From 3c2a769de7955ff81818b49d388dd771bf6ae29d Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 14 Dec 2016 15:08:37 -0200
Subject: [PATCH 0001/1084] Documentation, x86, resctrl: Recommend locking for
 resctrlfs

Concurrent write or read/write access from applications to the resctrlfs
directory can result in incorrect readouts or setups.

Recommend a standard locking scheme for applications to use.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/20161214170835.GA16924@amt.cnet
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/x86/intel_rdt_ui.txt | 114 +++++++++++++++++++++++++++++
 1 file changed, 114 insertions(+)

diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt
index d918d268cd72..51cf6fa5591f 100644
--- a/Documentation/x86/intel_rdt_ui.txt
+++ b/Documentation/x86/intel_rdt_ui.txt
@@ -212,3 +212,117 @@ Finally we move core 4-7 over to the new group and make sure that the
 kernel and the tasks running there get 50% of the cache.
 
 # echo C0 > p0/cpus
+
+4) Locking between applications
+
+Certain operations on the resctrl filesystem, composed of read/writes
+to/from multiple files, must be atomic.
+
+As an example, the allocation of an exclusive reservation of L3 cache
+involves:
+
+  1. Read the cbmmasks from each directory
+  2. Find a contiguous set of bits in the global CBM bitmask that is clear
+     in any of the directory cbmmasks
+  3. Create a new directory
+  4. Set the bits found in step 2 to the new directory "schemata" file
+
+If two applications attempt to allocate space concurrently then they can
+end up allocating the same bits so the reservations are shared instead of
+exclusive.
+
+To coordinate atomic operations on the resctrlfs and to avoid the problem
+above, the following locking procedure is recommended:
+
+Locking is based on flock, which is available in libc and also as a shell
+script command
+
+Write lock:
+
+ A) Take flock(LOCK_EX) on /sys/fs/resctrl
+ B) Read/write the directory structure.
+ C) funlock
+
+Read lock:
+
+ A) Take flock(LOCK_SH) on /sys/fs/resctrl
+ B) If success read the directory structure.
+ C) funlock
+
+Example with bash:
+
+# Atomically read directory structure
+$ flock -s /sys/fs/resctrl/ find /sys/fs/resctrl
+
+# Read directory contents and create new subdirectory
+
+$ cat create-dir.sh
+find /sys/fs/resctrl/ > output.txt
+mask = function-of(output.txt)
+mkdir /sys/fs/resctrl/newres/
+echo mask > /sys/fs/resctrl/newres/schemata
+
+$ flock /sys/fs/resctrl/ ./create-dir.sh
+
+Example with C:
+
+/*
+ * Example code do take advisory locks
+ * before accessing resctrl filesystem
+ */
+#include <sys/file.h>
+#include <stdlib.h>
+
+void resctrl_take_shared_lock(int fd)
+{
+	int ret;
+
+	/* take shared lock on resctrl filesystem */
+	ret = flock(fd, LOCK_SH);
+	if (ret) {
+		perror("flock");
+		exit(-1);
+	}
+}
+
+void resctrl_take_exclusive_lock(int fd)
+{
+	int ret;
+
+	/* release lock on resctrl filesystem */
+	ret = flock(fd, LOCK_EX);
+	if (ret) {
+		perror("flock");
+		exit(-1);
+	}
+}
+
+void resctrl_release_lock(int fd)
+{
+	int ret;
+
+	/* take shared lock on resctrl filesystem */
+	ret = flock(fd, LOCK_UN);
+	if (ret) {
+		perror("flock");
+		exit(-1);
+	}
+}
+
+void main(void)
+{
+	int fd, ret;
+
+	fd = open("/sys/fs/resctrl", O_DIRECTORY);
+	if (fd == -1) {
+		perror("open");
+		exit(-1);
+	}
+	resctrl_take_shared_lock(fd);
+	/* code to read directory contents */
+	resctrl_release_lock(fd);
+
+	resctrl_take_exclusive_lock(fd);
+	/* code to read and write directory contents */
+	resctrl_release_lock(fd);
+}
-- 
GitLab


From 79ab80beb792fc56141ca9bc5675b2109b729955 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 10 Dec 2014 17:06:38 -0500
Subject: [PATCH 0002/1084] [drbd] use sock_sendmsg()

... and keep ->msg_iter through the loop

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/block/drbd/drbd_main.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 83482721bc01..d96a41a7e257 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1846,7 +1846,7 @@ int drbd_send_out_of_sync(struct drbd_peer_device *peer_device, struct drbd_requ
 int drbd_send(struct drbd_connection *connection, struct socket *sock,
 	      void *buf, size_t size, unsigned msg_flags)
 {
-	struct kvec iov;
+	struct kvec iov = {.iov_base = buf, .iov_len = size};
 	struct msghdr msg;
 	int rv, sent = 0;
 
@@ -1855,15 +1855,14 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock,
 
 	/* THINK  if (signal_pending) return ... ? */
 
-	iov.iov_base = buf;
-	iov.iov_len  = size;
-
 	msg.msg_name       = NULL;
 	msg.msg_namelen    = 0;
 	msg.msg_control    = NULL;
 	msg.msg_controllen = 0;
 	msg.msg_flags      = msg_flags | MSG_NOSIGNAL;
 
+	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, size);
+
 	if (sock == connection->data.socket) {
 		rcu_read_lock();
 		connection->ko_count = rcu_dereference(connection->net_conf)->ko_count;
@@ -1871,7 +1870,7 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock,
 		drbd_update_congested(connection);
 	}
 	do {
-		rv = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
+		rv = sock_sendmsg(sock, &msg);
 		if (rv == -EAGAIN) {
 			if (we_should_drop_the_connection(connection, sock))
 				break;
@@ -1885,8 +1884,6 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock,
 		if (rv < 0)
 			break;
 		sent += rv;
-		iov.iov_base += rv;
-		iov.iov_len  -= rv;
 	} while (sent < size);
 
 	if (sock == connection->data.socket)
-- 
GitLab


From c1696cab700588f8493df7b51e096abf5bfb1d40 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 12 Nov 2015 04:51:19 -0500
Subject: [PATCH 0003/1084] [nbd] switch sock_xmit() to sock_{send,recv}msg()

Step 1 - don't reinintialize ->msg_iter on each iteration.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/block/nbd.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 38c576f76d36..8e63caecdd00 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -215,7 +215,7 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf,
 	struct socket *sock = nbd->socks[index]->sock;
 	int result;
 	struct msghdr msg;
-	struct kvec iov;
+	struct kvec iov = {.iov_base = buf, .iov_len = size};
 	unsigned long pflags = current->flags;
 
 	if (unlikely(!sock)) {
@@ -225,11 +225,12 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf,
 		return -EINVAL;
 	}
 
+	iov_iter_kvec(&msg.msg_iter, (send ? WRITE : READ) | ITER_KVEC,
+		      &iov, 1, size);
+
 	current->flags |= PF_MEMALLOC;
 	do {
 		sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
-		iov.iov_base = buf;
-		iov.iov_len = size;
 		msg.msg_name = NULL;
 		msg.msg_namelen = 0;
 		msg.msg_control = NULL;
@@ -237,19 +238,16 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf,
 		msg.msg_flags = msg_flags | MSG_NOSIGNAL;
 
 		if (send)
-			result = kernel_sendmsg(sock, &msg, &iov, 1, size);
+			result = sock_sendmsg(sock, &msg);
 		else
-			result = kernel_recvmsg(sock, &msg, &iov, 1, size,
-						msg.msg_flags);
+			result = sock_recvmsg(sock, &msg, msg.msg_flags);
 
 		if (result <= 0) {
 			if (result == 0)
 				result = -EPIPE; /* short read */
 			break;
 		}
-		size -= result;
-		buf += result;
-	} while (size > 0);
+	} while (msg_data_left(&msg));
 
 	tsk_restore_flags(current, pflags, PF_MEMALLOC);
 
-- 
GitLab


From c9f2b6aeb92286f15ffc80d2ba16bc24e530f560 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 12 Nov 2015 05:09:35 -0500
Subject: [PATCH 0004/1084] [nbd] pass iov_iter to nbd_xmit()

... and don't mess with kmap() - just use BVEC_ITER for those parts.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/block/nbd.c | 66 +++++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 39 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 8e63caecdd00..3c2dbe412c02 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -209,13 +209,12 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 /*
  *  Send or receive packet.
  */
-static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf,
-		     int size, int msg_flags)
+static int sock_xmit(struct nbd_device *nbd, int index, int send,
+		     struct iov_iter *iter, int msg_flags)
 {
 	struct socket *sock = nbd->socks[index]->sock;
 	int result;
 	struct msghdr msg;
-	struct kvec iov = {.iov_base = buf, .iov_len = size};
 	unsigned long pflags = current->flags;
 
 	if (unlikely(!sock)) {
@@ -225,8 +224,7 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf,
 		return -EINVAL;
 	}
 
-	iov_iter_kvec(&msg.msg_iter, (send ? WRITE : READ) | ITER_KVEC,
-		      &iov, 1, size);
+	msg.msg_iter = *iter;
 
 	current->flags |= PF_MEMALLOC;
 	do {
@@ -254,28 +252,21 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf,
 	return result;
 }
 
-static inline int sock_send_bvec(struct nbd_device *nbd, int index,
-				 struct bio_vec *bvec, int flags)
-{
-	int result;
-	void *kaddr = kmap(bvec->bv_page);
-	result = sock_xmit(nbd, index, 1, kaddr + bvec->bv_offset,
-			   bvec->bv_len, flags);
-	kunmap(bvec->bv_page);
-	return result;
-}
-
 /* always call with the tx_lock held */
 static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 {
 	struct request *req = blk_mq_rq_from_pdu(cmd);
 	int result, flags;
-	struct nbd_request request;
+	struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)};
+	struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
+	struct iov_iter from;
 	unsigned long size = blk_rq_bytes(req);
 	struct bio *bio;
 	u32 type;
 	u32 tag = blk_mq_unique_tag(req);
 
+	iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
+
 	if (req_op(req) == REQ_OP_DISCARD)
 		type = NBD_CMD_TRIM;
 	else if (req_op(req) == REQ_OP_FLUSH)
@@ -285,8 +276,6 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 	else
 		type = NBD_CMD_READ;
 
-	memset(&request, 0, sizeof(request));
-	request.magic = htonl(NBD_REQUEST_MAGIC);
 	request.type = htonl(type);
 	if (type != NBD_CMD_FLUSH) {
 		request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
@@ -297,7 +286,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 	dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
 		cmd, nbdcmd_to_ascii(type),
 		(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
-	result = sock_xmit(nbd, index, 1, &request, sizeof(request),
+	result = sock_xmit(nbd, index, 1, &from,
 			(type == NBD_CMD_WRITE) ? MSG_MORE : 0);
 	if (result <= 0) {
 		dev_err_ratelimited(disk_to_dev(nbd->disk),
@@ -322,7 +311,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 				flags = MSG_MORE;
 			dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
 				cmd, bvec.bv_len);
-			result = sock_send_bvec(nbd, index, &bvec, flags);
+			iov_iter_bvec(&from, ITER_BVEC | WRITE,
+				      &bvec, 1, bvec.bv_len);
+			result = sock_xmit(nbd, index, 1, &from, flags);
 			if (result <= 0) {
 				dev_err(disk_to_dev(nbd->disk),
 					"Send data failed (result %d)\n",
@@ -343,17 +334,6 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 	return 0;
 }
 
-static inline int sock_recv_bvec(struct nbd_device *nbd, int index,
-				 struct bio_vec *bvec)
-{
-	int result;
-	void *kaddr = kmap(bvec->bv_page);
-	result = sock_xmit(nbd, index, 0, kaddr + bvec->bv_offset,
-			   bvec->bv_len, MSG_WAITALL);
-	kunmap(bvec->bv_page);
-	return result;
-}
-
 /* NULL returned = something went wrong, inform userspace */
 static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 {
@@ -363,9 +343,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 	struct request *req = NULL;
 	u16 hwq;
 	u32 tag;
+	struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)};
+	struct iov_iter to;
 
 	reply.magic = 0;
-	result = sock_xmit(nbd, index, 0, &reply, sizeof(reply), MSG_WAITALL);
+	iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
+	result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL);
 	if (result <= 0) {
 		if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) &&
 		    !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
@@ -405,7 +388,9 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 		struct bio_vec bvec;
 
 		rq_for_each_segment(bvec, req, iter) {
-			result = sock_recv_bvec(nbd, index, &bvec);
+			iov_iter_bvec(&to, ITER_BVEC | READ,
+				      &bvec, 1, bvec.bv_len);
+			result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL);
 			if (result <= 0) {
 				dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
 					result);
@@ -645,14 +630,17 @@ static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev)
 
 static void send_disconnects(struct nbd_device *nbd)
 {
-	struct nbd_request request = {};
+	struct nbd_request request = {
+		.magic = htonl(NBD_REQUEST_MAGIC),
+		.type = htonl(NBD_CMD_DISC),
+	};
+	struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
+	struct iov_iter from;
 	int i, ret;
 
-	request.magic = htonl(NBD_REQUEST_MAGIC);
-	request.type = htonl(NBD_CMD_DISC);
-
 	for (i = 0; i < nbd->num_connections; i++) {
-		ret = sock_xmit(nbd, i, 1, &request, sizeof(request), 0);
+		iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
+		ret = sock_xmit(nbd, i, 1, &from, 0);
 		if (ret <= 0)
 			dev_err(disk_to_dev(nbd->disk),
 				"Send disconnect failed %d\n", ret);
-- 
GitLab


From 4113e47b3d0931879d75dc5ad7c4c294651b3c1d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 21 Dec 2014 02:14:47 -0500
Subject: [PATCH 0005/1084] iscsi_target: deal with short writes on the tx side

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/target/iscsi/iscsi_target_util.c | 64 +++++++++---------------
 1 file changed, 24 insertions(+), 40 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index b5a1b4ccba12..a9ba2479374f 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -1304,39 +1304,6 @@ static int iscsit_do_rx_data(
 	return total_rx;
 }
 
-static int iscsit_do_tx_data(
-	struct iscsi_conn *conn,
-	struct iscsi_data_count *count)
-{
-	int ret, iov_len;
-	struct kvec *iov_p;
-	struct msghdr msg;
-
-	if (!conn || !conn->sock || !conn->conn_ops)
-		return -1;
-
-	if (count->data_length <= 0) {
-		pr_err("Data length is: %d\n", count->data_length);
-		return -1;
-	}
-
-	memset(&msg, 0, sizeof(struct msghdr));
-
-	iov_p = count->iov;
-	iov_len = count->iov_count;
-
-	ret = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len,
-			     count->data_length);
-	if (ret != count->data_length) {
-		pr_err("Unexpected ret: %d send data %d\n",
-		       ret, count->data_length);
-		return -EPIPE;
-	}
-	pr_debug("ret: %d, sent data: %d\n", ret, count->data_length);
-
-	return ret;
-}
-
 int rx_data(
 	struct iscsi_conn *conn,
 	struct kvec *iov,
@@ -1363,18 +1330,35 @@ int tx_data(
 	int iov_count,
 	int data)
 {
-	struct iscsi_data_count c;
+	struct msghdr msg;
+	int total_tx = 0;
 
 	if (!conn || !conn->sock || !conn->conn_ops)
 		return -1;
 
-	memset(&c, 0, sizeof(struct iscsi_data_count));
-	c.iov = iov;
-	c.iov_count = iov_count;
-	c.data_length = data;
-	c.type = ISCSI_TX_DATA;
+	if (data <= 0) {
+		pr_err("Data length is: %d\n", data);
+		return -1;
+	}
+
+	memset(&msg, 0, sizeof(struct msghdr));
+
+	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC,
+		      iov, iov_count, data);
+
+	while (msg_data_left(&msg)) {
+		int tx_loop = sock_sendmsg(conn->sock, &msg);
+		if (tx_loop <= 0) {
+			pr_debug("tx_loop: %d total_tx %d\n",
+				tx_loop, total_tx);
+			return tx_loop;
+		}
+		total_tx += tx_loop;
+		pr_debug("tx_loop: %d, total_tx: %d, data: %d\n",
+					tx_loop, total_tx, data);
+	}
 
-	return iscsit_do_tx_data(conn, &c);
+	return total_tx;
 }
 
 static bool sockaddr_equal(struct sockaddr_storage *x, struct sockaddr_storage *y)
-- 
GitLab


From 3995d1610713c1a62af687872e460c3dca82d17c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 21 Dec 2014 03:53:10 -0500
Subject: [PATCH 0006/1084] usbip_recv(): switch to sock_recvmsg()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/usb/usbip/usbip_common.c | 32 +++++++++++---------------------
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c
index 8b232290be6b..f123bfee5d1b 100644
--- a/drivers/usb/usbip/usbip_common.c
+++ b/drivers/usb/usbip/usbip_common.c
@@ -327,13 +327,11 @@ EXPORT_SYMBOL_GPL(usbip_dump_header);
 int usbip_recv(struct socket *sock, void *buf, int size)
 {
 	int result;
-	struct msghdr msg;
-	struct kvec iov;
+	struct kvec iov = {.iov_base = buf, .iov_len = size};
+	struct msghdr msg = {.msg_flags = MSG_NOSIGNAL};
 	int total = 0;
 
-	/* for blocks of if (usbip_dbg_flag_xmit) */
-	char *bp = buf;
-	int osize = size;
+	iov_iter_kvec(&msg.msg_iter, READ|ITER_KVEC, &iov, 1, size);
 
 	usbip_dbg_xmit("enter\n");
 
@@ -344,26 +342,18 @@ int usbip_recv(struct socket *sock, void *buf, int size)
 	}
 
 	do {
+		int sz = msg_data_left(&msg);
 		sock->sk->sk_allocation = GFP_NOIO;
-		iov.iov_base    = buf;
-		iov.iov_len     = size;
-		msg.msg_name    = NULL;
-		msg.msg_namelen = 0;
-		msg.msg_control = NULL;
-		msg.msg_controllen = 0;
-		msg.msg_flags      = MSG_NOSIGNAL;
-
-		result = kernel_recvmsg(sock, &msg, &iov, 1, size, MSG_WAITALL);
+
+		result = sock_recvmsg(sock, &msg, MSG_WAITALL);
 		if (result <= 0) {
 			pr_debug("receive sock %p buf %p size %u ret %d total %d\n",
-				 sock, buf, size, result, total);
+				 sock, buf + total, sz, result, total);
 			goto err;
 		}
 
-		size -= result;
-		buf += result;
 		total += result;
-	} while (size > 0);
+	} while (msg_data_left(&msg));
 
 	if (usbip_dbg_flag_xmit) {
 		if (!in_interrupt())
@@ -372,9 +362,9 @@ int usbip_recv(struct socket *sock, void *buf, int size)
 			pr_debug("interrupt  :");
 
 		pr_debug("receiving....\n");
-		usbip_dump_buffer(bp, osize);
-		pr_debug("received, osize %d ret %d size %d total %d\n",
-			 osize, result, size, total);
+		usbip_dump_buffer(buf, size);
+		pr_debug("received, osize %d ret %d size %zd total %d\n",
+			 size, result, msg_data_left(&msg), total);
 	}
 
 	return total;
-- 
GitLab


From 100803a84d3cb84bd3ee36e8ec4274019ad667ac Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 15 Nov 2015 00:12:48 -0500
Subject: [PATCH 0007/1084] ceph: switch to sock_recvmsg()

... and use ITER_BVEC instead of playing with kmap()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/ceph/messenger.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 770c52701efa..9e46db7e5968 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -520,7 +520,8 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
 	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
 	int r;
 
-	r = kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags);
+	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, len);
+	r = sock_recvmsg(sock, &msg, msg.msg_flags);
 	if (r == -EAGAIN)
 		r = 0;
 	return r;
@@ -529,17 +530,20 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
 static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
 		     int page_offset, size_t length)
 {
-	void *kaddr;
-	int ret;
+	struct bio_vec bvec = {
+		.bv_page = page,
+		.bv_offset = page_offset,
+		.bv_len = length
+	};
+	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
+	int r;
 
 	BUG_ON(page_offset + length > PAGE_SIZE);
-
-	kaddr = kmap(page);
-	BUG_ON(!kaddr);
-	ret = ceph_tcp_recvmsg(sock, kaddr + page_offset, length);
-	kunmap(page);
-
-	return ret;
+	iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, &bvec, 1, length);
+	r = sock_recvmsg(sock, &msg, msg.msg_flags);
+	if (r == -EAGAIN)
+		r = 0;
+	return r;
 }
 
 /*
-- 
GitLab


From be6e4d66f01335827d734ee9bde79862ed8a235b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 15 Nov 2015 15:05:32 -0500
Subject: [PATCH 0008/1084] rds: remove dead code

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/rds/page.c | 29 -----------------------------
 net/rds/rds.h  |  7 -------
 2 files changed, 36 deletions(-)

diff --git a/net/rds/page.c b/net/rds/page.c
index e2b5a5832d3d..7cc57e098ddb 100644
--- a/net/rds/page.c
+++ b/net/rds/page.c
@@ -45,35 +45,6 @@ struct rds_page_remainder {
 static
 DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders);
 
-/*
- * returns 0 on success or -errno on failure.
- *
- * We don't have to worry about flush_dcache_page() as this only works
- * with private pages.  If, say, we were to do directed receive to pinned
- * user pages we'd have to worry more about cache coherence.  (Though
- * the flush_dcache_page() in get_user_pages() would probably be enough).
- */
-int rds_page_copy_user(struct page *page, unsigned long offset,
-		       void __user *ptr, unsigned long bytes,
-		       int to_user)
-{
-	unsigned long ret;
-	void *addr;
-
-	addr = kmap(page);
-	if (to_user) {
-		rds_stats_add(s_copy_to_user, bytes);
-		ret = copy_to_user(ptr, addr + offset, bytes);
-	} else {
-		rds_stats_add(s_copy_from_user, bytes);
-		ret = copy_from_user(addr + offset, ptr, bytes);
-	}
-	kunmap(page);
-
-	return ret ? -EFAULT : 0;
-}
-EXPORT_SYMBOL_GPL(rds_page_copy_user);
-
 /**
  * rds_page_remainder_alloc - build up regions of a message.
  *
diff --git a/net/rds/rds.h b/net/rds/rds.h
index ebbf909b87ec..8635993066ae 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -781,13 +781,6 @@ static inline int rds_message_verify_checksum(const struct rds_header *hdr)
 /* page.c */
 int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
 			     gfp_t gfp);
-int rds_page_copy_user(struct page *page, unsigned long offset,
-		       void __user *ptr, unsigned long bytes,
-		       int to_user);
-#define rds_page_copy_to_user(page, offset, ptr, bytes) \
-	rds_page_copy_user(page, offset, ptr, bytes, 1)
-#define rds_page_copy_from_user(page, offset, ptr, bytes) \
-	rds_page_copy_user(page, offset, ptr, bytes, 0)
 void rds_page_exit(void);
 
 /* recv.c */
-- 
GitLab


From 39c6aceae961776a11a3767553b0e295fc9d413b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jan 2016 20:36:51 -0500
Subject: [PATCH 0009/1084] afs_send_pages(): use ITER_BVEC

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/afs/rxrpc.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 25f05a8d21b1..bfba8f0c3d43 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -249,8 +249,7 @@ void afs_flat_call_destructor(struct afs_call *call)
 /*
  * attach the data from a bunch of pages on an inode to a call
  */
-static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
-			  struct kvec *iov)
+static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
 {
 	struct page *pages[8];
 	unsigned count, n, loop, offset, to;
@@ -273,20 +272,21 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
 
 		loop = 0;
 		do {
+			struct bio_vec bvec = {.bv_page = pages[loop],
+					       .bv_offset = offset};
 			msg->msg_flags = 0;
 			to = PAGE_SIZE;
 			if (first + loop >= last)
 				to = call->last_to;
 			else
 				msg->msg_flags = MSG_MORE;
-			iov->iov_base = kmap(pages[loop]) + offset;
-			iov->iov_len = to - offset;
+			bvec.bv_len = to - offset;
 			offset = 0;
 
 			_debug("- range %u-%u%s",
 			       offset, to, msg->msg_flags ? " [more]" : "");
-			iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC,
-				      iov, 1, to - offset);
+			iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC,
+				      &bvec, 1, to - offset);
 
 			/* have to change the state *before* sending the last
 			 * packet as RxRPC might give us the reply before it
@@ -295,7 +295,6 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
 				call->state = AFS_CALL_AWAIT_REPLY;
 			ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
 						     msg, to - offset);
-			kunmap(pages[loop]);
 			if (ret < 0)
 				break;
 		} while (++loop < count);
@@ -379,7 +378,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 		goto error_do_abort;
 
 	if (call->send_pages) {
-		ret = afs_send_pages(call, &msg, iov);
+		ret = afs_send_pages(call, &msg);
 		if (ret < 0)
 			goto error_do_abort;
 	}
-- 
GitLab


From 61ff6e9b452d6158e0fd659c6d987f47cfcfbd7b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jan 2016 20:55:45 -0500
Subject: [PATCH 0010/1084] ceph_tcp_sendpage(): use ITER_BVEC sendmsg

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/ceph/messenger.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 9e46db7e5968..6f3b5754cc7e 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -583,18 +583,28 @@ static int __ceph_tcp_sendpage(struct socket *sock, struct page *page,
 static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
 		     int offset, size_t size, bool more)
 {
+	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
+	struct bio_vec bvec;
 	int ret;
-	struct kvec iov;
 
 	/* sendpage cannot properly handle pages with page_count == 0,
 	 * we need to fallback to sendmsg if that's the case */
 	if (page_count(page) >= 1)
 		return __ceph_tcp_sendpage(sock, page, offset, size, more);
 
-	iov.iov_base = kmap(page) + offset;
-	iov.iov_len = size;
-	ret = ceph_tcp_sendmsg(sock, &iov, 1, size, more);
-	kunmap(page);
+	bvec.bv_page = page;
+	bvec.bv_offset = offset;
+	bvec.bv_len = size;
+
+	if (more)
+		msg.msg_flags |= MSG_MORE;
+	else
+		msg.msg_flags |= MSG_EOR;  /* superfluous, but what the hell */
+
+	iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, &bvec, 1, size);
+	ret = sock_sendmsg(sock, &msg);
+	if (ret == -EAGAIN)
+		ret = 0;
 
 	return ret;
 }
-- 
GitLab


From 4b4fbad37f70f1dca26e060cf8dc71371b01899c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jan 2016 21:12:37 -0500
Subject: [PATCH 0011/1084] ncpfs: sendmsg does *not* bugger iovec these days

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ncpfs/sock.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index f32f272ee501..97cfccefccc5 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -205,10 +205,7 @@ static inline void __ncptcp_abort(struct ncp_server *server)
 
 static int ncpdgram_send(struct socket *sock, struct ncp_request_reply *req)
 {
-	struct kvec vec[3];
-	/* sock_sendmsg updates iov pointers for us :-( */
-	memcpy(vec, req->tx_ciov, req->tx_iovlen * sizeof(vec[0]));
-	return do_send(sock, vec, req->tx_iovlen,
+	return do_send(sock, req->tx_ciov, req->tx_iovlen,
 		       req->tx_totallen, MSG_DONTWAIT);
 }
 
@@ -216,16 +213,13 @@ static void __ncptcp_try_send(struct ncp_server *server)
 {
 	struct ncp_request_reply *rq;
 	struct kvec *iov;
-	struct kvec iovc[3];
 	int result;
 
 	rq = server->tx.creq;
 	if (!rq)
 		return;
 
-	/* sock_sendmsg updates iov pointers for us :-( */
-	memcpy(iovc, rq->tx_ciov, rq->tx_iovlen * sizeof(iov[0]));
-	result = do_send(server->ncp_sock, iovc, rq->tx_iovlen,
+	result = do_send(server->ncp_sock, rq->tx_ciov, rq->tx_iovlen,
 			 rq->tx_totallen, MSG_NOSIGNAL | MSG_DONTWAIT);
 
 	if (result == -EAGAIN)
-- 
GitLab


From 2cebcc78fab5f56c9a9795fc49546d2436491312 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jan 2016 22:01:58 -0500
Subject: [PATCH 0012/1084] ncpfs: don't mess with manually advancing iovec on
 send

just keep iov_iter in req

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ncpfs/sock.c | 69 +++++++++++++++++++------------------------------
 1 file changed, 27 insertions(+), 42 deletions(-)

diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index 97cfccefccc5..a13c0b54f078 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -63,9 +63,7 @@ struct ncp_request_reply {
 	size_t datalen;
 	int result;
 	enum { RQ_DONE, RQ_INPROGRESS, RQ_QUEUED, RQ_IDLE, RQ_ABANDONED } status;
-	struct kvec* tx_ciov;
-	size_t tx_totallen;
-	size_t tx_iovlen;
+	struct iov_iter from;
 	struct kvec tx_iov[3];
 	u_int16_t tx_type;
 	u_int32_t sign[6];
@@ -205,22 +203,22 @@ static inline void __ncptcp_abort(struct ncp_server *server)
 
 static int ncpdgram_send(struct socket *sock, struct ncp_request_reply *req)
 {
-	return do_send(sock, req->tx_ciov, req->tx_iovlen,
-		       req->tx_totallen, MSG_DONTWAIT);
+	struct msghdr msg = { .msg_iter = req->from, .msg_flags = MSG_DONTWAIT };
+	return sock_sendmsg(sock, &msg);
 }
 
 static void __ncptcp_try_send(struct ncp_server *server)
 {
 	struct ncp_request_reply *rq;
-	struct kvec *iov;
+	struct msghdr msg = { .msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT };
 	int result;
 
 	rq = server->tx.creq;
 	if (!rq)
 		return;
 
-	result = do_send(server->ncp_sock, rq->tx_ciov, rq->tx_iovlen,
-			 rq->tx_totallen, MSG_NOSIGNAL | MSG_DONTWAIT);
+	msg.msg_iter = rq->from;
+	result = sock_sendmsg(server->ncp_sock, &msg);
 
 	if (result == -EAGAIN)
 		return;
@@ -230,21 +228,12 @@ static void __ncptcp_try_send(struct ncp_server *server)
 		__ncp_abort_request(server, rq, result);
 		return;
 	}
-	if (result >= rq->tx_totallen) {
+	if (!msg_data_left(&msg)) {
 		server->rcv.creq = rq;
 		server->tx.creq = NULL;
 		return;
 	}
-	rq->tx_totallen -= result;
-	iov = rq->tx_ciov;
-	while (iov->iov_len <= result) {
-		result -= iov->iov_len;
-		iov++;
-		rq->tx_iovlen--;
-	}
-	iov->iov_base += result;
-	iov->iov_len -= result;
-	rq->tx_ciov = iov;
+	rq->from = msg.msg_iter;
 }
 
 static inline void ncp_init_header(struct ncp_server *server, struct ncp_request_reply *req, struct ncp_request_header *h)
@@ -257,22 +246,21 @@ static inline void ncp_init_header(struct ncp_server *server, struct ncp_request
 	
 static void ncpdgram_start_request(struct ncp_server *server, struct ncp_request_reply *req)
 {
-	size_t signlen;
-	struct ncp_request_header* h;
+	size_t signlen, len = req->tx_iov[1].iov_len;
+	struct ncp_request_header *h = req->tx_iov[1].iov_base;
 	
-	req->tx_ciov = req->tx_iov + 1;
-
-	h = req->tx_iov[1].iov_base;
 	ncp_init_header(server, req, h);
-	signlen = sign_packet(server, req->tx_iov[1].iov_base + sizeof(struct ncp_request_header) - 1, 
-			req->tx_iov[1].iov_len - sizeof(struct ncp_request_header) + 1,
-			cpu_to_le32(req->tx_totallen), req->sign);
+	signlen = sign_packet(server,
+			req->tx_iov[1].iov_base + sizeof(struct ncp_request_header) - 1, 
+			len - sizeof(struct ncp_request_header) + 1,
+			cpu_to_le32(len), req->sign);
 	if (signlen) {
-		req->tx_ciov[1].iov_base = req->sign;
-		req->tx_ciov[1].iov_len = signlen;
-		req->tx_iovlen += 1;
-		req->tx_totallen += signlen;
+		/* NCP over UDP appends signature */
+		req->tx_iov[2].iov_base = req->sign;
+		req->tx_iov[2].iov_len = signlen;
 	}
+	iov_iter_kvec(&req->from, WRITE | ITER_KVEC,
+			req->tx_iov + 1, signlen ? 2 : 1, len + signlen);
 	server->rcv.creq = req;
 	server->timeout_last = server->m.time_out;
 	server->timeout_retries = server->m.retry_count;
@@ -286,24 +274,23 @@ static void ncpdgram_start_request(struct ncp_server *server, struct ncp_request
 
 static void ncptcp_start_request(struct ncp_server *server, struct ncp_request_reply *req)
 {
-	size_t signlen;
-	struct ncp_request_header* h;
+	size_t signlen, len = req->tx_iov[1].iov_len;
+	struct ncp_request_header *h = req->tx_iov[1].iov_base;
 
-	req->tx_ciov = req->tx_iov;
-	h = req->tx_iov[1].iov_base;
 	ncp_init_header(server, req, h);
 	signlen = sign_packet(server, req->tx_iov[1].iov_base + sizeof(struct ncp_request_header) - 1,
-			req->tx_iov[1].iov_len - sizeof(struct ncp_request_header) + 1,
-			cpu_to_be32(req->tx_totallen + 24), req->sign + 4) + 16;
+			len - sizeof(struct ncp_request_header) + 1,
+			cpu_to_be32(len + 24), req->sign + 4) + 16;
 
 	req->sign[0] = htonl(NCP_TCP_XMIT_MAGIC);
-	req->sign[1] = htonl(req->tx_totallen + signlen);
+	req->sign[1] = htonl(len + signlen);
 	req->sign[2] = htonl(NCP_TCP_XMIT_VERSION);
 	req->sign[3] = htonl(req->datalen + 8);
+	/* NCP over TCP prepends signature */
 	req->tx_iov[0].iov_base = req->sign;
 	req->tx_iov[0].iov_len = signlen;
-	req->tx_iovlen += 1;
-	req->tx_totallen += signlen;
+	iov_iter_kvec(&req->from, WRITE | ITER_KVEC,
+			req->tx_iov, 2, len + signlen);
 
 	server->tx.creq = req;
 	__ncptcp_try_send(server);
@@ -705,8 +692,6 @@ static int do_ncp_rpc_call(struct ncp_server *server, int size,
 	req->datalen = max_reply_size;
 	req->tx_iov[1].iov_base = server->packet;
 	req->tx_iov[1].iov_len = size;
-	req->tx_iovlen = 1;
-	req->tx_totallen = size;
 	req->tx_type = *(u_int16_t*)server->packet;
 
 	result = ncp_add_request(server, req);
-- 
GitLab


From b8e2df1f812add27bfdb1e9d4fdf1051cc51fe1b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jan 2016 22:12:55 -0500
Subject: [PATCH 0013/1084] ncpfs: switch to sock_sendmsg()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ncpfs/sock.c | 36 ++++++++++++++----------------------
 1 file changed, 14 insertions(+), 22 deletions(-)

diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index a13c0b54f078..f013c92bb5d8 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -40,19 +40,12 @@ static int _recv(struct socket *sock, void *buf, int size, unsigned flags)
 	return kernel_recvmsg(sock, &msg, &iov, 1, size, flags);
 }
 
-static inline int do_send(struct socket *sock, struct kvec *vec, int count,
-			  int len, unsigned flags)
-{
-	struct msghdr msg = { .msg_flags = flags };
-	return kernel_sendmsg(sock, &msg, vec, count, len);
-}
-
 static int _send(struct socket *sock, const void *buff, int len)
 {
-	struct kvec vec;
-	vec.iov_base = (void *) buff;
-	vec.iov_len = len;
-	return do_send(sock, &vec, 1, len, 0);
+	struct msghdr msg = { .msg_flags = 0 };
+	struct kvec vec = {.iov_base = (void *)buff, .iov_len = len};
+	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &vec, 1, len);
+	return sock_sendmsg(sock, &msg);
 }
 
 struct ncp_request_reply {
@@ -345,18 +338,17 @@ static void __ncp_next_request(struct ncp_server *server)
 static void info_server(struct ncp_server *server, unsigned int id, const void * data, size_t len)
 {
 	if (server->info_sock) {
-		struct kvec iov[2];
-		__be32 hdr[2];
-	
-		hdr[0] = cpu_to_be32(len + 8);
-		hdr[1] = cpu_to_be32(id);
-	
-		iov[0].iov_base = hdr;
-		iov[0].iov_len = 8;
-		iov[1].iov_base = (void *) data;
-		iov[1].iov_len = len;
+		struct msghdr msg = { .msg_flags = MSG_NOSIGNAL };
+		__be32 hdr[2] = {cpu_to_be32(len + 8), cpu_to_be32(id)};
+		struct kvec iov[2] = {
+			{.iov_base = hdr, .iov_len = 8},
+			{.iov_base = (void *)data, .iov_len = len},
+		};
+
+		iov_iter_kvec(&msg.msg_iter, ITER_KVEC | WRITE,
+				iov, 2, len + 8);
 
-		do_send(server->info_sock, iov, 2, len + 8, MSG_NOSIGNAL);
+		sock_sendmsg(server->info_sock, &msg);
 	}
 }
 
-- 
GitLab


From b31ef8285b19ec5563274c574fcfe7a5993125ce Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Wed, 21 Dec 2016 09:47:03 -0800
Subject: [PATCH 0014/1084] thermal core: convert ID allocation to IDA

The thermal core does not use the ability to look up pointers by ID, so
convert it from using an IDR to the more space-efficient IDA.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/thermal_core.c | 75 ++++++++++++----------------------
 include/linux/thermal.h        |  4 +-
 2 files changed, 28 insertions(+), 51 deletions(-)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 641faab6e24b..f2a0cd119e22 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -36,9 +36,8 @@ MODULE_AUTHOR("Zhang Rui");
 MODULE_DESCRIPTION("Generic thermal management sysfs support");
 MODULE_LICENSE("GPL v2");
 
-static DEFINE_IDR(thermal_tz_idr);
-static DEFINE_IDR(thermal_cdev_idr);
-static DEFINE_MUTEX(thermal_idr_lock);
+static DEFINE_IDA(thermal_tz_ida);
+static DEFINE_IDA(thermal_cdev_ida);
 
 static LIST_HEAD(thermal_tz_list);
 static LIST_HEAD(thermal_cdev_list);
@@ -589,29 +588,6 @@ void thermal_zone_device_unbind_exception(struct thermal_zone_device *tz,
  * - thermal zone devices lifecycle: registration, unregistration,
  *				     binding, and unbinding.
  */
-static int get_idr(struct idr *idr, struct mutex *lock, int *id)
-{
-	int ret;
-
-	if (lock)
-		mutex_lock(lock);
-	ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL);
-	if (lock)
-		mutex_unlock(lock);
-	if (unlikely(ret < 0))
-		return ret;
-	*id = ret;
-	return 0;
-}
-
-static void release_idr(struct idr *idr, struct mutex *lock, int id)
-{
-	if (lock)
-		mutex_lock(lock);
-	idr_remove(idr, id);
-	if (lock)
-		mutex_unlock(lock);
-}
 
 /**
  * thermal_zone_bind_cooling_device() - bind a cooling device to a thermal zone
@@ -685,15 +661,16 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
 	dev->target = THERMAL_NO_TARGET;
 	dev->weight = weight;
 
-	result = get_idr(&tz->idr, &tz->lock, &dev->id);
-	if (result)
+	result = ida_simple_get(&tz->ida, 0, 0, GFP_KERNEL);
+	if (result < 0)
 		goto free_mem;
 
+	dev->id = result;
 	sprintf(dev->name, "cdev%d", dev->id);
 	result =
 	    sysfs_create_link(&tz->device.kobj, &cdev->device.kobj, dev->name);
 	if (result)
-		goto release_idr;
+		goto release_ida;
 
 	sprintf(dev->attr_name, "cdev%d_trip_point", dev->id);
 	sysfs_attr_init(&dev->attr.attr);
@@ -737,8 +714,8 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
 	device_remove_file(&tz->device, &dev->attr);
 remove_symbol_link:
 	sysfs_remove_link(&tz->device.kobj, dev->name);
-release_idr:
-	release_idr(&tz->idr, &tz->lock, dev->id);
+release_ida:
+	ida_simple_remove(&tz->ida, dev->id);
 free_mem:
 	kfree(dev);
 	return result;
@@ -785,7 +762,7 @@ int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz,
 	device_remove_file(&tz->device, &pos->weight_attr);
 	device_remove_file(&tz->device, &pos->attr);
 	sysfs_remove_link(&tz->device.kobj, pos->name);
-	release_idr(&tz->idr, &tz->lock, pos->id);
+	ida_simple_remove(&tz->ida, pos->id);
 	kfree(pos);
 	return 0;
 }
@@ -920,12 +897,13 @@ __thermal_cooling_device_register(struct device_node *np,
 	if (!cdev)
 		return ERR_PTR(-ENOMEM);
 
-	result = get_idr(&thermal_cdev_idr, &thermal_idr_lock, &cdev->id);
-	if (result) {
+	result = ida_simple_get(&thermal_cdev_ida, 0, 0, GFP_KERNEL);
+	if (result < 0) {
 		kfree(cdev);
 		return ERR_PTR(result);
 	}
 
+	cdev->id = result;
 	strlcpy(cdev->type, type ? : "", sizeof(cdev->type));
 	mutex_init(&cdev->lock);
 	INIT_LIST_HEAD(&cdev->thermal_instances);
@@ -938,7 +916,7 @@ __thermal_cooling_device_register(struct device_node *np,
 	dev_set_name(&cdev->device, "cooling_device%d", cdev->id);
 	result = device_register(&cdev->device);
 	if (result) {
-		release_idr(&thermal_cdev_idr, &thermal_idr_lock, cdev->id);
+		ida_simple_remove(&thermal_cdev_ida, cdev->id);
 		kfree(cdev);
 		return ERR_PTR(result);
 	}
@@ -1065,7 +1043,7 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
 
 	mutex_unlock(&thermal_list_lock);
 
-	release_idr(&thermal_cdev_idr, &thermal_idr_lock, cdev->id);
+	ida_simple_remove(&thermal_cdev_ida, cdev->id);
 	device_unregister(&cdev->device);
 }
 EXPORT_SYMBOL_GPL(thermal_cooling_device_unregister);
@@ -1167,14 +1145,15 @@ thermal_zone_device_register(const char *type, int trips, int mask,
 		return ERR_PTR(-ENOMEM);
 
 	INIT_LIST_HEAD(&tz->thermal_instances);
-	idr_init(&tz->idr);
+	ida_init(&tz->ida);
 	mutex_init(&tz->lock);
-	result = get_idr(&thermal_tz_idr, &thermal_idr_lock, &tz->id);
-	if (result) {
+	result = ida_simple_get(&thermal_tz_ida, 0, 0, GFP_KERNEL);
+	if (result < 0) {
 		kfree(tz);
 		return ERR_PTR(result);
 	}
 
+	tz->id = result;
 	strlcpy(tz->type, type, sizeof(tz->type));
 	tz->ops = ops;
 	tz->tzp = tzp;
@@ -1196,7 +1175,7 @@ thermal_zone_device_register(const char *type, int trips, int mask,
 	dev_set_name(&tz->device, "thermal_zone%d", tz->id);
 	result = device_register(&tz->device);
 	if (result) {
-		release_idr(&thermal_tz_idr, &thermal_idr_lock, tz->id);
+		ida_simple_remove(&thermal_tz_ida, tz->id);
 		kfree(tz);
 		return ERR_PTR(result);
 	}
@@ -1250,7 +1229,7 @@ thermal_zone_device_register(const char *type, int trips, int mask,
 	return tz;
 
 unregister:
-	release_idr(&thermal_tz_idr, &thermal_idr_lock, tz->id);
+	ida_simple_remove(&thermal_tz_ida, tz->id);
 	device_unregister(&tz->device);
 	return ERR_PTR(result);
 }
@@ -1312,8 +1291,8 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz)
 	thermal_set_governor(tz, NULL);
 
 	thermal_remove_hwmon_sysfs(tz);
-	release_idr(&thermal_tz_idr, &thermal_idr_lock, tz->id);
-	idr_destroy(&tz->idr);
+	ida_simple_remove(&thermal_tz_ida, tz->id);
+	ida_destroy(&tz->ida);
 	mutex_destroy(&tz->lock);
 	device_unregister(&tz->device);
 	kfree(tz->device.groups);
@@ -1514,9 +1493,8 @@ static int __init thermal_init(void)
 unregister_governors:
 	thermal_unregister_governors();
 error:
-	idr_destroy(&thermal_tz_idr);
-	idr_destroy(&thermal_cdev_idr);
-	mutex_destroy(&thermal_idr_lock);
+	ida_destroy(&thermal_tz_ida);
+	ida_destroy(&thermal_cdev_ida);
 	mutex_destroy(&thermal_list_lock);
 	mutex_destroy(&thermal_governor_lock);
 	return result;
@@ -1529,9 +1507,8 @@ static void __exit thermal_exit(void)
 	genetlink_exit();
 	class_unregister(&thermal_class);
 	thermal_unregister_governors();
-	idr_destroy(&thermal_tz_idr);
-	idr_destroy(&thermal_cdev_idr);
-	mutex_destroy(&thermal_idr_lock);
+	ida_destroy(&thermal_tz_ida);
+	ida_destroy(&thermal_cdev_ida);
 	mutex_destroy(&thermal_list_lock);
 	mutex_destroy(&thermal_governor_lock);
 }
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index e275e98bdceb..dab11f97e1c6 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -194,7 +194,7 @@ struct thermal_attr {
  * @governor:	pointer to the governor for this thermal zone
  * @governor_data:	private pointer for governor data
  * @thermal_instances:	list of &struct thermal_instance of this thermal zone
- * @idr:	&struct idr to generate unique id for this zone's cooling
+ * @ida:	&struct ida to generate unique id for this zone's cooling
  *		devices
  * @lock:	lock to protect thermal_instances list
  * @node:	node in thermal_tz_list (in thermal_core.c)
@@ -227,7 +227,7 @@ struct thermal_zone_device {
 	struct thermal_governor *governor;
 	void *governor_data;
 	struct list_head thermal_instances;
-	struct idr idr;
+	struct ida ida;
 	struct mutex lock;
 	struct list_head node;
 	struct delayed_work poll_queue;
-- 
GitLab


From 7a6639dca61bf14e3aad8fd31d1f16ed0acf0a60 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Wed, 21 Dec 2016 09:47:04 -0800
Subject: [PATCH 0015/1084] thermal: convert clock cooling to use an IDA

thermal clock cooling does not use the ability to look up pointers by ID,
so convert it from using an IDR to the more space-efficient IDA.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/clock_cooling.c | 50 ++++++---------------------------
 1 file changed, 8 insertions(+), 42 deletions(-)

diff --git a/drivers/thermal/clock_cooling.c b/drivers/thermal/clock_cooling.c
index ed5dd0e88657..56711c25584d 100644
--- a/drivers/thermal/clock_cooling.c
+++ b/drivers/thermal/clock_cooling.c
@@ -65,42 +65,7 @@ struct clock_cooling_device {
 };
 #define to_clock_cooling_device(x) \
 		container_of(x, struct clock_cooling_device, clk_rate_change_nb)
-static DEFINE_IDR(clock_idr);
-static DEFINE_MUTEX(cooling_clock_lock);
-
-/**
- * clock_cooling_get_idr - function to get an unique id.
- * @id: int * value generated by this function.
- *
- * This function will populate @id with an unique
- * id, using the idr API.
- *
- * Return: 0 on success, an error code on failure.
- */
-static int clock_cooling_get_idr(int *id)
-{
-	int ret;
-
-	mutex_lock(&cooling_clock_lock);
-	ret = idr_alloc(&clock_idr, NULL, 0, 0, GFP_KERNEL);
-	mutex_unlock(&cooling_clock_lock);
-	if (unlikely(ret < 0))
-		return ret;
-	*id = ret;
-
-	return 0;
-}
-
-/**
- * release_idr - function to free the unique id.
- * @id: int value representing the unique id.
- */
-static void release_idr(int id)
-{
-	mutex_lock(&cooling_clock_lock);
-	idr_remove(&clock_idr, id);
-	mutex_unlock(&cooling_clock_lock);
-}
+static DEFINE_IDA(clock_ida);
 
 /* Below code defines functions to be used for clock as cooling device */
 
@@ -432,16 +397,17 @@ clock_cooling_register(struct device *dev, const char *clock_name)
 	if (IS_ERR(ccdev->clk))
 		return ERR_CAST(ccdev->clk);
 
-	ret = clock_cooling_get_idr(&ccdev->id);
-	if (ret)
-		return ERR_PTR(-EINVAL);
+	ret = ida_simple_get(&clock_ida, 0, 0, GFP_KERNEL);
+	if (ret < 0)
+		return ERR_PTR(ret);
+	ccdev->id = ret;
 
 	snprintf(dev_name, sizeof(dev_name), "thermal-clock-%d", ccdev->id);
 
 	cdev = thermal_cooling_device_register(dev_name, ccdev,
 					       &clock_cooling_ops);
 	if (IS_ERR(cdev)) {
-		release_idr(ccdev->id);
+		ida_simple_remove(&clock_ida, ccdev->id);
 		return ERR_PTR(-EINVAL);
 	}
 	ccdev->cdev = cdev;
@@ -450,7 +416,7 @@ clock_cooling_register(struct device *dev, const char *clock_name)
 	/* Assuming someone has already filled the opp table for this device */
 	ret = dev_pm_opp_init_cpufreq_table(dev, &ccdev->freq_table);
 	if (ret) {
-		release_idr(ccdev->id);
+		ida_simple_remove(&clock_ida, ccdev->id);
 		return ERR_PTR(ret);
 	}
 	ccdev->clock_state = 0;
@@ -481,6 +447,6 @@ void clock_cooling_unregister(struct thermal_cooling_device *cdev)
 	dev_pm_opp_free_cpufreq_table(ccdev->dev, &ccdev->freq_table);
 
 	thermal_cooling_device_unregister(ccdev->cdev);
-	release_idr(ccdev->id);
+	ida_simple_remove(&clock_ida, ccdev->id);
 }
 EXPORT_SYMBOL_GPL(clock_cooling_unregister);
-- 
GitLab


From ae606089621ef0349402cfcbeca33a82abbd0fd0 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Wed, 21 Dec 2016 09:47:05 -0800
Subject: [PATCH 0016/1084] thermal: convert cpu_cooling to use an IDA

thermal cpu cooling does not use the ability to look up pointers by ID,
so convert it from using an IDR to the more space-efficient IDA.

The cooling_cpufreq_lock was being used to protect cpufreq_dev_count as
well as the IDR.  Rather than keep the mutex to protect a single integer,
I expanded the scope of cooling_list_lock to also cover cpufreq_dev_count.
We could also convert cpufreq_dev_count into an atomic.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/cpu_cooling.c | 63 ++++++-----------------------------
 1 file changed, 11 insertions(+), 52 deletions(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 9ce0e9eef923..ea3cf7b5cb5b 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -26,6 +26,7 @@
 #include <linux/thermal.h>
 #include <linux/cpufreq.h>
 #include <linux/err.h>
+#include <linux/idr.h>
 #include <linux/pm_opp.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
@@ -104,50 +105,13 @@ struct cpufreq_cooling_device {
 	struct device *cpu_dev;
 	get_static_t plat_get_static_power;
 };
-static DEFINE_IDR(cpufreq_idr);
-static DEFINE_MUTEX(cooling_cpufreq_lock);
+static DEFINE_IDA(cpufreq_ida);
 
 static unsigned int cpufreq_dev_count;
 
 static DEFINE_MUTEX(cooling_list_lock);
 static LIST_HEAD(cpufreq_dev_list);
 
-/**
- * get_idr - function to get a unique id.
- * @idr: struct idr * handle used to create a id.
- * @id: int * value generated by this function.
- *
- * This function will populate @id with an unique
- * id, using the idr API.
- *
- * Return: 0 on success, an error code on failure.
- */
-static int get_idr(struct idr *idr, int *id)
-{
-	int ret;
-
-	mutex_lock(&cooling_cpufreq_lock);
-	ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL);
-	mutex_unlock(&cooling_cpufreq_lock);
-	if (unlikely(ret < 0))
-		return ret;
-	*id = ret;
-
-	return 0;
-}
-
-/**
- * release_idr - function to free the unique id.
- * @idr: struct idr * handle used for creating the id.
- * @id: int value representing the unique id.
- */
-static void release_idr(struct idr *idr, int id)
-{
-	mutex_lock(&cooling_cpufreq_lock);
-	idr_remove(idr, id);
-	mutex_unlock(&cooling_cpufreq_lock);
-}
-
 /* Below code defines functions to be used for cpufreq as cooling device */
 
 /**
@@ -874,11 +838,12 @@ __cpufreq_cooling_register(struct device_node *np,
 		cooling_ops = &cpufreq_cooling_ops;
 	}
 
-	ret = get_idr(&cpufreq_idr, &cpufreq_dev->id);
-	if (ret) {
+	ret = ida_simple_get(&cpufreq_ida, 0, 0, GFP_KERNEL);
+	if (ret < 0) {
 		cool_dev = ERR_PTR(ret);
 		goto free_power_table;
 	}
+	cpufreq_dev->id = ret;
 
 	/* Fill freq-table in descending order of frequencies */
 	for (i = 0, freq = -1; i <= cpufreq_dev->max_level; i++) {
@@ -898,27 +863,24 @@ __cpufreq_cooling_register(struct device_node *np,
 	cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
 						      cooling_ops);
 	if (IS_ERR(cool_dev))
-		goto remove_idr;
+		goto remove_ida;
 
 	cpufreq_dev->clipped_freq = cpufreq_dev->freq_table[0];
 	cpufreq_dev->cool_dev = cool_dev;
 
-	mutex_lock(&cooling_cpufreq_lock);
-
 	mutex_lock(&cooling_list_lock);
 	list_add(&cpufreq_dev->node, &cpufreq_dev_list);
-	mutex_unlock(&cooling_list_lock);
 
 	/* Register the notifier for first cpufreq cooling device */
 	if (!cpufreq_dev_count++)
 		cpufreq_register_notifier(&thermal_cpufreq_notifier_block,
 					  CPUFREQ_POLICY_NOTIFIER);
-	mutex_unlock(&cooling_cpufreq_lock);
+	mutex_unlock(&cooling_list_lock);
 
 	goto put_policy;
 
-remove_idr:
-	release_idr(&cpufreq_idr, cpufreq_dev->id);
+remove_ida:
+	ida_simple_remove(&cpufreq_ida, cpufreq_dev->id);
 free_power_table:
 	kfree(cpufreq_dev->dyn_power_table);
 free_table:
@@ -1059,20 +1021,17 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
 
 	cpufreq_dev = cdev->devdata;
 
+	mutex_lock(&cooling_list_lock);
 	/* Unregister the notifier for the last cpufreq cooling device */
-	mutex_lock(&cooling_cpufreq_lock);
 	if (!--cpufreq_dev_count)
 		cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block,
 					    CPUFREQ_POLICY_NOTIFIER);
 
-	mutex_lock(&cooling_list_lock);
 	list_del(&cpufreq_dev->node);
 	mutex_unlock(&cooling_list_lock);
 
-	mutex_unlock(&cooling_cpufreq_lock);
-
 	thermal_cooling_device_unregister(cpufreq_dev->cool_dev);
-	release_idr(&cpufreq_idr, cpufreq_dev->id);
+	ida_simple_remove(&cpufreq_ida, cpufreq_dev->id);
 	kfree(cpufreq_dev->dyn_power_table);
 	kfree(cpufreq_dev->time_in_idle_timestamp);
 	kfree(cpufreq_dev->time_in_idle);
-- 
GitLab


From 2f96c035fbd143695eec5b465c98bcc52732626e Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Wed, 21 Dec 2016 09:47:06 -0800
Subject: [PATCH 0017/1084] thermal: convert devfreq_cooling to use an IDA

thermal devfreq cooling does not use the ability to look up pointers by
ID, so convert it from using an IDR to the more space-efficient IDA.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/devfreq_cooling.c | 53 ++++++-------------------------
 1 file changed, 9 insertions(+), 44 deletions(-)

diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index 5a737fd5f1aa..a667ba779a43 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -21,14 +21,14 @@
 #include <linux/devfreq.h>
 #include <linux/devfreq_cooling.h>
 #include <linux/export.h>
+#include <linux/idr.h>
 #include <linux/slab.h>
 #include <linux/pm_opp.h>
 #include <linux/thermal.h>
 
 #include <trace/events/thermal.h>
 
-static DEFINE_MUTEX(devfreq_lock);
-static DEFINE_IDR(devfreq_idr);
+static DEFINE_IDA(devfreq_ida);
 
 /**
  * struct devfreq_cooling_device - Devfreq cooling device
@@ -57,42 +57,6 @@ struct devfreq_cooling_device {
 	struct devfreq_cooling_power *power_ops;
 };
 
-/**
- * get_idr - function to get a unique id.
- * @idr: struct idr * handle used to create a id.
- * @id: int * value generated by this function.
- *
- * This function will populate @id with an unique
- * id, using the idr API.
- *
- * Return: 0 on success, an error code on failure.
- */
-static int get_idr(struct idr *idr, int *id)
-{
-	int ret;
-
-	mutex_lock(&devfreq_lock);
-	ret = idr_alloc(idr, NULL, 0, 0, GFP_KERNEL);
-	mutex_unlock(&devfreq_lock);
-	if (unlikely(ret < 0))
-		return ret;
-	*id = ret;
-
-	return 0;
-}
-
-/**
- * release_idr - function to free the unique id.
- * @idr: struct idr * handle used for creating the id.
- * @id: int value representing the unique id.
- */
-static void release_idr(struct idr *idr, int id)
-{
-	mutex_lock(&devfreq_lock);
-	idr_remove(idr, id);
-	mutex_unlock(&devfreq_lock);
-}
-
 /**
  * partition_enable_opps() - disable all opps above a given state
  * @dfc:	Pointer to devfreq we are operating on
@@ -496,9 +460,10 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
 	if (err)
 		goto free_dfc;
 
-	err = get_idr(&devfreq_idr, &dfc->id);
-	if (err)
+	err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL);
+	if (err < 0)
 		goto free_tables;
+	dfc->id = err;
 
 	snprintf(dev_name, sizeof(dev_name), "thermal-devfreq-%d", dfc->id);
 
@@ -509,15 +474,15 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
 		dev_err(df->dev.parent,
 			"Failed to register devfreq cooling device (%d)\n",
 			err);
-		goto release_idr;
+		goto release_ida;
 	}
 
 	dfc->cdev = cdev;
 
 	return cdev;
 
-release_idr:
-	release_idr(&devfreq_idr, dfc->id);
+release_ida:
+	ida_simple_remove(&devfreq_ida, dfc->id);
 free_tables:
 	kfree(dfc->power_table);
 	kfree(dfc->freq_table);
@@ -565,7 +530,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
 	dfc = cdev->devdata;
 
 	thermal_cooling_device_unregister(dfc->cdev);
-	release_idr(&devfreq_idr, dfc->id);
+	ida_simple_remove(&devfreq_ida, dfc->id);
 	kfree(dfc->power_table);
 	kfree(dfc->freq_table);
 
-- 
GitLab


From 247bde13b91aad0e27446eb356420052c474e4c3 Mon Sep 17 00:00:00 2001
From: Thierry Reding <thierry.reding@gmail.com>
Date: Wed, 4 Jan 2017 09:37:56 +0100
Subject: [PATCH 0018/1084] leds: pwm: Remove atomic code paths

PWM devices have all been marked as "might sleep" since v4.5. It no
longer makes sense to keep the alternative code paths around because
it is effectively dead code.

Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/leds/leds-pwm.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/drivers/leds/leds-pwm.c b/drivers/leds/leds-pwm.c
index a9145aa7f36a..8d456dc6c5bf 100644
--- a/drivers/leds/leds-pwm.c
+++ b/drivers/leds/leds-pwm.c
@@ -29,7 +29,6 @@ struct led_pwm_data {
 	unsigned int		active_low;
 	unsigned int		period;
 	int			duty;
-	bool			can_sleep;
 };
 
 struct led_pwm_priv {
@@ -49,8 +48,8 @@ static void __led_pwm_set(struct led_pwm_data *led_dat)
 		pwm_enable(led_dat->pwm);
 }
 
-static void led_pwm_set(struct led_classdev *led_cdev,
-	enum led_brightness brightness)
+static int led_pwm_set(struct led_classdev *led_cdev,
+		       enum led_brightness brightness)
 {
 	struct led_pwm_data *led_dat =
 		container_of(led_cdev, struct led_pwm_data, cdev);
@@ -66,12 +65,7 @@ static void led_pwm_set(struct led_classdev *led_cdev,
 	led_dat->duty = duty;
 
 	__led_pwm_set(led_dat);
-}
 
-static int led_pwm_set_blocking(struct led_classdev *led_cdev,
-	enum led_brightness brightness)
-{
-	led_pwm_set(led_cdev, brightness);
 	return 0;
 }
 
@@ -112,11 +106,7 @@ static int led_pwm_add(struct device *dev, struct led_pwm_priv *priv,
 		return ret;
 	}
 
-	led_data->can_sleep = pwm_can_sleep(led_data->pwm);
-	if (!led_data->can_sleep)
-		led_data->cdev.brightness_set = led_pwm_set;
-	else
-		led_data->cdev.brightness_set_blocking = led_pwm_set_blocking;
+	led_data->cdev.brightness_set_blocking = led_pwm_set;
 
 	/*
 	 * FIXME: pwm_apply_args() should be removed when switching to the
-- 
GitLab


From fe2858c8c6d167df33a839591ebe63ea05a69d06 Mon Sep 17 00:00:00 2001
From: Thierry Reding <thierry.reding@gmail.com>
Date: Wed, 4 Jan 2017 09:39:52 +0100
Subject: [PATCH 0019/1084] pwm: Remove pwm_can_sleep()

The last user of this function has been removed, so it is no longer
needed.

Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/core.c  | 12 ------------
 include/linux/pwm.h |  7 -------
 2 files changed, 19 deletions(-)

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 172ef8245811..78e114a11c4f 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -960,18 +960,6 @@ void devm_pwm_put(struct device *dev, struct pwm_device *pwm)
 }
 EXPORT_SYMBOL_GPL(devm_pwm_put);
 
-/**
-  * pwm_can_sleep() - report whether PWM access will sleep
-  * @pwm: PWM device
-  *
-  * Returns: True if accessing the PWM can sleep, false otherwise.
-  */
-bool pwm_can_sleep(struct pwm_device *pwm)
-{
-	return true;
-}
-EXPORT_SYMBOL_GPL(pwm_can_sleep);
-
 #ifdef CONFIG_DEBUG_FS
 static void pwm_dbg_show(struct pwm_chip *chip, struct seq_file *s)
 {
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 2c6c5114c089..e15fd3ce6502 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -451,8 +451,6 @@ struct pwm_device *devm_pwm_get(struct device *dev, const char *con_id);
 struct pwm_device *devm_of_pwm_get(struct device *dev, struct device_node *np,
 				   const char *con_id);
 void devm_pwm_put(struct device *dev, struct pwm_device *pwm);
-
-bool pwm_can_sleep(struct pwm_device *pwm);
 #else
 static inline struct pwm_device *pwm_request(int pwm_id, const char *label)
 {
@@ -566,11 +564,6 @@ static inline struct pwm_device *devm_of_pwm_get(struct device *dev,
 static inline void devm_pwm_put(struct device *dev, struct pwm_device *pwm)
 {
 }
-
-static inline bool pwm_can_sleep(struct pwm_device *pwm)
-{
-	return false;
-}
 #endif
 
 static inline void pwm_apply_args(struct pwm_device *pwm)
-- 
GitLab


From 8c0216f377406c7613b67bd18755889026284192 Mon Sep 17 00:00:00 2001
From: Thierry Reding <thierry.reding@gmail.com>
Date: Wed, 4 Jan 2017 09:40:54 +0100
Subject: [PATCH 0020/1084] pwm: Remove .can_sleep from struct pwm_chip

All PWM devices have been marked as "might sleep" since v4.5, there is
no longer a need to differentiate on a per-chip basis.

Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-atmel-hlcdc.c | 1 -
 drivers/pwm/pwm-atmel.c       | 1 -
 drivers/pwm/pwm-bcm-kona.c    | 1 -
 drivers/pwm/pwm-berlin.c      | 1 -
 drivers/pwm/pwm-brcmstb.c     | 1 -
 drivers/pwm/pwm-fsl-ftm.c     | 1 -
 drivers/pwm/pwm-imx.c         | 1 -
 drivers/pwm/pwm-lp3943.c      | 1 -
 drivers/pwm/pwm-mxs.c         | 2 +-
 drivers/pwm/pwm-pca9685.c     | 1 -
 drivers/pwm/pwm-sti.c         | 1 -
 drivers/pwm/pwm-sun4i.c       | 1 -
 drivers/pwm/pwm-twl-led.c     | 1 -
 drivers/pwm/pwm-twl.c         | 1 -
 drivers/staging/greybus/pwm.c | 1 -
 include/linux/pwm.h           | 3 ---
 16 files changed, 1 insertion(+), 18 deletions(-)

diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c
index 14fc011faa32..999187277ea5 100644
--- a/drivers/pwm/pwm-atmel-hlcdc.c
+++ b/drivers/pwm/pwm-atmel-hlcdc.c
@@ -270,7 +270,6 @@ static int atmel_hlcdc_pwm_probe(struct platform_device *pdev)
 	chip->chip.npwm = 1;
 	chip->chip.of_xlate = of_pwm_xlate_with_flags;
 	chip->chip.of_pwm_n_cells = 3;
-	chip->chip.can_sleep = 1;
 
 	ret = pwmchip_add_with_polarity(&chip->chip, PWM_POLARITY_INVERSED);
 	if (ret) {
diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c
index e6b8b1b7e6ba..67a7023be5c2 100644
--- a/drivers/pwm/pwm-atmel.c
+++ b/drivers/pwm/pwm-atmel.c
@@ -385,7 +385,6 @@ static int atmel_pwm_probe(struct platform_device *pdev)
 
 	atmel_pwm->chip.base = -1;
 	atmel_pwm->chip.npwm = 4;
-	atmel_pwm->chip.can_sleep = true;
 	atmel_pwm->config = data->config;
 	atmel_pwm->updated_pwms = 0;
 	mutex_init(&atmel_pwm->isr_lock);
diff --git a/drivers/pwm/pwm-bcm-kona.c b/drivers/pwm/pwm-bcm-kona.c
index c63418322023..09a95aeb3a70 100644
--- a/drivers/pwm/pwm-bcm-kona.c
+++ b/drivers/pwm/pwm-bcm-kona.c
@@ -276,7 +276,6 @@ static int kona_pwmc_probe(struct platform_device *pdev)
 	kp->chip.npwm = 6;
 	kp->chip.of_xlate = of_pwm_xlate_with_flags;
 	kp->chip.of_pwm_n_cells = 3;
-	kp->chip.can_sleep = true;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	kp->base = devm_ioremap_resource(&pdev->dev, res);
diff --git a/drivers/pwm/pwm-berlin.c b/drivers/pwm/pwm-berlin.c
index 01339c152ab0..771859aca4be 100644
--- a/drivers/pwm/pwm-berlin.c
+++ b/drivers/pwm/pwm-berlin.c
@@ -206,7 +206,6 @@ static int berlin_pwm_probe(struct platform_device *pdev)
 	pwm->chip.ops = &berlin_pwm_ops;
 	pwm->chip.base = -1;
 	pwm->chip.npwm = 4;
-	pwm->chip.can_sleep = true;
 	pwm->chip.of_xlate = of_pwm_xlate_with_flags;
 	pwm->chip.of_pwm_n_cells = 3;
 
diff --git a/drivers/pwm/pwm-brcmstb.c b/drivers/pwm/pwm-brcmstb.c
index 5d5adee16886..8063cffa1c96 100644
--- a/drivers/pwm/pwm-brcmstb.c
+++ b/drivers/pwm/pwm-brcmstb.c
@@ -270,7 +270,6 @@ static int brcmstb_pwm_probe(struct platform_device *pdev)
 	p->chip.ops = &brcmstb_pwm_ops;
 	p->chip.base = -1;
 	p->chip.npwm = 2;
-	p->chip.can_sleep = true;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	p->base = devm_ioremap_resource(&pdev->dev, res);
diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c
index fad968eb75f6..557b4ea16796 100644
--- a/drivers/pwm/pwm-fsl-ftm.c
+++ b/drivers/pwm/pwm-fsl-ftm.c
@@ -446,7 +446,6 @@ static int fsl_pwm_probe(struct platform_device *pdev)
 	fpc->chip.of_pwm_n_cells = 3;
 	fpc->chip.base = -1;
 	fpc->chip.npwm = 8;
-	fpc->chip.can_sleep = true;
 
 	ret = pwmchip_add(&fpc->chip);
 	if (ret < 0) {
diff --git a/drivers/pwm/pwm-imx.c b/drivers/pwm/pwm-imx.c
index d600fd5cd4ba..1223187ad354 100644
--- a/drivers/pwm/pwm-imx.c
+++ b/drivers/pwm/pwm-imx.c
@@ -304,7 +304,6 @@ static int imx_pwm_probe(struct platform_device *pdev)
 	imx->chip.dev = &pdev->dev;
 	imx->chip.base = -1;
 	imx->chip.npwm = 1;
-	imx->chip.can_sleep = true;
 
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	imx->mmio_base = devm_ioremap_resource(&pdev->dev, r);
diff --git a/drivers/pwm/pwm-lp3943.c b/drivers/pwm/pwm-lp3943.c
index 872ea76a4f19..52584e9962ed 100644
--- a/drivers/pwm/pwm-lp3943.c
+++ b/drivers/pwm/pwm-lp3943.c
@@ -278,7 +278,6 @@ static int lp3943_pwm_probe(struct platform_device *pdev)
 	lp3943_pwm->chip.dev = &pdev->dev;
 	lp3943_pwm->chip.ops = &lp3943_pwm_ops;
 	lp3943_pwm->chip.npwm = LP3943_NUM_PWMS;
-	lp3943_pwm->chip.can_sleep = true;
 
 	platform_set_drvdata(pdev, lp3943_pwm);
 
diff --git a/drivers/pwm/pwm-mxs.c b/drivers/pwm/pwm-mxs.c
index 9a596324ebef..a6017ad9926c 100644
--- a/drivers/pwm/pwm-mxs.c
+++ b/drivers/pwm/pwm-mxs.c
@@ -151,7 +151,7 @@ static int mxs_pwm_probe(struct platform_device *pdev)
 	mxs->chip.dev = &pdev->dev;
 	mxs->chip.ops = &mxs_pwm_ops;
 	mxs->chip.base = -1;
-	mxs->chip.can_sleep = true;
+
 	ret = of_property_read_u32(np, "fsl,pwm-number", &mxs->chip.npwm);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "failed to get pwm number: %d\n", ret);
diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c
index 117fccf7934a..c8282a2650be 100644
--- a/drivers/pwm/pwm-pca9685.c
+++ b/drivers/pwm/pwm-pca9685.c
@@ -343,7 +343,6 @@ static int pca9685_pwm_probe(struct i2c_client *client,
 
 	pca->chip.dev = &client->dev;
 	pca->chip.base = -1;
-	pca->chip.can_sleep = true;
 
 	return pwmchip_add(&pca->chip);
 }
diff --git a/drivers/pwm/pwm-sti.c b/drivers/pwm/pwm-sti.c
index dd82dc840af9..2b7c31c9d1ab 100644
--- a/drivers/pwm/pwm-sti.c
+++ b/drivers/pwm/pwm-sti.c
@@ -635,7 +635,6 @@ static int sti_pwm_probe(struct platform_device *pdev)
 	pc->chip.ops = &sti_pwm_ops;
 	pc->chip.base = -1;
 	pc->chip.npwm = pc->cdata->pwm_num_devs;
-	pc->chip.can_sleep = true;
 
 	ret = pwmchip_add(&pc->chip);
 	if (ret < 0) {
diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c
index b0803f6c64d9..1284ffa05921 100644
--- a/drivers/pwm/pwm-sun4i.c
+++ b/drivers/pwm/pwm-sun4i.c
@@ -340,7 +340,6 @@ static int sun4i_pwm_probe(struct platform_device *pdev)
 	pwm->chip.ops = &sun4i_pwm_ops;
 	pwm->chip.base = -1;
 	pwm->chip.npwm = pwm->data->npwm;
-	pwm->chip.can_sleep = true;
 	pwm->chip.of_xlate = of_pwm_xlate_with_flags;
 	pwm->chip.of_pwm_n_cells = 3;
 
diff --git a/drivers/pwm/pwm-twl-led.c b/drivers/pwm/pwm-twl-led.c
index b964470025c5..21eff991d0e3 100644
--- a/drivers/pwm/pwm-twl-led.c
+++ b/drivers/pwm/pwm-twl-led.c
@@ -303,7 +303,6 @@ static int twl_pwmled_probe(struct platform_device *pdev)
 
 	twl->chip.dev = &pdev->dev;
 	twl->chip.base = -1;
-	twl->chip.can_sleep = true;
 
 	mutex_init(&twl->mutex);
 
diff --git a/drivers/pwm/pwm-twl.c b/drivers/pwm/pwm-twl.c
index 7a993b056638..9de617b76680 100644
--- a/drivers/pwm/pwm-twl.c
+++ b/drivers/pwm/pwm-twl.c
@@ -323,7 +323,6 @@ static int twl_pwm_probe(struct platform_device *pdev)
 	twl->chip.dev = &pdev->dev;
 	twl->chip.base = -1;
 	twl->chip.npwm = 2;
-	twl->chip.can_sleep = true;
 
 	mutex_init(&twl->mutex);
 
diff --git a/drivers/staging/greybus/pwm.c b/drivers/staging/greybus/pwm.c
index c4bf3298ba07..f0404bc37123 100644
--- a/drivers/staging/greybus/pwm.c
+++ b/drivers/staging/greybus/pwm.c
@@ -284,7 +284,6 @@ static int gb_pwm_probe(struct gbphy_device *gbphy_dev,
 	pwm->ops = &gb_pwm_ops;
 	pwm->base = -1;			/* Allocate base dynamically */
 	pwm->npwm = pwmc->pwm_max + 1;
-	pwm->can_sleep = true;		/* FIXME */
 
 	ret = pwmchip_add(pwm);
 	if (ret) {
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index e15fd3ce6502..eae215ef1b2c 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -287,8 +287,6 @@ struct pwm_ops {
  * @pwms: array of PWM devices allocated by the framework
  * @of_xlate: request a PWM device given a device tree PWM specifier
  * @of_pwm_n_cells: number of cells expected in the device tree PWM specifier
- * @can_sleep: must be true if the .config(), .enable() or .disable()
- *             operations may sleep
  */
 struct pwm_chip {
 	struct device *dev;
@@ -302,7 +300,6 @@ struct pwm_chip {
 	struct pwm_device * (*of_xlate)(struct pwm_chip *pc,
 					const struct of_phandle_args *args);
 	unsigned int of_pwm_n_cells;
-	bool can_sleep;
 };
 
 /**
-- 
GitLab


From c4158ff536439619fa342810cc575ae2c809f03f Mon Sep 17 00:00:00 2001
From: Daniel Bristot de Oliveira <bristot@redhat.com>
Date: Wed, 4 Jan 2017 12:20:33 +0100
Subject: [PATCH 0021/1084] x86/irq, trace: Add __irq_entry annotation to x86's
 platform IRQ handlers

This patch adds the __irq_entry annotation to the default x86
platform IRQ handlers. ftrace's function_graph tracer uses the
__irq_entry annotation to notify the entry and return of IRQ
handlers.

For example, before the patch:
  354549.667252 |   3)  d..1              |  default_idle_call() {
  354549.667252 |   3)  d..1              |    arch_cpu_idle() {
  354549.667253 |   3)  d..1              |      default_idle() {
  354549.696886 |   3)  d..1              |        smp_trace_reschedule_interrupt() {
  354549.696886 |   3)  d..1              |          irq_enter() {
  354549.696886 |   3)  d..1              |            rcu_irq_enter() {

After the patch:
  366416.254476 |   3)  d..1              |    arch_cpu_idle() {
  366416.254476 |   3)  d..1              |      default_idle() {
  366416.261566 |   3)  d..1  ==========> |
  366416.261566 |   3)  d..1              |        smp_trace_reschedule_interrupt() {
  366416.261566 |   3)  d..1              |          irq_enter() {
  366416.261566 |   3)  d..1              |            rcu_irq_enter() {

KASAN also uses this annotation. The smp_apic_timer_interrupt()
was already annotated.

Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Claudio Fontana <claudio.fontana@huawei.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dou Liyang <douly.fnst@cn.fujitsu.com>
Cc: Gu Zheng <guz.fnst@cn.fujitsu.com>
Cc: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nicolai Stange <nicstange@gmail.com>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Cc: linux-edac@vger.kernel.org
Link: http://lkml.kernel.org/r/059fdf437c2f0c09b13c18c8fe4e69999d3ffe69.1483528431.git.bristot@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/apic.c              |  8 ++++----
 arch/x86/kernel/apic/vector.c            |  2 +-
 arch/x86/kernel/cpu/mcheck/mce_amd.c     |  4 ++--
 arch/x86/kernel/cpu/mcheck/therm_throt.c |  6 ++++--
 arch/x86/kernel/cpu/mcheck/threshold.c   |  4 ++--
 arch/x86/kernel/irq.c                    |  4 ++--
 arch/x86/kernel/irq_work.c               |  5 +++--
 arch/x86/kernel/smp.c                    | 15 +++++++++------
 8 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 5b7e43eff139..30b122987906 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1864,14 +1864,14 @@ static void __smp_spurious_interrupt(u8 vector)
 		"should never happen.\n", vector, smp_processor_id());
 }
 
-__visible void smp_spurious_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
 	__smp_spurious_interrupt(~regs->orig_ax);
 	exiting_irq();
 }
 
-__visible void smp_trace_spurious_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_spurious_interrupt(struct pt_regs *regs)
 {
 	u8 vector = ~regs->orig_ax;
 
@@ -1922,14 +1922,14 @@ static void __smp_error_interrupt(struct pt_regs *regs)
 
 }
 
-__visible void smp_error_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
 	__smp_error_interrupt(regs);
 	exiting_irq();
 }
 
-__visible void smp_trace_error_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_error_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
 	trace_error_apic_entry(ERROR_APIC_VECTOR);
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 5d30c5e42bb1..f3557a1eb562 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -559,7 +559,7 @@ void send_cleanup_vector(struct irq_cfg *cfg)
 		__send_cleanup_vector(data);
 }
 
-asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
+asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
 {
 	unsigned vector, me;
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index a5fd137417a2..9e655292cf10 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -814,14 +814,14 @@ static inline void __smp_deferred_error_interrupt(void)
 	deferred_error_int_vector();
 }
 
-asmlinkage __visible void smp_deferred_error_interrupt(void)
+asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(void)
 {
 	entering_irq();
 	__smp_deferred_error_interrupt();
 	exiting_ack_irq();
 }
 
-asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
+asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void)
 {
 	entering_irq();
 	trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 465aca8be009..772d9400930d 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -404,14 +404,16 @@ static inline void __smp_thermal_interrupt(void)
 	smp_thermal_vector();
 }
 
-asmlinkage __visible void smp_thermal_interrupt(struct pt_regs *regs)
+asmlinkage __visible void __irq_entry
+smp_thermal_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
 	__smp_thermal_interrupt();
 	exiting_ack_irq();
 }
 
-asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs)
+asmlinkage __visible void __irq_entry
+smp_trace_thermal_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
 	trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index 9beb092d68a5..bb0e75eed10a 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -23,14 +23,14 @@ static inline void __smp_threshold_interrupt(void)
 	mce_threshold_vector();
 }
 
-asmlinkage __visible void smp_threshold_interrupt(void)
+asmlinkage __visible void __irq_entry smp_threshold_interrupt(void)
 {
 	entering_irq();
 	__smp_threshold_interrupt();
 	exiting_ack_irq();
 }
 
-asmlinkage __visible void smp_trace_threshold_interrupt(void)
+asmlinkage __visible void __irq_entry smp_trace_threshold_interrupt(void)
 {
 	entering_irq();
 	trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 7c6e9ffe4424..4d8183b5f113 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -264,7 +264,7 @@ void __smp_x86_platform_ipi(void)
 		x86_platform_ipi_callback();
 }
 
-__visible void smp_x86_platform_ipi(struct pt_regs *regs)
+__visible void __irq_entry smp_x86_platform_ipi(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
@@ -315,7 +315,7 @@ __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs)
 }
 #endif
 
-__visible void smp_trace_x86_platform_ipi(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_x86_platform_ipi(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index 3512ba607361..275487872be2 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -9,6 +9,7 @@
 #include <linux/hardirq.h>
 #include <asm/apic.h>
 #include <asm/trace/irq_vectors.h>
+#include <linux/interrupt.h>
 
 static inline void __smp_irq_work_interrupt(void)
 {
@@ -16,14 +17,14 @@ static inline void __smp_irq_work_interrupt(void)
 	irq_work_run();
 }
 
-__visible void smp_irq_work_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs)
 {
 	ipi_entering_ack_irq();
 	__smp_irq_work_interrupt();
 	exiting_irq();
 }
 
-__visible void smp_trace_irq_work_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_irq_work_interrupt(struct pt_regs *regs)
 {
 	ipi_entering_ack_irq();
 	trace_irq_work_entry(IRQ_WORK_VECTOR);
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 68f8cc222f25..d3c66a15bbde 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -259,7 +259,7 @@ static inline void __smp_reschedule_interrupt(void)
 	scheduler_ipi();
 }
 
-__visible void smp_reschedule_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs)
 {
 	ack_APIC_irq();
 	__smp_reschedule_interrupt();
@@ -268,7 +268,7 @@ __visible void smp_reschedule_interrupt(struct pt_regs *regs)
 	 */
 }
 
-__visible void smp_trace_reschedule_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_reschedule_interrupt(struct pt_regs *regs)
 {
 	/*
 	 * Need to call irq_enter() before calling the trace point.
@@ -292,14 +292,15 @@ static inline void __smp_call_function_interrupt(void)
 	inc_irq_stat(irq_call_count);
 }
 
-__visible void smp_call_function_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_call_function_interrupt(struct pt_regs *regs)
 {
 	ipi_entering_ack_irq();
 	__smp_call_function_interrupt();
 	exiting_irq();
 }
 
-__visible void smp_trace_call_function_interrupt(struct pt_regs *regs)
+__visible void __irq_entry
+smp_trace_call_function_interrupt(struct pt_regs *regs)
 {
 	ipi_entering_ack_irq();
 	trace_call_function_entry(CALL_FUNCTION_VECTOR);
@@ -314,14 +315,16 @@ static inline void __smp_call_function_single_interrupt(void)
 	inc_irq_stat(irq_call_count);
 }
 
-__visible void smp_call_function_single_interrupt(struct pt_regs *regs)
+__visible void __irq_entry
+smp_call_function_single_interrupt(struct pt_regs *regs)
 {
 	ipi_entering_ack_irq();
 	__smp_call_function_single_interrupt();
 	exiting_irq();
 }
 
-__visible void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
+__visible void __irq_entry
+smp_trace_call_function_single_interrupt(struct pt_regs *regs)
 {
 	ipi_entering_ack_irq();
 	trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
-- 
GitLab


From 08f6cd01fe77712fda512472346647021a8a3c25 Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Sun, 18 Dec 2016 21:11:54 +0200
Subject: [PATCH 0022/1084] Documentation: Fix a typo in IPMI.txt.

This patch fixes a trivial type in IPMI.txt.

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 Documentation/IPMI.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/IPMI.txt b/Documentation/IPMI.txt
index 72292308d0f5..6962cab997ef 100644
--- a/Documentation/IPMI.txt
+++ b/Documentation/IPMI.txt
@@ -257,7 +257,7 @@ and tell you when they come and go.
 
 Creating the User
 
-To user the message handler, you must first create a user using
+To use the message handler, you must first create a user using
 ipmi_create_user.  The interface number specifies which SMI you want
 to connect to, and you must supply callback functions to be called
 when data comes in.  The callback function can run at interrupt level,
-- 
GitLab


From 210af2a5f12403a8968d6014742886cc7e9823b4 Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Thu, 5 Jan 2017 10:52:10 -0600
Subject: [PATCH 0023/1084] ipmi: make ipmi_usr_hndl const

It's only function pointers.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_devintf.c    | 2 +-
 drivers/char/ipmi/ipmi_msghandler.c | 4 ++--
 drivers/char/ipmi/ipmi_watchdog.c   | 2 +-
 include/linux/ipmi.h                | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_devintf.c b/drivers/char/ipmi/ipmi_devintf.c
index a21407de46ae..f45119c5337d 100644
--- a/drivers/char/ipmi/ipmi_devintf.c
+++ b/drivers/char/ipmi/ipmi_devintf.c
@@ -108,7 +108,7 @@ static int ipmi_fasync(int fd, struct file *file, int on)
 	return (result);
 }
 
-static struct ipmi_user_hndl ipmi_hndlrs =
+static const struct ipmi_user_hndl ipmi_hndlrs =
 {
 	.ipmi_recv_hndl	= file_receive_handler,
 };
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 92e53acf2cd2..9f699951b75a 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -102,7 +102,7 @@ struct ipmi_user {
 	struct kref refcount;
 
 	/* The upper layer that handles receive messages. */
-	struct ipmi_user_hndl *handler;
+	const struct ipmi_user_hndl *handler;
 	void             *handler_data;
 
 	/* The interface this user is bound to. */
@@ -919,7 +919,7 @@ static int intf_err_seq(ipmi_smi_t   intf,
 
 
 int ipmi_create_user(unsigned int          if_num,
-		     struct ipmi_user_hndl *handler,
+		     const struct ipmi_user_hndl *handler,
 		     void                  *handler_data,
 		     ipmi_user_t           *user)
 {
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 4035495f3a86..30b9e83bf1bf 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -985,7 +985,7 @@ static void ipmi_wdog_pretimeout_handler(void *handler_data)
 	pretimeout_since_last_heartbeat = 1;
 }
 
-static struct ipmi_user_hndl ipmi_hndlrs = {
+static const struct ipmi_user_hndl ipmi_hndlrs = {
 	.ipmi_recv_hndl           = ipmi_wdog_msg_handler,
 	.ipmi_watchdog_pretimeout = ipmi_wdog_pretimeout_handler
 };
diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h
index 78c5d5ae3857..f1045b2c6a00 100644
--- a/include/linux/ipmi.h
+++ b/include/linux/ipmi.h
@@ -100,7 +100,7 @@ struct ipmi_user_hndl {
 
 /* Create a new user of the IPMI layer on the given interface number. */
 int ipmi_create_user(unsigned int          if_num,
-		     struct ipmi_user_hndl *handler,
+		     const struct ipmi_user_hndl *handler,
 		     void                  *handler_data,
 		     ipmi_user_t           *user);
 
-- 
GitLab


From ad1633a151df9869a222bd99ba04643dc2e0052b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 8 Jan 2017 22:35:31 -0500
Subject: [PATCH 0024/1084] namei: fold unlazy_link() into its sole caller

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 32 +++++++++++---------------------
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index ad74877e1442..f08eca2b788b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -749,24 +749,6 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq
 	return -ECHILD;
 }
 
-static int unlazy_link(struct nameidata *nd, struct path *link, unsigned seq)
-{
-	if (unlikely(!legitimize_path(nd, link, seq))) {
-		drop_links(nd);
-		nd->depth = 0;
-		nd->flags &= ~LOOKUP_RCU;
-		nd->path.mnt = NULL;
-		nd->path.dentry = NULL;
-		if (!(nd->flags & LOOKUP_ROOT))
-			nd->root.mnt = NULL;
-		rcu_read_unlock();
-	} else if (likely(unlazy_walk(nd, NULL, 0)) == 0) {
-		return 0;
-	}
-	path_put(link);
-	return -ECHILD;
-}
-
 static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	return dentry->d_op->d_revalidate(dentry, flags);
@@ -1706,9 +1688,17 @@ static int pick_link(struct nameidata *nd, struct path *link,
 	error = nd_alloc_stack(nd);
 	if (unlikely(error)) {
 		if (error == -ECHILD) {
-			if (unlikely(unlazy_link(nd, link, seq)))
-				return -ECHILD;
-			error = nd_alloc_stack(nd);
+			if (unlikely(!legitimize_path(nd, link, seq))) {
+				drop_links(nd);
+				nd->depth = 0;
+				nd->flags &= ~LOOKUP_RCU;
+				nd->path.mnt = NULL;
+				nd->path.dentry = NULL;
+				if (!(nd->flags & LOOKUP_ROOT))
+					nd->root.mnt = NULL;
+				rcu_read_unlock();
+			} else if (likely(unlazy_walk(nd, NULL, 0)) == 0)
+				error = nd_alloc_stack(nd);
 		}
 		if (error) {
 			path_put(link);
-- 
GitLab


From 209a7fb2104f2724f651870306c65f86850ee953 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 9 Jan 2017 01:35:39 -0500
Subject: [PATCH 0025/1084] lookup_fast(): clean up the logics around the
 fallback to non-rcu mode

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index f08eca2b788b..dfe6e32aeec6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1558,12 +1558,7 @@ static int lookup_fast(struct nameidata *nd,
 		*seqp = seq;
 		if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
 			status = d_revalidate(dentry, nd->flags);
-		if (unlikely(status <= 0)) {
-			if (unlazy_walk(nd, dentry, seq))
-				return -ECHILD;
-			if (status == -ECHILD)
-				status = d_revalidate(dentry, nd->flags);
-		} else {
+		if (likely(status > 0)) {
 			/*
 			 * Note: do negative dentry check after revalidation in
 			 * case that drops it.
@@ -1574,9 +1569,12 @@ static int lookup_fast(struct nameidata *nd,
 			path->dentry = dentry;
 			if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
 				return 1;
-			if (unlazy_walk(nd, dentry, seq))
-				return -ECHILD;
 		}
+		if (unlazy_walk(nd, dentry, seq))
+			return -ECHILD;
+		if (unlikely(status == -ECHILD))
+			/* we'd been told to redo it in non-rcu mode */
+			status = d_revalidate(dentry, nd->flags);
 	} else {
 		dentry = __d_lookup(parent, &nd->last);
 		if (unlikely(!dentry))
-- 
GitLab


From a89f833737e6c75df0091ccf6c767b94745463c1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 9 Jan 2017 22:25:28 -0500
Subject: [PATCH 0026/1084] namei.c: fold the check for DCACHE_OP_REVALIDATE
 into d_revalidate()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 36 ++++++++++++++----------------------
 1 file changed, 14 insertions(+), 22 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index dfe6e32aeec6..2ed2701a74b5 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -751,7 +751,10 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq
 
 static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
 {
-	return dentry->d_op->d_revalidate(dentry, flags);
+	if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
+		return dentry->d_op->d_revalidate(dentry, flags);
+	else
+		return 1;
 }
 
 /**
@@ -1454,19 +1457,14 @@ static struct dentry *lookup_dcache(const struct qstr *name,
 				    struct dentry *dir,
 				    unsigned int flags)
 {
-	struct dentry *dentry;
-	int error;
-
-	dentry = d_lookup(dir, name);
+	struct dentry *dentry = d_lookup(dir, name);
 	if (dentry) {
-		if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
-			error = d_revalidate(dentry, flags);
-			if (unlikely(error <= 0)) {
-				if (!error)
-					d_invalidate(dentry);
-				dput(dentry);
-				return ERR_PTR(error);
-			}
+		int error = d_revalidate(dentry, flags);
+		if (unlikely(error <= 0)) {
+			if (!error)
+				d_invalidate(dentry);
+			dput(dentry);
+			return ERR_PTR(error);
 		}
 	}
 	return dentry;
@@ -1556,8 +1554,7 @@ static int lookup_fast(struct nameidata *nd,
 			return -ECHILD;
 
 		*seqp = seq;
-		if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
-			status = d_revalidate(dentry, nd->flags);
+		status = d_revalidate(dentry, nd->flags);
 		if (likely(status > 0)) {
 			/*
 			 * Note: do negative dentry check after revalidation in
@@ -1579,8 +1576,7 @@ static int lookup_fast(struct nameidata *nd,
 		dentry = __d_lookup(parent, &nd->last);
 		if (unlikely(!dentry))
 			return 0;
-		if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
-			status = d_revalidate(dentry, nd->flags);
+		status = d_revalidate(dentry, nd->flags);
 	}
 	if (unlikely(status <= 0)) {
 		if (!status)
@@ -1619,8 +1615,7 @@ static struct dentry *lookup_slow(const struct qstr *name,
 	if (IS_ERR(dentry))
 		goto out;
 	if (unlikely(!d_in_lookup(dentry))) {
-		if ((dentry->d_flags & DCACHE_OP_REVALIDATE) &&
-		    !(flags & LOOKUP_NO_REVAL)) {
+		if (!(flags & LOOKUP_NO_REVAL)) {
 			int error = d_revalidate(dentry, flags);
 			if (unlikely(error <= 0)) {
 				if (!error) {
@@ -3057,9 +3052,6 @@ static int lookup_open(struct nameidata *nd, struct path *path,
 		if (d_in_lookup(dentry))
 			break;
 
-		if (!(dentry->d_flags & DCACHE_OP_REVALIDATE))
-			break;
-
 		error = d_revalidate(dentry, nd->flags);
 		if (likely(error > 0))
 			break;
-- 
GitLab


From 4675ac39b5dd5ff08dd8cb2be9ddd3cba778aa39 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 9 Jan 2017 22:29:15 -0500
Subject: [PATCH 0027/1084] namei.c: split unlazy_walk()

In all but one case, the last two arguments are NULL and 0 resp.;
almost everyone just wants to switch nameidata to non-RCU mode.
The only exception is lookup_fast(), where we have a child dentry
we want to legitimize as well.  Split these two cases.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c | 105 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 66 insertions(+), 39 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 2ed2701a74b5..76cfeb641e97 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -672,52 +672,83 @@ static bool legitimize_links(struct nameidata *nd)
 /**
  * unlazy_walk - try to switch to ref-walk mode.
  * @nd: nameidata pathwalk data
- * @dentry: child of nd->path.dentry or NULL
- * @seq: seq number to check dentry against
  * Returns: 0 on success, -ECHILD on failure
  *
- * unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry
- * for ref-walk mode.  @dentry must be a path found by a do_lookup call on
- * @nd or NULL.  Must be called from rcu-walk context.
+ * unlazy_walk attempts to legitimize the current nd->path and nd->root
+ * for ref-walk mode.
+ * Must be called from rcu-walk context.
  * Nothing should touch nameidata between unlazy_walk() failure and
  * terminate_walk().
  */
-static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq)
+static int unlazy_walk(struct nameidata *nd)
 {
 	struct dentry *parent = nd->path.dentry;
 
 	BUG_ON(!(nd->flags & LOOKUP_RCU));
 
+	nd->flags &= ~LOOKUP_RCU;
+	if (unlikely(!legitimize_links(nd)))
+		goto out2;
+	if (unlikely(!legitimize_path(nd, &nd->path, nd->seq)))
+		goto out1;
+	if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+		if (unlikely(!legitimize_path(nd, &nd->root, nd->root_seq)))
+			goto out;
+	}
+	rcu_read_unlock();
+	BUG_ON(nd->inode != parent->d_inode);
+	return 0;
+
+out2:
+	nd->path.mnt = NULL;
+	nd->path.dentry = NULL;
+out1:
+	if (!(nd->flags & LOOKUP_ROOT))
+		nd->root.mnt = NULL;
+out:
+	rcu_read_unlock();
+	return -ECHILD;
+}
+
+/**
+ * unlazy_child - try to switch to ref-walk mode.
+ * @nd: nameidata pathwalk data
+ * @dentry: child of nd->path.dentry
+ * @seq: seq number to check dentry against
+ * Returns: 0 on success, -ECHILD on failure
+ *
+ * unlazy_child attempts to legitimize the current nd->path, nd->root and dentry
+ * for ref-walk mode.  @dentry must be a path found by a do_lookup call on
+ * @nd.  Must be called from rcu-walk context.
+ * Nothing should touch nameidata between unlazy_child() failure and
+ * terminate_walk().
+ */
+static int unlazy_child(struct nameidata *nd, struct dentry *dentry, unsigned seq)
+{
+	BUG_ON(!(nd->flags & LOOKUP_RCU));
+
 	nd->flags &= ~LOOKUP_RCU;
 	if (unlikely(!legitimize_links(nd)))
 		goto out2;
 	if (unlikely(!legitimize_mnt(nd->path.mnt, nd->m_seq)))
 		goto out2;
-	if (unlikely(!lockref_get_not_dead(&parent->d_lockref)))
+	if (unlikely(!lockref_get_not_dead(&nd->path.dentry->d_lockref)))
 		goto out1;
 
 	/*
-	 * For a negative lookup, the lookup sequence point is the parents
-	 * sequence point, and it only needs to revalidate the parent dentry.
-	 *
-	 * For a positive lookup, we need to move both the parent and the
-	 * dentry from the RCU domain to be properly refcounted. And the
-	 * sequence number in the dentry validates *both* dentry counters,
-	 * since we checked the sequence number of the parent after we got
-	 * the child sequence number. So we know the parent must still
-	 * be valid if the child sequence number is still valid.
+	 * We need to move both the parent and the dentry from the RCU domain
+	 * to be properly refcounted. And the sequence number in the dentry
+	 * validates *both* dentry counters, since we checked the sequence
+	 * number of the parent after we got the child sequence number. So we
+	 * know the parent must still be valid if the child sequence number is
 	 */
-	if (!dentry) {
-		if (read_seqcount_retry(&parent->d_seq, nd->seq))
-			goto out;
-		BUG_ON(nd->inode != parent->d_inode);
-	} else {
-		if (!lockref_get_not_dead(&dentry->d_lockref))
-			goto out;
-		if (read_seqcount_retry(&dentry->d_seq, seq))
-			goto drop_dentry;
+	if (unlikely(!lockref_get_not_dead(&dentry->d_lockref)))
+		goto out;
+	if (unlikely(read_seqcount_retry(&dentry->d_seq, seq))) {
+		rcu_read_unlock();
+		dput(dentry);
+		goto drop_root_mnt;
 	}
-
 	/*
 	 * Sequence counts matched. Now make sure that the root is
 	 * still valid and get it if required.
@@ -733,10 +764,6 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq
 	rcu_read_unlock();
 	return 0;
 
-drop_dentry:
-	rcu_read_unlock();
-	dput(dentry);
-	goto drop_root_mnt;
 out2:
 	nd->path.mnt = NULL;
 out1:
@@ -775,7 +802,7 @@ static int complete_walk(struct nameidata *nd)
 	if (nd->flags & LOOKUP_RCU) {
 		if (!(nd->flags & LOOKUP_ROOT))
 			nd->root.mnt = NULL;
-		if (unlikely(unlazy_walk(nd, NULL, 0)))
+		if (unlikely(unlazy_walk(nd)))
 			return -ECHILD;
 	}
 
@@ -1001,7 +1028,7 @@ const char *get_link(struct nameidata *nd)
 		touch_atime(&last->link);
 		cond_resched();
 	} else if (atime_needs_update_rcu(&last->link, inode)) {
-		if (unlikely(unlazy_walk(nd, NULL, 0)))
+		if (unlikely(unlazy_walk(nd)))
 			return ERR_PTR(-ECHILD);
 		touch_atime(&last->link);
 	}
@@ -1020,7 +1047,7 @@ const char *get_link(struct nameidata *nd)
 		if (nd->flags & LOOKUP_RCU) {
 			res = get(NULL, inode, &last->done);
 			if (res == ERR_PTR(-ECHILD)) {
-				if (unlikely(unlazy_walk(nd, NULL, 0)))
+				if (unlikely(unlazy_walk(nd)))
 					return ERR_PTR(-ECHILD);
 				res = get(dentry, inode, &last->done);
 			}
@@ -1529,7 +1556,7 @@ static int lookup_fast(struct nameidata *nd,
 		bool negative;
 		dentry = __d_lookup_rcu(parent, &nd->last, &seq);
 		if (unlikely(!dentry)) {
-			if (unlazy_walk(nd, NULL, 0))
+			if (unlazy_walk(nd))
 				return -ECHILD;
 			return 0;
 		}
@@ -1567,7 +1594,7 @@ static int lookup_fast(struct nameidata *nd,
 			if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
 				return 1;
 		}
-		if (unlazy_walk(nd, dentry, seq))
+		if (unlazy_child(nd, dentry, seq))
 			return -ECHILD;
 		if (unlikely(status == -ECHILD))
 			/* we'd been told to redo it in non-rcu mode */
@@ -1646,7 +1673,7 @@ static inline int may_lookup(struct nameidata *nd)
 		int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
 		if (err != -ECHILD)
 			return err;
-		if (unlazy_walk(nd, NULL, 0))
+		if (unlazy_walk(nd))
 			return -ECHILD;
 	}
 	return inode_permission(nd->inode, MAY_EXEC);
@@ -1690,7 +1717,7 @@ static int pick_link(struct nameidata *nd, struct path *link,
 				if (!(nd->flags & LOOKUP_ROOT))
 					nd->root.mnt = NULL;
 				rcu_read_unlock();
-			} else if (likely(unlazy_walk(nd, NULL, 0)) == 0)
+			} else if (likely(unlazy_walk(nd)) == 0)
 				error = nd_alloc_stack(nd);
 		}
 		if (error) {
@@ -2108,7 +2135,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 		}
 		if (unlikely(!d_can_lookup(nd->path.dentry))) {
 			if (nd->flags & LOOKUP_RCU) {
-				if (unlazy_walk(nd, NULL, 0))
+				if (unlazy_walk(nd))
 					return -ECHILD;
 			}
 			return -ENOTDIR;
@@ -2565,7 +2592,7 @@ mountpoint_last(struct nameidata *nd)
 
 	/* If we're in rcuwalk, drop out of it to handle last component */
 	if (nd->flags & LOOKUP_RCU) {
-		if (unlazy_walk(nd, NULL, 0))
+		if (unlazy_walk(nd))
 			return -ECHILD;
 	}
 
-- 
GitLab


From 47961f1353b8195cb258ee3068d0969ef87acd20 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sat, 3 Dec 2016 00:08:48 +0100
Subject: [PATCH 0028/1084] ARM64: dts: meson-gx: move the SCPI and SRAM nodes
 to meson-gx

SCPI and SRAM are identical on GXBB and GXL. Moving the corresponding
nodes to meson-gx adds support for the thermal sensor on GXL based
devices.

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Tested-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
[khilman: add scpi_clocks label]
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/meson-gx.dtsi   | 45 ++++++++++++++++
 arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi | 57 ---------------------
 2 files changed, 45 insertions(+), 57 deletions(-)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
index eada0b58ba1c..be56b9fae2aa 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
@@ -65,6 +65,7 @@ cpu0: cpu@0 {
 			reg = <0x0 0x0>;
 			enable-method = "psci";
 			next-level-cache = <&l2>;
+			clocks = <&scpi_dvfs 0>;
 		};
 
 		cpu1: cpu@1 {
@@ -73,6 +74,7 @@ cpu1: cpu@1 {
 			reg = <0x0 0x1>;
 			enable-method = "psci";
 			next-level-cache = <&l2>;
+			clocks = <&scpi_dvfs 0>;
 		};
 
 		cpu2: cpu@2 {
@@ -81,6 +83,7 @@ cpu2: cpu@2 {
 			reg = <0x0 0x2>;
 			enable-method = "psci";
 			next-level-cache = <&l2>;
+			clocks = <&scpi_dvfs 0>;
 		};
 
 		cpu3: cpu@3 {
@@ -89,6 +92,7 @@ cpu3: cpu@3 {
 			reg = <0x0 0x3>;
 			enable-method = "psci";
 			next-level-cache = <&l2>;
+			clocks = <&scpi_dvfs 0>;
 		};
 
 		l2: l2-cache0 {
@@ -153,6 +157,28 @@ bid: bid@46 {
 		};
 	};
 
+	scpi {
+		compatible = "amlogic,meson-gxbb-scpi", "arm,scpi-pre-1.0";
+		mboxes = <&mailbox 1 &mailbox 2>;
+		shmem = <&cpu_scp_lpri &cpu_scp_hpri>;
+
+		scpi_clocks: clocks {
+			compatible = "arm,scpi-clocks";
+
+			scpi_dvfs: scpi_clocks@0 {
+				compatible = "arm,scpi-dvfs-clocks";
+				#clock-cells = <1>;
+				clock-indices = <0>;
+				clock-output-names = "vcpu";
+			};
+		};
+
+		scpi_sensors: sensors {
+			compatible = "arm,scpi-sensors";
+			#thermal-sensor-cells = <1>;
+		};
+	};
+
 	soc {
 		compatible = "simple-bus";
 		#address-cells = <2>;
@@ -264,6 +290,25 @@ gic: interrupt-controller@c4301000 {
 			#address-cells = <0>;
 		};
 
+		sram: sram@c8000000 {
+			compatible = "amlogic,meson-gxbb-sram", "mmio-sram";
+			reg = <0x0 0xc8000000 0x0 0x14000>;
+
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0 0x0 0xc8000000 0x14000>;
+
+			cpu_scp_lpri: scp-shmem@0 {
+				compatible = "amlogic,meson-gxbb-scp-shmem";
+				reg = <0x13000 0x400>;
+			};
+
+			cpu_scp_hpri: scp-shmem@200 {
+				compatible = "amlogic,meson-gxbb-scp-shmem";
+				reg = <0x13400 0x400>;
+			};
+		};
+
 		aobus: aobus@c8100000 {
 			compatible = "simple-bus";
 			reg = <0x0 0xc8100000 0x0 0x100000>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
index 596240c38a9c..5d686334f692 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
@@ -50,28 +50,6 @@
 / {
 	compatible = "amlogic,meson-gxbb";
 
-	scpi {
-		compatible = "amlogic,meson-gxbb-scpi", "arm,scpi-pre-1.0";
-		mboxes = <&mailbox 1 &mailbox 2>;
-		shmem = <&cpu_scp_lpri &cpu_scp_hpri>;
-
-		clocks {
-			compatible = "arm,scpi-clocks";
-
-			scpi_dvfs: scpi_clocks@0 {
-				compatible = "arm,scpi-dvfs-clocks";
-				#clock-cells = <1>;
-				clock-indices = <0>;
-				clock-output-names = "vcpu";
-			};
-		};
-
-		scpi_sensors: sensors {
-			compatible = "arm,scpi-sensors";
-			#thermal-sensor-cells = <1>;
-		};
-	};
-
 	soc {
 		usb0_phy: phy@c0000000 {
 			compatible = "amlogic,meson-gxbb-usb2-phy";
@@ -93,25 +71,6 @@ usb1_phy: phy@c0000020 {
 			status = "disabled";
 		};
 
-		sram: sram@c8000000 {
-			compatible = "amlogic,meson-gxbb-sram", "mmio-sram";
-			reg = <0x0 0xc8000000 0x0 0x14000>;
-
-			#address-cells = <1>;
-			#size-cells = <1>;
-			ranges = <0 0x0 0xc8000000 0x14000>;
-
-			cpu_scp_lpri: scp-shmem@0 {
-				compatible = "amlogic,meson-gxbb-scp-shmem";
-				reg = <0x13000 0x400>;
-			};
-
-			cpu_scp_hpri: scp-shmem@200 {
-				compatible = "amlogic,meson-gxbb-scp-shmem";
-				reg = <0x13400 0x400>;
-			};
-		};
-
 		usb0: usb@c9000000 {
 			compatible = "amlogic,meson-gxbb-usb", "snps,dwc2";
 			reg = <0x0 0xc9000000 0x0 0x40000>;
@@ -138,22 +97,6 @@ usb1: usb@c9100000 {
 	};
 };
 
-&cpu0 {
-	clocks = <&scpi_dvfs 0>;
-};
-
-&cpu1 {
-	clocks = <&scpi_dvfs 0>;
-};
-
-&cpu2 {
-	clocks = <&scpi_dvfs 0>;
-};
-
-&cpu3 {
-	clocks = <&scpi_dvfs 0>;
-};
-
 &cbus {
 	spifc: spi@8c80 {
 		compatible = "amlogic,meson-gxbb-spifc";
-- 
GitLab


From bd97abc0d04930d0369411824d437f19e72e13cc Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sat, 3 Dec 2016 00:08:49 +0100
Subject: [PATCH 0029/1084] ARM64: dts: meson-gxm: add SCPI configuration for
 GXM

This adds the SCPI DVFS clock index and configures the CPU cores
accordingly.

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Tested-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/meson-gxm.dtsi | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi
index eb2f0c3e5e53..4c55665a253f 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi
@@ -85,6 +85,7 @@ cpu4: cpu@100 {
 			reg = <0x0 0x100>;
 			enable-method = "psci";
 			next-level-cache = <&l2>;
+			clocks = <&scpi_dvfs 1>;
 		};
 
 		cpu5: cpu@101 {
@@ -93,6 +94,7 @@ cpu5: cpu@101 {
 			reg = <0x0 0x101>;
 			enable-method = "psci";
 			next-level-cache = <&l2>;
+			clocks = <&scpi_dvfs 1>;
 		};
 
 		cpu6: cpu@102 {
@@ -101,6 +103,7 @@ cpu6: cpu@102 {
 			reg = <0x0 0x102>;
 			enable-method = "psci";
 			next-level-cache = <&l2>;
+			clocks = <&scpi_dvfs 1>;
 		};
 
 		cpu7: cpu@103 {
@@ -109,10 +112,17 @@ cpu7: cpu@103 {
 			reg = <0x0 0x103>;
 			enable-method = "psci";
 			next-level-cache = <&l2>;
+			clocks = <&scpi_dvfs 1>;
 		};
 	};
 };
 
+&scpi_dvfs {
+	clock-indices = <0 1>;
+	clock-output-names = "vbig", "vlittle";
+};
+
 &vpu {
 	compatible = "amlogic,meson-gxm-vpu", "amlogic,meson-gx-vpu";
 };
+
-- 
GitLab


From 00a19f3e25c0c40e0ec77f52d4841d23ad269169 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabinv@axis.com>
Date: Tue, 8 Nov 2016 09:21:19 +0100
Subject: [PATCH 0030/1084] ARM: 8627/1: avoid cache flushing in
 flush_dcache_page()

When the data cache is PIPT or VIPT non-aliasing, and cache operations
are broadcast by the hardware, we can always postpone the flush in
flush_dcache_page().  A similar change was done for ARM64 in commit
b5b6c9e9149d ("arm64: Avoid cache flushing in flush_dcache_page()").

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Rabin Vincent <rabinv@axis.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/flush.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 3cced8455727..f1e6190aa7ea 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -327,6 +327,12 @@ void flush_dcache_page(struct page *page)
 	if (page == ZERO_PAGE(0))
 		return;
 
+	if (!cache_ops_need_broadcast() && cache_is_vipt_nonaliasing()) {
+		if (test_bit(PG_dcache_clean, &page->flags))
+			clear_bit(PG_dcache_clean, &page->flags);
+		return;
+	}
+
 	mapping = page_mapping(page);
 
 	if (!cache_ops_need_broadcast() &&
-- 
GitLab


From 79964a1c2972ca7ecc231e2d2ac7593a1af73f63 Mon Sep 17 00:00:00 2001
From: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Date: Mon, 12 Dec 2016 09:31:18 +0100
Subject: [PATCH 0031/1084] ARM: 8633/1: nommu: allow mmap when !CONFIG_MMU

commit ab6494f0c96f ("nommu: Add noMMU support to the DMA API") have
add CONFIG_MMU compilation flag but that prohibit to use dma_mmap_wc()
when the platform doesn't have MMU.

This patch call vm_iomap_memory() in noMMU case to test if addresses
are correct and set vma->vm_flags rather than all return an error.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mm/dma-mapping.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ab7710002ba6..afa64ed5e36b 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -868,6 +868,9 @@ static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 				      vma->vm_end - vma->vm_start,
 				      vma->vm_page_prot);
 	}
+#else
+	ret = vm_iomap_memory(vma, vma->vm_start,
+			      (vma->vm_end - vma->vm_start));
 #endif	/* CONFIG_MMU */
 
 	return ret;
-- 
GitLab


From 8a792e9afbce84a0fdaf213fe42bb97382487094 Mon Sep 17 00:00:00 2001
From: Afzal Mohammed <afzal.mohd.ma@gmail.com>
Date: Sat, 7 Jan 2017 17:48:10 +0100
Subject: [PATCH 0032/1084] ARM: 8635/1: nommu: allow enabling
 REMAP_VECTORS_TO_RAM

REMAP_VECTORS_TO_RAM depends on DRAM_BASE, but since DRAM_BASE is a
hex, REMAP_VECTORS_TO_RAM could never get enabled. Also depending on
DRAM_BASE is redundant as whenever REMAP_VECTORS_TO_RAM makes itself
available to Kconfig, DRAM_BASE also is available as the Kconfig
gets sourced on !MMU.

Signed-off-by: Afzal Mohammed <afzal.mohd.ma@gmail.com>
Reviewed-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig-nommu | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/arm/Kconfig-nommu b/arch/arm/Kconfig-nommu
index aed66d5df7f1..b7576349528c 100644
--- a/arch/arm/Kconfig-nommu
+++ b/arch/arm/Kconfig-nommu
@@ -34,8 +34,7 @@ config PROCESSOR_ID
 	  used instead of the auto-probing which utilizes the register.
 
 config REMAP_VECTORS_TO_RAM
-	bool 'Install vectors to the beginning of RAM' if DRAM_BASE
-	depends on DRAM_BASE
+	bool 'Install vectors to the beginning of RAM'
 	help
 	  The kernel needs to change the hardware exception vectors.
 	  In nommu mode, the hardware exception vectors are normally
-- 
GitLab


From 7880b43bdfc9580700ee4568c75c383a5bcdd2ca Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 12 Jan 2017 04:01:17 -0500
Subject: [PATCH 0033/1084] 9p: constify ->d_name handling

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/fid.c             | 10 +++++-----
 fs/9p/vfs_inode.c       | 10 +++++-----
 fs/9p/vfs_inode_dotl.c  | 20 ++++++++++----------
 include/net/9p/9p.h     |  8 ++++----
 include/net/9p/client.h | 18 +++++++++---------
 net/9p/client.c         | 18 +++++++++---------
 6 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 60fb47469c86..ed4f8519b627 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -91,10 +91,10 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any)
  * dentry names.
  */
 static int build_path_from_dentry(struct v9fs_session_info *v9ses,
-				  struct dentry *dentry, char ***names)
+				  struct dentry *dentry, const unsigned char ***names)
 {
 	int n = 0, i;
-	char **wnames;
+	const unsigned char **wnames;
 	struct dentry *ds;
 
 	for (ds = dentry; !IS_ROOT(ds); ds = ds->d_parent)
@@ -105,7 +105,7 @@ static int build_path_from_dentry(struct v9fs_session_info *v9ses,
 		goto err_out;
 
 	for (ds = dentry, i = (n-1); i >= 0; i--, ds = ds->d_parent)
-		wnames[i] = (char  *)ds->d_name.name;
+		wnames[i] = ds->d_name.name;
 
 	*names = wnames;
 	return n;
@@ -117,7 +117,7 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
 					       kuid_t uid, int any)
 {
 	struct dentry *ds;
-	char **wnames, *uname;
+	const unsigned char **wnames, *uname;
 	int i, n, l, clone, access;
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid, *old_fid = NULL;
@@ -137,7 +137,7 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
 	fid = v9fs_fid_find(ds, uid, any);
 	if (fid) {
 		/* Found the parent fid do a lookup with that */
-		fid = p9_client_walk(fid, 1, (char **)&dentry->d_name.name, 1);
+		fid = p9_client_walk(fid, 1, &dentry->d_name.name, 1);
 		goto fid_out;
 	}
 	up_read(&v9ses->rename_sem);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index f4f4450119e4..e3e0d6581d4c 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -643,7 +643,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 		struct dentry *dentry, char *extension, u32 perm, u8 mode)
 {
 	int err;
-	char *name;
+	const unsigned char *name;
 	struct p9_fid *dfid, *ofid, *fid;
 	struct inode *inode;
 
@@ -652,7 +652,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 	err = 0;
 	ofid = NULL;
 	fid = NULL;
-	name = (char *) dentry->d_name.name;
+	name = dentry->d_name.name;
 	dfid = v9fs_parent_fid(dentry);
 	if (IS_ERR(dfid)) {
 		err = PTR_ERR(dfid);
@@ -788,7 +788,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *dfid, *fid;
 	struct inode *inode;
-	char *name;
+	const unsigned char *name;
 
 	p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%pd) %p flags: %x\n",
 		 dir, dentry, dentry, flags);
@@ -802,7 +802,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 	if (IS_ERR(dfid))
 		return ERR_CAST(dfid);
 
-	name = (char *) dentry->d_name.name;
+	name = dentry->d_name.name;
 	fid = p9_client_walk(dfid, 1, &name, 1);
 	if (IS_ERR(fid)) {
 		if (fid == ERR_PTR(-ENOENT)) {
@@ -1012,7 +1012,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	}
 	v9fs_blank_wstat(&wstat);
 	wstat.muid = v9ses->uname;
-	wstat.name = (char *) new_dentry->d_name.name;
+	wstat.name = new_dentry->d_name.name;
 	retval = p9_client_wstat(oldfid, &wstat);
 
 clunk_newdir:
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 5999bd050678..28130b1e53e4 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -244,7 +244,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 	int err = 0;
 	kgid_t gid;
 	umode_t mode;
-	char *name = NULL;
+	const unsigned char *name = NULL;
 	struct p9_qid qid;
 	struct inode *inode;
 	struct p9_fid *fid = NULL;
@@ -269,7 +269,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 
 	v9ses = v9fs_inode2v9ses(dir);
 
-	name = (char *) dentry->d_name.name;
+	name = dentry->d_name.name;
 	p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%hx\n",
 		 name, flags, omode);
 
@@ -385,7 +385,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid = NULL, *dfid = NULL;
 	kgid_t gid;
-	char *name;
+	const unsigned char *name;
 	umode_t mode;
 	struct inode *inode;
 	struct p9_qid qid;
@@ -416,7 +416,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
 			 err);
 		goto error;
 	}
-	name = (char *) dentry->d_name.name;
+	name = dentry->d_name.name;
 	err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
 	if (err < 0)
 		goto error;
@@ -678,14 +678,14 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
 {
 	int err;
 	kgid_t gid;
-	char *name;
+	const unsigned char *name;
 	struct p9_qid qid;
 	struct inode *inode;
 	struct p9_fid *dfid;
 	struct p9_fid *fid = NULL;
 	struct v9fs_session_info *v9ses;
 
-	name = (char *) dentry->d_name.name;
+	name = dentry->d_name.name;
 	p9_debug(P9_DEBUG_VFS, "%lu,%s,%s\n", dir->i_ino, name, symname);
 	v9ses = v9fs_inode2v9ses(dir);
 
@@ -699,7 +699,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
 	gid = v9fs_get_fsgid_for_create(dir);
 
 	/* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */
-	err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
+	err = p9_client_symlink(dfid, name, symname, gid, &qid);
 
 	if (err < 0) {
 		p9_debug(P9_DEBUG_VFS, "p9_client_symlink failed %d\n", err);
@@ -775,7 +775,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
 	if (IS_ERR(oldfid))
 		return PTR_ERR(oldfid);
 
-	err = p9_client_link(dfid, oldfid, (char *)dentry->d_name.name);
+	err = p9_client_link(dfid, oldfid, dentry->d_name.name);
 
 	if (err < 0) {
 		p9_debug(P9_DEBUG_VFS, "p9_client_link failed %d\n", err);
@@ -812,7 +812,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
 {
 	int err;
 	kgid_t gid;
-	char *name;
+	const unsigned char *name;
 	umode_t mode;
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid = NULL, *dfid = NULL;
@@ -842,7 +842,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
 			 err);
 		goto error;
 	}
-	name = (char *) dentry->d_name.name;
+	name = dentry->d_name.name;
 
 	err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid);
 	if (err < 0)
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 27dfe85772b1..b8eb51a661e5 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -402,10 +402,10 @@ struct p9_wstat {
 	u32 atime;
 	u32 mtime;
 	u64 length;
-	char *name;
-	char *uid;
-	char *gid;
-	char *muid;
+	const char *name;
+	const char *uid;
+	const char *gid;
+	const char *muid;
 	char *extension;	/* 9p2000.u extensions */
 	kuid_t n_uid;		/* 9p2000.u extensions */
 	kgid_t n_gid;		/* 9p2000.u extensions */
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index c6b97e58cf84..b582339ccef5 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -223,16 +223,16 @@ void p9_client_destroy(struct p9_client *clnt);
 void p9_client_disconnect(struct p9_client *clnt);
 void p9_client_begin_disconnect(struct p9_client *clnt);
 struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
-				char *uname, kuid_t n_uname, char *aname);
+				const char *uname, kuid_t n_uname, const char *aname);
 struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
-		char **wnames, int clone);
+		const unsigned char * const *wnames, int clone);
 int p9_client_open(struct p9_fid *fid, int mode);
-int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
+int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
 							char *extension);
-int p9_client_link(struct p9_fid *fid, struct p9_fid *oldfid, char *newname);
-int p9_client_symlink(struct p9_fid *fid, char *name, char *symname, kgid_t gid,
-							struct p9_qid *qid);
-int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
+int p9_client_link(struct p9_fid *fid, struct p9_fid *oldfid, const char *newname);
+int p9_client_symlink(struct p9_fid *fid, const char *name, const char *symname,
+		kgid_t gid, struct p9_qid *qid);
+int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 mode,
 		kgid_t gid, struct p9_qid *qid);
 int p9_client_clunk(struct p9_fid *fid);
 int p9_client_fsync(struct p9_fid *fid, int datasync);
@@ -250,9 +250,9 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *attr);
 struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
 							u64 request_mask);
 
-int p9_client_mknod_dotl(struct p9_fid *oldfid, char *name, int mode,
+int p9_client_mknod_dotl(struct p9_fid *oldfid, const char *name, int mode,
 			dev_t rdev, kgid_t gid, struct p9_qid *);
-int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode,
+int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
 				kgid_t gid, struct p9_qid *);
 int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
 int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
diff --git a/net/9p/client.c b/net/9p/client.c
index 3fc94a49ccd5..5a0c3a64af14 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1101,7 +1101,7 @@ void p9_client_begin_disconnect(struct p9_client *clnt)
 EXPORT_SYMBOL(p9_client_begin_disconnect);
 
 struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
-	char *uname, kuid_t n_uname, char *aname)
+	const char *uname, kuid_t n_uname, const char *aname)
 {
 	int err = 0;
 	struct p9_req_t *req;
@@ -1149,7 +1149,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
 EXPORT_SYMBOL(p9_client_attach);
 
 struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
-		char **wnames, int clone)
+		const unsigned char * const *wnames, int clone)
 {
 	int err;
 	struct p9_client *clnt;
@@ -1271,7 +1271,7 @@ int p9_client_open(struct p9_fid *fid, int mode)
 }
 EXPORT_SYMBOL(p9_client_open);
 
-int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
+int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 mode,
 		kgid_t gid, struct p9_qid *qid)
 {
 	int err = 0;
@@ -1316,7 +1316,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
 }
 EXPORT_SYMBOL(p9_client_create_dotl);
 
-int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
+int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
 		     char *extension)
 {
 	int err;
@@ -1361,8 +1361,8 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
 }
 EXPORT_SYMBOL(p9_client_fcreate);
 
-int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, kgid_t gid,
-		struct p9_qid *qid)
+int p9_client_symlink(struct p9_fid *dfid, const char *name,
+		const char *symtgt, kgid_t gid, struct p9_qid *qid)
 {
 	int err = 0;
 	struct p9_client *clnt;
@@ -1395,7 +1395,7 @@ int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, kgid_t gid,
 }
 EXPORT_SYMBOL(p9_client_symlink);
 
-int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname)
+int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, const char *newname)
 {
 	struct p9_client *clnt;
 	struct p9_req_t *req;
@@ -2117,7 +2117,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
 }
 EXPORT_SYMBOL(p9_client_readdir);
 
-int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode,
+int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
 			dev_t rdev, kgid_t gid, struct p9_qid *qid)
 {
 	int err;
@@ -2148,7 +2148,7 @@ int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode,
 }
 EXPORT_SYMBOL(p9_client_mknod_dotl);
 
-int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode,
+int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
 				kgid_t gid, struct p9_qid *qid)
 {
 	int err;
-- 
GitLab


From 5a582cff47c90af29ecb293caa2f667bd4d45e54 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Tue, 17 Jan 2017 13:08:48 +0100
Subject: [PATCH 0034/1084] clk: meson-gxbb: Export HDMI clocks

Export HDMI clock from internal to dt-bindings.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 drivers/clk/meson/gxbb.h              | 4 ++--
 include/dt-bindings/clock/gxbb-clkc.h | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/meson/gxbb.h b/drivers/clk/meson/gxbb.h
index 0252939ba58f..2139e97f5e39 100644
--- a/drivers/clk/meson/gxbb.h
+++ b/drivers/clk/meson/gxbb.h
@@ -231,7 +231,7 @@
 #define CLKID_AHB_DATA_BUS	  60
 #define CLKID_AHB_CTRL_BUS	  61
 #define CLKID_HDMI_INTR_SYNC	  62
-#define CLKID_HDMI_PCLK		  63
+/* CLKID_HDMI_PCLK */
 /* CLKID_USB1_DDR_BRIDGE */
 /* CLKID_USB0_DDR_BRIDGE */
 #define CLKID_MMC_PCLK		  66
@@ -245,7 +245,7 @@
 #define CLKID_VCLK2_VENCI1	  74
 #define CLKID_VCLK2_VENCP0	  75
 #define CLKID_VCLK2_VENCP1	  76
-#define CLKID_GCLK_VENCI_INT0	  77
+/* CLKID_GCLK_VENCI_INT0 */
 #define CLKID_GCLK_VENCI_INT	  78
 #define CLKID_DAC_CLK		  79
 #define CLKID_AOCLK_GATE	  80
diff --git a/include/dt-bindings/clock/gxbb-clkc.h b/include/dt-bindings/clock/gxbb-clkc.h
index baade6f429d0..da1d473a5a3a 100644
--- a/include/dt-bindings/clock/gxbb-clkc.h
+++ b/include/dt-bindings/clock/gxbb-clkc.h
@@ -18,8 +18,10 @@
 #define CLKID_USB0		50
 #define CLKID_USB1		51
 #define CLKID_USB		55
+#define CLKID_HDMI_PCLK		63
 #define CLKID_USB1_DDR_BRIDGE	64
 #define CLKID_USB0_DDR_BRIDGE	65
+#define CLKID_GCLK_VENCI_INT0	77
 #define CLKID_AO_I2C		93
 #define CLKID_SD_EMMC_A		94
 #define CLKID_SD_EMMC_B		95
-- 
GitLab


From 890a96a257b497e8361055ffbf66e2fd08833074 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sun, 15 Jan 2017 23:20:29 +0100
Subject: [PATCH 0035/1084] ARM64: dts: meson-gx: add the missing uart_AO_B

This adds the missing node for the uart_AO_B port to the meson-gx.dtsi
(as this is supported by GXBB, GXL and GXM) along with the required
pinctrl pins. This is required as some boards are using it (the boards
from the Khadas VIM series for example have it exposed on the pin
headers).

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/meson-gx.dtsi   | 8 ++++++++
 arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi | 7 +++++++
 arch/arm64/boot/dts/amlogic/meson-gxl.dtsi  | 7 +++++++
 3 files changed, 22 insertions(+)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
index be56b9fae2aa..9212fca6eb94 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
@@ -324,6 +324,14 @@ uart_AO: serial@4c0 {
 				status = "disabled";
 			};
 
+			uart_AO_B: serial@4e0 {
+				compatible = "amlogic,meson-uart";
+				reg = <0x0 0x004e0 0x0 0x14>;
+				interrupts = <GIC_SPI 197 IRQ_TYPE_EDGE_RISING>;
+				clocks = <&xtal>;
+				status = "disabled";
+			};
+
 			ir: ir@580 {
 				compatible = "amlogic,meson-gxbb-ir";
 				reg = <0x0 0x00580 0x0 0x40>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
index 5d686334f692..474435e21759 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
@@ -138,6 +138,13 @@ mux {
 			};
 		};
 
+		uart_ao_b_pins: uart_ao_b {
+			mux {
+				groups = "uart_tx_ao_b", "uart_rx_ao_b";
+				function = "uart_ao_b";
+			};
+		};
+
 		remote_input_ao_pins: remote_input_ao {
 			mux {
 				groups = "remote_input_ao";
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
index 69216246275d..6d815f9930c9 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
@@ -88,6 +88,13 @@ mux {
 			};
 		};
 
+		uart_ao_b_pins: uart_ao_b {
+			mux {
+				groups = "uart_tx_ao_b", "uart_rx_ao_b";
+				function = "uart_ao_b";
+			};
+		};
+
 		remote_input_ao_pins: remote_input_ao {
 			mux {
 				groups = "remote_input_ao";
-- 
GitLab


From 261e1d5cc5ee994f17089d8f0d4f8443743d732d Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sun, 15 Jan 2017 23:32:53 +0100
Subject: [PATCH 0036/1084] ARM64: dts: meson-gx: add the serial CTS and RTS
 pin groups
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds pinctrl group nodes for the CTS and RTS pins of each serial
controller. This makes it possible to enable the CTS and RTS pins which
are controlled by the serial controller hardware (through the meson_uart
driver).

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Tested-by: Andreas Färber <afaerber@suse.de>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi | 40 +++++++++++++++++++++
 arch/arm64/boot/dts/amlogic/meson-gxl.dtsi  | 40 +++++++++++++++++++++
 2 files changed, 80 insertions(+)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
index 474435e21759..f001c4d007bc 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
@@ -138,6 +138,14 @@ mux {
 			};
 		};
 
+		uart_ao_a_cts_rts_pins: uart_ao_a_cts_rts {
+			mux {
+				groups = "uart_cts_ao_a",
+				       "uart_rts_ao_a";
+				function = "uart_ao";
+			};
+		};
+
 		uart_ao_b_pins: uart_ao_b {
 			mux {
 				groups = "uart_tx_ao_b", "uart_rx_ao_b";
@@ -145,6 +153,14 @@ mux {
 			};
 		};
 
+		uart_ao_b_cts_rts_pins: uart_ao_b_cts_rts {
+			mux {
+				groups = "uart_cts_ao_b",
+				       "uart_rts_ao_b";
+				function = "uart_ao_b";
+			};
+		};
+
 		remote_input_ao_pins: remote_input_ao {
 			mux {
 				groups = "remote_input_ao";
@@ -290,6 +306,14 @@ mux {
 			};
 		};
 
+		uart_a_cts_rts_pins: uart_a_cts_rts {
+			mux {
+				groups = "uart_cts_a",
+				       "uart_rts_a";
+				function = "uart_a";
+			};
+		};
+
 		uart_b_pins: uart_b {
 			mux {
 				groups = "uart_tx_b",
@@ -298,6 +322,14 @@ mux {
 			};
 		};
 
+		uart_b_cts_rts_pins: uart_b_cts_rts {
+			mux {
+				groups = "uart_cts_b",
+				       "uart_rts_b";
+				function = "uart_b";
+			};
+		};
+
 		uart_c_pins: uart_c {
 			mux {
 				groups = "uart_tx_c",
@@ -306,6 +338,14 @@ mux {
 			};
 		};
 
+		uart_c_cts_rts_pins: uart_c_cts_rts {
+			mux {
+				groups = "uart_cts_c",
+				       "uart_rts_c";
+				function = "uart_c";
+			};
+		};
+
 		i2c_a_pins: i2c_a {
 			mux {
 				groups = "i2c_sck_a",
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
index 6d815f9930c9..f3d1566c0a09 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
@@ -88,6 +88,14 @@ mux {
 			};
 		};
 
+		uart_ao_a_cts_rts_pins: uart_ao_a_cts_rts {
+			mux {
+				groups = "uart_cts_ao_a",
+				       "uart_rts_ao_a";
+				function = "uart_ao";
+			};
+		};
+
 		uart_ao_b_pins: uart_ao_b {
 			mux {
 				groups = "uart_tx_ao_b", "uart_rx_ao_b";
@@ -95,6 +103,14 @@ mux {
 			};
 		};
 
+		uart_ao_b_cts_rts_pins: uart_ao_b_cts_rts {
+			mux {
+				groups = "uart_cts_ao_b",
+				       "uart_rts_ao_b";
+				function = "uart_ao_b";
+			};
+		};
+
 		remote_input_ao_pins: remote_input_ao {
 			mux {
 				groups = "remote_input_ao";
@@ -170,6 +186,14 @@ mux {
 			};
 		};
 
+		uart_a_cts_rts_pins: uart_a_cts_rts {
+			mux {
+				groups = "uart_cts_a",
+				       "uart_rts_a";
+				function = "uart_a";
+			};
+		};
+
 		uart_b_pins: uart_b {
 			mux {
 				groups = "uart_tx_b",
@@ -178,6 +202,14 @@ mux {
 			};
 		};
 
+		uart_b_cts_rts_pins: uart_b_cts_rts {
+			mux {
+				groups = "uart_cts_b",
+				       "uart_rts_b";
+				function = "uart_b";
+			};
+		};
+
 		uart_c_pins: uart_c {
 			mux {
 				groups = "uart_tx_c",
@@ -186,6 +218,14 @@ mux {
 			};
 		};
 
+		uart_c_cts_rts_pins: uart_c_cts_rts {
+			mux {
+				groups = "uart_cts_c",
+				       "uart_rts_c";
+				function = "uart_c";
+			};
+		};
+
 		i2c_a_pins: i2c_a {
 			mux {
 				groups = "i2c_sck_a",
-- 
GitLab


From 2fbbc4bf69f293df317559a267f4120f290b8fc4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20F=C3=A4rber?= <afaerber@suse.de>
Date: Tue, 17 Jan 2017 02:17:53 +0100
Subject: [PATCH 0037/1084] ARM64: dts: meson-gxbb-vega-s95: Add LED
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is one blue LED on the front of the device. Keep it lit and
configure it as panic indicator.

Signed-off-by: Andreas Färber <afaerber@suse.de>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi
index e59ad308192f..86709929fd20 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi
@@ -53,6 +53,17 @@ chosen {
 		stdout-path = "serial0:115200n8";
 	};
 
+	leds {
+		compatible = "gpio-leds";
+
+		blue {
+			label = "vega-s95:blue:on";
+			gpios = <&gpio_ao GPIOAO_13 GPIO_ACTIVE_HIGH>;
+			default-state = "on";
+			panic-indicator;
+		};
+	};
+
 	usb_vbus: regulator-usb0-vbus {
 		compatible = "regulator-fixed";
 
-- 
GitLab


From b949165c86cdd230fbb7997a2b2966e860caf372 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Tue, 17 Jan 2017 13:05:38 +0100
Subject: [PATCH 0038/1084] ARM64: dts: meson-gx: Add HDMI HPD/DDC pinctrl
 nodes

Add pinctrl nodes for HDMI HPD and DDC pins modes for Amlogic Meson GXL
and GXBB SoCs.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi | 14 ++++++++++++++
 arch/arm64/boot/dts/amlogic/meson-gxl.dtsi  | 14 ++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
index f001c4d007bc..39a774ad83ce 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
@@ -453,6 +453,20 @@ mux {
 				function = "pwm_f_y";
 			};
 		};
+
+		hdmi_hpd_pins: hdmi_hpd {
+			mux {
+				groups = "hdmi_hpd";
+				function = "hdmi_hpd";
+			};
+		};
+
+		hdmi_i2c_pins: hdmi_i2c {
+			mux {
+				groups = "hdmi_sda", "hdmi_scl";
+				function = "hdmi_i2c";
+			};
+		};
 	};
 };
 
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
index f3d1566c0a09..bdf2305a2e25 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
@@ -276,6 +276,20 @@ mux {
 				function = "pwm_e";
 			};
 		};
+
+		hdmi_hpd_pins: hdmi_hpd {
+			mux {
+				groups = "hdmi_hpd";
+				function = "hdmi_hpd";
+			};
+		};
+
+		hdmi_i2c_pins: hdmi_i2c {
+			mux {
+				groups = "hdmi_sda", "hdmi_scl";
+				function = "hdmi_i2c";
+			};
+		};
 	};
 
 	eth-phy-mux {
-- 
GitLab


From bccec89f0a35f65302734d1cdb01479df0f33ac9 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 20 Sep 2016 17:40:56 +0300
Subject: [PATCH 0039/1084] pwm: pca9685: Allow any of the 16 PWMs to be used
 as a GPIO

The PCA9685 controller has full on/off bit for each PWM channel. Setting
this bit bypasses the PWM control and the line works just as it would be a
GPIO. Furthermore in Intel Galileo it is actually used as GPIO output for
discreet muxes on the board.

This patch adds GPIO output only support for the driver so that we can
control the muxes on Galileo using standard GPIO interfaces available in
the kernel. GPIO and PWM functionality is exclusive so only one can be
active at a time on a single PWM channel.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-pca9685.c | 164 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 163 insertions(+), 1 deletion(-)

diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c
index 117fccf7934a..621656b9aa41 100644
--- a/drivers/pwm/pwm-pca9685.c
+++ b/drivers/pwm/pwm-pca9685.c
@@ -20,8 +20,10 @@
  */
 
 #include <linux/acpi.h>
+#include <linux/gpio/driver.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/platform_device.h>
 #include <linux/property.h>
 #include <linux/pwm.h>
@@ -81,6 +83,10 @@ struct pca9685 {
 	int active_cnt;
 	int duty_ns;
 	int period_ns;
+#if IS_ENABLED(CONFIG_GPIOLIB)
+	struct mutex lock;
+	struct gpio_chip gpio;
+#endif
 };
 
 static inline struct pca9685 *to_pca(struct pwm_chip *chip)
@@ -88,6 +94,151 @@ static inline struct pca9685 *to_pca(struct pwm_chip *chip)
 	return container_of(chip, struct pca9685, chip);
 }
 
+#if IS_ENABLED(CONFIG_GPIOLIB)
+static int pca9685_pwm_gpio_request(struct gpio_chip *gpio, unsigned int offset)
+{
+	struct pca9685 *pca = gpiochip_get_data(gpio);
+	struct pwm_device *pwm;
+
+	mutex_lock(&pca->lock);
+
+	pwm = &pca->chip.pwms[offset];
+
+	if (pwm->flags & (PWMF_REQUESTED | PWMF_EXPORTED)) {
+		mutex_unlock(&pca->lock);
+		return -EBUSY;
+	}
+
+	pwm_set_chip_data(pwm, (void *)1);
+
+	mutex_unlock(&pca->lock);
+	return 0;
+}
+
+static void pca9685_pwm_gpio_free(struct gpio_chip *gpio, unsigned int offset)
+{
+	struct pca9685 *pca = gpiochip_get_data(gpio);
+	struct pwm_device *pwm;
+
+	mutex_lock(&pca->lock);
+	pwm = &pca->chip.pwms[offset];
+	pwm_set_chip_data(pwm, NULL);
+	mutex_unlock(&pca->lock);
+}
+
+static bool pca9685_pwm_is_gpio(struct pca9685 *pca, struct pwm_device *pwm)
+{
+	bool is_gpio = false;
+
+	mutex_lock(&pca->lock);
+
+	if (pwm->hwpwm >= PCA9685_MAXCHAN) {
+		unsigned int i;
+
+		/*
+		 * Check if any of the GPIOs are requested and in that case
+		 * prevent using the "all LEDs" channel.
+		 */
+		for (i = 0; i < pca->gpio.ngpio; i++)
+			if (gpiochip_is_requested(&pca->gpio, i)) {
+				is_gpio = true;
+				break;
+			}
+	} else if (pwm_get_chip_data(pwm)) {
+		is_gpio = true;
+	}
+
+	mutex_unlock(&pca->lock);
+	return is_gpio;
+}
+
+static int pca9685_pwm_gpio_get(struct gpio_chip *gpio, unsigned int offset)
+{
+	struct pca9685 *pca = gpiochip_get_data(gpio);
+	struct pwm_device *pwm = &pca->chip.pwms[offset];
+	unsigned int value;
+
+	regmap_read(pca->regmap, LED_N_ON_H(pwm->hwpwm), &value);
+
+	return value & LED_FULL;
+}
+
+static void pca9685_pwm_gpio_set(struct gpio_chip *gpio, unsigned int offset,
+				 int value)
+{
+	struct pca9685 *pca = gpiochip_get_data(gpio);
+	struct pwm_device *pwm = &pca->chip.pwms[offset];
+	unsigned int on = value ? LED_FULL : 0;
+
+	/* Clear both OFF registers */
+	regmap_write(pca->regmap, LED_N_OFF_L(pwm->hwpwm), 0);
+	regmap_write(pca->regmap, LED_N_OFF_H(pwm->hwpwm), 0);
+
+	/* Set the full ON bit */
+	regmap_write(pca->regmap, LED_N_ON_H(pwm->hwpwm), on);
+}
+
+static int pca9685_pwm_gpio_get_direction(struct gpio_chip *chip,
+					  unsigned int offset)
+{
+	/* Always out */
+	return 0;
+}
+
+static int pca9685_pwm_gpio_direction_input(struct gpio_chip *gpio,
+					    unsigned int offset)
+{
+	return -EINVAL;
+}
+
+static int pca9685_pwm_gpio_direction_output(struct gpio_chip *gpio,
+					     unsigned int offset, int value)
+{
+	pca9685_pwm_gpio_set(gpio, offset, value);
+
+	return 0;
+}
+
+/*
+ * The PCA9685 has a bit for turning the PWM output full off or on. Some
+ * boards like Intel Galileo actually uses these as normal GPIOs so we
+ * expose a GPIO chip here which can exclusively take over the underlying
+ * PWM channel.
+ */
+static int pca9685_pwm_gpio_probe(struct pca9685 *pca)
+{
+	struct device *dev = pca->chip.dev;
+
+	mutex_init(&pca->lock);
+
+	pca->gpio.label = dev_name(dev);
+	pca->gpio.parent = dev;
+	pca->gpio.request = pca9685_pwm_gpio_request;
+	pca->gpio.free = pca9685_pwm_gpio_free;
+	pca->gpio.get_direction = pca9685_pwm_gpio_get_direction;
+	pca->gpio.direction_input = pca9685_pwm_gpio_direction_input;
+	pca->gpio.direction_output = pca9685_pwm_gpio_direction_output;
+	pca->gpio.get = pca9685_pwm_gpio_get;
+	pca->gpio.set = pca9685_pwm_gpio_set;
+	pca->gpio.base = -1;
+	pca->gpio.ngpio = PCA9685_MAXCHAN;
+	pca->gpio.can_sleep = true;
+
+	return devm_gpiochip_add_data(dev, &pca->gpio, pca);
+}
+#else
+static inline bool pca9685_pwm_is_gpio(struct pca9685 *pca,
+				       struct pwm_device *pwm)
+{
+	return false;
+}
+
+static inline int pca9685_pwm_gpio_probe(struct pca9685 *pca)
+{
+	return 0;
+}
+#endif
+
 static int pca9685_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 			      int duty_ns, int period_ns)
 {
@@ -264,6 +415,9 @@ static int pca9685_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
 {
 	struct pca9685 *pca = to_pca(chip);
 
+	if (pca9685_pwm_is_gpio(pca, pwm))
+		return -EBUSY;
+
 	if (pca->active_cnt++ == 0)
 		return regmap_update_bits(pca->regmap, PCA9685_MODE1,
 					  MODE1_SLEEP, 0x0);
@@ -345,7 +499,15 @@ static int pca9685_pwm_probe(struct i2c_client *client,
 	pca->chip.base = -1;
 	pca->chip.can_sleep = true;
 
-	return pwmchip_add(&pca->chip);
+	ret = pwmchip_add(&pca->chip);
+	if (ret < 0)
+		return ret;
+
+	ret = pca9685_pwm_gpio_probe(pca);
+	if (ret < 0)
+		pwmchip_remove(&pca->chip);
+
+	return ret;
 }
 
 static int pca9685_pwm_remove(struct i2c_client *client)
-- 
GitLab


From 5bf22ff32e736beb8ba91a99a3e116d12e7eabd0 Mon Sep 17 00:00:00 2001
From: Scott Branden <scott.branden@broadcom.com>
Date: Sat, 8 Oct 2016 13:54:05 -0700
Subject: [PATCH 0040/1084] pwm: bcm-iproc: Update dependencies for
 compile-test

Add dependency on COMMON_CLK and allow COMPILE_TEST for broader compile
coverage. Default to Y for IPROC SoCs. This allows the driver to simply
be enabled by selecting PWM.

Signed-off-by: Scott Branden <scott.branden@broadcom.com>
Reviewed-by: Ray Jui <ray.jui@broadcom.com>
[thierry.reding@gmail.com: reword commit message]
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/Kconfig | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index f92dd41b0395..704b0b8f65c1 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig
@@ -76,7 +76,9 @@ config PWM_ATMEL_TCB
 
 config PWM_BCM_IPROC
 	tristate "iProc PWM support"
-	depends on ARCH_BCM_IPROC
+	depends on ARCH_BCM_IPROC || COMPILE_TEST
+	depends on COMMON_CLK
+	default ARCH_BCM_IPROC
 	help
 	  Generic PWM framework driver for Broadcom iProc PWM block. This
 	  block is used in Broadcom iProc SoC's.
-- 
GitLab


From b2ec9efc1f4fb1dc13a599db88019d5472deef9b Mon Sep 17 00:00:00 2001
From: Bhumika Goyal <bhumirks@gmail.com>
Date: Tue, 10 Jan 2017 23:42:06 +0530
Subject: [PATCH 0041/1084] pwm: constify pwm_ops structures

Declare pwm_ops structures as const as they are only stored in the ops
field of a pwm_chip structure. This field is of type const struct pwm_ops
*, so pwm_ops structures having this property can be declared as const.
Done using Coccinelle:

@r1 disable optional_qualifier@
identifier i;
position p;
@@
static struct pwm_ops i@p={...};

@ok1@
identifier r1.i;
position p;
struct pxa_pwm_chip pwm;
struct bfin_pwm_chip bwm;
struct vt8500_chip vp;
struct imx_chip icp;
@@
(
pwm.chip.ops=&i@p
|
bwm.chip.ops=&i@p
|
vp.chip.ops=&i@p
|
icp.chip.ops=&i@p
)

@bad@
position p!={r1.p,ok1.p};
identifier r1.i;
@@
i@p

@depends on !bad disable optional_qualifier@
identifier r1.i;
@@
+const
struct pwm_ops i;

File size details:

   text	   data	    bss	    dec	    hex	filename
   1646	    328	      0	   1974	    7b6	drivers/pwm/pwm-imx.o
   1742	    224	      0	   1966	    7ae	drivers/pwm/pwm-imx.o

   1941	    296	      0	   2237	    8bd	drivers/pwm/pwm-pxa.o
   2037	    192	      0	   2229	    8b5	drivers/pwm/pwm-pxa.o

   1946	    296	      0	   2242	    8c2	drivers/pwm/pwm-vt8500.o
   2050	    192	      0	   2242	    8c2	drivers/pwm/pwm-vt8500.o

The drivers/pwm/pwm-bfin.o file did not compile.

Signed-off-by: Bhumika Goyal <bhumirks@gmail.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-bfin.c   | 2 +-
 drivers/pwm/pwm-imx.c    | 2 +-
 drivers/pwm/pwm-pxa.c    | 2 +-
 drivers/pwm/pwm-vt8500.c | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/pwm/pwm-bfin.c b/drivers/pwm/pwm-bfin.c
index 7631ef194de7..d2ed0a2a18e8 100644
--- a/drivers/pwm/pwm-bfin.c
+++ b/drivers/pwm/pwm-bfin.c
@@ -103,7 +103,7 @@ static void bfin_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
 	disable_gptimer(priv->pin);
 }
 
-static struct pwm_ops bfin_pwm_ops = {
+static const struct pwm_ops bfin_pwm_ops = {
 	.request = bfin_pwm_request,
 	.free = bfin_pwm_free,
 	.config = bfin_pwm_config,
diff --git a/drivers/pwm/pwm-imx.c b/drivers/pwm/pwm-imx.c
index d600fd5cd4ba..177fb81c916d 100644
--- a/drivers/pwm/pwm-imx.c
+++ b/drivers/pwm/pwm-imx.c
@@ -240,7 +240,7 @@ static void imx_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
 	clk_disable_unprepare(imx->clk_per);
 }
 
-static struct pwm_ops imx_pwm_ops = {
+static const struct pwm_ops imx_pwm_ops = {
 	.enable = imx_pwm_enable,
 	.disable = imx_pwm_disable,
 	.config = imx_pwm_config,
diff --git a/drivers/pwm/pwm-pxa.c b/drivers/pwm/pwm-pxa.c
index 58b709f29130..4143a46684d2 100644
--- a/drivers/pwm/pwm-pxa.c
+++ b/drivers/pwm/pwm-pxa.c
@@ -118,7 +118,7 @@ static void pxa_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
 	clk_disable_unprepare(pc->clk);
 }
 
-static struct pwm_ops pxa_pwm_ops = {
+static const struct pwm_ops pxa_pwm_ops = {
 	.config = pxa_pwm_config,
 	.enable = pxa_pwm_enable,
 	.disable = pxa_pwm_disable,
diff --git a/drivers/pwm/pwm-vt8500.c b/drivers/pwm/pwm-vt8500.c
index cdb58fd4619d..8141a4984126 100644
--- a/drivers/pwm/pwm-vt8500.c
+++ b/drivers/pwm/pwm-vt8500.c
@@ -184,7 +184,7 @@ static int vt8500_pwm_set_polarity(struct pwm_chip *chip,
 	return 0;
 }
 
-static struct pwm_ops vt8500_pwm_ops = {
+static const struct pwm_ops vt8500_pwm_ops = {
 	.enable = vt8500_pwm_enable,
 	.disable = vt8500_pwm_disable,
 	.config = vt8500_pwm_config,
-- 
GitLab


From 4e6118974c7ab5d65e77fe8c47c87a85e9c7ed7d Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Thu, 19 Jan 2017 09:21:57 +0100
Subject: [PATCH 0042/1084] ARM64: dts: meson-gxm: Rename q200 and q201 DT
 files for consistency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to keep consistency naming with the Nexbox A1 DTS file, remove the
S912 SoC name in the GXM DT files.

Suggested-by: Andreas Färber <afaerber@suse.de>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/Makefile                          | 4 ++--
 .../amlogic/{meson-gxm-s912-q200.dts => meson-gxm-q200.dts}   | 0
 .../amlogic/{meson-gxm-s912-q201.dts => meson-gxm-q201.dts}   | 0
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename arch/arm64/boot/dts/amlogic/{meson-gxm-s912-q200.dts => meson-gxm-q200.dts} (100%)
 rename arch/arm64/boot/dts/amlogic/{meson-gxm-s912-q201.dts => meson-gxm-q201.dts} (100%)

diff --git a/arch/arm64/boot/dts/amlogic/Makefile b/arch/arm64/boot/dts/amlogic/Makefile
index 0d7bfbf7d922..1aca8e58ad2b 100644
--- a/arch/arm64/boot/dts/amlogic/Makefile
+++ b/arch/arm64/boot/dts/amlogic/Makefile
@@ -9,8 +9,8 @@ dtb-$(CONFIG_ARCH_MESON) += meson-gxl-s905x-p212.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxl-s905d-p230.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxl-s905d-p231.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxl-nexbox-a95x.dtb
-dtb-$(CONFIG_ARCH_MESON) += meson-gxm-s912-q200.dtb
-dtb-$(CONFIG_ARCH_MESON) += meson-gxm-s912-q201.dtb
+dtb-$(CONFIG_ARCH_MESON) += meson-gxm-q200.dtb
+dtb-$(CONFIG_ARCH_MESON) += meson-gxm-q201.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxm-nexbox-a1.dtb
 
 always		:= $(dtb-y)
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-s912-q200.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-q200.dts
similarity index 100%
rename from arch/arm64/boot/dts/amlogic/meson-gxm-s912-q200.dts
rename to arch/arm64/boot/dts/amlogic/meson-gxm-q200.dts
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-s912-q201.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-q201.dts
similarity index 100%
rename from arch/arm64/boot/dts/amlogic/meson-gxm-s912-q201.dts
rename to arch/arm64/boot/dts/amlogic/meson-gxm-q201.dts
-- 
GitLab


From b022e9b9d0e67f4cba62bc790bd387e23c29dc6c Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Thu, 22 Dec 2016 11:38:20 +0100
Subject: [PATCH 0043/1084] thermal: rcar_gen3_thermal: Document the R-Car Gen3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Hien Dang <hien.dang.eb@renesas.com>
Signed-off-by: Khiem Nguyen <khiem.nguyen.xt@renesas.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 .../bindings/thermal/rcar-gen3-thermal.txt    | 56 +++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.txt

diff --git a/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.txt b/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.txt
new file mode 100644
index 000000000000..07a9713ae6a7
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.txt
@@ -0,0 +1,56 @@
+* DT bindings for Renesas R-Car Gen3 Thermal Sensor driver
+
+On R-Car Gen3 SoCs, the thermal sensor controllers (TSC) control the thermal
+sensors (THS) which are the analog circuits for measuring temperature (Tj)
+inside the LSI.
+
+Required properties:
+- compatible		: "renesas,<soctype>-thermal",
+			  Examples with soctypes are:
+			    - "renesas,r8a7795-thermal" (R-Car H3)
+			    - "renesas,r8a7796-thermal" (R-Car M3-W)
+- reg			: Address ranges of the thermal registers. Each sensor
+			  needs one address range. Sorting must be done in
+			  increasing order according to datasheet, i.e.
+			  TSC1, TSC2, ...
+- clocks		: Must contain a reference to the functional clock.
+- #thermal-sensor-cells : must be <1>.
+
+Optional properties:
+
+- interrupts           : interrupts routed to the TSC (3 for H3 and M3-W)
+- power-domain		: Must contain a reference to the power domain. This
+			  property is mandatory if the thermal sensor instance
+			  is part of a controllable power domain.
+
+Example:
+
+	tsc: thermal@e6198000 {
+		compatible = "renesas,r8a7795-thermal";
+		reg = <0 0xe6198000 0 0x68>,
+		      <0 0xe61a0000 0 0x5c>,
+		      <0 0xe61a8000 0 0x5c>;
+		interrupts = <GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cpg CPG_MOD 522>;
+		power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
+		#thermal-sensor-cells = <1>;
+		status = "okay";
+	};
+
+	thermal-zones {
+		sensor_thermal1: sensor-thermal1 {
+			polling-delay-passive = <250>;
+			polling-delay = <1000>;
+			thermal-sensors = <&tsc 0>;
+
+			trips {
+				sensor1_crit: sensor1-crit {
+					temperature = <90000>;
+					hysteresis = <2000>;
+					type = "critical";
+				};
+			};
+		};
+	};
-- 
GitLab


From 564e73d283af9d4c1d642079d5c7ac601876162f Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Thu, 22 Dec 2016 11:38:21 +0100
Subject: [PATCH 0044/1084] thermal: rcar_gen3_thermal: Add R-Car Gen3 thermal
 driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for R-Car Gen3 thermal sensors. Polling only for now,
interrupts will be added incrementally. Same goes for reading fuses.
This is documented already, but no hardware available for now.

Signed-off-by: Hien Dang <hien.dang.eb@renesas.com>
Signed-off-by: Thao Nguyen <thao.nguyen.yb@rvc.renesas.com>
Signed-off-by: Khiem Nguyen <khiem.nguyen.xt@renesas.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
[Niklas: document and rework temperature calculation]
Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/Kconfig             |   9 +
 drivers/thermal/Makefile            |   1 +
 drivers/thermal/rcar_gen3_thermal.c | 335 ++++++++++++++++++++++++++++
 3 files changed, 345 insertions(+)
 create mode 100644 drivers/thermal/rcar_gen3_thermal.c

diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index c2c056cc7ea5..3912d24a07b1 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -245,6 +245,15 @@ config RCAR_THERMAL
 	  Enable this to plug the R-Car thermal sensor driver into the Linux
 	  thermal framework.
 
+config RCAR_GEN3_THERMAL
+	tristate "Renesas R-Car Gen3 thermal driver"
+	depends on ARCH_RENESAS || COMPILE_TEST
+	depends on HAS_IOMEM
+	depends on OF
+	help
+	  Enable this to plug the R-Car Gen3 thermal sensor driver into the Linux
+	  thermal framework.
+
 config KIRKWOOD_THERMAL
 	tristate "Temperature sensor on Marvell Kirkwood SoCs"
 	depends on MACH_KIRKWOOD || COMPILE_TEST
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 6a3d7b573036..adcf3cc90859 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -31,6 +31,7 @@ obj-$(CONFIG_QCOM_SPMI_TEMP_ALARM)	+= qcom-spmi-temp-alarm.o
 obj-$(CONFIG_SPEAR_THERMAL)	+= spear_thermal.o
 obj-$(CONFIG_ROCKCHIP_THERMAL)	+= rockchip_thermal.o
 obj-$(CONFIG_RCAR_THERMAL)	+= rcar_thermal.o
+obj-$(CONFIG_RCAR_GEN3_THERMAL)	+= rcar_gen3_thermal.o
 obj-$(CONFIG_KIRKWOOD_THERMAL)  += kirkwood_thermal.o
 obj-y				+= samsung/
 obj-$(CONFIG_DOVE_THERMAL)  	+= dove_thermal.o
diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c
new file mode 100644
index 000000000000..d33c845244b1
--- /dev/null
+++ b/drivers/thermal/rcar_gen3_thermal.c
@@ -0,0 +1,335 @@
+/*
+ *  R-Car Gen3 THS thermal sensor driver
+ *  Based on rcar_thermal.c and work from Hien Dang and Khiem Nguyen.
+ *
+ * Copyright (C) 2016 Renesas Electronics Corporation.
+ * Copyright (C) 2016 Sang Engineering
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ */
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/thermal.h>
+
+/* Register offsets */
+#define REG_GEN3_IRQSTR		0x04
+#define REG_GEN3_IRQMSK		0x08
+#define REG_GEN3_IRQCTL		0x0C
+#define REG_GEN3_IRQEN		0x10
+#define REG_GEN3_IRQTEMP1	0x14
+#define REG_GEN3_IRQTEMP2	0x18
+#define REG_GEN3_IRQTEMP3	0x1C
+#define REG_GEN3_CTSR		0x20
+#define REG_GEN3_THCTR		0x20
+#define REG_GEN3_TEMP		0x28
+#define REG_GEN3_THCODE1	0x50
+#define REG_GEN3_THCODE2	0x54
+#define REG_GEN3_THCODE3	0x58
+
+/* CTSR bits */
+#define CTSR_PONM	BIT(8)
+#define CTSR_AOUT	BIT(7)
+#define CTSR_THBGR	BIT(5)
+#define CTSR_VMEN	BIT(4)
+#define CTSR_VMST	BIT(1)
+#define CTSR_THSST	BIT(0)
+
+/* THCTR bits */
+#define THCTR_PONM	BIT(6)
+#define THCTR_THSST	BIT(0)
+
+#define CTEMP_MASK	0xFFF
+
+#define MCELSIUS(temp)	((temp) * 1000)
+#define GEN3_FUSE_MASK	0xFFF
+
+#define TSC_MAX_NUM	3
+
+/* Structure for thermal temperature calculation */
+struct equation_coefs {
+	int a1;
+	int b1;
+	int a2;
+	int b2;
+};
+
+struct rcar_gen3_thermal_tsc {
+	void __iomem *base;
+	struct thermal_zone_device *zone;
+	struct equation_coefs coef;
+	struct mutex lock;
+};
+
+struct rcar_gen3_thermal_priv {
+	struct rcar_gen3_thermal_tsc *tscs[TSC_MAX_NUM];
+};
+
+struct rcar_gen3_thermal_data {
+	void (*thermal_init)(struct rcar_gen3_thermal_tsc *tsc);
+};
+
+static inline u32 rcar_gen3_thermal_read(struct rcar_gen3_thermal_tsc *tsc,
+					 u32 reg)
+{
+	return ioread32(tsc->base + reg);
+}
+
+static inline void rcar_gen3_thermal_write(struct rcar_gen3_thermal_tsc *tsc,
+					   u32 reg, u32 data)
+{
+	iowrite32(data, tsc->base + reg);
+}
+
+/*
+ * Linear approximation for temperature
+ *
+ * [reg] = [temp] * a + b => [temp] = ([reg] - b) / a
+ *
+ * The constants a and b are calculated using two triplets of int values PTAT
+ * and THCODE. PTAT and THCODE can either be read from hardware or use hard
+ * coded values from driver. The formula to calculate a and b are taken from
+ * BSP and sparsely documented and understood.
+ *
+ * Examining the linear formula and the formula used to calculate constants a
+ * and b while knowing that the span for PTAT and THCODE values are between
+ * 0x000 and 0xfff the largest integer possible is 0xfff * 0xfff == 0xffe001.
+ * Integer also needs to be signed so that leaves 7 bits for binary
+ * fixed point scaling.
+ */
+
+#define FIXPT_SHIFT 7
+#define FIXPT_INT(_x) ((_x) << FIXPT_SHIFT)
+#define FIXPT_DIV(_a, _b) DIV_ROUND_CLOSEST(((_a) << FIXPT_SHIFT), (_b))
+#define FIXPT_TO_MCELSIUS(_x) ((_x) * 1000 >> FIXPT_SHIFT)
+
+#define RCAR3_THERMAL_GRAN 500 /* mili Celsius */
+
+/* no idea where these constants come from */
+#define TJ_1 96
+#define TJ_3 -41
+
+static void rcar_gen3_thermal_calc_coefs(struct equation_coefs *coef,
+					 int *ptat, int *thcode)
+{
+	int tj_2;
+
+	/* TODO: Find documentation and document constant calculation formula */
+
+	/*
+	 * Division is not scaled in BSP and if scaled it might overflow
+	 * the dividend (4095 * 4095 << 14 > INT_MAX) so keep it unscaled
+	 */
+	tj_2 = (FIXPT_INT((ptat[1] - ptat[2]) * 137)
+		/ (ptat[0] - ptat[2])) - FIXPT_INT(41);
+
+	coef->a1 = FIXPT_DIV(FIXPT_INT(thcode[1] - thcode[2]),
+			     tj_2 - FIXPT_INT(TJ_3));
+	coef->b1 = FIXPT_INT(thcode[2]) - coef->a1 * TJ_3;
+
+	coef->a2 = FIXPT_DIV(FIXPT_INT(thcode[1] - thcode[0]),
+			     tj_2 - FIXPT_INT(TJ_1));
+	coef->b2 = FIXPT_INT(thcode[0]) - coef->a2 * TJ_1;
+}
+
+static int rcar_gen3_thermal_round(int temp)
+{
+	int result, round_offs;
+
+	round_offs = temp >= 0 ? RCAR3_THERMAL_GRAN / 2 :
+		-RCAR3_THERMAL_GRAN / 2;
+	result = (temp + round_offs) / RCAR3_THERMAL_GRAN;
+	return result * RCAR3_THERMAL_GRAN;
+}
+
+static int rcar_gen3_thermal_get_temp(void *devdata, int *temp)
+{
+	struct rcar_gen3_thermal_tsc *tsc = devdata;
+	int mcelsius, val1, val2;
+	u32 reg;
+
+	/* Read register and convert to mili Celsius */
+	mutex_lock(&tsc->lock);
+
+	reg = rcar_gen3_thermal_read(tsc, REG_GEN3_TEMP) & CTEMP_MASK;
+
+	val1 = FIXPT_DIV(FIXPT_INT(reg) - tsc->coef.b1, tsc->coef.a1);
+	val2 = FIXPT_DIV(FIXPT_INT(reg) - tsc->coef.b2, tsc->coef.a2);
+	mcelsius = FIXPT_TO_MCELSIUS((val1 + val2) / 2);
+
+	mutex_unlock(&tsc->lock);
+
+	/* Make sure we are inside specifications */
+	if ((mcelsius < MCELSIUS(-40)) || (mcelsius > MCELSIUS(125)))
+		return -EIO;
+
+	/* Round value to device granularity setting */
+	*temp = rcar_gen3_thermal_round(mcelsius);
+
+	return 0;
+}
+
+static struct thermal_zone_of_device_ops rcar_gen3_tz_of_ops = {
+	.get_temp	= rcar_gen3_thermal_get_temp,
+};
+
+static void r8a7795_thermal_init(struct rcar_gen3_thermal_tsc *tsc)
+{
+	rcar_gen3_thermal_write(tsc, REG_GEN3_CTSR,  CTSR_THBGR);
+	rcar_gen3_thermal_write(tsc, REG_GEN3_CTSR,  0x0);
+
+	usleep_range(1000, 2000);
+
+	rcar_gen3_thermal_write(tsc, REG_GEN3_CTSR, CTSR_PONM);
+	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQCTL, 0x3F);
+	rcar_gen3_thermal_write(tsc, REG_GEN3_CTSR,
+				CTSR_PONM | CTSR_AOUT | CTSR_THBGR | CTSR_VMEN);
+
+	usleep_range(100, 200);
+
+	rcar_gen3_thermal_write(tsc, REG_GEN3_CTSR,
+				CTSR_PONM | CTSR_AOUT | CTSR_THBGR | CTSR_VMEN |
+				CTSR_VMST | CTSR_THSST);
+
+	usleep_range(1000, 2000);
+}
+
+static void r8a7796_thermal_init(struct rcar_gen3_thermal_tsc *tsc)
+{
+	u32 reg_val;
+
+	reg_val = rcar_gen3_thermal_read(tsc, REG_GEN3_THCTR);
+	reg_val &= ~THCTR_PONM;
+	rcar_gen3_thermal_write(tsc, REG_GEN3_THCTR, reg_val);
+
+	usleep_range(1000, 2000);
+
+	rcar_gen3_thermal_write(tsc, REG_GEN3_IRQCTL, 0x3F);
+	reg_val = rcar_gen3_thermal_read(tsc, REG_GEN3_THCTR);
+	reg_val |= THCTR_THSST;
+	rcar_gen3_thermal_write(tsc, REG_GEN3_THCTR, reg_val);
+}
+
+static const struct rcar_gen3_thermal_data r8a7795_data = {
+	.thermal_init = r8a7795_thermal_init,
+};
+
+static const struct rcar_gen3_thermal_data r8a7796_data = {
+	.thermal_init = r8a7796_thermal_init,
+};
+
+static const struct of_device_id rcar_gen3_thermal_dt_ids[] = {
+	{ .compatible = "renesas,r8a7795-thermal", .data = &r8a7795_data},
+	{ .compatible = "renesas,r8a7796-thermal", .data = &r8a7796_data},
+	{},
+};
+MODULE_DEVICE_TABLE(of, rcar_gen3_thermal_dt_ids);
+
+static int rcar_gen3_thermal_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+
+	pm_runtime_put(dev);
+	pm_runtime_disable(dev);
+
+	return 0;
+}
+
+static int rcar_gen3_thermal_probe(struct platform_device *pdev)
+{
+	struct rcar_gen3_thermal_priv *priv;
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	struct thermal_zone_device *zone;
+	int ret, i;
+	const struct rcar_gen3_thermal_data *match_data =
+		of_device_get_match_data(dev);
+
+	/* default values if FUSEs are missing */
+	/* TODO: Read values from hardware on supported platforms */
+	int ptat[3] = { 2351, 1509, 435 };
+	int thcode[TSC_MAX_NUM][3] = {
+		{ 3248, 2800, 2221 },
+		{ 3245, 2795, 2216 },
+		{ 3250, 2805, 2237 },
+	};
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, priv);
+
+	pm_runtime_enable(dev);
+	pm_runtime_get_sync(dev);
+
+	for (i = 0; i < TSC_MAX_NUM; i++) {
+		struct rcar_gen3_thermal_tsc *tsc;
+
+		tsc = devm_kzalloc(dev, sizeof(*tsc), GFP_KERNEL);
+		if (!tsc) {
+			ret = -ENOMEM;
+			goto error_unregister;
+		}
+
+		res = platform_get_resource(pdev, IORESOURCE_MEM, i);
+		if (!res)
+			break;
+
+		tsc->base = devm_ioremap_resource(dev, res);
+		if (IS_ERR(tsc->base)) {
+			ret = PTR_ERR(tsc->base);
+			goto error_unregister;
+		}
+
+		priv->tscs[i] = tsc;
+		mutex_init(&tsc->lock);
+
+		match_data->thermal_init(tsc);
+		rcar_gen3_thermal_calc_coefs(&tsc->coef, ptat, thcode[i]);
+
+		zone = devm_thermal_zone_of_sensor_register(dev, i, tsc,
+							    &rcar_gen3_tz_of_ops);
+		if (IS_ERR(zone)) {
+			dev_err(dev, "Can't register thermal zone\n");
+			ret = PTR_ERR(zone);
+			goto error_unregister;
+		}
+		tsc->zone = zone;
+	}
+
+	return 0;
+
+error_unregister:
+	rcar_gen3_thermal_remove(pdev);
+
+	return ret;
+}
+
+static struct platform_driver rcar_gen3_thermal_driver = {
+	.driver	= {
+		.name	= "rcar_gen3_thermal",
+		.of_match_table = rcar_gen3_thermal_dt_ids,
+	},
+	.probe		= rcar_gen3_thermal_probe,
+	.remove		= rcar_gen3_thermal_remove,
+};
+module_platform_driver(rcar_gen3_thermal_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("R-Car Gen3 THS thermal sensor driver");
+MODULE_AUTHOR("Wolfram Sang <wsa+renesas@sang-engineering.com>");
-- 
GitLab


From 8d254a340efb12b40c4c1ff25a48a4f48f7bbd6b Mon Sep 17 00:00:00 2001
From: Clemens Gruber <clemens.gruber@pqgruber.com>
Date: Tue, 13 Dec 2016 16:52:50 +0100
Subject: [PATCH 0045/1084] pwm: pca9685: Fix period change with same duty
 cycle

When first implementing support for changing the output frequency, an
optimization was added to continue the PWM after changing the prescaler
without having to reprogram the ON and OFF registers for the duty cycle,
in case the duty cycle stayed the same. This was flawed, because we
compared the absolute value of the duty cycle in nanoseconds instead of
the ratio to the period.

Fix the problem by removing the shortcut.

Fixes: 01ec8472009c9 ("pwm-pca9685: Support changing the output frequency")
Cc: <stable@vger.kernel.org> # v4.3+
Signed-off-by: Clemens Gruber <clemens.gruber@pqgruber.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-pca9685.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c
index 621656b9aa41..5566760f7dc4 100644
--- a/drivers/pwm/pwm-pca9685.c
+++ b/drivers/pwm/pwm-pca9685.c
@@ -67,7 +67,6 @@
 #define PCA9685_MAXCHAN		0x10
 
 #define LED_FULL		(1 << 4)
-#define MODE1_RESTART		(1 << 7)
 #define MODE1_SLEEP		(1 << 4)
 #define MODE2_INVRT		(1 << 4)
 #define MODE2_OUTDRV		(1 << 2)
@@ -268,16 +267,6 @@ static int pca9685_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 			udelay(500);
 
 			pca->period_ns = period_ns;
-
-			/*
-			 * If the duty cycle did not change, restart PWM with
-			 * the same duty cycle to period ratio and return.
-			 */
-			if (duty_ns == pca->duty_ns) {
-				regmap_update_bits(pca->regmap, PCA9685_MODE1,
-						   MODE1_RESTART, 0x1);
-				return 0;
-			}
 		} else {
 			dev_err(chip->dev,
 				"prescaler not set: period out of bounds!\n");
-- 
GitLab


From a8d4c8246b290ce97f88752d833804843041ac84 Mon Sep 17 00:00:00 2001
From: Xunlei Pang <xlpang@redhat.com>
Date: Mon, 23 Jan 2017 14:48:23 +0800
Subject: [PATCH 0046/1084] x86/crash: Update the stale comment in
 reserve_crashkernel()

CRASH_KERNEL_ADDR_MAX has been missing for a long time,
update it with a more detailed explanation.

Signed-off-by: Xunlei Pang <xlpang@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Young <dyoung@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert LeBlanc <robert@leblancnet.us>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kexec@lists.infradead.org
Link: http://lkml.kernel.org/r/1485154103-18426-1-git-send-email-xlpang@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/setup.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 4cfba947d774..eb69b14dbfc8 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -575,7 +575,9 @@ static void __init reserve_crashkernel(void)
 	/* 0 means: find the address automatically */
 	if (crash_base <= 0) {
 		/*
-		 *  kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
+		 * Set CRASH_ADDR_LOW_MAX upper bound for crash memory,
+		 * as old kexec-tools loads bzImage below that, unless
+		 * "crashkernel=size[KMG],high" is specified.
 		 */
 		crash_base = memblock_find_in_range(CRASH_ALIGN,
 						    high ? CRASH_ADDR_HIGH_MAX
-- 
GitLab


From 100bd9a961e368490daa8cdbb2f41b03ef50164a Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Wed, 18 Jan 2017 11:50:40 +0100
Subject: [PATCH 0047/1084] dt-bindings: vendor-prefix: Add wetek vendor prefix

Add prefix for WeTek Electronics, limited, a company producing multimedia
Set-Top-Boxes and supporting KODI and LibreELEC distributions.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 Documentation/devicetree/bindings/vendor-prefixes.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 16d3b5e7f5d1..0c16d8581d13 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -320,6 +320,7 @@ virtio	Virtual I/O Device Specification, developed by the OASIS consortium
 vivante	Vivante Corporation
 voipac	Voipac Technologies s.r.o.
 wd	Western Digital Corp.
+wetek	WeTek Electronics, limited.
 wexler	Wexler
 winbond Winbond Electronics corp.
 wlf	Wolfson Microelectronics
-- 
GitLab


From d537d289de06f57f5342106208ecf17ea83f23e2 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Wed, 18 Jan 2017 11:50:41 +0100
Subject: [PATCH 0048/1084] ARM64: dts: meson-gxbb: Add support for WeTek Hub
 and Play

Adds support for the WeTek Hub and Play2 boards.
The Hub is an extremely small IPTv Set-Top-Box and the Play2 is a more
traditionnal Satellite or Terrestrial and IPTv Set-Top-Box.

Both are based on the p200 Reference Design and out-of-tree support is
based on LibreELEC kernel at [1].

[1] https://github.com/wetek-enigma/linux-amlogic

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/Makefile          |  2 +
 .../boot/dts/amlogic/meson-gxbb-wetek-hub.dts | 66 +++++++++++++
 .../dts/amlogic/meson-gxbb-wetek-play2.dts    | 94 +++++++++++++++++++
 3 files changed, 162 insertions(+)
 create mode 100644 arch/arm64/boot/dts/amlogic/meson-gxbb-wetek-hub.dts
 create mode 100644 arch/arm64/boot/dts/amlogic/meson-gxbb-wetek-play2.dts

diff --git a/arch/arm64/boot/dts/amlogic/Makefile b/arch/arm64/boot/dts/amlogic/Makefile
index 1aca8e58ad2b..293e985e3dfa 100644
--- a/arch/arm64/boot/dts/amlogic/Makefile
+++ b/arch/arm64/boot/dts/amlogic/Makefile
@@ -5,6 +5,8 @@ dtb-$(CONFIG_ARCH_MESON) += meson-gxbb-p201.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxbb-vega-s95-pro.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxbb-vega-s95-meta.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxbb-vega-s95-telos.dtb
+dtb-$(CONFIG_ARCH_MESON) += meson-gxbb-wetek-hub.dtb
+dtb-$(CONFIG_ARCH_MESON) += meson-gxbb-wetek-play2.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxl-s905x-p212.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxl-s905d-p230.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxl-s905d-p231.dtb
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek-hub.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek-hub.dts
new file mode 100644
index 000000000000..56f855901262
--- /dev/null
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek-hub.dts
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2016 BayLibre, Inc.
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This library is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This library is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/dts-v1/;
+
+#include "meson-gxbb-p20x.dtsi"
+
+/ {
+	compatible = "wetek,hub", "amlogic,meson-gxbb";
+	model = "WeTek Hub";
+
+	leds {
+		compatible = "gpio-leds";
+
+		system {
+			label = "wetek-play:system-status";
+			gpios = <&gpio_ao GPIOAO_13 GPIO_ACTIVE_HIGH>;
+			default-state = "on";
+			panic-indicator;
+		};
+	};
+
+	cvbs-connector {
+		status = "disabled";
+	};
+};
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek-play2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek-play2.dts
new file mode 100644
index 000000000000..ea79fdd2c248
--- /dev/null
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek-play2.dts
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2016 BayLibre, Inc.
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This library is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This library is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/dts-v1/;
+
+#include "meson-gxbb-p20x.dtsi"
+#include <dt-bindings/input/input.h>
+
+/ {
+	compatible = "wetek,play2", "amlogic,meson-gxbb";
+	model = "WeTek Play 2";
+
+	leds {
+		compatible = "gpio-leds";
+
+		system {
+			label = "wetek-play:system-status";
+			gpios = <&gpio_ao GPIOAO_13 GPIO_ACTIVE_HIGH>;
+			default-state = "on";
+			panic-indicator;
+		};
+
+		wifi {
+			label = "wetek-play:wifi-status";
+			gpios = <&gpio GPIODV_26 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+
+		ethernet {
+			label = "wetek-play:ethernet-status";
+			gpios = <&gpio GPIODV_27 GPIO_ACTIVE_HIGH>;
+			default-state = "off";
+		};
+	};
+
+	gpio-keys-polled {
+		compatible = "gpio-keys-polled";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		poll-interval = <100>;
+
+		button@0 {
+			label = "reset";
+			linux,code = <KEY_RESTART>;
+			gpios = <&gpio_ao GPIOAO_3 GPIO_ACTIVE_LOW>;
+		};
+	};
+};
+
+&i2c_A {
+	status = "okay";
+	pinctrl-0 = <&i2c_a_pins>;
+	pinctrl-names = "default";
+};
-- 
GitLab


From 0264a88d6153e6cd5ee61239058b2002f36dde6b Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Wed, 18 Jan 2017 11:50:42 +0100
Subject: [PATCH 0049/1084] dt-bindings: amlogic: Add WeTek boards

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 Documentation/devicetree/bindings/arm/amlogic.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/amlogic.txt b/Documentation/devicetree/bindings/arm/amlogic.txt
index 9b2b41ab6817..c246cd2730d9 100644
--- a/Documentation/devicetree/bindings/arm/amlogic.txt
+++ b/Documentation/devicetree/bindings/arm/amlogic.txt
@@ -40,6 +40,8 @@ Board compatible values:
   - "hardkernel,odroid-c2" (Meson gxbb)
   - "amlogic,p200" (Meson gxbb)
   - "amlogic,p201" (Meson gxbb)
+  - "wetek,hub" (Meson gxbb)
+  - "wetek,play2" (Meson gxbb)
   - "amlogic,p212" (Meson gxl s905x)
   - "amlogic,p230" (Meson gxl s905d)
   - "amlogic,p231" (Meson gxl s905d)
-- 
GitLab


From 33d0fcdfe0e87070d96c678e554d711ae15b9fa6 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Thu, 19 Jan 2017 15:58:20 +0100
Subject: [PATCH 0050/1084] clk: gxbb: add the SAR ADC clocks and expose them

The HHI_SAR_CLK_CNTL contains three SAR ADC specific clocks:
- a mux clock to choose between different ADC reference clocks (this is
  2-bit wide, but the datasheet only lists the parents for the first
  bit)
- a divider for the input/reference clock
- a gate which enables the ADC clock

Additionally this exposes the ADC core clock (CLKID_SAR_ADC) and
CLKID_SANA (which seems to enable the analog inputs, but unfortunately
there is no documentation for this - we just mimic what the vendor
driver does).

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Tested-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 drivers/clk/meson/gxbb.c              | 48 +++++++++++++++++++++++++++
 drivers/clk/meson/gxbb.h              |  9 +++--
 include/dt-bindings/clock/gxbb-clkc.h |  4 +++
 3 files changed, 58 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c
index 9d9af446bafc..1c1ec137a3cc 100644
--- a/drivers/clk/meson/gxbb.c
+++ b/drivers/clk/meson/gxbb.c
@@ -564,6 +564,46 @@ static struct clk_gate gxbb_clk81 = {
 	},
 };
 
+static struct clk_mux gxbb_sar_adc_clk_sel = {
+	.reg = (void *)HHI_SAR_CLK_CNTL,
+	.mask = 0x3,
+	.shift = 9,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "sar_adc_clk_sel",
+		.ops = &clk_mux_ops,
+		/* NOTE: The datasheet doesn't list the parents for bit 10 */
+		.parent_names = (const char *[]){ "xtal", "clk81", },
+		.num_parents = 2,
+	},
+};
+
+static struct clk_divider gxbb_sar_adc_clk_div = {
+	.reg = (void *)HHI_SAR_CLK_CNTL,
+	.shift = 0,
+	.width = 8,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "sar_adc_clk_div",
+		.ops = &clk_divider_ops,
+		.parent_names = (const char *[]){ "sar_adc_clk_sel" },
+		.num_parents = 1,
+	},
+};
+
+static struct clk_gate gxbb_sar_adc_clk = {
+	.reg = (void *)HHI_SAR_CLK_CNTL,
+	.bit_idx = 8,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "sar_adc_clk",
+		.ops = &clk_gate_ops,
+		.parent_names = (const char *[]){ "sar_adc_clk_div" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
 /* Everything Else (EE) domain gates */
 static MESON_GATE(gxbb_ddr, HHI_GCLK_MPEG0, 0);
 static MESON_GATE(gxbb_dos, HHI_GCLK_MPEG0, 1);
@@ -754,6 +794,9 @@ static struct clk_hw_onecell_data gxbb_hw_onecell_data = {
 		[CLKID_SD_EMMC_A]	    = &gxbb_emmc_a.hw,
 		[CLKID_SD_EMMC_B]	    = &gxbb_emmc_b.hw,
 		[CLKID_SD_EMMC_C]	    = &gxbb_emmc_c.hw,
+		[CLKID_SAR_ADC_CLK]	    = &gxbb_sar_adc_clk.hw,
+		[CLKID_SAR_ADC_SEL]	    = &gxbb_sar_adc_clk_sel.hw,
+		[CLKID_SAR_ADC_DIV]	    = &gxbb_sar_adc_clk_div.hw,
 	},
 	.num = NR_CLKS,
 };
@@ -856,6 +899,7 @@ static struct clk_gate *gxbb_clk_gates[] = {
 	&gxbb_emmc_a,
 	&gxbb_emmc_b,
 	&gxbb_emmc_c,
+	&gxbb_sar_adc_clk,
 };
 
 static int gxbb_clkc_probe(struct platform_device *pdev)
@@ -888,6 +932,10 @@ static int gxbb_clkc_probe(struct platform_device *pdev)
 	gxbb_mpeg_clk_sel.reg = clk_base + (u64)gxbb_mpeg_clk_sel.reg;
 	gxbb_mpeg_clk_div.reg = clk_base + (u64)gxbb_mpeg_clk_div.reg;
 
+	/* Populate the base address for the SAR ADC clks */
+	gxbb_sar_adc_clk_sel.reg = clk_base + (u64)gxbb_sar_adc_clk_sel.reg;
+	gxbb_sar_adc_clk_div.reg = clk_base + (u64)gxbb_sar_adc_clk_div.reg;
+
 	/* Populate base address for gates */
 	for (i = 0; i < ARRAY_SIZE(gxbb_clk_gates); i++)
 		gxbb_clk_gates[i]->reg = clk_base +
diff --git a/drivers/clk/meson/gxbb.h b/drivers/clk/meson/gxbb.h
index 2139e97f5e39..dc487180f847 100644
--- a/drivers/clk/meson/gxbb.h
+++ b/drivers/clk/meson/gxbb.h
@@ -191,7 +191,7 @@
 #define CLKID_PERIPHS		  20
 #define CLKID_SPICC		  21
 /* CLKID_I2C */
-#define CLKID_SAR_ADC		  23
+/* #define CLKID_SAR_ADC */
 #define CLKID_SMART_CARD	  24
 #define CLKID_RNG0		  25
 #define CLKID_UART0		  26
@@ -237,7 +237,7 @@
 #define CLKID_MMC_PCLK		  66
 #define CLKID_DVIN		  67
 #define CLKID_UART2		  68
-#define CLKID_SANA		  69
+/* #define CLKID_SANA */
 #define CLKID_VPU_INTR		  70
 #define CLKID_SEC_AHB_AHB3_BRIDGE 71
 #define CLKID_CLK81_A53		  72
@@ -265,8 +265,11 @@
 /* CLKID_SD_EMMC_A */
 /* CLKID_SD_EMMC_B */
 /* CLKID_SD_EMMC_C */
+/* CLKID_SAR_ADC_CLK */
+/* CLKID_SAR_ADC_SEL */
+#define CLKID_SAR_ADC_DIV	  99
 
-#define NR_CLKS			  97
+#define NR_CLKS			  100
 
 /* include the CLKIDs that have been made part of the stable DT binding */
 #include <dt-bindings/clock/gxbb-clkc.h>
diff --git a/include/dt-bindings/clock/gxbb-clkc.h b/include/dt-bindings/clock/gxbb-clkc.h
index da1d473a5a3a..692846c7941b 100644
--- a/include/dt-bindings/clock/gxbb-clkc.h
+++ b/include/dt-bindings/clock/gxbb-clkc.h
@@ -14,6 +14,7 @@
 #define CLKID_MPLL2		15
 #define CLKID_SPI		34
 #define CLKID_I2C		22
+#define CLKID_SAR_ADC		23
 #define CLKID_ETH		36
 #define CLKID_USB0		50
 #define CLKID_USB1		51
@@ -21,10 +22,13 @@
 #define CLKID_HDMI_PCLK		63
 #define CLKID_USB1_DDR_BRIDGE	64
 #define CLKID_USB0_DDR_BRIDGE	65
+#define CLKID_SANA		69
 #define CLKID_GCLK_VENCI_INT0	77
 #define CLKID_AO_I2C		93
 #define CLKID_SD_EMMC_A		94
 #define CLKID_SD_EMMC_B		95
 #define CLKID_SD_EMMC_C		96
+#define CLKID_SAR_ADC_CLK	97
+#define CLKID_SAR_ADC_SEL	98
 
 #endif /* __GXBB_CLKC_H */
-- 
GitLab


From 7eea67101b9713ae438955e8899b3c4b078419f9 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@baylibre.com>
Date: Fri, 20 Jan 2017 07:57:52 -0800
Subject: [PATCH 0051/1084] ARM64: dts: meson-gxl: rename Nexbox A95x for
 consistency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since the GXL family has S905X and S905D SoCs, we're keeping the SoC
name in the DTS filename for clarity.  Rename this file accordingly to
be consistent with the rest of the GXL DTS files.

Cc: Neil Armstrong <narmstrong@baylibre.com>
Reviewed-by: Andreas Färber <afaerber@suse.de>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/Makefile                            | 2 +-
 ...eson-gxl-nexbox-a95x.dts => meson-gxl-s905x-nexbox-a95x.dts} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename arch/arm64/boot/dts/amlogic/{meson-gxl-nexbox-a95x.dts => meson-gxl-s905x-nexbox-a95x.dts} (100%)

diff --git a/arch/arm64/boot/dts/amlogic/Makefile b/arch/arm64/boot/dts/amlogic/Makefile
index 293e985e3dfa..3f94bce33b7f 100644
--- a/arch/arm64/boot/dts/amlogic/Makefile
+++ b/arch/arm64/boot/dts/amlogic/Makefile
@@ -10,7 +10,7 @@ dtb-$(CONFIG_ARCH_MESON) += meson-gxbb-wetek-play2.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxl-s905x-p212.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxl-s905d-p230.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxl-s905d-p231.dtb
-dtb-$(CONFIG_ARCH_MESON) += meson-gxl-nexbox-a95x.dtb
+dtb-$(CONFIG_ARCH_MESON) += meson-gxl-s905x-nexbox-a95x.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxm-q200.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxm-q201.dtb
 dtb-$(CONFIG_ARCH_MESON) += meson-gxm-nexbox-a1.dtb
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-nexbox-a95x.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts
similarity index 100%
rename from arch/arm64/boot/dts/amlogic/meson-gxl-nexbox-a95x.dts
rename to arch/arm64/boot/dts/amlogic/meson-gxl-s905x-nexbox-a95x.dts
-- 
GitLab


From 1c06552a703c6a4256976a1615d262b5a0d754e5 Mon Sep 17 00:00:00 2001
From: Jason Jin <jason.jin@nxp.com>
Date: Fri, 25 Nov 2016 14:40:55 +0800
Subject: [PATCH 0052/1084] powerpc/85xx: Enable display support for t1042rdb

Add a diu_ops implementation for t1042rdb.

Signed-off-by: Jason Jin <jason.jin@nxp.com>
[Meng Yi: Made file t1042rdb-specific]
Signed-off-by: Meng Yi <meng.yi@nxp.com>
[scottwood: clean up commit message]
Signed-off-by: Scott Wood <oss@buserror.net>
---
 arch/powerpc/platforms/85xx/Makefile       |   1 +
 arch/powerpc/platforms/85xx/t1042rdb_diu.c | 152 +++++++++++++++++++++
 2 files changed, 153 insertions(+)
 create mode 100644 arch/powerpc/platforms/85xx/t1042rdb_diu.c

diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
index 7bc86dae9517..fe19dad568e2 100644
--- a/arch/powerpc/platforms/85xx/Makefile
+++ b/arch/powerpc/platforms/85xx/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_P1022_RDK)   += p1022_rdk.o
 obj-$(CONFIG_P1023_RDB)   += p1023_rdb.o
 obj-$(CONFIG_TWR_P102x)   += twr_p102x.o
 obj-$(CONFIG_CORENET_GENERIC)   += corenet_generic.o
+obj-$(CONFIG_FB_FSL_DIU)	+= t1042rdb_diu.o
 obj-$(CONFIG_STX_GP3)	  += stx_gp3.o
 obj-$(CONFIG_TQM85xx)	  += tqm85xx.o
 obj-$(CONFIG_SBC8548)     += sbc8548.o
diff --git a/arch/powerpc/platforms/85xx/t1042rdb_diu.c b/arch/powerpc/platforms/85xx/t1042rdb_diu.c
new file mode 100644
index 000000000000..58fa3d319f1c
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/t1042rdb_diu.c
@@ -0,0 +1,152 @@
+/*
+ * T1042 platform DIU operation
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <sysdev/fsl_soc.h>
+
+/*DIU Pixel ClockCR offset in scfg*/
+#define CCSR_SCFG_PIXCLKCR      0x28
+
+/* DIU Pixel Clock bits of the PIXCLKCR */
+#define PIXCLKCR_PXCKEN		0x80000000
+#define PIXCLKCR_PXCKINV	0x40000000
+#define PIXCLKCR_PXCKDLY	0x0000FF00
+#define PIXCLKCR_PXCLK_MASK	0x00FF0000
+
+/* Some CPLD register definitions */
+#define CPLD_DIUCSR		0x16
+#define CPLD_DIUCSR_DVIEN	0x80
+#define CPLD_DIUCSR_BACKLIGHT	0x0f
+
+struct device_node *cpld_node;
+
+/**
+ * t1042rdb_set_monitor_port: switch the output to a different monitor port
+ */
+static void t1042rdb_set_monitor_port(enum fsl_diu_monitor_port port)
+{
+	static void __iomem *cpld_base;
+
+	cpld_base = of_iomap(cpld_node, 0);
+	if (!cpld_base) {
+		pr_err("%s: Could not map cpld registers\n", __func__);
+		goto exit;
+	}
+
+	switch (port) {
+	case FSL_DIU_PORT_DVI:
+		/* Enable the DVI(HDMI) port, disable the DFP and
+		 * the backlight
+		 */
+		clrbits8(cpld_base + CPLD_DIUCSR, CPLD_DIUCSR_DVIEN);
+		break;
+	case FSL_DIU_PORT_LVDS:
+		/*
+		 * LVDS also needs backlight enabled, otherwise the display
+		 * will be blank.
+		 */
+		/* Enable the DFP port, disable the DVI*/
+		setbits8(cpld_base + CPLD_DIUCSR, 0x01 << 8);
+		setbits8(cpld_base + CPLD_DIUCSR, 0x01 << 4);
+		setbits8(cpld_base + CPLD_DIUCSR, CPLD_DIUCSR_BACKLIGHT);
+		break;
+	default:
+		pr_err("%s: Unsupported monitor port %i\n", __func__, port);
+	}
+
+	iounmap(cpld_base);
+exit:
+	of_node_put(cpld_node);
+}
+
+/**
+ * t1042rdb_set_pixel_clock: program the DIU's clock
+ * @pixclock: pixel clock in ps (pico seconds)
+ */
+static void t1042rdb_set_pixel_clock(unsigned int pixclock)
+{
+	struct device_node *scfg_np;
+	void __iomem *scfg;
+	unsigned long freq;
+	u64 temp;
+	u32 pxclk;
+
+	scfg_np = of_find_compatible_node(NULL, NULL, "fsl,t1040-scfg");
+	if (!scfg_np) {
+		pr_err("%s: Missing scfg node. Can not display video.\n",
+		       __func__);
+		return;
+	}
+
+	scfg = of_iomap(scfg_np, 0);
+	of_node_put(scfg_np);
+	if (!scfg) {
+		pr_err("%s: Could not map device. Can not display video.\n",
+		       __func__);
+		return;
+	}
+
+	/* Convert pixclock into frequency */
+	temp = 1000000000000ULL;
+	do_div(temp, pixclock);
+	freq = temp;
+
+	/*
+	 * 'pxclk' is the ratio of the platform clock to the pixel clock.
+	 * This number is programmed into the PIXCLKCR register, and the valid
+	 * range of values is 2-255.
+	 */
+	pxclk = DIV_ROUND_CLOSEST(fsl_get_sys_freq(), freq);
+	pxclk = clamp_t(u32, pxclk, 2, 255);
+
+	/* Disable the pixel clock, and set it to non-inverted and no delay */
+	clrbits32(scfg + CCSR_SCFG_PIXCLKCR,
+		  PIXCLKCR_PXCKEN | PIXCLKCR_PXCKDLY | PIXCLKCR_PXCLK_MASK);
+
+	/* Enable the clock and set the pxclk */
+	setbits32(scfg + CCSR_SCFG_PIXCLKCR, PIXCLKCR_PXCKEN | (pxclk << 16));
+
+	iounmap(scfg);
+}
+
+/**
+ * t1042rdb_valid_monitor_port: set the monitor port for sysfs
+ */
+static enum fsl_diu_monitor_port
+t1042rdb_valid_monitor_port(enum fsl_diu_monitor_port port)
+{
+	switch (port) {
+	case FSL_DIU_PORT_DVI:
+	case FSL_DIU_PORT_LVDS:
+		return port;
+	default:
+		return FSL_DIU_PORT_DVI; /* Dual-link LVDS is not supported */
+	}
+}
+
+static int __init t1042rdb_diu_init(void)
+{
+	cpld_node = of_find_compatible_node(NULL, NULL, "fsl,t1042rdb-cpld");
+	if (!cpld_node)
+		return 0;
+
+	diu_ops.set_monitor_port	= t1042rdb_set_monitor_port;
+	diu_ops.set_pixel_clock		= t1042rdb_set_pixel_clock;
+	diu_ops.valid_monitor_port	= t1042rdb_valid_monitor_port;
+
+	return 0;
+}
+
+early_initcall(t1042rdb_diu_init);
-- 
GitLab


From a674c7d470bb47e82f4eb1fa944eadeac2f6bbaf Mon Sep 17 00:00:00 2001
From: Valentin Longchamp <valentin.longchamp@keymile.com>
Date: Thu, 15 Dec 2016 14:22:26 +0100
Subject: [PATCH 0053/1084] powerpc/corenet: explicitly disable the SDHC
 controller on kmcoge4

It is not implemented on the kmcoge4 hardware and if not disabled it
leads to error messages with the corenet32_smp_defconfig.

Signed-off-by: Valentin Longchamp <valentin.longchamp@keymile.com>
Signed-off-by: Scott Wood <oss@buserror.net>
---
 arch/powerpc/boot/dts/fsl/kmcoge4.dts | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/boot/dts/fsl/kmcoge4.dts b/arch/powerpc/boot/dts/fsl/kmcoge4.dts
index ae70a24094b0..e103c0f3f650 100644
--- a/arch/powerpc/boot/dts/fsl/kmcoge4.dts
+++ b/arch/powerpc/boot/dts/fsl/kmcoge4.dts
@@ -83,6 +83,10 @@ flash@2 {
 			};
 		};
 
+		sdhc@114000 {
+			status = "disabled";
+		};
+
 		i2c@119000 {
 			status = "disabled";
 		};
-- 
GitLab


From 7b51f8e35e11b9226f439d154f54b8ae8d128e34 Mon Sep 17 00:00:00 2001
From: Valentin Longchamp <valentin.longchamp@keymile.com>
Date: Thu, 15 Dec 2016 14:22:27 +0100
Subject: [PATCH 0054/1084] powerpc/85xx: remove the kmp204x_defconfig

It is not maintained and thus obsolete. corenet32_smp_defconfig can be
used as reference for the kmcoge4/kmp204x boards.

Signed-off-by: Valentin Longchamp <valentin.longchamp@keymile.com>
Signed-off-by: Scott Wood <oss@buserror.net>
---
 arch/powerpc/configs/85xx/kmp204x_defconfig | 220 --------------------
 1 file changed, 220 deletions(-)
 delete mode 100644 arch/powerpc/configs/85xx/kmp204x_defconfig

diff --git a/arch/powerpc/configs/85xx/kmp204x_defconfig b/arch/powerpc/configs/85xx/kmp204x_defconfig
deleted file mode 100644
index aaaaa609cd24..000000000000
--- a/arch/powerpc/configs/85xx/kmp204x_defconfig
+++ /dev/null
@@ -1,220 +0,0 @@
-CONFIG_PPC_85xx=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=8
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_AUDIT=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_CGROUPS=y
-CONFIG_CGROUP_SCHED=y
-CONFIG_RELAY=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_EMBEDDED=y
-CONFIG_PERF_EVENTS=y
-CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_MAC_PARTITION=y
-CONFIG_CORENET_GENERIC=y
-CONFIG_MPIC_MSGR=y
-CONFIG_HIGHMEM=y
-# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_MISC=m
-CONFIG_KEXEC=y
-CONFIG_FORCE_MAX_ZONEORDER=13
-CONFIG_PCI=y
-CONFIG_PCIEPORTBUS=y
-# CONFIG_PCIEASPM is not set
-CONFIG_PCI_MSI=y
-CONFIG_ADVANCED_OPTIONS=y
-CONFIG_LOWMEM_SIZE_BOOL=y
-CONFIG_LOWMEM_SIZE=0x20000000
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=y
-CONFIG_XFRM_SUB_POLICY=y
-CONFIG_XFRM_STATISTICS=y
-CONFIG_NET_KEY=y
-CONFIG_NET_KEY_MIGRATE=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_IP_PNP_RARP=y
-CONFIG_NET_IPIP=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_INET_AH=y
-CONFIG_INET_ESP=y
-CONFIG_INET_IPCOMP=y
-CONFIG_IPV6=y
-CONFIG_IP_SCTP=m
-CONFIG_TIPC=y
-CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_CBQ=y
-CONFIG_NET_SCH_HTB=y
-CONFIG_NET_SCH_HFSC=y
-CONFIG_NET_SCH_PRIO=y
-CONFIG_NET_SCH_MULTIQ=y
-CONFIG_NET_SCH_RED=y
-CONFIG_NET_SCH_SFQ=y
-CONFIG_NET_SCH_TEQL=y
-CONFIG_NET_SCH_TBF=y
-CONFIG_NET_SCH_GRED=y
-CONFIG_NET_CLS_BASIC=y
-CONFIG_NET_CLS_TCINDEX=y
-CONFIG_NET_CLS_U32=y
-CONFIG_CLS_U32_PERF=y
-CONFIG_CLS_U32_MARK=y
-CONFIG_NET_CLS_FLOW=y
-CONFIG_NET_CLS_CGROUP=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/mdev"
-CONFIG_DEVTMPFS=y
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_PHRAM=y
-CONFIG_MTD_NAND=y
-CONFIG_MTD_NAND_ECC_BCH=y
-CONFIG_MTD_NAND_FSL_ELBC=y
-CONFIG_MTD_UBI=y
-CONFIG_MTD_UBI_GLUEBI=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_COUNT=2
-CONFIG_BLK_DEV_RAM_SIZE=2048
-CONFIG_EEPROM_AT24=y
-CONFIG_SCSI=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=y
-CONFIG_BLK_DEV_SR=y
-CONFIG_CHR_DEV_SG=y
-CONFIG_SCSI_LOGGING=y
-CONFIG_SCSI_SYM53C8XX_2=y
-CONFIG_NETDEVICES=y
-# CONFIG_NET_VENDOR_3COM is not set
-# CONFIG_NET_VENDOR_ADAPTEC is not set
-# CONFIG_NET_VENDOR_ALTEON is not set
-# CONFIG_NET_VENDOR_AMD is not set
-# CONFIG_NET_VENDOR_ATHEROS is not set
-# CONFIG_NET_VENDOR_BROADCOM is not set
-# CONFIG_NET_VENDOR_BROCADE is not set
-# CONFIG_NET_VENDOR_CHELSIO is not set
-# CONFIG_NET_VENDOR_CISCO is not set
-# CONFIG_NET_VENDOR_DEC is not set
-# CONFIG_NET_VENDOR_DLINK is not set
-# CONFIG_NET_VENDOR_EMULEX is not set
-# CONFIG_NET_VENDOR_EXAR is not set
-CONFIG_FSL_PQ_MDIO=y
-CONFIG_FSL_XGMAC_MDIO=y
-# CONFIG_NET_VENDOR_HP is not set
-# CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_MARVELL is not set
-# CONFIG_NET_VENDOR_MELLANOX is not set
-# CONFIG_NET_VENDOR_MICREL is not set
-# CONFIG_NET_VENDOR_MICROCHIP is not set
-# CONFIG_NET_VENDOR_MYRI is not set
-# CONFIG_NET_VENDOR_NATSEMI is not set
-# CONFIG_NET_VENDOR_NVIDIA is not set
-# CONFIG_NET_VENDOR_OKI is not set
-# CONFIG_NET_PACKET_ENGINE is not set
-# CONFIG_NET_VENDOR_QLOGIC is not set
-# CONFIG_NET_VENDOR_REALTEK is not set
-# CONFIG_NET_VENDOR_RDC is not set
-# CONFIG_NET_VENDOR_SEEQ is not set
-# CONFIG_NET_VENDOR_SILAN is not set
-# CONFIG_NET_VENDOR_SIS is not set
-# CONFIG_NET_VENDOR_SMSC is not set
-# CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SUN is not set
-# CONFIG_NET_VENDOR_TEHUTI is not set
-# CONFIG_NET_VENDOR_TI is not set
-# CONFIG_NET_VENDOR_VIA is not set
-# CONFIG_NET_VENDOR_WIZNET is not set
-# CONFIG_NET_VENDOR_XILINX is not set
-CONFIG_MARVELL_PHY=y
-CONFIG_VITESSE_PHY=y
-CONFIG_FIXED_PHY=y
-# CONFIG_WLAN is not set
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-CONFIG_SERIO_LIBPS2=y
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_PPC_EPAPR_HV_BYTECHAN=y
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_MANY_PORTS=y
-CONFIG_SERIAL_8250_DETECT_IRQ=y
-CONFIG_SERIAL_8250_RSA=y
-CONFIG_NVRAM=y
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_MUX=y
-CONFIG_I2C_MUX_PCA954x=y
-CONFIG_I2C_MPC=y
-CONFIG_SPI=y
-CONFIG_SPI_FSL_SPI=y
-CONFIG_SPI_FSL_ESPI=y
-CONFIG_SPI_SPIDEV=m
-CONFIG_PTP_1588_CLOCK=y
-# CONFIG_HWMON is not set
-# CONFIG_USB_SUPPORT is not set
-CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
-CONFIG_EDAC_MPC85XX=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_DS3232=y
-CONFIG_RTC_DRV_CMOS=y
-CONFIG_UIO=y
-CONFIG_STAGING=y
-CONFIG_CLK_QORIQ=y
-CONFIG_EXT2_FS=y
-CONFIG_NTFS_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
-CONFIG_UBIFS_FS=y
-CONFIG_CRAMFS=y
-CONFIG_SQUASHFS=y
-CONFIG_SQUASHFS_XZ=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-CONFIG_NLS_ISO8859_1=y
-CONFIG_NLS_UTF8=m
-CONFIG_CRC_ITU_T=m
-CONFIG_DEBUG_INFO=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_SHIRQ=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_SCHEDSTATS=y
-CONFIG_RCU_TRACE=y
-CONFIG_UPROBE_EVENT=y
-CONFIG_CRYPTO_NULL=y
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_MD4=y
-CONFIG_CRYPTO_SHA256=y
-CONFIG_CRYPTO_SHA512=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DEV_FSL_CAAM=y
-- 
GitLab


From fdc8c4adb6a465b5873df97fcbd1cb332ade019b Mon Sep 17 00:00:00 2001
From: Valentin Longchamp <valentin.longchamp@keymile.com>
Date: Thu, 15 Dec 2016 14:22:28 +0100
Subject: [PATCH 0055/1084] powerpc/corenet: add support for the kmcent2 board

This board is built around Freescale's T1040 SoC.

The peripherals used by this design are:
- DDR3 RAM with SPD support
- parallel NOR Flash as boot medium
- 1 PCIe bus (PCIe1 x1)
- 3 FMAN Ethernet devices (FMAN1 DTSEC1/2/5)
- 4 IFC bus devices:
  - NOR flash
  - NAND flash
  - QRIO reset/power mgmt CPLD
  - BFTIC chassis management CPLD
- 2 I2C buses
- 1 SPI bus
- HDLC bus with the QE's UCC1
- last but not least, the mandatory serial port

The board can be used with the corenet32_smp_defconfig.

Signed-off-by: Valentin Longchamp <valentin.longchamp@keymile.com>
Signed-off-by: Scott Wood <oss@buserror.net>
---
 arch/powerpc/boot/dts/fsl/kmcent2.dts         | 303 ++++++++++++++++++
 arch/powerpc/platforms/85xx/corenet_generic.c |   1 +
 2 files changed, 304 insertions(+)
 create mode 100644 arch/powerpc/boot/dts/fsl/kmcent2.dts

diff --git a/arch/powerpc/boot/dts/fsl/kmcent2.dts b/arch/powerpc/boot/dts/fsl/kmcent2.dts
new file mode 100644
index 000000000000..47afa438602e
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/kmcent2.dts
@@ -0,0 +1,303 @@
+/*
+ * Keymile kmcent2 Device Tree Source, based on T1040RDB DTS
+ *
+ * (C) Copyright 2016
+ * Valentin Longchamp, Keymile AG, valentin.longchamp@keymile.com
+ *
+ * Copyright 2014 - 2015 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/include/ "t104xsi-pre.dtsi"
+
+/ {
+	model = "keymile,kmcent2";
+	compatible = "keymile,kmcent2";
+
+	aliases {
+		front_phy = &front_phy;
+	};
+
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		bman_fbpr: bman-fbpr {
+			size = <0 0x1000000>;
+			alignment = <0 0x1000000>;
+		};
+		qman_fqd: qman-fqd {
+			size = <0 0x400000>;
+			alignment = <0 0x400000>;
+		};
+		qman_pfdr: qman-pfdr {
+			size = <0 0x2000000>;
+			alignment = <0 0x2000000>;
+		};
+	};
+
+	ifc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x2000>;
+		ranges = <0 0 0xf 0xe8000000 0x04000000
+			  1 0 0xf 0xfa000000 0x00010000
+			  2 0 0xf 0xfb000000 0x00010000
+			  4 0 0xf 0xc0000000 0x08000000
+			  6 0 0xf 0xd0000000 0x08000000
+			  7 0 0xf 0xd8000000 0x08000000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x04000000>;
+			bank-width = <2>;
+			device-width = <2>;
+		};
+
+		nand@1,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,ifc-nand";
+			reg = <0x1 0x0 0x10000>;
+		};
+
+		board-control@2,0 {
+			compatible = "keymile,qriox";
+			reg = <0x2 0x0 0x80>;
+		};
+
+		chassis-mgmt@6,0 {
+			compatible = "keymile,bfticu";
+			reg = <6 0 0x100>;
+			interrupt-controller;
+			interrupt-parent = <&mpic>;
+			interrupts = <11 1 0 0>;
+			#interrupt-cells = <1>;
+		};
+
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+	};
+
+	bportals: bman-portals@ff4000000 {
+		ranges = <0x0 0xf 0xf4000000 0x2000000>;
+	};
+
+	qportals: qman-portals@ff6000000 {
+		ranges = <0x0 0xf 0xf6000000 0x2000000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+
+		spi@110000 {
+			network-clock@1 {
+				compatible = "zarlink,zl30364";
+				reg = <1>;
+				spi-max-frequency = <1000000>;
+			};
+		};
+
+		sdhc@114000 {
+			status = "disabled";
+		};
+
+		i2c@118000 {
+			clock-frequency = <100000>;
+
+			mux@70 {
+				compatible = "nxp,pca9547";
+				reg = <0x70>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+				i2c-mux-idle-disconnect;
+
+				i2c@0 {
+					reg = <0>;
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					eeprom@54 {
+						compatible = "24c02";
+						reg = <0x54>;
+						pagesize = <2>;
+						read-only;
+						label = "ddr3-spd";
+					};
+				};
+
+				i2c@7 {
+					reg = <7>;
+					#address-cells = <1>;
+					#size-cells = <0>;
+
+					temp-sensor@48 {
+						compatible = "national,lm75";
+						reg = <0x48>;
+						label = "SENSOR_0";
+					};
+					temp-sensor@4a {
+						compatible = "national,lm75";
+						reg = <0x4a>;
+						label = "SENSOR_2";
+					};
+					temp-sensor@4b {
+						compatible = "national,lm75";
+						reg = <0x4b>;
+						label = "SENSOR_3";
+					};
+				};
+			};
+		};
+
+		i2c@118100 {
+			clock-frequency = <100000>;
+
+			eeprom@50 {
+				compatible = "atmel,24c08";
+				reg = <0x50>;
+				pagesize = <16>;
+			};
+
+			eeprom@54 {
+				compatible = "atmel,24c08";
+				reg = <0x54>;
+				pagesize = <16>;
+			};
+		};
+
+		i2c@119000 {
+			status = "disabled";
+		};
+
+		i2c@119100 {
+			status = "disabled";
+		};
+
+		serial2: serial@11d500 {
+			status = "disabled";
+		};
+
+		serial3: serial@11d600 {
+			status = "disabled";
+		};
+
+		usb0: usb@210000 {
+			status = "disabled";
+		};
+		usb1: usb@211000 {
+			status = "disabled";
+		};
+
+		display@180000 {
+			status = "disabled";
+		};
+
+		sata@220000 {
+			status = "disabled";
+		};
+		sata@221000 {
+			status = "disabled";
+		};
+
+		fman@400000 {
+			ethernet@e0000 {
+				fixed-link = <0 1 1000 0 0>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e2000 {
+				fixed-link = <1 1 1000 0 0>;
+				phy-connection-type = "sgmii";
+			};
+
+			ethernet@e4000 {
+				status = "disabled";
+			};
+
+			ethernet@e6000 {
+				status = "disabled";
+			};
+
+			ethernet@e8000 {
+				phy-handle = <&front_phy>;
+				phy-connection-type = "rgmii";
+			};
+
+			mdio0: mdio@fc000 {
+				front_phy: ethernet-phy@11 {
+					reg = <0x11>;
+				};
+			};
+		};
+	};
+
+
+	pci0: pcie@ffe240000 {
+		reg = <0xf 0xfe240000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x20000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe250000 {
+		status = "disabled";
+	};
+
+	pci2: pcie@ffe260000 {
+		status = "disabled";
+	};
+
+	pci3: pcie@ffe270000 {
+		status = "disabled";
+	};
+
+	qe: qe@ffe140000 {
+		ranges = <0x0 0xf 0xfe140000 0x40000>;
+		reg = <0xf 0xfe140000 0 0x480>;
+		brg-frequency = <0>;
+		bus-frequency = <0>;
+
+		si1: si@700 {
+			compatible = "fsl,t1040-qe-si";
+			reg = <0x700 0x80>;
+		};
+
+		siram1: siram@1000 {
+			compatible = "fsl,t1040-qe-siram";
+			reg = <0x1000 0x800>;
+		};
+
+		ucc_hdlc: ucc@2000 {
+			device_type = "hdlc";
+			compatible = "fsl,ucc-hdlc";
+			rx-clock-name = "clk9";
+			tx-clock-name = "clk9";
+			fsl,tx-timeslot-mask = <0xfffffffe>;
+			fsl,rx-timeslot-mask = <0xfffffffe>;
+			fsl,siram-entry-id = <0>;
+		};
+	};
+};
+
+#include "t1040si-post.dtsi"
diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
index 6c0ba75fb256..ac191a7a1337 100644
--- a/arch/powerpc/platforms/85xx/corenet_generic.c
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -157,6 +157,7 @@ static const char * const boards[] __initconst = {
 	"fsl,T1040RDB",
 	"fsl,T1042RDB",
 	"fsl,T1042RDB_PI",
+	"keymile,kmcent2",
 	"keymile,kmcoge4",
 	"varisys,CYRUS",
 	NULL
-- 
GitLab


From fa769d3f58e6b0db4ed9f5f05ef1f251692f90c6 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 29 Nov 2016 09:52:13 +0100
Subject: [PATCH 0056/1084] powerpc/32: Enable HW_BREAKPOINT on BOOK3S

BOOK3S also has DABR register and capability to handle data
breakpoints, so this patch enable it on all BOOK3S, not only 64 bits.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Scott Wood <oss@buserror.net>
---
 arch/powerpc/Kconfig                 | 2 +-
 arch/powerpc/include/asm/processor.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a8ee573fe610..1971ed4e0336 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -113,7 +113,7 @@ config PPC
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_REGS_AND_STACK_ACCESS_API
-	select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
+	select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select SPARSE_IRQ
 	select IRQ_DOMAIN
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 1ba814436c73..2053a4b0914f 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -225,6 +225,7 @@ struct thread_struct {
 #ifdef CONFIG_PPC64
 	unsigned long	start_tb;	/* Start purr when proc switched in */
 	unsigned long	accum_tb;	/* Total accumulated purr for process */
+#endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	struct perf_event *ptrace_bps[HBP_NUM];
 	/*
@@ -233,7 +234,6 @@ struct thread_struct {
 	 */
 	struct perf_event *last_hit_ubp;
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
-#endif
 	struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */
 	unsigned long	trap_nr;	/* last trap # on this thread */
 	u8 load_fp;
-- 
GitLab


From 4ad8622dc54895c0072ddc919a83ea2a2f05605f Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 29 Nov 2016 09:52:15 +0100
Subject: [PATCH 0057/1084] powerpc/8xx: Implement hw_breakpoint

This patch implements HW breakpoint on the 8xx. The 8xx has
capability to manage HW breakpoints, which is slightly different
than BOOK3S:
1/ The breakpoint match doesn't trigger a DSI exception but a
dedicated data breakpoint exception.
2/ The breakpoint happens after the instruction has completed,
no need to single step or emulate the instruction,
3/ Matched address is not set in DAR but in BAR,
4/ DABR register doesn't exist, instead we have registers
LCTRL1, LCTRL2 and CMPx registers,
5/ The match on one comparator is not on a double word but
on a single word.

The patch does:
1/ Prepare the dedicated registers in call to __set_dabr(). In order
to emulate the double word handling of BOOK3S, comparator E is set to
DABR address value and comparator F to address + 4. Then breakpoint 1
is set to match comparator E or F,
2/ Skip the singlestepping stage when compiled for CONFIG_PPC_8xx,
3/ Implement the exception. In that exception, the matched address
is taken from SPRN_BAR and manage as if it was from SPRN_DAR.
4/ I/D TLB error exception routines perform a tlbie on bad TLBs. That
tlbie triggers the breakpoint exception when performed on the
breakpoint address. For this reason, the routine returns if the match
is from one of those two tlbie.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Scott Wood <oss@buserror.net>
---
 arch/powerpc/Kconfig                |  2 +-
 arch/powerpc/include/asm/reg_8xx.h  |  7 +++++++
 arch/powerpc/kernel/head_8xx.S      | 28 +++++++++++++++++++++++++++-
 arch/powerpc/kernel/hw_breakpoint.c |  6 +++++-
 arch/powerpc/kernel/process.c       | 22 ++++++++++++++++++++++
 5 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 1971ed4e0336..8b201eefdeaa 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -113,7 +113,7 @@ config PPC
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_REGS_AND_STACK_ACCESS_API
-	select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S
+	select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select SPARSE_IRQ
 	select IRQ_DOMAIN
diff --git a/arch/powerpc/include/asm/reg_8xx.h b/arch/powerpc/include/asm/reg_8xx.h
index 1f1636124a04..c52725b7085d 100644
--- a/arch/powerpc/include/asm/reg_8xx.h
+++ b/arch/powerpc/include/asm/reg_8xx.h
@@ -29,6 +29,13 @@
 #define SPRN_EIE	80	/* External interrupt enable (EE=1, RI=1) */
 #define SPRN_EID	81	/* External interrupt disable (EE=0, RI=1) */
 
+/* Debug registers */
+#define SPRN_CMPE	152
+#define SPRN_CMPF	153
+#define SPRN_LCTRL1	156
+#define SPRN_LCTRL2	157
+#define SPRN_BAR	159
+
 /* Commands.  Only the first few are available to the instruction cache.
 */
 #define	IDC_ENABLE	0x02000000	/* Cache enable */
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 1a9c99d3e5d8..5fcbd79a121d 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -561,6 +561,7 @@ InstructionTLBError:
 	andis.	r10,r5,0x4000
 	beq+	1f
 	tlbie	r4
+itlbie:
 	/* 0x400 is InstructionAccess exception, needed by bad_page_fault() */
 1:	EXC_XFER_LITE(0x400, handle_page_fault)
 
@@ -585,6 +586,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */
 	andis.	r10,r5,0x4000
 	beq+	1f
 	tlbie	r4
+dtlbie:
 1:	li	r10,RPN_PATTERN
 	mtspr	SPRN_DAR,r10	/* Tag DAR, to be used in DTLB Error */
 	/* 0x300 is DataAccess exception, needed by bad_page_fault() */
@@ -602,7 +604,27 @@ DARFixed:/* Return from dcbx instruction bug workaround */
  * support of breakpoints and such.  Someday I will get around to
  * using them.
  */
-	EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE)
+	. = 0x1c00
+DataBreakpoint:
+	EXCEPTION_PROLOG_0
+	mfcr	r10
+	mfspr	r11, SPRN_SRR0
+	cmplwi	cr0, r11, (dtlbie - PAGE_OFFSET)@l
+	cmplwi	cr7, r11, (itlbie - PAGE_OFFSET)@l
+	beq-	cr0, 11f
+	beq-	cr7, 11f
+	EXCEPTION_PROLOG_1
+	EXCEPTION_PROLOG_2
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	mfspr	r4,SPRN_BAR
+	stw	r4,_DAR(r11)
+	mfspr	r5,SPRN_DSISR
+	EXC_XFER_EE(0x1c00, do_break)
+11:
+	mtcr	r10
+	EXCEPTION_EPILOG_0
+	rfi
+
 	EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE)
 	EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE)
 	EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE)
@@ -977,6 +999,10 @@ initial_mmu:
 	lis	r8, IDC_ENABLE@h
 	mtspr	SPRN_DC_CST, r8
 #endif
+	/* Disable debug mode entry on data breakpoints */
+	mfspr	r8, SPRN_DER
+	rlwinm	r8, r8, 0, ~0x8
+	mtspr	SPRN_DER, r8
 	blr
 
 
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 4d3aa05e28be..146eaa9b350e 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -211,9 +211,11 @@ int hw_breakpoint_handler(struct die_args *args)
 	int rc = NOTIFY_STOP;
 	struct perf_event *bp;
 	struct pt_regs *regs = args->regs;
+#ifndef CONFIG_PPC_8xx
 	int stepped = 1;
-	struct arch_hw_breakpoint *info;
 	unsigned int instr;
+#endif
+	struct arch_hw_breakpoint *info;
 	unsigned long dar = regs->dar;
 
 	/* Disable breakpoints during exception handling */
@@ -255,6 +257,7 @@ int hw_breakpoint_handler(struct die_args *args)
 	      (dar - bp->attr.bp_addr < bp->attr.bp_len)))
 		info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
 
+#ifndef CONFIG_PPC_8xx
 	/* Do not emulate user-space instructions, instead single-step them */
 	if (user_mode(regs)) {
 		current->thread.last_hit_ubp = bp;
@@ -278,6 +281,7 @@ int hw_breakpoint_handler(struct die_args *args)
 		perf_event_disable_inatomic(bp);
 		goto out;
 	}
+#endif
 	/*
 	 * As a policy, the callback is invoked in a 'trigger-after-execute'
 	 * fashion
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 04885cec24df..2dcb65fee638 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -736,6 +736,28 @@ static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
 		mtspr(SPRN_DABRX, dabrx);
 	return 0;
 }
+#elif defined(CONFIG_PPC_8xx)
+static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
+{
+	unsigned long addr = dabr & ~HW_BRK_TYPE_DABR;
+	unsigned long lctrl1 = 0x90000000; /* compare type: equal on E & F */
+	unsigned long lctrl2 = 0x8e000002; /* watchpoint 1 on cmp E | F */
+
+	if ((dabr & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_READ)
+		lctrl1 |= 0xa0000;
+	else if ((dabr & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_WRITE)
+		lctrl1 |= 0xf0000;
+	else if ((dabr & HW_BRK_TYPE_RDWR) == 0)
+		lctrl2 = 0;
+
+	mtspr(SPRN_LCTRL2, 0);
+	mtspr(SPRN_CMPE, addr);
+	mtspr(SPRN_CMPF, addr + 4);
+	mtspr(SPRN_LCTRL1, lctrl1);
+	mtspr(SPRN_LCTRL2, lctrl2);
+
+	return 0;
+}
 #else
 static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
 {
-- 
GitLab


From 2add203169fd6bf419176e283f701d26944bce41 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Tue, 13 Dec 2016 17:57:38 +0100
Subject: [PATCH 0058/1084] powerpc/32: Remove FIX_SRR1

FIX_SRR1() is defined as blank. Last useful instance of FIX_SRR1()
was removed by commit 40ef8cbc6d360 ("powerpc: Get 64-bit configs to
compile with ARCH=powerpc") in 2005.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Scott Wood <oss@buserror.net>
---
 arch/powerpc/include/asm/ppc_asm.h | 1 -
 arch/powerpc/kernel/entry_32.S     | 4 ----
 arch/powerpc/kernel/head_32.S      | 3 ---
 3 files changed, 8 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 025833b8df9f..359c44341761 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -505,7 +505,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
 #define MTMSRD(r)	mtmsrd	r
 #define MTMSR_EERI(reg)	mtmsrd	reg,1
 #else
-#define FIX_SRR1(ra, rb)
 #ifndef CONFIG_40x
 #define	RFI		rfi
 #else
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 5742dbdbee46..980626ac34ee 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -292,7 +292,6 @@ stack_ovf:
 	lis	r9,StackOverflow@ha
 	addi	r9,r9,StackOverflow@l
 	LOAD_MSR_KERNEL(r10,MSR_KERNEL)
-	FIX_SRR1(r10,r12)
 	mtspr	SPRN_SRR0,r9
 	mtspr	SPRN_SRR1,r10
 	SYNC
@@ -417,7 +416,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
 	mtlr	r4
 	mtcr	r5
 	lwz	r7,_NIP(r1)
-	FIX_SRR1(r8, r0)
 	lwz	r2,GPR2(r1)
 	lwz	r1,GPR1(r1)
 	mtspr	SPRN_SRR0,r7
@@ -951,7 +949,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
 	.globl exc_exit_restart
 exc_exit_restart:
 	lwz	r12,_NIP(r1)
-	FIX_SRR1(r9,r10)
 	mtspr	SPRN_SRR0,r12
 	mtspr	SPRN_SRR1,r9
 	REST_4GPRS(9, r1)
@@ -1294,7 +1291,6 @@ _GLOBAL(enter_rtas)
 1:	tophys(r9,r1)
 	lwz	r8,INT_FRAME_SIZE+4(r9)	/* get return address */
 	lwz	r9,8(r9)	/* original msr value */
-	FIX_SRR1(r9,r0)
 	addi	r1,r1,INT_FRAME_SIZE
 	li	r0,0
 	mtspr	SPRN_SPRG_RTAS,r0
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 9d963547d243..1607be7c0ef2 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -869,7 +869,6 @@ __secondary_start:
 
 	/* enable MMU and jump to start_secondary */
 	li	r4,MSR_KERNEL
-	FIX_SRR1(r4,r5)
 	lis	r3,start_secondary@h
 	ori	r3,r3,start_secondary@l
 	mtspr	SPRN_SRR0,r3
@@ -977,7 +976,6 @@ start_here:
 	ori	r4,r4,2f@l
 	tophys(r4,r4)
 	li	r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
-	FIX_SRR1(r3,r5)
 	mtspr	SPRN_SRR0,r4
 	mtspr	SPRN_SRR1,r3
 	SYNC
@@ -1001,7 +999,6 @@ start_here:
 
 /* Now turn on the MMU for real! */
 	li	r4,MSR_KERNEL
-	FIX_SRR1(r4,r5)
 	lis	r3,start_kernel@h
 	ori	r3,r3,start_kernel@l
 	mtspr	SPRN_SRR0,r3
-- 
GitLab


From 75b824727680a9d12c34d78096a5ac642e53f5d0 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Thu, 15 Dec 2016 13:42:18 +0100
Subject: [PATCH 0059/1084] powerpc/8xx: Perf events on PPC 8xx

This patch has been reworked since RFC version. In the RFC, this patch
was preceded by a patch clearing MSR RI for all PPC32 at all time at
exception prologs. Now MSR RI clearing is done only when this 8xx perf
events functionality is compiled in, it is therefore limited to 8xx
and merged inside this patch.
Other main changes have been to take into account detailed review from
Peter Zijlstra. The instructions counter has been reworked to behave
as a free running counter like the three other counters.

The 8xx has no PMU, however some events can be emulated by other means.

This patch implements the following events (as reported by 'perf list'):
  cpu-cycles OR cycles				[Hardware event]
  instructions					[Hardware event]
  dTLB-load-misses				[Hardware cache event]
  iTLB-load-misses				[Hardware cache event]

'cycles' event is implemented using the timebase clock. Timebase clock
corresponds to CPU clock divided by 16, so number of cycles is
approximatly 16 times the number of TB ticks

On the 8xx, TLB misses are handled by software. It is therefore
easy to count all TLB misses each time the TLB miss exception is
called.

'instructions' is calculated by using instruction watchpoint counter.
This patch sets counter A to count instructions at address greater
than 0, hence we count all instructions executed while MSR RI bit is
set. The counter is set to the maximum which is 0xffff. Every 65535
instructions, debug instruction breakpoint exception fires. The
exception handler increments a counter in memory which then
represent the upper part of the instruction counter. We therefore
end up with a 48 bits counter. In order to avoid unnecessary overhead
while no perf event is active, this counter is started when the first
event referring to this counter is added, and the counter is stopped
when the last event referring to it is deleted. In order to properly
support breakpoint exceptions, MSR RI bit has to be unset in exception
epilogs in order to avoid breakpoint exceptions during critical
sections during changes to SRR0 and SRR1 would be problematic.

All counters are handled as free running counters.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Scott Wood <oss@buserror.net>
---
 arch/powerpc/include/asm/reg.h         |   2 +
 arch/powerpc/include/asm/reg_8xx.h     |   4 +
 arch/powerpc/kernel/entry_32.S         |  15 +++
 arch/powerpc/kernel/head_8xx.S         |  46 ++++++-
 arch/powerpc/perf/8xx-pmu.c            | 173 +++++++++++++++++++++++++
 arch/powerpc/perf/Makefile             |   2 +
 arch/powerpc/platforms/Kconfig.cputype |   7 +
 7 files changed, 248 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/perf/8xx-pmu.c

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 0d4531aa2052..9098b35b46a8 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -548,7 +548,9 @@
 #define SPRN_IBAT7U	0x236		/* Instruction BAT 7 Upper Register */
 #define SPRN_ICMP	0x3D5		/* Instruction TLB Compare Register */
 #define SPRN_ICTC	0x3FB	/* Instruction Cache Throttling Control Reg */
+#ifndef SPRN_ICTRL
 #define SPRN_ICTRL	0x3F3	/* 1011 7450 icache and interrupt ctrl */
+#endif
 #define ICTRL_EICE	0x08000000	/* enable icache parity errs */
 #define ICTRL_EDC	0x04000000	/* enable dcache parity errs */
 #define ICTRL_EICP	0x00000100	/* enable icache par. check */
diff --git a/arch/powerpc/include/asm/reg_8xx.h b/arch/powerpc/include/asm/reg_8xx.h
index c52725b7085d..ae16fef7a4d6 100644
--- a/arch/powerpc/include/asm/reg_8xx.h
+++ b/arch/powerpc/include/asm/reg_8xx.h
@@ -28,12 +28,16 @@
 /* Special MSR manipulation registers */
 #define SPRN_EIE	80	/* External interrupt enable (EE=1, RI=1) */
 #define SPRN_EID	81	/* External interrupt disable (EE=0, RI=1) */
+#define SPRN_NRI	82	/* Non recoverable interrupt (EE=0, RI=0) */
 
 /* Debug registers */
+#define SPRN_CMPA	144
+#define SPRN_COUNTA	150
 #define SPRN_CMPE	152
 #define SPRN_CMPF	153
 #define SPRN_LCTRL1	156
 #define SPRN_LCTRL2	157
+#define SPRN_ICTRL	158
 #define SPRN_BAR	159
 
 /* Commands.  Only the first few are available to the instruction cache.
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 980626ac34ee..f3e4fc1c1b4d 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -205,6 +205,9 @@ transfer_to_handler_cont:
 	mflr	r9
 	lwz	r11,0(r9)		/* virtual address of handler */
 	lwz	r9,4(r9)		/* where to go when done */
+#ifdef CONFIG_PPC_8xx_PERF_EVENT
+	mtspr	SPRN_NRI, r0
+#endif
 #ifdef CONFIG_TRACE_IRQFLAGS
 	lis	r12,reenable_mmu@h
 	ori	r12,r12,reenable_mmu@l
@@ -292,6 +295,9 @@ stack_ovf:
 	lis	r9,StackOverflow@ha
 	addi	r9,r9,StackOverflow@l
 	LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+#ifdef CONFIG_PPC_8xx_PERF_EVENT
+	mtspr	SPRN_NRI, r0
+#endif
 	mtspr	SPRN_SRR0,r9
 	mtspr	SPRN_SRR1,r10
 	SYNC
@@ -418,6 +424,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
 	lwz	r7,_NIP(r1)
 	lwz	r2,GPR2(r1)
 	lwz	r1,GPR1(r1)
+#ifdef CONFIG_PPC_8xx_PERF_EVENT
+	mtspr	SPRN_NRI, r0
+#endif
 	mtspr	SPRN_SRR0,r7
 	mtspr	SPRN_SRR1,r8
 	SYNC
@@ -701,6 +710,9 @@ fast_exception_return:
 	lwz	r10,_LINK(r11)
 	mtlr	r10
 	REST_GPR(10, r11)
+#ifdef CONFIG_PPC_8xx_PERF_EVENT
+	mtspr	SPRN_NRI, r0
+#endif
 	mtspr	SPRN_SRR1,r9
 	mtspr	SPRN_SRR0,r12
 	REST_GPR(9, r11)
@@ -949,6 +961,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
 	.globl exc_exit_restart
 exc_exit_restart:
 	lwz	r12,_NIP(r1)
+#ifdef CONFIG_PPC_8xx_PERF_EVENT
+	mtspr	SPRN_NRI, r0
+#endif
 	mtspr	SPRN_SRR0,r12
 	mtspr	SPRN_SRR1,r9
 	REST_4GPRS(9, r1)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 5fcbd79a121d..c032fe8c2d26 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -329,6 +329,12 @@ InstructionTLBMiss:
 	mtspr	SPRN_SPRG_SCRATCH2, r3
 #endif
 	EXCEPTION_PROLOG_0
+#ifdef CONFIG_PPC_8xx_PERF_EVENT
+	lis	r10, (itlb_miss_counter - PAGE_OFFSET)@ha
+	lwz	r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
+	addi	r11, r11, 1
+	stw	r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
+#endif
 
 	/* If we are faulting a kernel address, we have to use the
 	 * kernel page tables.
@@ -429,6 +435,12 @@ InstructionTLBMiss:
 DataStoreTLBMiss:
 	mtspr	SPRN_SPRG_SCRATCH2, r3
 	EXCEPTION_PROLOG_0
+#ifdef CONFIG_PPC_8xx_PERF_EVENT
+	lis	r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
+	lwz	r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
+	addi	r11, r11, 1
+	stw	r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
+#endif
 	mfcr	r3
 
 	/* If we are faulting a kernel address, we have to use the
@@ -625,7 +637,22 @@ DataBreakpoint:
 	EXCEPTION_EPILOG_0
 	rfi
 
+#ifdef CONFIG_PPC_8xx_PERF_EVENT
+	. = 0x1d00
+InstructionBreakpoint:
+	EXCEPTION_PROLOG_0
+	lis	r10, (instruction_counter - PAGE_OFFSET)@ha
+	lwz	r11, (instruction_counter - PAGE_OFFSET)@l(r10)
+	addi	r11, r11, -1
+	stw	r11, (instruction_counter - PAGE_OFFSET)@l(r10)
+	lis	r10, 0xffff
+	ori	r10, r10, 0x01
+	mtspr	SPRN_COUNTA, r10
+	EXCEPTION_EPILOG_0
+	rfi
+#else
 	EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE)
+#endif
 	EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE)
 	EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE)
 
@@ -999,9 +1026,13 @@ initial_mmu:
 	lis	r8, IDC_ENABLE@h
 	mtspr	SPRN_DC_CST, r8
 #endif
-	/* Disable debug mode entry on data breakpoints */
+	/* Disable debug mode entry on breakpoints */
 	mfspr	r8, SPRN_DER
+#ifdef CONFIG_PPC_8xx_PERF_EVENT
+	rlwinm	r8, r8, 0, ~0xc
+#else
 	rlwinm	r8, r8, 0, ~0x8
+#endif
 	mtspr	SPRN_DER, r8
 	blr
 
@@ -1036,3 +1067,16 @@ cpu6_errata_word:
 	.space	16
 #endif
 
+#ifdef CONFIG_PPC_8xx_PERF_EVENT
+	.globl	itlb_miss_counter
+itlb_miss_counter:
+	.space	4
+
+	.globl	dtlb_miss_counter
+dtlb_miss_counter:
+	.space	4
+
+	.globl	instruction_counter
+instruction_counter:
+	.space	4
+#endif
diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c
new file mode 100644
index 000000000000..3c39f05f0af3
--- /dev/null
+++ b/arch/powerpc/perf/8xx-pmu.c
@@ -0,0 +1,173 @@
+/*
+ * Performance event support - PPC 8xx
+ *
+ * Copyright 2016 Christophe Leroy, CS Systemes d'Information
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <asm/pmc.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/ptrace.h>
+
+#define PERF_8xx_ID_CPU_CYCLES		1
+#define PERF_8xx_ID_HW_INSTRUCTIONS	2
+#define PERF_8xx_ID_ITLB_LOAD_MISS	3
+#define PERF_8xx_ID_DTLB_LOAD_MISS	4
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+#define DTLB_LOAD_MISS	(C(DTLB) | (C(OP_READ) << 8) | (C(RESULT_MISS) << 16))
+#define ITLB_LOAD_MISS	(C(ITLB) | (C(OP_READ) << 8) | (C(RESULT_MISS) << 16))
+
+extern unsigned long itlb_miss_counter, dtlb_miss_counter;
+extern atomic_t instruction_counter;
+
+static atomic_t insn_ctr_ref;
+
+static s64 get_insn_ctr(void)
+{
+	int ctr;
+	unsigned long counta;
+
+	do {
+		ctr = atomic_read(&instruction_counter);
+		counta = mfspr(SPRN_COUNTA);
+	} while (ctr != atomic_read(&instruction_counter));
+
+	return ((s64)ctr << 16) | (counta >> 16);
+}
+
+static int event_type(struct perf_event *event)
+{
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES)
+			return PERF_8xx_ID_CPU_CYCLES;
+		if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS)
+			return PERF_8xx_ID_HW_INSTRUCTIONS;
+		break;
+	case PERF_TYPE_HW_CACHE:
+		if (event->attr.config == ITLB_LOAD_MISS)
+			return PERF_8xx_ID_ITLB_LOAD_MISS;
+		if (event->attr.config == DTLB_LOAD_MISS)
+			return PERF_8xx_ID_DTLB_LOAD_MISS;
+		break;
+	case PERF_TYPE_RAW:
+		break;
+	default:
+		return -ENOENT;
+	}
+	return -EOPNOTSUPP;
+}
+
+static int mpc8xx_pmu_event_init(struct perf_event *event)
+{
+	int type = event_type(event);
+
+	if (type < 0)
+		return type;
+	return 0;
+}
+
+static int mpc8xx_pmu_add(struct perf_event *event, int flags)
+{
+	int type = event_type(event);
+	s64 val = 0;
+
+	if (type < 0)
+		return type;
+
+	switch (type) {
+	case PERF_8xx_ID_CPU_CYCLES:
+		val = get_tb();
+		break;
+	case PERF_8xx_ID_HW_INSTRUCTIONS:
+		if (atomic_inc_return(&insn_ctr_ref) == 1)
+			mtspr(SPRN_ICTRL, 0xc0080007);
+		val = get_insn_ctr();
+		break;
+	case PERF_8xx_ID_ITLB_LOAD_MISS:
+		val = itlb_miss_counter;
+		break;
+	case PERF_8xx_ID_DTLB_LOAD_MISS:
+		val = dtlb_miss_counter;
+		break;
+	}
+	local64_set(&event->hw.prev_count, val);
+	return 0;
+}
+
+static void mpc8xx_pmu_read(struct perf_event *event)
+{
+	int type = event_type(event);
+	s64 prev, val = 0, delta = 0;
+
+	if (type < 0)
+		return;
+
+	do {
+		prev = local64_read(&event->hw.prev_count);
+		switch (type) {
+		case PERF_8xx_ID_CPU_CYCLES:
+			val = get_tb();
+			delta = 16 * (val - prev);
+			break;
+		case PERF_8xx_ID_HW_INSTRUCTIONS:
+			val = get_insn_ctr();
+			delta = prev - val;
+			if (delta < 0)
+				delta += 0x1000000000000LL;
+			break;
+		case PERF_8xx_ID_ITLB_LOAD_MISS:
+			val = itlb_miss_counter;
+			delta = (s64)((s32)val - (s32)prev);
+			break;
+		case PERF_8xx_ID_DTLB_LOAD_MISS:
+			val = dtlb_miss_counter;
+			delta = (s64)((s32)val - (s32)prev);
+			break;
+		}
+	} while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
+
+	local64_add(delta, &event->count);
+}
+
+static void mpc8xx_pmu_del(struct perf_event *event, int flags)
+{
+	mpc8xx_pmu_read(event);
+	if (event_type(event) != PERF_8xx_ID_HW_INSTRUCTIONS)
+		return;
+
+	/* If it was the last user, stop counting to avoid useles overhead */
+	if (atomic_dec_return(&insn_ctr_ref) == 0)
+		mtspr(SPRN_ICTRL, 7);
+}
+
+static struct pmu mpc8xx_pmu = {
+	.event_init	= mpc8xx_pmu_event_init,
+	.add		= mpc8xx_pmu_add,
+	.del		= mpc8xx_pmu_del,
+	.read		= mpc8xx_pmu_read,
+	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT |
+			  PERF_PMU_CAP_NO_NMI,
+};
+
+static int init_mpc8xx_pmu(void)
+{
+	mtspr(SPRN_ICTRL, 7);
+	mtspr(SPRN_CMPA, 0);
+	mtspr(SPRN_COUNTA, 0xffff);
+
+	return perf_pmu_register(&mpc8xx_pmu, "cpu", PERF_TYPE_RAW);
+}
+
+early_initcall(init_mpc8xx_pmu);
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index f102d5370101..4d606b99a5cb 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -13,5 +13,7 @@ obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
 
 obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o
 
+obj-$(CONFIG_PPC_8xx_PERF_EVENT) += 8xx-pmu.o
+
 obj-$(CONFIG_PPC64)		+= $(obj64-y)
 obj-$(CONFIG_PPC32)		+= $(obj32-y)
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 6e89e5a8d4fb..99b0ae8acb78 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -172,6 +172,13 @@ config PPC_FPU
 	bool
 	default y if PPC64
 
+config PPC_8xx_PERF_EVENT
+	bool "PPC 8xx perf events"
+	depends on PPC_8xx && PERF_EVENTS
+	help
+	  This is Performance Events support for PPC 8xx. The 8xx doesn't
+	  have a PMU but some events are emulated using 8xx features.
+
 config FSL_EMB_PERFMON
 	bool "Freescale Embedded Perfmon"
 	depends on E500 || PPC_83xx
-- 
GitLab


From 2d3b74d3836901c8601e147b1ae24170f9fa2982 Mon Sep 17 00:00:00 2001
From: jbrunet <jbrunet@baylibre.com>
Date: Thu, 26 Jan 2017 17:05:49 +0100
Subject: [PATCH 0060/1084] clk: gxbb: fix CLKID_ETH defined twice

CLKID_ETH is define in the dt-bindings but has not be commented out in the
clock driver. Just do it now.

Fixes: 738f66d3211d ("clk: gxbb: add AmLogic GXBB clk controller driver")
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Acked-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 drivers/clk/meson/gxbb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/meson/gxbb.h b/drivers/clk/meson/gxbb.h
index dc487180f847..8ee2022ce5d5 100644
--- a/drivers/clk/meson/gxbb.h
+++ b/drivers/clk/meson/gxbb.h
@@ -204,7 +204,7 @@
 #define CLKID_ASSIST_MISC	  33
 /* CLKID_SPI */
 #define CLKID_I2S_SPDIF		  35
-#define CLKID_ETH		  36
+/* CLKID_ETH */
 #define CLKID_DEMUX		  37
 #define CLKID_AIU_GLUE		  38
 #define CLKID_IEC958		  39
-- 
GitLab


From e48512244f653960cfbb002597686362558533d1 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sun, 22 Jan 2017 22:05:26 +0100
Subject: [PATCH 0061/1084] ARM64: dts: meson-gx: add the missing pwm_AO_ab
 node

All Meson GX SoCs (GXBB, GXL and GXM) have a PWM controller within the
AO domain. When one of the board's LEDs is connected to one of the AO
PWM pins then this can be used to dim that LED (when the leds-pwm driver
is used).
Add the pwm_AO_ab to allow such devices to use the leds-pwm driver.

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/meson-gx.dtsi | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
index 9212fca6eb94..9110dc1a2481 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
@@ -332,6 +332,13 @@ uart_AO_B: serial@4e0 {
 				status = "disabled";
 			};
 
+			pwm_AO_ab: pwm@550 {
+				compatible = "amlogic,meson-gx-pwm", "amlogic,meson-gxbb-pwm";
+				reg = <0x0 0x00550 0x0 0x10>;
+				#pwm-cells = <3>;
+				status = "disabled";
+			};
+
 			ir: ir@580 {
 				compatible = "amlogic,meson-gxbb-ir";
 				reg = <0x0 0x00580 0x0 0x40>;
-- 
GitLab


From 249a2243e97edcd2cb0bef01a934d4ba21fb167f Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sun, 22 Jan 2017 22:05:28 +0100
Subject: [PATCH 0062/1084] ARM64: dts: meson-gxl: add the pwm_ao_b pin

This adds the pwm_ao_b pin to allow boards which have an LED connected
to GPIOAO_9 to use the leds-pwm driver (by activating the pwm_AO_ab node
and passing the pwm_ao_b_pin pinctrl-reference).

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/meson-gxl.dtsi | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
index bdf2305a2e25..5f1100af72b1 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
@@ -117,6 +117,13 @@ mux {
 				function = "remote_input_ao";
 			};
 		};
+
+		pwm_ao_b_pins: pwm_ao_b {
+			mux {
+				groups = "pwm_ao_b";
+				function = "pwm_ao_b";
+			};
+		};
 	};
 };
 
-- 
GitLab


From 15f2e88ddd4bc9b2c6b6236162993b5caa80abb9 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 16 Dec 2016 14:46:09 -0500
Subject: [PATCH 0063/1084] radix tree: Add some implicit includes

We were using spinlock_t and INIT_LIST_HEAD without including spinlock.h
or list.h.  They were being implicitly included through some other header
file, but that's fragile.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 include/linux/radix-tree.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 52bda854593b..13d8d741ca34 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -22,11 +22,13 @@
 #define _LINUX_RADIX_TREE_H
 
 #include <linux/bitops.h>
-#include <linux/preempt.h>
-#include <linux/types.h>
 #include <linux/bug.h>
 #include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/preempt.h>
 #include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
 
 /*
  * The bottom two bits of the slot determine how the remaining bits in the
-- 
GitLab


From 35534c869c62f59203c1822769bbef14e894a9e9 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 19 Dec 2016 17:43:19 -0500
Subject: [PATCH 0064/1084] radix tree: constify some pointers

If we're just getting the value of a tag, or looking up an entry,
we won't modify the radix tree, so we can declare these functions as
taking a const pointer.  Mostly for documentation purposes, though it
might help code generation.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 include/linux/radix-tree.h | 22 +++++++--------
 lib/radix-tree.c           | 57 +++++++++++++++++++++-----------------
 2 files changed, 42 insertions(+), 37 deletions(-)

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 13d8d741ca34..32683c7c2e0d 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -125,7 +125,7 @@ do {									\
 	(root)->rnode = NULL;						\
 } while (0)
 
-static inline bool radix_tree_empty(struct radix_tree_root *root)
+static inline bool radix_tree_empty(const struct radix_tree_root *root)
 {
 	return root->rnode == NULL;
 }
@@ -294,10 +294,10 @@ static inline int radix_tree_insert(struct radix_tree_root *root,
 {
 	return __radix_tree_insert(root, index, 0, entry);
 }
-void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
+void *__radix_tree_lookup(const struct radix_tree_root *, unsigned long index,
 			  struct radix_tree_node **nodep, void ***slotp);
-void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
-void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
+void *radix_tree_lookup(const struct radix_tree_root *, unsigned long);
+void **radix_tree_lookup_slot(const struct radix_tree_root *, unsigned long);
 typedef void (*radix_tree_update_node_t)(struct radix_tree_node *, void *);
 void __radix_tree_replace(struct radix_tree_root *root,
 			  struct radix_tree_node *node,
@@ -316,10 +316,10 @@ void *radix_tree_delete(struct radix_tree_root *, unsigned long);
 void radix_tree_clear_tags(struct radix_tree_root *root,
 			   struct radix_tree_node *node,
 			   void **slot);
-unsigned int radix_tree_gang_lookup(struct radix_tree_root *root,
+unsigned int radix_tree_gang_lookup(const struct radix_tree_root *,
 			void **results, unsigned long first_index,
 			unsigned int max_items);
-unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root,
+unsigned int radix_tree_gang_lookup_slot(const struct radix_tree_root *,
 			void ***results, unsigned long *indices,
 			unsigned long first_index, unsigned int max_items);
 int radix_tree_preload(gfp_t gfp_mask);
@@ -330,19 +330,19 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
 			unsigned long index, unsigned int tag);
 void *radix_tree_tag_clear(struct radix_tree_root *root,
 			unsigned long index, unsigned int tag);
-int radix_tree_tag_get(struct radix_tree_root *root,
+int radix_tree_tag_get(const struct radix_tree_root *,
 			unsigned long index, unsigned int tag);
 void radix_tree_iter_tag_set(struct radix_tree_root *root,
 		const struct radix_tree_iter *iter, unsigned int tag);
 unsigned int
-radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
+radix_tree_gang_lookup_tag(const struct radix_tree_root *, void **results,
 		unsigned long first_index, unsigned int max_items,
 		unsigned int tag);
 unsigned int
-radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
+radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *, void ***results,
 		unsigned long first_index, unsigned int max_items,
 		unsigned int tag);
-int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
+int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag);
 
 static inline void radix_tree_preload_end(void)
 {
@@ -395,7 +395,7 @@ radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
  * Also it fills @iter with data about chunk: position in the tree (index),
  * its end (next_index), and constructs a bit mask for tagged iterating (tags).
  */
-void **radix_tree_next_chunk(struct radix_tree_root *root,
+void **radix_tree_next_chunk(const struct radix_tree_root *,
 			     struct radix_tree_iter *iter, unsigned flags);
 
 /**
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 84812a9fb16f..6f86fbac0e36 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -83,26 +83,28 @@ static inline void *node_to_entry(void *ptr)
 
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
 /* Sibling slots point directly to another slot in the same node */
-static inline bool is_sibling_entry(struct radix_tree_node *parent, void *node)
+static inline
+bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
 {
 	void **ptr = node;
 	return (parent->slots <= ptr) &&
 			(ptr < parent->slots + RADIX_TREE_MAP_SIZE);
 }
 #else
-static inline bool is_sibling_entry(struct radix_tree_node *parent, void *node)
+static inline
+bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
 {
 	return false;
 }
 #endif
 
-static inline unsigned long get_slot_offset(struct radix_tree_node *parent,
-						 void **slot)
+static inline
+unsigned long get_slot_offset(const struct radix_tree_node *parent, void **slot)
 {
 	return slot - parent->slots;
 }
 
-static unsigned int radix_tree_descend(struct radix_tree_node *parent,
+static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
 			struct radix_tree_node **nodep, unsigned long index)
 {
 	unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
@@ -122,7 +124,7 @@ static unsigned int radix_tree_descend(struct radix_tree_node *parent,
 	return offset;
 }
 
-static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
+static inline gfp_t root_gfp_mask(const struct radix_tree_root *root)
 {
 	return root->gfp_mask & __GFP_BITS_MASK;
 }
@@ -139,13 +141,13 @@ static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
 	__clear_bit(offset, node->tags[tag]);
 }
 
-static inline int tag_get(struct radix_tree_node *node, unsigned int tag,
+static inline int tag_get(const struct radix_tree_node *node, unsigned int tag,
 		int offset)
 {
 	return test_bit(offset, node->tags[tag]);
 }
 
-static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag)
+static inline void root_tag_set(struct radix_tree_root *root, unsigned tag)
 {
 	root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
 }
@@ -160,12 +162,12 @@ static inline void root_tag_clear_all(struct radix_tree_root *root)
 	root->gfp_mask &= __GFP_BITS_MASK;
 }
 
-static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
+static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag)
 {
 	return (__force int)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
 }
 
-static inline unsigned root_tags_get(struct radix_tree_root *root)
+static inline unsigned root_tags_get(const struct radix_tree_root *root)
 {
 	return (__force unsigned)root->gfp_mask >> __GFP_BITS_SHIFT;
 }
@@ -174,7 +176,8 @@ static inline unsigned root_tags_get(struct radix_tree_root *root)
  * Returns 1 if any slot in the node has this tag set.
  * Otherwise returns 0.
  */
-static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
+static inline int any_tag_set(const struct radix_tree_node *node,
+							unsigned int tag)
 {
 	unsigned idx;
 	for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
@@ -232,7 +235,7 @@ static inline unsigned long shift_maxindex(unsigned int shift)
 	return (RADIX_TREE_MAP_SIZE << shift) - 1;
 }
 
-static inline unsigned long node_maxindex(struct radix_tree_node *node)
+static inline unsigned long node_maxindex(const struct radix_tree_node *node)
 {
 	return shift_maxindex(node->shift);
 }
@@ -510,7 +513,7 @@ int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
 	return __radix_tree_preload(gfp_mask, nr_nodes);
 }
 
-static unsigned radix_tree_load_root(struct radix_tree_root *root,
+static unsigned radix_tree_load_root(const struct radix_tree_root *root,
 		struct radix_tree_node **nodep, unsigned long *maxindex)
 {
 	struct radix_tree_node *node = rcu_dereference_raw(root->rnode);
@@ -908,8 +911,9 @@ EXPORT_SYMBOL(__radix_tree_insert);
  *	allocated and @root->rnode is used as a direct slot instead of
  *	pointing to a node, in which case *@nodep will be NULL.
  */
-void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
-			  struct radix_tree_node **nodep, void ***slotp)
+void *__radix_tree_lookup(const struct radix_tree_root *root,
+			  unsigned long index, struct radix_tree_node **nodep,
+			  void ***slotp)
 {
 	struct radix_tree_node *node, *parent;
 	unsigned long maxindex;
@@ -952,7 +956,8 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
  *	exclusive from other writers. Any dereference of the slot must be done
  *	using radix_tree_deref_slot.
  */
-void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
+void **radix_tree_lookup_slot(const struct radix_tree_root *root,
+				unsigned long index)
 {
 	void **slot;
 
@@ -974,7 +979,7 @@ EXPORT_SYMBOL(radix_tree_lookup_slot);
  *	them safely). No RCU barriers are required to access or modify the
  *	returned item, however.
  */
-void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
+void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
 {
 	return __radix_tree_lookup(root, index, NULL, NULL);
 }
@@ -1404,7 +1409,7 @@ EXPORT_SYMBOL(radix_tree_tag_clear);
  * the RCU lock is held, unless tag modification and node deletion are excluded
  * from concurrency.
  */
-int radix_tree_tag_get(struct radix_tree_root *root,
+int radix_tree_tag_get(const struct radix_tree_root *root,
 			unsigned long index, unsigned int tag)
 {
 	struct radix_tree_node *node, *parent;
@@ -1568,7 +1573,7 @@ EXPORT_SYMBOL(radix_tree_iter_resume);
  * @flags:	RADIX_TREE_ITER_* flags and tag index
  * Returns:	pointer to chunk first slot, or NULL if iteration is over
  */
-void **radix_tree_next_chunk(struct radix_tree_root *root,
+void **radix_tree_next_chunk(const struct radix_tree_root *root,
 			     struct radix_tree_iter *iter, unsigned flags)
 {
 	unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
@@ -1679,7 +1684,7 @@ EXPORT_SYMBOL(radix_tree_next_chunk);
  *	stored in 'results'.
  */
 unsigned int
-radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
+radix_tree_gang_lookup(const struct radix_tree_root *root, void **results,
 			unsigned long first_index, unsigned int max_items)
 {
 	struct radix_tree_iter iter;
@@ -1724,7 +1729,7 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
  *	protection, radix_tree_deref_slot may fail requiring a retry.
  */
 unsigned int
-radix_tree_gang_lookup_slot(struct radix_tree_root *root,
+radix_tree_gang_lookup_slot(const struct radix_tree_root *root,
 			void ***results, unsigned long *indices,
 			unsigned long first_index, unsigned int max_items)
 {
@@ -1761,7 +1766,7 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_slot);
  *	returns the number of items which were placed at *@results.
  */
 unsigned int
-radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
+radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results,
 		unsigned long first_index, unsigned int max_items,
 		unsigned int tag)
 {
@@ -1802,9 +1807,9 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
  *	returns the number of slots which were placed at *@results.
  */
 unsigned int
-radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
-		unsigned long first_index, unsigned int max_items,
-		unsigned int tag)
+radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root,
+		void ***results, unsigned long first_index,
+		unsigned int max_items, unsigned int tag)
 {
 	struct radix_tree_iter iter;
 	void **slot;
@@ -1921,7 +1926,7 @@ void radix_tree_clear_tags(struct radix_tree_root *root,
  *	@root:		radix tree root
  *	@tag:		tag to test
  */
-int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
+int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag)
 {
 	return root_tag_get(root, tag);
 }
-- 
GitLab


From c68a2aab3300df4106f368568bd7361d6f465993 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 16 Dec 2016 11:52:43 -0500
Subject: [PATCH 0065/1084] radix tree test suite: Remove duplicate bitops code

By adding __set_bit and __clear_bit to the tools include directory, we
can share the bitops code.  This reveals an include loop between kernel.h,
log2.h, bitmap.h and bitops.h.  Break it the same way as the kernel does;
by moving the kernel.h include from bitops.h to bitmap.h.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/include/asm-generic/bitops/atomic.h     |   3 +
 tools/include/linux/bitmap.h                  |   1 +
 tools/include/linux/bitops.h                  |   1 -
 tools/testing/radix-tree/linux/bitops.h       | 160 ------------------
 tools/testing/radix-tree/linux/bitops/__ffs.h |  43 -----
 tools/testing/radix-tree/linux/bitops/ffs.h   |  41 -----
 tools/testing/radix-tree/linux/bitops/ffz.h   |  12 --
 tools/testing/radix-tree/linux/bitops/find.h  |  13 --
 tools/testing/radix-tree/linux/bitops/fls.h   |  41 -----
 tools/testing/radix-tree/linux/bitops/fls64.h |  14 --
 .../testing/radix-tree/linux/bitops/hweight.h |  11 --
 tools/testing/radix-tree/linux/bitops/le.h    |  53 ------
 .../radix-tree/linux/bitops/non-atomic.h      | 110 ------------
 tools/testing/radix-tree/linux/kernel.h       |   6 +-
 14 files changed, 8 insertions(+), 501 deletions(-)
 delete mode 100644 tools/testing/radix-tree/linux/bitops.h
 delete mode 100644 tools/testing/radix-tree/linux/bitops/__ffs.h
 delete mode 100644 tools/testing/radix-tree/linux/bitops/ffs.h
 delete mode 100644 tools/testing/radix-tree/linux/bitops/ffz.h
 delete mode 100644 tools/testing/radix-tree/linux/bitops/find.h
 delete mode 100644 tools/testing/radix-tree/linux/bitops/fls.h
 delete mode 100644 tools/testing/radix-tree/linux/bitops/fls64.h
 delete mode 100644 tools/testing/radix-tree/linux/bitops/hweight.h
 delete mode 100644 tools/testing/radix-tree/linux/bitops/le.h
 delete mode 100644 tools/testing/radix-tree/linux/bitops/non-atomic.h

diff --git a/tools/include/asm-generic/bitops/atomic.h b/tools/include/asm-generic/bitops/atomic.h
index 18663f59d72f..68b8c1516c5a 100644
--- a/tools/include/asm-generic/bitops/atomic.h
+++ b/tools/include/asm-generic/bitops/atomic.h
@@ -20,4 +20,7 @@ static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
 		(((unsigned long *)addr)[nr / __BITS_PER_LONG])) != 0;
 }
 
+#define __set_bit(nr, addr)	set_bit(nr, addr)
+#define __clear_bit(nr, addr)	clear_bit(nr, addr)
+
 #endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ */
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index eef41d500e9e..e8b9f518e36b 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -4,6 +4,7 @@
 #include <string.h>
 #include <linux/bitops.h>
 #include <stdlib.h>
+#include <linux/kernel.h>
 
 #define DECLARE_BITMAP(name,bits) \
 	unsigned long name[BITS_TO_LONGS(bits)]
diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h
index fc446343ff41..1aecad369af5 100644
--- a/tools/include/linux/bitops.h
+++ b/tools/include/linux/bitops.h
@@ -2,7 +2,6 @@
 #define _TOOLS_LINUX_BITOPS_H_
 
 #include <asm/types.h>
-#include <linux/kernel.h>
 #include <linux/compiler.h>
 
 #ifndef __WORDSIZE
diff --git a/tools/testing/radix-tree/linux/bitops.h b/tools/testing/radix-tree/linux/bitops.h
deleted file mode 100644
index a13e9bc76eec..000000000000
--- a/tools/testing/radix-tree/linux/bitops.h
+++ /dev/null
@@ -1,160 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
-#define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
-
-#include <linux/types.h>
-#include <linux/bitops/find.h>
-#include <linux/bitops/hweight.h>
-#include <linux/kernel.h>
-
-#define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
-#define BIT_WORD(nr)		((nr) / BITS_PER_LONG)
-#define BITS_PER_BYTE		8
-#define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
-
-/**
- * __set_bit - Set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike set_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-
-	*p  |= mask;
-}
-
-static inline void __clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-
-	*p &= ~mask;
-}
-
-/**
- * __change_bit - Toggle a bit in memory
- * @nr: the bit to change
- * @addr: the address to start counting from
- *
- * Unlike change_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __change_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-
-	*p ^= mask;
-}
-
-/**
- * __test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-	unsigned long old = *p;
-
-	*p = old | mask;
-	return (old & mask) != 0;
-}
-
-/**
- * __test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-	unsigned long old = *p;
-
-	*p = old & ~mask;
-	return (old & mask) != 0;
-}
-
-/* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(int nr,
-					    volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-	unsigned long old = *p;
-
-	*p = old ^ mask;
-	return (old & mask) != 0;
-}
-
-/**
- * test_bit - Determine whether a bit is set
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static inline int test_bit(int nr, const volatile unsigned long *addr)
-{
-	return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
-}
-
-/**
- * __ffs - find first bit in word.
- * @word: The word to search
- *
- * Undefined if no bit exists, so code should check against 0 first.
- */
-static inline unsigned long __ffs(unsigned long word)
-{
-	int num = 0;
-
-	if ((word & 0xffffffff) == 0) {
-		num += 32;
-		word >>= 32;
-	}
-	if ((word & 0xffff) == 0) {
-		num += 16;
-		word >>= 16;
-	}
-	if ((word & 0xff) == 0) {
-		num += 8;
-		word >>= 8;
-	}
-	if ((word & 0xf) == 0) {
-		num += 4;
-		word >>= 4;
-	}
-	if ((word & 0x3) == 0) {
-		num += 2;
-		word >>= 2;
-	}
-	if ((word & 0x1) == 0)
-		num += 1;
-	return num;
-}
-
-unsigned long find_next_bit(const unsigned long *addr,
-			    unsigned long size,
-			    unsigned long offset);
-
-static inline unsigned long hweight_long(unsigned long w)
-{
-	return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
-}
-
-#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/__ffs.h b/tools/testing/radix-tree/linux/bitops/__ffs.h
deleted file mode 100644
index 9a3274aecf83..000000000000
--- a/tools/testing/radix-tree/linux/bitops/__ffs.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS___FFS_H_
-#define _ASM_GENERIC_BITOPS___FFS_H_
-
-#include <asm/types.h>
-
-/**
- * __ffs - find first bit in word.
- * @word: The word to search
- *
- * Undefined if no bit exists, so code should check against 0 first.
- */
-static inline unsigned long __ffs(unsigned long word)
-{
-	int num = 0;
-
-#if BITS_PER_LONG == 64
-	if ((word & 0xffffffff) == 0) {
-		num += 32;
-		word >>= 32;
-	}
-#endif
-	if ((word & 0xffff) == 0) {
-		num += 16;
-		word >>= 16;
-	}
-	if ((word & 0xff) == 0) {
-		num += 8;
-		word >>= 8;
-	}
-	if ((word & 0xf) == 0) {
-		num += 4;
-		word >>= 4;
-	}
-	if ((word & 0x3) == 0) {
-		num += 2;
-		word >>= 2;
-	}
-	if ((word & 0x1) == 0)
-		num += 1;
-	return num;
-}
-
-#endif /* _ASM_GENERIC_BITOPS___FFS_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/ffs.h b/tools/testing/radix-tree/linux/bitops/ffs.h
deleted file mode 100644
index fbbb43af7dc0..000000000000
--- a/tools/testing/radix-tree/linux/bitops/ffs.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_FFS_H_
-#define _ASM_GENERIC_BITOPS_FFS_H_
-
-/**
- * ffs - find first bit set
- * @x: the word to search
- *
- * This is defined the same way as
- * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
- */
-static inline int ffs(int x)
-{
-	int r = 1;
-
-	if (!x)
-		return 0;
-	if (!(x & 0xffff)) {
-		x >>= 16;
-		r += 16;
-	}
-	if (!(x & 0xff)) {
-		x >>= 8;
-		r += 8;
-	}
-	if (!(x & 0xf)) {
-		x >>= 4;
-		r += 4;
-	}
-	if (!(x & 3)) {
-		x >>= 2;
-		r += 2;
-	}
-	if (!(x & 1)) {
-		x >>= 1;
-		r += 1;
-	}
-	return r;
-}
-
-#endif /* _ASM_GENERIC_BITOPS_FFS_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/ffz.h b/tools/testing/radix-tree/linux/bitops/ffz.h
deleted file mode 100644
index 6744bd4cdf46..000000000000
--- a/tools/testing/radix-tree/linux/bitops/ffz.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_FFZ_H_
-#define _ASM_GENERIC_BITOPS_FFZ_H_
-
-/*
- * ffz - find first zero in word.
- * @word: The word to search
- *
- * Undefined if no zero exists, so code should check against ~0UL first.
- */
-#define ffz(x)  __ffs(~(x))
-
-#endif /* _ASM_GENERIC_BITOPS_FFZ_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/find.h b/tools/testing/radix-tree/linux/bitops/find.h
deleted file mode 100644
index 72a51e5a12ef..000000000000
--- a/tools/testing/radix-tree/linux/bitops/find.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_FIND_H_
-#define _ASM_GENERIC_BITOPS_FIND_H_
-
-extern unsigned long find_next_bit(const unsigned long *addr, unsigned long
-		size, unsigned long offset);
-
-extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned
-		long size, unsigned long offset);
-
-#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
-#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
-
-#endif /*_ASM_GENERIC_BITOPS_FIND_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/fls.h b/tools/testing/radix-tree/linux/bitops/fls.h
deleted file mode 100644
index 850859bc5069..000000000000
--- a/tools/testing/radix-tree/linux/bitops/fls.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_FLS_H_
-#define _ASM_GENERIC_BITOPS_FLS_H_
-
-/**
- * fls - find last (most-significant) bit set
- * @x: the word to search
- *
- * This is defined the same way as ffs.
- * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
- */
-
-static inline int fls(int x)
-{
-	int r = 32;
-
-	if (!x)
-		return 0;
-	if (!(x & 0xffff0000u)) {
-		x <<= 16;
-		r -= 16;
-	}
-	if (!(x & 0xff000000u)) {
-		x <<= 8;
-		r -= 8;
-	}
-	if (!(x & 0xf0000000u)) {
-		x <<= 4;
-		r -= 4;
-	}
-	if (!(x & 0xc0000000u)) {
-		x <<= 2;
-		r -= 2;
-	}
-	if (!(x & 0x80000000u)) {
-		x <<= 1;
-		r -= 1;
-	}
-	return r;
-}
-
-#endif /* _ASM_GENERIC_BITOPS_FLS_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/fls64.h b/tools/testing/radix-tree/linux/bitops/fls64.h
deleted file mode 100644
index 1b6b17ce2428..000000000000
--- a/tools/testing/radix-tree/linux/bitops/fls64.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_FLS64_H_
-#define _ASM_GENERIC_BITOPS_FLS64_H_
-
-#include <asm/types.h>
-
-static inline int fls64(__u64 x)
-{
-	__u32 h = x >> 32;
-	if (h)
-		return fls(h) + 32;
-	return fls(x);
-}
-
-#endif /* _ASM_GENERIC_BITOPS_FLS64_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/hweight.h b/tools/testing/radix-tree/linux/bitops/hweight.h
deleted file mode 100644
index fbbc383771da..000000000000
--- a/tools/testing/radix-tree/linux/bitops/hweight.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_HWEIGHT_H_
-#define _ASM_GENERIC_BITOPS_HWEIGHT_H_
-
-#include <asm/types.h>
-
-extern unsigned int hweight32(unsigned int w);
-extern unsigned int hweight16(unsigned int w);
-extern unsigned int hweight8(unsigned int w);
-extern unsigned long hweight64(__u64 w);
-
-#endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/le.h b/tools/testing/radix-tree/linux/bitops/le.h
deleted file mode 100644
index b9c7e5d2d2ad..000000000000
--- a/tools/testing/radix-tree/linux/bitops/le.h
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_LE_H_
-#define _ASM_GENERIC_BITOPS_LE_H_
-
-#include <asm/types.h>
-#include <asm/byteorder.h>
-
-#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
-#define BITOP_LE_SWIZZLE	((BITS_PER_LONG-1) & ~0x7)
-
-#if defined(__LITTLE_ENDIAN)
-
-#define generic_test_le_bit(nr, addr) test_bit(nr, addr)
-#define generic___set_le_bit(nr, addr) __set_bit(nr, addr)
-#define generic___clear_le_bit(nr, addr) __clear_bit(nr, addr)
-
-#define generic_test_and_set_le_bit(nr, addr) test_and_set_bit(nr, addr)
-#define generic_test_and_clear_le_bit(nr, addr) test_and_clear_bit(nr, addr)
-
-#define generic___test_and_set_le_bit(nr, addr) __test_and_set_bit(nr, addr)
-#define generic___test_and_clear_le_bit(nr, addr) __test_and_clear_bit(nr, addr)
-
-#define generic_find_next_zero_le_bit(addr, size, offset) find_next_zero_bit(addr, size, offset)
-
-#elif defined(__BIG_ENDIAN)
-
-#define generic_test_le_bit(nr, addr) \
-	test_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
-#define generic___set_le_bit(nr, addr) \
-	__set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
-#define generic___clear_le_bit(nr, addr) \
-	__clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
-
-#define generic_test_and_set_le_bit(nr, addr) \
-	test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
-#define generic_test_and_clear_le_bit(nr, addr) \
-	test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
-
-#define generic___test_and_set_le_bit(nr, addr) \
-	__test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
-#define generic___test_and_clear_le_bit(nr, addr) \
-	__test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
-
-extern unsigned long generic_find_next_zero_le_bit(const unsigned long *addr,
-		unsigned long size, unsigned long offset);
-
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
-
-#define generic_find_first_zero_le_bit(addr, size) \
-        generic_find_next_zero_le_bit((addr), (size), 0)
-
-#endif /* _ASM_GENERIC_BITOPS_LE_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/non-atomic.h b/tools/testing/radix-tree/linux/bitops/non-atomic.h
deleted file mode 100644
index 6a1bcb9d2c4a..000000000000
--- a/tools/testing/radix-tree/linux/bitops/non-atomic.h
+++ /dev/null
@@ -1,110 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
-#define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
-
-#include <asm/types.h>
-
-#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
-
-/**
- * __set_bit - Set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike set_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
-
-	*p  |= mask;
-}
-
-static inline void __clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
-
-	*p &= ~mask;
-}
-
-/**
- * __change_bit - Toggle a bit in memory
- * @nr: the bit to change
- * @addr: the address to start counting from
- *
- * Unlike change_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __change_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
-
-	*p ^= mask;
-}
-
-/**
- * __test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
-	unsigned long old = *p;
-
-	*p = old | mask;
-	return (old & mask) != 0;
-}
-
-/**
- * __test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
-	unsigned long old = *p;
-
-	*p = old & ~mask;
-	return (old & mask) != 0;
-}
-
-/* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(int nr,
-					    volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
-	unsigned long old = *p;
-
-	*p = old ^ mask;
-	return (old & mask) != 0;
-}
-
-/**
- * test_bit - Determine whether a bit is set
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static inline int test_bit(int nr, const volatile unsigned long *addr)
-{
-	return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
-}
-
-#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 9b43b4975d83..1cd72a84a089 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -7,8 +7,10 @@
 #include <stddef.h>
 #include <limits.h>
 
-#include "../../include/linux/compiler.h"
-#include "../../include/linux/err.h"
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
 #include "../../../include/linux/kconfig.h"
 
 #ifdef BENCHMARK
-- 
GitLab


From b246a9d2671f4f6cfab3f98199568e0b3028f6e5 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 19 Dec 2016 11:07:17 -0500
Subject: [PATCH 0066/1084] tools: Provide a definition of WARN_ON

The definition of WARN_ON being used by the radix tree test suite was
deficient in two ways: it did not provide a return value, and it stopped
execution instead of continuing.  This version of WARN_ON tells you
which file & line the assertion was triggered in.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/include/asm/bug.h                 | 8 ++++++++
 tools/testing/radix-tree/linux/kernel.h | 1 -
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/tools/include/asm/bug.h b/tools/include/asm/bug.h
index beda1a884b50..4790f047a89c 100644
--- a/tools/include/asm/bug.h
+++ b/tools/include/asm/bug.h
@@ -12,6 +12,14 @@
 	unlikely(__ret_warn_on);		\
 })
 
+#define WARN_ON(condition) ({					\
+	int __ret_warn_on = !!(condition);			\
+	if (unlikely(__ret_warn_on))				\
+		__WARN_printf("assertion failed at %s:%d\n",	\
+				__FILE__, __LINE__);		\
+	unlikely(__ret_warn_on);				\
+})
+
 #define WARN_ON_ONCE(condition) ({			\
 	static int __warned;				\
 	int __ret_warn_once = !!(condition);		\
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 1cd72a84a089..9681463c91e2 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -24,7 +24,6 @@
 #endif
 
 #define BUG_ON(expr)	assert(!(expr))
-#define WARN_ON(expr)	assert(!(expr))
 #define __init
 #define __must_check
 #define panic(expr)
-- 
GitLab


From fed24668482e07421b8e746a4886e7725434050a Mon Sep 17 00:00:00 2001
From: Yunlei He <heyunlei@huawei.com>
Date: Tue, 13 Dec 2016 17:23:37 +0800
Subject: [PATCH 0067/1084] f2fs: remove unused values in recover_fsync_data

This patch remove unused values in function recover_fsync_data

Signed-off-by: Yunlei He <heyunlei@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/recovery.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 981a9584b62f..4fb4471a3206 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -552,10 +552,8 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
 
 int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
 {
-	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
 	struct list_head inode_list;
 	struct list_head dir_list;
-	block_t blkaddr;
 	int err;
 	int ret = 0;
 	bool need_writecp = false;
@@ -571,8 +569,6 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
 	/* prevent checkpoint */
 	mutex_lock(&sbi->cp_mutex);
 
-	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
-
 	/* step #1: find fsynced inode numbers */
 	err = find_fsync_dnodes(sbi, &inode_list);
 	if (err || list_empty(&inode_list))
-- 
GitLab


From 5c9e418436f3445d7cc4f3ba2964f231a4b33f17 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Tue, 13 Dec 2016 18:54:59 +0800
Subject: [PATCH 0068/1084] f2fs: don't cache nat entry if out of memory

If we run out of memory, in cache_nat_entry, it's better to avoid loop
for allocating memory to cache nat entry, so in low memory scenario, for
read path of node block, I expect this can avoid unneeded latency.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b9078fdb3743..03a1f9043558 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -245,12 +245,24 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
 	return need_update;
 }
 
-static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
+static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
+								bool no_fail)
 {
 	struct nat_entry *new;
 
-	new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
-	f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
+	if (no_fail) {
+		new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
+		f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
+	} else {
+		new = kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
+		if (!new)
+			return NULL;
+		if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
+			kmem_cache_free(nat_entry_slab, new);
+			return NULL;
+		}
+	}
+
 	memset(new, 0, sizeof(struct nat_entry));
 	nat_set_nid(new, nid);
 	nat_reset_flag(new);
@@ -267,8 +279,9 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
 
 	e = __lookup_nat_cache(nm_i, nid);
 	if (!e) {
-		e = grab_nat_entry(nm_i, nid);
-		node_info_from_raw_nat(&e->ni, ne);
+		e = grab_nat_entry(nm_i, nid, false);
+		if (e)
+			node_info_from_raw_nat(&e->ni, ne);
 	} else {
 		f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) ||
 				nat_get_blkaddr(e) !=
@@ -286,7 +299,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
 	down_write(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, ni->nid);
 	if (!e) {
-		e = grab_nat_entry(nm_i, ni->nid);
+		e = grab_nat_entry(nm_i, ni->nid, true);
 		copy_node_info(&e->ni, ni);
 		f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
 	} else if (new_blkaddr == NEW_ADDR) {
@@ -2152,7 +2165,7 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
 
 		ne = __lookup_nat_cache(nm_i, nid);
 		if (!ne) {
-			ne = grab_nat_entry(nm_i, nid);
+			ne = grab_nat_entry(nm_i, nid, true);
 			node_info_from_raw_nat(&ne->ni, &raw_ne);
 		}
 
-- 
GitLab


From 07fe8d44409f88be8f4a4e8f22b47ee709a22657 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 16 Dec 2016 11:18:15 +0300
Subject: [PATCH 0069/1084] f2fs: remove unneeded condition

We checked that "inode" is not an error pointer earlier so there is
no need to check again here.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/namei.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 56c19b0610a8..290a9d7060ef 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -321,9 +321,9 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
 		if (err)
 			goto err_out;
 	}
-	if (!IS_ERR(inode) && f2fs_encrypted_inode(dir) &&
-			(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
-			!fscrypt_has_permitted_context(dir, inode)) {
+	if (f2fs_encrypted_inode(dir) &&
+	    (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
+	    !fscrypt_has_permitted_context(dir, inode)) {
 		bool nokey = f2fs_encrypted_inode(inode) &&
 			!fscrypt_has_encryption_key(inode);
 		err = nokey ? -ENOKEY : -EPERM;
-- 
GitLab


From 7855eba4d6102f811b6dd142d6c749f53b591fa3 Mon Sep 17 00:00:00 2001
From: Yunlei He <heyunlei@huawei.com>
Date: Mon, 19 Dec 2016 20:10:48 +0800
Subject: [PATCH 0070/1084] f2fs: fix a problem of using memory after free

This patch fix a problem of using memory after free
in function __try_merge_extent_node.

Fixes: 0f825ee6e873 ("f2fs: add new interfaces for extent tree")
Cc: <stable@vger.kernel.org>
Signed-off-by: Yunlei He <heyunlei@huawei.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/extent_cache.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 4db44da7ef69..e02c3d88dc9a 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -352,11 +352,12 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode,
 	}
 
 	if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
-		if (en)
-			__release_extent_node(sbi, et, prev_ex);
 		next_ex->ei.fofs = ei->fofs;
 		next_ex->ei.blk = ei->blk;
 		next_ex->ei.len += ei->len;
+		if (en)
+			__release_extent_node(sbi, et, prev_ex);
+
 		en = next_ex;
 	}
 
-- 
GitLab


From 746e2403927efbd7c7f2e796314e3cfb3cfabaa4 Mon Sep 17 00:00:00 2001
From: Yunlei He <heyunlei@huawei.com>
Date: Tue, 20 Dec 2016 11:11:35 +0800
Subject: [PATCH 0071/1084] f2fs: add a case of no need to read a page in write
 begin

If the range we write cover the whole valid data in the last page,
we do not need to read it.

Signed-off-by: Yunlei He <heyunlei@huawei.com>
[Jaegeuk Kim: nullify the remaining area (fix: xfstests/f2fs/001)]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9ac262564fa6..2c5df1dc1479 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1715,6 +1715,11 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
 	if (len == PAGE_SIZE || PageUptodate(page))
 		return 0;
 
+	if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) {
+		zero_user_segment(page, len, PAGE_SIZE);
+		return 0;
+	}
+
 	if (blkaddr == NEW_ADDR) {
 		zero_user_segment(page, 0, PAGE_SIZE);
 		SetPageUptodate(page);
@@ -1768,7 +1773,7 @@ static int f2fs_write_end(struct file *file,
 	 * let generic_perform_write() try to copy data again through copied=0.
 	 */
 	if (!PageUptodate(page)) {
-		if (unlikely(copied != PAGE_SIZE))
+		if (unlikely(copied != len))
 			copied = 0;
 		else
 			SetPageUptodate(page);
-- 
GitLab


From ed0b56209fe79a1309653d4b03f5c3147f580f6b Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 20 Dec 2016 21:57:42 +0800
Subject: [PATCH 0072/1084] f2fs: use rb_entry_safe

Use rb_entry_safe() instead of open-coding it.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/extent_cache.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index e02c3d88dc9a..6ed6424807b6 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -311,28 +311,24 @@ static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et,
 	tmp_node = parent;
 	if (parent && fofs > en->ei.fofs)
 		tmp_node = rb_next(parent);
-	*next_ex = tmp_node ?
-		rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
+	*next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
 
 	tmp_node = parent;
 	if (parent && fofs < en->ei.fofs)
 		tmp_node = rb_prev(parent);
-	*prev_ex = tmp_node ?
-		rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
+	*prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
 	return NULL;
 
 lookup_neighbors:
 	if (fofs == en->ei.fofs) {
 		/* lookup prev node for merging backward later */
 		tmp_node = rb_prev(&en->rb_node);
-		*prev_ex = tmp_node ?
-			rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
+		*prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
 	}
 	if (fofs == en->ei.fofs + en->ei.len - 1) {
 		/* lookup next node for merging frontward later */
 		tmp_node = rb_next(&en->rb_node);
-		*next_ex = tmp_node ?
-			rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
+		*next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
 	}
 	return en;
 }
@@ -493,9 +489,8 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
 		if (!next_en) {
 			struct rb_node *node = rb_next(&en->rb_node);
 
-			next_en = node ?
-				rb_entry(node, struct extent_node, rb_node)
-				: NULL;
+			next_en = rb_entry_safe(node, struct extent_node,
+						rb_node);
 		}
 
 		if (parts)
-- 
GitLab


From 650d3c4e56e1e92ee6e004648c9deb243e5963e0 Mon Sep 17 00:00:00 2001
From: Yunlei He <heyunlei@huawei.com>
Date: Thu, 22 Dec 2016 11:46:24 +0800
Subject: [PATCH 0073/1084] f2fs: fix a missing discard prefree segments

If userspace issue a fstrim with a range not involve prefree segments,
it will reuse these segments without discard. This patch fix it.

Signed-off-by: Yunlei He <heyunlei@huawei.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 0d8802453758..cf98ba7bf645 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -916,9 +916,13 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 		dirty_i->nr_dirty[PRE] -= end - start;
 
-		if (force || !test_opt(sbi, DISCARD))
+		if (!test_opt(sbi, DISCARD))
 			continue;
 
+		if (force && start >= cpc->trim_start &&
+					(end - 1) <= cpc->trim_end)
+				continue;
+
 		if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) {
 			f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
 				(end - start) << sbi->log_blocks_per_seg);
@@ -2263,8 +2267,12 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	f2fs_bug_on(sbi, sit_i->dirty_sentries);
 out:
 	if (cpc->reason == CP_DISCARD) {
+		__u64 trim_start = cpc->trim_start;
+
 		for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
 			add_discard_addrs(sbi, cpc);
+
+		cpc->trim_start = trim_start;
 	}
 	mutex_unlock(&sit_i->sentry_lock);
 
-- 
GitLab


From 9d52a504db6db9e4e254576130aa867838daff55 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 21 Dec 2016 11:51:32 -0800
Subject: [PATCH 0074/1084] f2fs: reassign new segment for mode=lfs

Otherwise we can remain wrong curseg->next_blkoff, resulting in fsck failure.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index cf98ba7bf645..b6bb6490a640 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1428,9 +1428,6 @@ void allocate_new_segments(struct f2fs_sb_info *sbi)
 	unsigned int old_segno;
 	int i;
 
-	if (test_opt(sbi, LFS))
-		return;
-
 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
 		curseg = CURSEG_I(sbi, i);
 		old_segno = curseg->segno;
-- 
GitLab


From d621e6b3707f4c3535e5e2c0b0c204753a9f00b2 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 21 Dec 2016 14:26:39 -0800
Subject: [PATCH 0075/1084] f2fs: fix wrong tracepoints for op and op_flags

This patch fixes wrong tracepoints in terms of op and op_flags.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/trace/events/f2fs.h | 41 ++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 01b3c9869a0d..4c942599581b 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -55,25 +55,34 @@ TRACE_DEFINE_ENUM(CP_DISCARD);
 		{ IPU,		"IN-PLACE" },				\
 		{ OPU,		"OUT-OF-PLACE" })
 
-#define F2FS_BIO_FLAG_MASK(t)	(t & (REQ_RAHEAD | REQ_PREFLUSH | REQ_FUA))
-#define F2FS_BIO_EXTRA_MASK(t)	(t & (REQ_META | REQ_PRIO))
-
-#define show_bio_type(op_flags)	show_bio_op_flags(op_flags), 		\
-						show_bio_extra(op_flags)
+#define F2FS_OP_FLAGS (REQ_RAHEAD | REQ_SYNC | REQ_PREFLUSH | REQ_META |\
+			REQ_PRIO)
+#define F2FS_BIO_FLAG_MASK(t)	(t & F2FS_OP_FLAGS)
+
+#define show_bio_type(op,op_flags)	show_bio_op(op),		\
+						show_bio_op_flags(op_flags)
+
+#define show_bio_op(op)							\
+	__print_symbolic(op,						\
+		{ REQ_OP_READ,			"READ" },		\
+		{ REQ_OP_WRITE,			"WRITE" },		\
+		{ REQ_OP_FLUSH,			"FLUSH" },		\
+		{ REQ_OP_DISCARD,		"DISCARD" },		\
+		{ REQ_OP_ZONE_REPORT,		"ZONE_REPORT" },	\
+		{ REQ_OP_SECURE_ERASE,		"SECURE_ERASE" },	\
+		{ REQ_OP_ZONE_RESET,		"ZONE_RESET" },		\
+		{ REQ_OP_WRITE_SAME,		"WRITE_SAME" },		\
+		{ REQ_OP_WRITE_ZEROES,		"WRITE_ZEROES" })
 
 #define show_bio_op_flags(flags)					\
 	__print_symbolic(F2FS_BIO_FLAG_MASK(flags),			\
-		{ 0,			"WRITE" },			\
-		{ REQ_RAHEAD, 		"READAHEAD" },			\
-		{ REQ_SYNC, 		"REQ_SYNC" },			\
-		{ REQ_PREFLUSH,		"REQ_PREFLUSH" },		\
-		{ REQ_FUA,		"REQ_FUA" })
-
-#define show_bio_extra(type)						\
-	__print_symbolic(F2FS_BIO_EXTRA_MASK(type),			\
+		{ REQ_RAHEAD, 		"(RA)" },			\
+		{ REQ_SYNC, 		"(S)" },			\
+		{ REQ_SYNC | REQ_PRIO,	"(SP)" },			\
 		{ REQ_META, 		"(M)" },			\
-		{ REQ_PRIO, 		"(P)" },			\
 		{ REQ_META | REQ_PRIO,	"(MP)" },			\
+		{ REQ_SYNC | REQ_META | REQ_PRIO, "(SMP)" },		\
+		{ REQ_PREFLUSH | REQ_META | REQ_PRIO, "(FMP)" },	\
 		{ 0, " \b" })
 
 #define show_data_type(type)						\
@@ -753,7 +762,7 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
 		(unsigned long)__entry->index,
 		(unsigned long long)__entry->old_blkaddr,
 		(unsigned long long)__entry->new_blkaddr,
-		show_bio_type(__entry->op_flags),
+		show_bio_type(__entry->op, __entry->op_flags),
 		show_block_type(__entry->type))
 );
 
@@ -802,7 +811,7 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio,
 
 	TP_printk("dev = (%d,%d), rw = %s%s, %s, sector = %lld, size = %u",
 		show_dev(__entry),
-		show_bio_type(__entry->op_flags),
+		show_bio_type(__entry->op, __entry->op_flags),
 		show_block_type(__entry->type),
 		(unsigned long long)__entry->sector,
 		__entry->size)
-- 
GitLab


From 554b5125f5cfca6653461fd52bad24d4ef35ec29 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 21 Dec 2016 12:13:03 -0800
Subject: [PATCH 0076/1084] f2fs: add submit_bio tracepoint

This patch adds final submit_bio() tracepoint.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c              | 12 ++++++----
 include/trace/events/f2fs.h | 45 ++++++++++++++++++++++++-------------
 2 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 2c5df1dc1479..a06b2d187aec 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -175,6 +175,10 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
 			current->plug && (type == DATA || type == NODE))
 			blk_finish_plug(current->plug);
 	}
+	if (is_read_io(bio_op(bio)))
+		trace_f2fs_submit_read_bio(sbi->sb, type, bio);
+	else
+		trace_f2fs_submit_write_bio(sbi->sb, type, bio);
 	submit_bio(bio);
 }
 
@@ -185,12 +189,12 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
 	if (!io->bio)
 		return;
 
+	bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
+
 	if (is_read_io(fio->op))
-		trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio);
+		trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
 	else
-		trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio);
-
-	bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
+		trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
 
 	__submit_bio(io->sbi, io->bio, fio->type);
 	io->bio = NULL;
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 4c942599581b..04c527410ecc 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -784,12 +784,11 @@ DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_mbio,
 	TP_CONDITION(page->mapping)
 );
 
-DECLARE_EVENT_CLASS(f2fs__submit_bio,
+DECLARE_EVENT_CLASS(f2fs__bio,
 
-	TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio,
-						struct bio *bio),
+	TP_PROTO(struct super_block *sb, int type, struct bio *bio),
 
-	TP_ARGS(sb, fio, bio),
+	TP_ARGS(sb, type, bio),
 
 	TP_STRUCT__entry(
 		__field(dev_t,	dev)
@@ -802,9 +801,9 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio,
 
 	TP_fast_assign(
 		__entry->dev		= sb->s_dev;
-		__entry->op		= fio->op;
-		__entry->op_flags	= fio->op_flags;
-		__entry->type		= fio->type;
+		__entry->op		= bio_op(bio);
+		__entry->op_flags	= bio->bi_opf;
+		__entry->type		= type;
 		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->size		= bio->bi_iter.bi_size;
 	),
@@ -817,22 +816,38 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio,
 		__entry->size)
 );
 
-DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_write_bio,
+DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_prepare_write_bio,
+
+	TP_PROTO(struct super_block *sb, int type, struct bio *bio),
+
+	TP_ARGS(sb, type, bio),
+
+	TP_CONDITION(bio)
+);
+
+DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_prepare_read_bio,
+
+	TP_PROTO(struct super_block *sb, int type, struct bio *bio),
+
+	TP_ARGS(sb, type, bio),
+
+	TP_CONDITION(bio)
+);
+
+DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_submit_read_bio,
 
-	TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio,
-							struct bio *bio),
+	TP_PROTO(struct super_block *sb, int type, struct bio *bio),
 
-	TP_ARGS(sb, fio, bio),
+	TP_ARGS(sb, type, bio),
 
 	TP_CONDITION(bio)
 );
 
-DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio,
+DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_submit_write_bio,
 
-	TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio,
-							struct bio *bio),
+	TP_PROTO(struct super_block *sb, int type, struct bio *bio),
 
-	TP_ARGS(sb, fio, bio),
+	TP_ARGS(sb, type, bio),
 
 	TP_CONDITION(bio)
 );
-- 
GitLab


From 0a595ebaaa6b53a2226d3fee2a2fd616ea5ba378 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 14 Dec 2016 10:12:56 -0800
Subject: [PATCH 0077/1084] f2fs: support IO alignment for DATA and NODE writes

This patch implements IO alignment by filling dummy blocks in DATA and NODE
write bios. If we can guarantee, for example, 32KB or 64KB for such the IOs,
we can eliminate underlying dummy page problem which FTL conducts in order to
close MLC or TLC partial written pages.

Note that,
 - it requires "-o mode=lfs".
 - IO size should be power of 2, not exceed BIO_MAX_PAGES, 256.
 - read IO is still 4KB.
 - do checkpoint at fsync, if dummy NODE page was written.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c          | 55 +++++++++++++++++++++++++++++++++++++++--
 fs/f2fs/f2fs.h          |  4 ++-
 fs/f2fs/segment.c       |  9 +++++--
 fs/f2fs/segment.h       |  3 +++
 fs/f2fs/super.c         | 13 +++++++++-
 include/linux/f2fs_fs.h |  6 +++++
 6 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index a06b2d187aec..12d235f2c771 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -93,6 +93,17 @@ static void f2fs_write_end_io(struct bio *bio)
 		struct page *page = bvec->bv_page;
 		enum count_type type = WB_DATA_TYPE(page);
 
+		if (IS_DUMMY_WRITTEN_PAGE(page)) {
+			set_page_private(page, (unsigned long)NULL);
+			ClearPagePrivate(page);
+			unlock_page(page);
+			mempool_free(page, sbi->write_io_dummy);
+
+			if (unlikely(bio->bi_error))
+				f2fs_stop_checkpoint(sbi, true);
+			continue;
+		}
+
 		fscrypt_pullback_bio_page(&page, true);
 
 		if (unlikely(bio->bi_error)) {
@@ -171,10 +182,42 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
 				struct bio *bio, enum page_type type)
 {
 	if (!is_read_io(bio_op(bio))) {
+		unsigned int start;
+
 		if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
 			current->plug && (type == DATA || type == NODE))
 			blk_finish_plug(current->plug);
+
+		if (type != DATA && type != NODE)
+			goto submit_io;
+
+		start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
+		start %= F2FS_IO_SIZE(sbi);
+
+		if (start == 0)
+			goto submit_io;
+
+		/* fill dummy pages */
+		for (; start < F2FS_IO_SIZE(sbi); start++) {
+			struct page *page =
+				mempool_alloc(sbi->write_io_dummy,
+					GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL);
+			f2fs_bug_on(sbi, !page);
+
+			SetPagePrivate(page);
+			set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
+			lock_page(page);
+			if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
+				f2fs_bug_on(sbi, 1);
+		}
+		/*
+		 * In the NODE case, we lose next block address chain. So, we
+		 * need to do checkpoint in f2fs_sync_file.
+		 */
+		if (type == NODE)
+			set_sbi_flag(sbi, SBI_NEED_CP);
 	}
+submit_io:
 	if (is_read_io(bio_op(bio)))
 		trace_f2fs_submit_read_bio(sbi->sb, type, bio);
 	else
@@ -319,13 +362,14 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
 	return 0;
 }
 
-void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
+int f2fs_submit_page_mbio(struct f2fs_io_info *fio)
 {
 	struct f2fs_sb_info *sbi = fio->sbi;
 	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
 	struct f2fs_bio_info *io;
 	bool is_read = is_read_io(fio->op);
 	struct page *bio_page;
+	int err = 0;
 
 	io = is_read ? &sbi->read_io : &sbi->write_io[btype];
 
@@ -346,6 +390,12 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
 		__submit_merged_bio(io);
 alloc_new:
 	if (io->bio == NULL) {
+		if ((fio->type == DATA || fio->type == NODE) &&
+				fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
+			err = -EAGAIN;
+			dec_page_count(sbi, WB_DATA_TYPE(bio_page));
+			goto out_fail;
+		}
 		io->bio = __bio_alloc(sbi, fio->new_blkaddr,
 						BIO_MAX_PAGES, is_read);
 		io->fio = *fio;
@@ -359,9 +409,10 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
 
 	io->last_block_in_bio = fio->new_blkaddr;
 	f2fs_trace_ios(fio, 0);
-
+out_fail:
 	up_write(&io->io_rwsem);
 	trace_f2fs_submit_page_mbio(fio->page, fio);
+	return err;
 }
 
 static void __set_data_blkaddr(struct dnode_of_data *dn)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 2da8c3aa0ce5..0d7eaddef4a0 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -792,6 +792,8 @@ struct f2fs_sb_info {
 	struct f2fs_bio_info read_io;			/* for read bios */
 	struct f2fs_bio_info write_io[NR_PAGE_TYPE];	/* for write bios */
 	struct mutex wio_mutex[NODE + 1];	/* bio ordering for NODE/DATA */
+	int write_io_size_bits;			/* Write IO size bits */
+	mempool_t *write_io_dummy;		/* Dummy pages */
 
 	/* for checkpoint */
 	struct f2fs_checkpoint *ckpt;		/* raw checkpoint pointer */
@@ -2174,7 +2176,7 @@ void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *, struct inode *,
 				struct page *, nid_t, enum page_type, int);
 void f2fs_flush_merged_bios(struct f2fs_sb_info *);
 int f2fs_submit_page_bio(struct f2fs_io_info *);
-void f2fs_submit_page_mbio(struct f2fs_io_info *);
+int f2fs_submit_page_mbio(struct f2fs_io_info *);
 struct block_device *f2fs_target_device(struct f2fs_sb_info *,
 				block_t, struct bio *);
 int f2fs_target_device_index(struct f2fs_sb_info *, block_t);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index b6bb6490a640..2e8d12e9cae9 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1604,15 +1604,20 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
 {
 	int type = __get_segment_type(fio->page, fio->type);
+	int err;
 
 	if (fio->type == NODE || fio->type == DATA)
 		mutex_lock(&fio->sbi->wio_mutex[fio->type]);
-
+reallocate:
 	allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
 					&fio->new_blkaddr, sum, type);
 
 	/* writeout dirty page into bdev */
-	f2fs_submit_page_mbio(fio);
+	err = f2fs_submit_page_mbio(fio);
+	if (err == -EAGAIN) {
+		fio->old_blkaddr = fio->new_blkaddr;
+		goto reallocate;
+	}
 
 	if (fio->type == NODE || fio->type == DATA)
 		mutex_unlock(&fio->sbi->wio_mutex[fio->type]);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 9d44ce83acb2..08f1455c812c 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -186,9 +186,12 @@ struct segment_allocation {
  * the page is atomically written, and it is in inmem_pages list.
  */
 #define ATOMIC_WRITTEN_PAGE		((unsigned long)-1)
+#define DUMMY_WRITTEN_PAGE		((unsigned long)-2)
 
 #define IS_ATOMIC_WRITTEN_PAGE(page)			\
 		(page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE)
+#define IS_DUMMY_WRITTEN_PAGE(page)			\
+		(page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE)
 
 struct inmem_pages {
 	struct list_head list;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 46fd30d8af77..00fc36e49368 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1763,6 +1763,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
 				FDEV(i).total_segments,
 				FDEV(i).start_blk, FDEV(i).end_blk);
 	}
+	f2fs_msg(sbi->sb, KERN_INFO,
+			"IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
 	return 0;
 }
 
@@ -1880,12 +1882,19 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	if (err)
 		goto free_options;
 
+	if (F2FS_IO_SIZE(sbi) > 1) {
+		sbi->write_io_dummy =
+			mempool_create_page_pool(F2FS_IO_SIZE(sbi) - 1, 0);
+		if (!sbi->write_io_dummy)
+			goto free_options;
+	}
+
 	/* get an inode for meta space */
 	sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
 	if (IS_ERR(sbi->meta_inode)) {
 		f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
 		err = PTR_ERR(sbi->meta_inode);
-		goto free_options;
+		goto free_io_dummy;
 	}
 
 	err = get_valid_checkpoint(sbi);
@@ -2103,6 +2112,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 free_meta_inode:
 	make_bad_inode(sbi->meta_inode);
 	iput(sbi->meta_inode);
+free_io_dummy:
+	mempool_destroy(sbi->write_io_dummy);
 free_options:
 	destroy_percpu_info(sbi);
 	kfree(options);
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index cea41a124a80..f0748524ca8c 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -36,6 +36,12 @@
 #define F2FS_NODE_INO(sbi)	(sbi->node_ino_num)
 #define F2FS_META_INO(sbi)	(sbi->meta_ino_num)
 
+#define F2FS_IO_SIZE(sbi)	(1 << (sbi)->write_io_size_bits) /* Blocks */
+#define F2FS_IO_SIZE_KB(sbi)	(1 << ((sbi)->write_io_size_bits + 2)) /* KB */
+#define F2FS_IO_SIZE_BYTES(sbi)	(1 << ((sbi)->write_io_size_bits + 12)) /* B */
+#define F2FS_IO_SIZE_BITS(sbi)	((sbi)->write_io_size_bits) /* power of 2 */
+#define F2FS_IO_SIZE_MASK(sbi)	(F2FS_IO_SIZE(sbi) - 1)
+
 /* This flag is used by node and meta inodes, and by recovery */
 #define GFP_F2FS_ZERO		(GFP_NOFS | __GFP_ZERO)
 #define GFP_F2FS_HIGH_ZERO	(GFP_NOFS | __GFP_ZERO | __GFP_HIGHMEM)
-- 
GitLab


From ec91538dccd44329ad83d3aae1aa6a8389b5c75f Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 21 Dec 2016 17:09:19 -0800
Subject: [PATCH 0078/1084] f2fs: get io size bit from mount option

This patch adds to set io_size_bits from mount option.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/filesystems/f2fs.txt |  2 ++
 fs/f2fs/super.c                    | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 753dd4f96afe..d99faced79cb 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -157,6 +157,8 @@ data_flush             Enable data flushing before checkpoint in order to
 mode=%s                Control block allocation mode which supports "adaptive"
                        and "lfs". In "lfs" mode, there should be no random
                        writes towards main area.
+io_bits=%u             Set the bit size of write IO requests. It should be set
+                       with "mode=lfs".
 
 ================================================================================
 DEBUGFS ENTRIES
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 00fc36e49368..b7efbd4f6af9 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -101,6 +101,7 @@ enum {
 	Opt_noinline_data,
 	Opt_data_flush,
 	Opt_mode,
+	Opt_io_size_bits,
 	Opt_fault_injection,
 	Opt_lazytime,
 	Opt_nolazytime,
@@ -133,6 +134,7 @@ static match_table_t f2fs_tokens = {
 	{Opt_noinline_data, "noinline_data"},
 	{Opt_data_flush, "data_flush"},
 	{Opt_mode, "mode=%s"},
+	{Opt_io_size_bits, "io_bits=%u"},
 	{Opt_fault_injection, "fault_injection=%u"},
 	{Opt_lazytime, "lazytime"},
 	{Opt_nolazytime, "nolazytime"},
@@ -535,6 +537,17 @@ static int parse_options(struct super_block *sb, char *options)
 			}
 			kfree(name);
 			break;
+		case Opt_io_size_bits:
+			if (args->from && match_int(args, &arg))
+				return -EINVAL;
+			if (arg > __ilog2_u32(BIO_MAX_PAGES)) {
+				f2fs_msg(sb, KERN_WARNING,
+					"Not support %d, larger than %d",
+					1 << arg, BIO_MAX_PAGES);
+				return -EINVAL;
+			}
+			sbi->write_io_size_bits = arg;
+			break;
 		case Opt_fault_injection:
 			if (args->from && match_int(args, &arg))
 				return -EINVAL;
@@ -558,6 +571,13 @@ static int parse_options(struct super_block *sb, char *options)
 			return -EINVAL;
 		}
 	}
+
+	if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
+		f2fs_msg(sb, KERN_ERR,
+				"Should set mode=lfs with %uKB-sized IO",
+				F2FS_IO_SIZE_KB(sbi));
+		return -EINVAL;
+	}
 	return 0;
 }
 
@@ -918,6 +938,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 	else if (test_opt(sbi, LFS))
 		seq_puts(seq, "lfs");
 	seq_printf(seq, ",active_logs=%u", sbi->active_logs);
+	if (F2FS_IO_SIZE_BITS(sbi))
+		seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
 
 	return 0;
 }
-- 
GitLab


From 26a28a0c1eb756ba18bfb1f93309c4b4406b9cd9 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 28 Dec 2016 13:55:09 -0800
Subject: [PATCH 0079/1084] f2fs: show the max number of atomic operations

This patch adds to show the max number of atomic operations which are
conducting concurrently.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c   |  7 +++++++
 fs/f2fs/f2fs.h    | 17 +++++++++++++++++
 fs/f2fs/file.c    |  8 ++++++--
 fs/f2fs/segment.c |  1 +
 4 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index fbd5184140d0..29cdf0c1da1d 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -50,6 +50,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
 	si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
 	si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
 	si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
+	si->aw_cnt = atomic_read(&sbi->aw_cnt);
+	si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
 	si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
 	si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
 	si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
@@ -256,6 +258,8 @@ static int stat_show(struct seq_file *s, void *v)
 			   si->inline_dir);
 		seq_printf(s, "  - Orphan Inode: %u\n",
 			   si->orphans);
+		seq_printf(s, "  - Atomic write count: %4d (Max. %4d)\n",
+			   si->aw_cnt, si->max_aw_cnt);
 		seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
 			   si->main_area_segs, si->main_area_sections,
 			   si->main_area_zones);
@@ -414,6 +418,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
 	atomic_set(&sbi->inline_dir, 0);
 	atomic_set(&sbi->inplace_count, 0);
 
+	atomic_set(&sbi->aw_cnt, 0);
+	atomic_set(&sbi->max_aw_cnt, 0);
+
 	mutex_lock(&f2fs_stat_mutex);
 	list_add_tail(&si->stat_list, &f2fs_stat_list);
 	mutex_unlock(&f2fs_stat_mutex);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 0d7eaddef4a0..bdcfe2a9b532 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -884,6 +884,8 @@ struct f2fs_sb_info {
 	atomic_t inline_xattr;			/* # of inline_xattr inodes */
 	atomic_t inline_inode;			/* # of inline_data inodes */
 	atomic_t inline_dir;			/* # of inline_dentry inodes */
+	atomic_t aw_cnt;			/* # of atomic writes */
+	atomic_t max_aw_cnt;			/* max # of atomic writes */
 	int bg_gc;				/* background gc calls */
 	unsigned int ndirty_inode[NR_INODE_TYPE];	/* # of dirty inodes */
 #endif
@@ -2236,6 +2238,7 @@ struct f2fs_stat_info {
 	int total_count, utilization;
 	int bg_gc, nr_wb_cp_data, nr_wb_data;
 	int inline_xattr, inline_inode, inline_dir, orphans;
+	int aw_cnt, max_aw_cnt;
 	unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
 	unsigned int bimodal, avg_vblocks;
 	int util_free, util_valid, util_invalid;
@@ -2307,6 +2310,17 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
 		((sbi)->block_count[(curseg)->alloc_type]++)
 #define stat_inc_inplace_blocks(sbi)					\
 		(atomic_inc(&(sbi)->inplace_count))
+#define stat_inc_atomic_write(inode)					\
+		(atomic_inc(&F2FS_I_SB(inode)->aw_cnt));
+#define stat_dec_atomic_write(inode)					\
+		(atomic_dec(&F2FS_I_SB(inode)->aw_cnt));
+#define stat_update_max_atomic_write(inode)				\
+	do {								\
+		int cur = atomic_read(&F2FS_I_SB(inode)->aw_cnt);	\
+		int max = atomic_read(&F2FS_I_SB(inode)->max_aw_cnt);	\
+		if (cur > max)						\
+			atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur);	\
+	} while (0)
 #define stat_inc_seg_count(sbi, type, gc_type)				\
 	do {								\
 		struct f2fs_stat_info *si = F2FS_STAT(sbi);		\
@@ -2360,6 +2374,9 @@ void f2fs_destroy_root_stats(void);
 #define stat_dec_inline_inode(inode)
 #define stat_inc_inline_dir(inode)
 #define stat_dec_inline_dir(inode)
+#define stat_inc_atomic_write(inode)
+#define stat_dec_atomic_write(inode)
+#define stat_update_max_atomic_write(inode)
 #define stat_inc_seg_type(sbi, curseg)
 #define stat_inc_block_count(sbi, curseg)
 #define stat_inc_inplace_blocks(sbi)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 49f10dce817d..291d2ca53a4c 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1542,6 +1542,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
 	if (ret)
 		clear_inode_flag(inode, FI_ATOMIC_FILE);
 out:
+	stat_inc_atomic_write(inode);
+	stat_update_max_atomic_write(inode);
 	inode_unlock(inode);
 	mnt_drop_write_file(filp);
 	return ret;
@@ -1571,9 +1573,11 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
 			set_inode_flag(inode, FI_ATOMIC_FILE);
 			goto err_out;
 		}
+		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
+		stat_dec_atomic_write(inode);
+	} else {
+		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
 	}
-
-	ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
 err_out:
 	inode_unlock(inode);
 	mnt_drop_write_file(filp);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 2e8d12e9cae9..9cabe935afc7 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -243,6 +243,7 @@ void drop_inmem_pages(struct inode *inode)
 	struct f2fs_inode_info *fi = F2FS_I(inode);
 
 	clear_inode_flag(inode, FI_ATOMIC_FILE);
+	stat_dec_atomic_write(inode);
 
 	mutex_lock(&fi->inmem_lock);
 	__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
-- 
GitLab


From 363fa4e078cbdc97a172c19d19dc04b41b52ebc8 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 28 Dec 2016 17:31:15 -0800
Subject: [PATCH 0080/1084] f2fs: don't allow encrypted operations without keys

This patch fixes the renaming bug on encrypted filenames, which was pointed by

 (ext4: don't allow encrypted operations without keys)

Cc: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/namei.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 290a9d7060ef..0eda02253ffb 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -663,6 +663,12 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	bool is_old_inline = f2fs_has_inline_dentry(old_dir);
 	int err = -ENOENT;
 
+	if ((f2fs_encrypted_inode(old_dir) &&
+			!fscrypt_has_encryption_key(old_dir)) ||
+			(f2fs_encrypted_inode(new_dir) &&
+			!fscrypt_has_encryption_key(new_dir)))
+		return -ENOKEY;
+
 	if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) &&
 			!fscrypt_has_permitted_context(new_dir, old_inode)) {
 		err = -EPERM;
@@ -843,6 +849,12 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 	int old_nlink = 0, new_nlink = 0;
 	int err = -ENOENT;
 
+	if ((f2fs_encrypted_inode(old_dir) &&
+			!fscrypt_has_encryption_key(old_dir)) ||
+			(f2fs_encrypted_inode(new_dir) &&
+			!fscrypt_has_encryption_key(new_dir)))
+		return -ENOKEY;
+
 	if ((f2fs_encrypted_inode(old_dir) || f2fs_encrypted_inode(new_dir)) &&
 			(old_dir != new_dir) &&
 			(!fscrypt_has_permitted_context(new_dir, old_inode) ||
-- 
GitLab


From bb95d9ab2a9d4afd03b59a603cccb2c601f68b78 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Tue, 3 Jan 2017 17:19:30 -0800
Subject: [PATCH 0081/1084] f2fs: drop exist_data for inline_data when
 truncated to 0

A test program gets the SEEK_DATA with two values between
a new created file and the exist file on f2fs filesystem.

F2FS filesystem,  (the first "test1" is a new file)
SEEK_DATA size != 0 (offset = 8192)
SEEK_DATA size != 0 (offset = 4096)

PNFS filesystem, (the first "test1" is a new file)
SEEK_DATA size != 0 (offset = 4096)
SEEK_DATA size != 0 (offset = 4096)

int main(int argc, char **argv)
{
        char *filename = argv[1];
        int offset = 1, i = 0, fd = -1;

        if (argc < 2) {
                printf("Usage: %s f2fsfilename\n", argv[0]);
                return -1;
        }

        /*
        if (!access(filename, F_OK) || errno != ENOENT) {
                printf("Needs a new file for test, %m\n");
                return -1;
        }*/

        fd = open(filename, O_RDWR | O_CREAT, 0777);
        if (fd < 0) {
                printf("Create test file %s failed, %m\n", filename);
                return -1;
        }

        for (i = 0; i < 20; i++) {
                offset = 1 << i;
                ftruncate(fd, 0);
                lseek(fd, offset, SEEK_SET);
                write(fd, "test", 5);
                /* Get the alloc size by seek data equal zero*/
                if (lseek(fd, 0, SEEK_DATA)) {
                        printf("SEEK_DATA size != 0 (offset = %d)\n", offset);
                        break;
                }
        }

        close(fd);
        return 0;
}

Reported-and-Tested-by: Kinglong Mee <kinglongmee@gmail.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 291d2ca53a4c..6c335180b9d8 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -570,6 +570,8 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
 	if (f2fs_has_inline_data(inode)) {
 		if (truncate_inline_inode(ipage, from))
 			set_page_dirty(ipage);
+		if (from == 0)
+			clear_inode_flag(inode, FI_DATA_EXIST);
 		f2fs_put_page(ipage, 1);
 		truncate_page = true;
 		goto out;
-- 
GitLab


From 4e6a8d9b224f886362ea6e8f6046b541437c944f Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 29 Dec 2016 14:07:53 -0800
Subject: [PATCH 0082/1084] f2fs: relax async discard commands more

This patch relaxes async discard commands to avoid waiting its end_io during
checkpoint.
Instead of waiting them during checkpoint, it will be done when actually reusing
them.

Test on initial partition of nvme drive.

 # time fstrim /mnt/test

Before : 6.158s
After : 4.822s

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c |  7 ++-----
 fs/f2fs/f2fs.h       |  4 +++-
 fs/f2fs/segment.c    | 24 +++++++++++++++++++-----
 fs/f2fs/super.c      |  3 +++
 4 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f73ee9534d83..1a9ba69a22ba 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1254,7 +1254,6 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		f2fs_bug_on(sbi, prefree_segments(sbi));
 		flush_sit_entries(sbi, cpc);
 		clear_prefree_segments(sbi, cpc);
-		f2fs_wait_all_discard_bio(sbi);
 		unblock_operations(sbi);
 		goto out;
 	}
@@ -1273,12 +1272,10 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	/* unlock all the fs_lock[] in do_checkpoint() */
 	err = do_checkpoint(sbi, cpc);
-	if (err) {
+	if (err)
 		release_discard_addrs(sbi);
-	} else {
+	else
 		clear_prefree_segments(sbi, cpc);
-		f2fs_wait_all_discard_bio(sbi);
-	}
 
 	unblock_operations(sbi);
 	stat_inc_cp_count(sbi->stat_info);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index bdcfe2a9b532..e0db895fd84c 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -183,6 +183,8 @@ struct discard_entry {
 
 struct bio_entry {
 	struct list_head list;
+	block_t lstart;
+	block_t len;
 	struct bio *bio;
 	struct completion event;
 	int error;
@@ -2111,7 +2113,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *, bool);
 void invalidate_blocks(struct f2fs_sb_info *, block_t);
 bool is_checkpointed_data(struct f2fs_sb_info *, block_t);
 void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
-void f2fs_wait_all_discard_bio(struct f2fs_sb_info *);
+void f2fs_wait_discard_bio(struct f2fs_sb_info *, block_t);
 void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
 void release_discard_addrs(struct f2fs_sb_info *);
 int npages_for_summary_flush(struct f2fs_sb_info *, bool);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 9cabe935afc7..44d69f90be2a 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -625,20 +625,23 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 }
 
 static struct bio_entry *__add_bio_entry(struct f2fs_sb_info *sbi,
-							struct bio *bio)
+			struct bio *bio, block_t lstart, block_t len)
 {
 	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
 	struct bio_entry *be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
 
 	INIT_LIST_HEAD(&be->list);
 	be->bio = bio;
+	be->lstart = lstart;
+	be->len = len;
 	init_completion(&be->event);
 	list_add_tail(&be->list, wait_list);
 
 	return be;
 }
 
-void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
+/* This should be covered by global mutex, &sit_i->sentry_lock */
+void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
 {
 	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
 	struct bio_entry *be, *tmp;
@@ -647,7 +650,15 @@ void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi)
 		struct bio *bio = be->bio;
 		int err;
 
-		wait_for_completion_io(&be->event);
+		if (!completion_done(&be->event)) {
+			if ((be->lstart <= blkaddr &&
+					blkaddr < be->lstart + be->len) ||
+					blkaddr == NULL_ADDR)
+				wait_for_completion_io(&be->event);
+			else
+				continue;
+		}
+
 		err = be->error;
 		if (err == -EOPNOTSUPP)
 			err = 0;
@@ -675,6 +686,7 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
 		struct block_device *bdev, block_t blkstart, block_t blklen)
 {
 	struct bio *bio = NULL;
+	block_t lblkstart = blkstart;
 	int err;
 
 	trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
@@ -689,14 +701,14 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
 				SECTOR_FROM_BLOCK(blklen),
 				GFP_NOFS, 0, &bio);
 	if (!err && bio) {
-		struct bio_entry *be = __add_bio_entry(sbi, bio);
+		struct bio_entry *be = __add_bio_entry(sbi, bio,
+						lblkstart, blklen);
 
 		bio->bi_private = be;
 		bio->bi_end_io = f2fs_submit_bio_wait_endio;
 		bio->bi_opf |= REQ_SYNC;
 		submit_bio(bio);
 	}
-
 	return err;
 }
 
@@ -1575,6 +1587,8 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 
 	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
 
+	f2fs_wait_discard_bio(sbi, *new_blkaddr);
+
 	/*
 	 * __add_sum_entry should be resided under the curseg_mutex
 	 * because, this function updates a summary entry in the
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index b7efbd4f6af9..e68cec492f06 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -770,6 +770,9 @@ static void f2fs_put_super(struct super_block *sb)
 		write_checkpoint(sbi, &cpc);
 	}
 
+	/* be sure to wait for any on-going discard commands */
+	f2fs_wait_discard_bio(sbi, NULL_ADDR);
+
 	/* write_checkpoint can update stat informaion */
 	f2fs_destroy_stats(sbi);
 
-- 
GitLab


From 684309e5043efaaa94d9b4e65f53f7fdde1f6675 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Sat, 28 Jan 2017 17:10:39 +0200
Subject: [PATCH 0083/1084] pwm: lpss: Avoid potential overflow of base_unit

The resolution of base_unit is derived from base_unit_bits and thus must be
equal to (2^base_unit_bits - 1). Otherwise frequency and therefore base_unit
might potentially overflow.

Prevent the above by substracting 1 in all cases where base_unit_bits or
derivative is used.

Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-lpss.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c
index 72c0bce5a75c..8642feeb8abd 100644
--- a/drivers/pwm/pwm-lpss.c
+++ b/drivers/pwm/pwm-lpss.c
@@ -102,7 +102,7 @@ static int pwm_lpss_config(struct pwm_chip *chip, struct pwm_device *pwm,
 	 * The equation is:
 	 * base_unit = round(base_unit_range * freq / c)
 	 */
-	base_unit_range = BIT(lpwm->info->base_unit_bits);
+	base_unit_range = BIT(lpwm->info->base_unit_bits) - 1;
 	freq *= base_unit_range;
 
 	base_unit = DIV_ROUND_CLOSEST_ULL(freq, c);
@@ -117,8 +117,8 @@ static int pwm_lpss_config(struct pwm_chip *chip, struct pwm_device *pwm,
 
 	ctrl = pwm_lpss_read(pwm);
 	ctrl &= ~PWM_ON_TIME_DIV_MASK;
-	ctrl &= ~((base_unit_range - 1) << PWM_BASE_UNIT_SHIFT);
-	base_unit &= (base_unit_range - 1);
+	ctrl &= ~(base_unit_range << PWM_BASE_UNIT_SHIFT);
+	base_unit &= base_unit_range;
 	ctrl |= (u32) base_unit << PWM_BASE_UNIT_SHIFT;
 	ctrl |= on_time_div;
 	pwm_lpss_write(pwm, ctrl);
-- 
GitLab


From b5c050c719922901891bdd8580d224e2a0035db5 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Sat, 28 Jan 2017 17:10:40 +0200
Subject: [PATCH 0084/1084] pwm: lpss: Allow duty cycle to be 0

A duty cycle is represented by values [0..<period>] which reflects [0%..100%].
0% of the duty cycle means always off (logical "0") on output. Allow this in
the driver.

Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-lpss.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c
index 8642feeb8abd..ffa01ab907a6 100644
--- a/drivers/pwm/pwm-lpss.c
+++ b/drivers/pwm/pwm-lpss.c
@@ -107,8 +107,6 @@ static int pwm_lpss_config(struct pwm_chip *chip, struct pwm_device *pwm,
 
 	base_unit = DIV_ROUND_CLOSEST_ULL(freq, c);
 
-	if (duty_ns <= 0)
-		duty_ns = 1;
 	on_time_div = 255ULL * duty_ns;
 	do_div(on_time_div, period_ns);
 	on_time_div = 255ULL - on_time_div;
-- 
GitLab


From b14e8ceff03404cce4d9b85204246d3ed1259ec7 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Sat, 28 Jan 2017 17:10:41 +0200
Subject: [PATCH 0085/1084] pwm: lpss: Switch to new atomic API

Instead of doing things separately, which is not so reliable on some platforms,
switch the driver to use new atomic API, i.e. ->apply() callback.

The change has been tested on Intel platforms such as Broxton, BayTrail, and
Merrifield.

Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-lpss.c | 64 +++++++++++++++++++-----------------------
 1 file changed, 29 insertions(+), 35 deletions(-)

diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c
index ffa01ab907a6..09869f91d2d0 100644
--- a/drivers/pwm/pwm-lpss.c
+++ b/drivers/pwm/pwm-lpss.c
@@ -82,15 +82,20 @@ static inline void pwm_lpss_write(const struct pwm_device *pwm, u32 value)
 
 static void pwm_lpss_update(struct pwm_device *pwm)
 {
+	/*
+	 * Set a limit for busyloop since not all implementations correctly
+	 * clear PWM_SW_UPDATE bit (at least it's not visible on OS side).
+	 */
+	unsigned int count = 10;
+
 	pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE);
-	/* Give it some time to propagate */
-	usleep_range(10, 50);
+	while (pwm_lpss_read(pwm) & PWM_SW_UPDATE && --count)
+		usleep_range(10, 20);
 }
 
-static int pwm_lpss_config(struct pwm_chip *chip, struct pwm_device *pwm,
-			   int duty_ns, int period_ns)
+static void pwm_lpss_prepare(struct pwm_lpss_chip *lpwm, struct pwm_device *pwm,
+			     int duty_ns, int period_ns)
 {
-	struct pwm_lpss_chip *lpwm = to_lpwm(chip);
 	unsigned long long on_time_div;
 	unsigned long c = lpwm->info->clk_rate, base_unit_range;
 	unsigned long long base_unit, freq = NSEC_PER_SEC;
@@ -111,8 +116,6 @@ static int pwm_lpss_config(struct pwm_chip *chip, struct pwm_device *pwm,
 	do_div(on_time_div, period_ns);
 	on_time_div = 255ULL - on_time_div;
 
-	pm_runtime_get_sync(chip->dev);
-
 	ctrl = pwm_lpss_read(pwm);
 	ctrl &= ~PWM_ON_TIME_DIV_MASK;
 	ctrl &= ~(base_unit_range << PWM_BASE_UNIT_SHIFT);
@@ -120,42 +123,33 @@ static int pwm_lpss_config(struct pwm_chip *chip, struct pwm_device *pwm,
 	ctrl |= (u32) base_unit << PWM_BASE_UNIT_SHIFT;
 	ctrl |= on_time_div;
 	pwm_lpss_write(pwm, ctrl);
-
-	/*
-	 * If the PWM is already enabled we need to notify the hardware
-	 * about the change by setting PWM_SW_UPDATE.
-	 */
-	if (pwm_is_enabled(pwm))
-		pwm_lpss_update(pwm);
-
-	pm_runtime_put(chip->dev);
-
-	return 0;
 }
 
-static int pwm_lpss_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+			  struct pwm_state *state)
 {
-	pm_runtime_get_sync(chip->dev);
+	struct pwm_lpss_chip *lpwm = to_lpwm(chip);
 
-	/*
-	 * Hardware must first see PWM_SW_UPDATE before the PWM can be
-	 * enabled.
-	 */
-	pwm_lpss_update(pwm);
-	pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_ENABLE);
-	return 0;
-}
+	if (state->enabled) {
+		if (!pwm_is_enabled(pwm)) {
+			pm_runtime_get_sync(chip->dev);
+			pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period);
+			pwm_lpss_update(pwm);
+			pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_ENABLE);
+		} else {
+			pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period);
+			pwm_lpss_update(pwm);
+		}
+	} else if (pwm_is_enabled(pwm)) {
+		pwm_lpss_write(pwm, pwm_lpss_read(pwm) & ~PWM_ENABLE);
+		pm_runtime_put(chip->dev);
+	}
 
-static void pwm_lpss_disable(struct pwm_chip *chip, struct pwm_device *pwm)
-{
-	pwm_lpss_write(pwm, pwm_lpss_read(pwm) & ~PWM_ENABLE);
-	pm_runtime_put(chip->dev);
+	return 0;
 }
 
 static const struct pwm_ops pwm_lpss_ops = {
-	.config = pwm_lpss_config,
-	.enable = pwm_lpss_enable,
-	.disable = pwm_lpss_disable,
+	.apply = pwm_lpss_apply,
 	.owner = THIS_MODULE,
 };
 
-- 
GitLab


From 10d56a4cb1c6c894c60acbaec0f8aa44aba833b0 Mon Sep 17 00:00:00 2001
From: Ilkka Koskinen <ilkka.koskinen@intel.com>
Date: Sat, 28 Jan 2017 17:10:42 +0200
Subject: [PATCH 0086/1084] pwm: lpss: Avoid reconfiguring while UPDATE bit is
 still enabled

PWM Configuration register has SW_UPDATE bit that is set when a new
configuration is written to the register. The bit is automatically
cleared at the start of the next output cycle by the IP block.

If one writes a new configuration to the register while it still has
the bit enabled, PWM may freeze. That is, while one can still write
to the register, it won't have an effect. Thus, we try to sleep long
enough that the bit gets cleared and make sure the bit is not
enabled while we update the configuration.

Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: Richard Griffiths <richard.a.griffiths@intel.com>
Signed-off-by: Ilkka Koskinen <ilkka.koskinen@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-lpss.c | 52 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 43 insertions(+), 9 deletions(-)

diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c
index 09869f91d2d0..46670276690d 100644
--- a/drivers/pwm/pwm-lpss.c
+++ b/drivers/pwm/pwm-lpss.c
@@ -15,6 +15,7 @@
 
 #include <linux/delay.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pm_runtime.h>
@@ -80,17 +81,37 @@ static inline void pwm_lpss_write(const struct pwm_device *pwm, u32 value)
 	writel(value, lpwm->regs + pwm->hwpwm * PWM_SIZE + PWM);
 }
 
-static void pwm_lpss_update(struct pwm_device *pwm)
+static int pwm_lpss_update(struct pwm_device *pwm)
 {
+	struct pwm_lpss_chip *lpwm = to_lpwm(pwm->chip);
+	const void __iomem *addr = lpwm->regs + pwm->hwpwm * PWM_SIZE + PWM;
+	const unsigned int ms = 500 * USEC_PER_MSEC;
+	u32 val;
+	int err;
+
+	pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE);
+
 	/*
-	 * Set a limit for busyloop since not all implementations correctly
-	 * clear PWM_SW_UPDATE bit (at least it's not visible on OS side).
+	 * PWM Configuration register has SW_UPDATE bit that is set when a new
+	 * configuration is written to the register. The bit is automatically
+	 * cleared at the start of the next output cycle by the IP block.
+	 *
+	 * If one writes a new configuration to the register while it still has
+	 * the bit enabled, PWM may freeze. That is, while one can still write
+	 * to the register, it won't have an effect. Thus, we try to sleep long
+	 * enough that the bit gets cleared and make sure the bit is not
+	 * enabled while we update the configuration.
 	 */
-	unsigned int count = 10;
+	err = readl_poll_timeout(addr, val, !(val & PWM_SW_UPDATE), 40, ms);
+	if (err)
+		dev_err(pwm->chip->dev, "PWM_SW_UPDATE was not cleared\n");
 
-	pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE);
-	while (pwm_lpss_read(pwm) & PWM_SW_UPDATE && --count)
-		usleep_range(10, 20);
+	return err;
+}
+
+static inline int pwm_lpss_is_updating(struct pwm_device *pwm)
+{
+	return (pwm_lpss_read(pwm) & PWM_SW_UPDATE) ? -EBUSY : 0;
 }
 
 static void pwm_lpss_prepare(struct pwm_lpss_chip *lpwm, struct pwm_device *pwm,
@@ -129,16 +150,29 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 			  struct pwm_state *state)
 {
 	struct pwm_lpss_chip *lpwm = to_lpwm(chip);
+	int ret;
 
 	if (state->enabled) {
 		if (!pwm_is_enabled(pwm)) {
 			pm_runtime_get_sync(chip->dev);
+			ret = pwm_lpss_is_updating(pwm);
+			if (ret) {
+				pm_runtime_put(chip->dev);
+				return ret;
+			}
 			pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period);
-			pwm_lpss_update(pwm);
+			ret = pwm_lpss_update(pwm);
+			if (ret) {
+				pm_runtime_put(chip->dev);
+				return ret;
+			}
 			pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_ENABLE);
 		} else {
+			ret = pwm_lpss_is_updating(pwm);
+			if (ret)
+				return ret;
 			pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period);
-			pwm_lpss_update(pwm);
+			return pwm_lpss_update(pwm);
 		}
 	} else if (pwm_is_enabled(pwm)) {
 		pwm_lpss_write(pwm, pwm_lpss_read(pwm) & ~PWM_ENABLE);
-- 
GitLab


From 9900073cf5587662df9b7ef59f649ff100229d85 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Sat, 28 Jan 2017 17:10:43 +0200
Subject: [PATCH 0087/1084] pwm: lpss: Do not export board infos for different
 PWM types

The PWM LPSS probe drivers just pass a pointer to the exported board
info structures to pwm_lpss_probe() based on device PCI or ACPI ID.

In order to remove the knowledge of specific devices from library part of
the driver and reduce noise in exported namespace just duplicate the
board info structures and stop exporting them.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-lpss-pci.c      | 21 +++++++++++++++++++++
 drivers/pwm/pwm-lpss-platform.c | 21 +++++++++++++++++++++
 drivers/pwm/pwm-lpss.c          | 24 ------------------------
 drivers/pwm/pwm-lpss.h          |  4 ----
 4 files changed, 42 insertions(+), 28 deletions(-)

diff --git a/drivers/pwm/pwm-lpss-pci.c b/drivers/pwm/pwm-lpss-pci.c
index 3622f093490e..2e7a9a48060d 100644
--- a/drivers/pwm/pwm-lpss-pci.c
+++ b/drivers/pwm/pwm-lpss-pci.c
@@ -17,6 +17,27 @@
 
 #include "pwm-lpss.h"
 
+/* BayTrail */
+static const struct pwm_lpss_boardinfo pwm_lpss_byt_info = {
+	.clk_rate = 25000000,
+	.npwm = 1,
+	.base_unit_bits = 16,
+};
+
+/* Braswell */
+static const struct pwm_lpss_boardinfo pwm_lpss_bsw_info = {
+	.clk_rate = 19200000,
+	.npwm = 1,
+	.base_unit_bits = 16,
+};
+
+/* Broxton */
+static const struct pwm_lpss_boardinfo pwm_lpss_bxt_info = {
+	.clk_rate = 19200000,
+	.npwm = 4,
+	.base_unit_bits = 22,
+};
+
 static int pwm_lpss_probe_pci(struct pci_dev *pdev,
 			      const struct pci_device_id *id)
 {
diff --git a/drivers/pwm/pwm-lpss-platform.c b/drivers/pwm/pwm-lpss-platform.c
index 54433fc6d1a4..b22b6fdadb9a 100644
--- a/drivers/pwm/pwm-lpss-platform.c
+++ b/drivers/pwm/pwm-lpss-platform.c
@@ -18,6 +18,27 @@
 
 #include "pwm-lpss.h"
 
+/* BayTrail */
+static const struct pwm_lpss_boardinfo pwm_lpss_byt_info = {
+	.clk_rate = 25000000,
+	.npwm = 1,
+	.base_unit_bits = 16,
+};
+
+/* Braswell */
+static const struct pwm_lpss_boardinfo pwm_lpss_bsw_info = {
+	.clk_rate = 19200000,
+	.npwm = 1,
+	.base_unit_bits = 16,
+};
+
+/* Broxton */
+static const struct pwm_lpss_boardinfo pwm_lpss_bxt_info = {
+	.clk_rate = 19200000,
+	.npwm = 4,
+	.base_unit_bits = 22,
+};
+
 static int pwm_lpss_probe_platform(struct platform_device *pdev)
 {
 	const struct pwm_lpss_boardinfo *info;
diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c
index 46670276690d..689d2c1cbead 100644
--- a/drivers/pwm/pwm-lpss.c
+++ b/drivers/pwm/pwm-lpss.c
@@ -38,30 +38,6 @@ struct pwm_lpss_chip {
 	const struct pwm_lpss_boardinfo *info;
 };
 
-/* BayTrail */
-const struct pwm_lpss_boardinfo pwm_lpss_byt_info = {
-	.clk_rate = 25000000,
-	.npwm = 1,
-	.base_unit_bits = 16,
-};
-EXPORT_SYMBOL_GPL(pwm_lpss_byt_info);
-
-/* Braswell */
-const struct pwm_lpss_boardinfo pwm_lpss_bsw_info = {
-	.clk_rate = 19200000,
-	.npwm = 1,
-	.base_unit_bits = 16,
-};
-EXPORT_SYMBOL_GPL(pwm_lpss_bsw_info);
-
-/* Broxton */
-const struct pwm_lpss_boardinfo pwm_lpss_bxt_info = {
-	.clk_rate = 19200000,
-	.npwm = 4,
-	.base_unit_bits = 22,
-};
-EXPORT_SYMBOL_GPL(pwm_lpss_bxt_info);
-
 static inline struct pwm_lpss_chip *to_lpwm(struct pwm_chip *chip)
 {
 	return container_of(chip, struct pwm_lpss_chip, chip);
diff --git a/drivers/pwm/pwm-lpss.h b/drivers/pwm/pwm-lpss.h
index 04766e0d41aa..c94cd7c2695d 100644
--- a/drivers/pwm/pwm-lpss.h
+++ b/drivers/pwm/pwm-lpss.h
@@ -24,10 +24,6 @@ struct pwm_lpss_boardinfo {
 	unsigned long base_unit_bits;
 };
 
-extern const struct pwm_lpss_boardinfo pwm_lpss_byt_info;
-extern const struct pwm_lpss_boardinfo pwm_lpss_bsw_info;
-extern const struct pwm_lpss_boardinfo pwm_lpss_bxt_info;
-
 struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r,
 				     const struct pwm_lpss_boardinfo *info);
 int pwm_lpss_remove(struct pwm_lpss_chip *lpwm);
-- 
GitLab


From ae2520540cb0090eb8cdba559bcb6a82266f0308 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Sat, 28 Jan 2017 17:10:44 +0200
Subject: [PATCH 0088/1084] pwm: lpss: Add Intel Gemini Lake PCI ID

Intel Gemini Lake PWM is pretty much same as used in Intel Broxton. Add
this new PCI ID to the list of supported devices.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-lpss-pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pwm/pwm-lpss-pci.c b/drivers/pwm/pwm-lpss-pci.c
index 2e7a9a48060d..053088b9b66e 100644
--- a/drivers/pwm/pwm-lpss-pci.c
+++ b/drivers/pwm/pwm-lpss-pci.c
@@ -101,6 +101,7 @@ static const struct pci_device_id pwm_lpss_pci_ids[] = {
 	{ PCI_VDEVICE(INTEL, 0x1ac8), (unsigned long)&pwm_lpss_bxt_info},
 	{ PCI_VDEVICE(INTEL, 0x2288), (unsigned long)&pwm_lpss_bsw_info},
 	{ PCI_VDEVICE(INTEL, 0x2289), (unsigned long)&pwm_lpss_bsw_info},
+	{ PCI_VDEVICE(INTEL, 0x31c8), (unsigned long)&pwm_lpss_bxt_info},
 	{ PCI_VDEVICE(INTEL, 0x5ac8), (unsigned long)&pwm_lpss_bxt_info},
 	{ },
 };
-- 
GitLab


From 9fb27fac39f47632aa32ffd787b4de1dbd0b8655 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Sun, 29 Jan 2017 22:54:06 +0100
Subject: [PATCH 0089/1084] pwm: imx: Remove ipg clock and enable per clock
 when required

The use of the ipg clock was introduced with commit 7b27c160c681 ("pwm:
i.MX: fix clock lookup"). In the commit message it was claimed that the
ipg clock is enabled for register accesses. This is true for the
->config() callback, but not for the ->set_enable() callback. Given that
the ipg clock is not consistently enabled for all register accesses we
can assume that either it is not required at all or that the current
code does not work. Remove the ipg clock code for now so that it's no
longer in the way of refactoring the driver.

On the other hand, the i.MX 7 IP requires the peripheral clock to be
enabled before accessing its registers. Since ->config() can be called
when the PWM is disabled (in which case, the peripheral clock is also
disabled), we need to surround the imx->config() with
clk_prepare_enable(per_clk)/clk_disable_unprepare(per_clk) calls.

Note that the driver was working fine for the i.MX 7 IP so far because
the ipg and peripheral clock use the same hardware clock gate, which
guaranteed peripheral clock activation even when ->config() was called
when the PWM was disabled.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Stefan Agner <stefan@agner.ch>
Tested-by: Stefan Agner <stefan@agner.ch>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-imx.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/pwm/pwm-imx.c b/drivers/pwm/pwm-imx.c
index 177fb81c916d..bb64122a74be 100644
--- a/drivers/pwm/pwm-imx.c
+++ b/drivers/pwm/pwm-imx.c
@@ -49,7 +49,6 @@
 
 struct imx_chip {
 	struct clk	*clk_per;
-	struct clk	*clk_ipg;
 
 	void __iomem	*mmio_base;
 
@@ -206,13 +205,13 @@ static int imx_pwm_config(struct pwm_chip *chip,
 	struct imx_chip *imx = to_imx_chip(chip);
 	int ret;
 
-	ret = clk_prepare_enable(imx->clk_ipg);
+	ret = clk_prepare_enable(imx->clk_per);
 	if (ret)
 		return ret;
 
 	ret = imx->config(chip, pwm, duty_ns, period_ns);
 
-	clk_disable_unprepare(imx->clk_ipg);
+	clk_disable_unprepare(imx->clk_per);
 
 	return ret;
 }
@@ -293,13 +292,6 @@ static int imx_pwm_probe(struct platform_device *pdev)
 		return PTR_ERR(imx->clk_per);
 	}
 
-	imx->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
-	if (IS_ERR(imx->clk_ipg)) {
-		dev_err(&pdev->dev, "getting ipg clock failed with %ld\n",
-				PTR_ERR(imx->clk_ipg));
-		return PTR_ERR(imx->clk_ipg);
-	}
-
 	imx->chip.ops = &imx_pwm_ops;
 	imx->chip.dev = &pdev->dev;
 	imx->chip.base = -1;
-- 
GitLab


From 0038922954236a6af424e2604ece88404dfbc41c Mon Sep 17 00:00:00 2001
From: Lukasz Majewski <l.majewski@majess.pl>
Date: Sun, 29 Jan 2017 22:54:07 +0100
Subject: [PATCH 0090/1084] pwm: imx: Add separate set of PWM ops for v1 and v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch provides separate set of PWM operations utilized by i.MX's
v1 and v2 of the PWM hardware.

Signed-off-by: Lothar Waßmann <LW@KARO-electronics.de>
Signed-off-by: Bhuvanchandra DV <bhuvanchandra.dv@toradex.com>
Signed-off-by: Lukasz Majewski <l.majewski@majess.pl>
Acked-by: Shawn Guo <shawn.guo@linaro.org>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-imx.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/pwm/pwm-imx.c b/drivers/pwm/pwm-imx.c
index bb64122a74be..c405cb777808 100644
--- a/drivers/pwm/pwm-imx.c
+++ b/drivers/pwm/pwm-imx.c
@@ -239,7 +239,14 @@ static void imx_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
 	clk_disable_unprepare(imx->clk_per);
 }
 
-static const struct pwm_ops imx_pwm_ops = {
+static const struct pwm_ops imx_pwm_ops_v1 = {
+	.enable = imx_pwm_enable,
+	.disable = imx_pwm_disable,
+	.config = imx_pwm_config,
+	.owner = THIS_MODULE,
+};
+
+static const struct pwm_ops imx_pwm_ops_v2 = {
 	.enable = imx_pwm_enable,
 	.disable = imx_pwm_disable,
 	.config = imx_pwm_config,
@@ -250,16 +257,19 @@ struct imx_pwm_data {
 	int (*config)(struct pwm_chip *chip,
 		struct pwm_device *pwm, int duty_ns, int period_ns);
 	void (*set_enable)(struct pwm_chip *chip, bool enable);
+	const struct pwm_ops *ops;
 };
 
 static struct imx_pwm_data imx_pwm_data_v1 = {
 	.config = imx_pwm_config_v1,
 	.set_enable = imx_pwm_set_enable_v1,
+	.ops = &imx_pwm_ops_v1,
 };
 
 static struct imx_pwm_data imx_pwm_data_v2 = {
 	.config = imx_pwm_config_v2,
 	.set_enable = imx_pwm_set_enable_v2,
+	.ops = &imx_pwm_ops_v2,
 };
 
 static const struct of_device_id imx_pwm_dt_ids[] = {
@@ -281,6 +291,8 @@ static int imx_pwm_probe(struct platform_device *pdev)
 	if (!of_id)
 		return -ENODEV;
 
+	data = of_id->data;
+
 	imx = devm_kzalloc(&pdev->dev, sizeof(*imx), GFP_KERNEL);
 	if (imx == NULL)
 		return -ENOMEM;
@@ -292,7 +304,7 @@ static int imx_pwm_probe(struct platform_device *pdev)
 		return PTR_ERR(imx->clk_per);
 	}
 
-	imx->chip.ops = &imx_pwm_ops;
+	imx->chip.ops = data->ops;
 	imx->chip.dev = &pdev->dev;
 	imx->chip.base = -1;
 	imx->chip.npwm = 1;
@@ -303,7 +315,6 @@ static int imx_pwm_probe(struct platform_device *pdev)
 	if (IS_ERR(imx->mmio_base))
 		return PTR_ERR(imx->mmio_base);
 
-	data = of_id->data;
 	imx->config = data->config;
 	imx->set_enable = data->set_enable;
 
-- 
GitLab


From b3c088fe0297d7580bef5d5830fa5fc69ae8443c Mon Sep 17 00:00:00 2001
From: Lukasz Majewski <l.majewski@majess.pl>
Date: Sun, 29 Jan 2017 22:54:08 +0100
Subject: [PATCH 0091/1084] pwm: imx: Rewrite v1 code to facilitate switch to
 atomic PWM

The code has been rewritten to remove "generic" calls to
imx_pwm_{enable|disable|config}.

Such approach would facilitate switch to atomic PWM (a.k.a ->apply())
implementation.

Suggested-by: Stefan Agner <stefan@agner.ch>
Suggested-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Reviewed-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Lukasz Majewski <l.majewski@majess.pl>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-imx.c | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/drivers/pwm/pwm-imx.c b/drivers/pwm/pwm-imx.c
index c405cb777808..5c712104066a 100644
--- a/drivers/pwm/pwm-imx.c
+++ b/drivers/pwm/pwm-imx.c
@@ -90,19 +90,33 @@ static int imx_pwm_config_v1(struct pwm_chip *chip,
 	return 0;
 }
 
-static void imx_pwm_set_enable_v1(struct pwm_chip *chip, bool enable)
+static int imx_pwm_enable_v1(struct pwm_chip *chip, struct pwm_device *pwm)
 {
 	struct imx_chip *imx = to_imx_chip(chip);
 	u32 val;
+	int ret;
+
+	ret = clk_prepare_enable(imx->clk_per);
+	if (ret < 0)
+		return ret;
 
 	val = readl(imx->mmio_base + MX1_PWMC);
+	val |= MX1_PWMC_EN;
+	writel(val, imx->mmio_base + MX1_PWMC);
 
-	if (enable)
-		val |= MX1_PWMC_EN;
-	else
-		val &= ~MX1_PWMC_EN;
+	return 0;
+}
+
+static void imx_pwm_disable_v1(struct pwm_chip *chip, struct pwm_device *pwm)
+{
+	struct imx_chip *imx = to_imx_chip(chip);
+	u32 val;
 
+	val = readl(imx->mmio_base + MX1_PWMC);
+	val &= ~MX1_PWMC_EN;
 	writel(val, imx->mmio_base + MX1_PWMC);
+
+	clk_disable_unprepare(imx->clk_per);
 }
 
 static int imx_pwm_config_v2(struct pwm_chip *chip,
@@ -240,9 +254,9 @@ static void imx_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
 }
 
 static const struct pwm_ops imx_pwm_ops_v1 = {
-	.enable = imx_pwm_enable,
-	.disable = imx_pwm_disable,
-	.config = imx_pwm_config,
+	.enable = imx_pwm_enable_v1,
+	.disable = imx_pwm_disable_v1,
+	.config = imx_pwm_config_v1,
 	.owner = THIS_MODULE,
 };
 
@@ -261,8 +275,6 @@ struct imx_pwm_data {
 };
 
 static struct imx_pwm_data imx_pwm_data_v1 = {
-	.config = imx_pwm_config_v1,
-	.set_enable = imx_pwm_set_enable_v1,
 	.ops = &imx_pwm_ops_v1,
 };
 
-- 
GitLab


From 970247a486751c79903c7d853198d0106805c641 Mon Sep 17 00:00:00 2001
From: Lukasz Majewski <l.majewski@majess.pl>
Date: Sun, 29 Jan 2017 22:54:09 +0100
Subject: [PATCH 0092/1084] pwm: imx: Move PWMv2 software reset code to a
 separate function

The software reset code has been extracted from imx_pwm_config_v2 function
and moved to new one - imx_pwm_sw_reset().

This change reduces the overall size of imx_pwm_config_v2() and prepares
it for atomic PWM operation.

Suggested-by: Stefan Agner <stefan@agner.ch>
Suggested-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Lukasz Majewski <l.majewski@majess.pl>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-imx.c | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/drivers/pwm/pwm-imx.c b/drivers/pwm/pwm-imx.c
index 5c712104066a..c944f15f574c 100644
--- a/drivers/pwm/pwm-imx.c
+++ b/drivers/pwm/pwm-imx.c
@@ -119,6 +119,25 @@ static void imx_pwm_disable_v1(struct pwm_chip *chip, struct pwm_device *pwm)
 	clk_disable_unprepare(imx->clk_per);
 }
 
+static void imx_pwm_sw_reset(struct pwm_chip *chip)
+{
+	struct imx_chip *imx = to_imx_chip(chip);
+	struct device *dev = chip->dev;
+	int wait_count = 0;
+	u32 cr;
+
+	writel(MX3_PWMCR_SWR, imx->mmio_base + MX3_PWMCR);
+	do {
+		usleep_range(200, 1000);
+		cr = readl(imx->mmio_base + MX3_PWMCR);
+	} while ((cr & MX3_PWMCR_SWR) &&
+		 (wait_count++ < MX3_PWM_SWR_LOOP));
+
+	if (cr & MX3_PWMCR_SWR)
+		dev_warn(dev, "software reset timeout\n");
+}
+
+
 static int imx_pwm_config_v2(struct pwm_chip *chip,
 		struct pwm_device *pwm, int duty_ns, int period_ns)
 {
@@ -128,7 +147,7 @@ static int imx_pwm_config_v2(struct pwm_chip *chip,
 	unsigned long period_cycles, duty_cycles, prescale;
 	unsigned int period_ms;
 	bool enable = pwm_is_enabled(pwm);
-	int wait_count = 0, fifoav;
+	int fifoav;
 	u32 cr, sr;
 
 	/*
@@ -151,15 +170,7 @@ static int imx_pwm_config_v2(struct pwm_chip *chip,
 				dev_warn(dev, "there is no free FIFO slot\n");
 		}
 	} else {
-		writel(MX3_PWMCR_SWR, imx->mmio_base + MX3_PWMCR);
-		do {
-			usleep_range(200, 1000);
-			cr = readl(imx->mmio_base + MX3_PWMCR);
-		} while ((cr & MX3_PWMCR_SWR) &&
-			 (wait_count++ < MX3_PWM_SWR_LOOP));
-
-		if (cr & MX3_PWMCR_SWR)
-			dev_warn(dev, "software reset timeout\n");
+		imx_pwm_sw_reset(chip);
 	}
 
 	c = clk_get_rate(imx->clk_per);
-- 
GitLab


From 73b1ff1f3e5ccc22b31fa44c98460ef05393181c Mon Sep 17 00:00:00 2001
From: Lukasz Majewski <l.majewski@majess.pl>
Date: Sun, 29 Jan 2017 22:54:10 +0100
Subject: [PATCH 0093/1084] pwm: imx: Move PWMv2 wait for fifo slot code to a
 separate function

The code, which waits for fifo slot, has been extracted from
imx_pwm_config_v2 function and moved to new one - imx_pwm_wait_fifo_slot().

This change reduces the overall size of imx_pwm_config_v2() and prepares
it for atomic PWM operation.

Suggested-by: Stefan Agner <stefan@agner.ch>
Suggested-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Lukasz Majewski <l.majewski@majess.pl>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-imx.c | 43 +++++++++++++++++++++++++------------------
 1 file changed, 25 insertions(+), 18 deletions(-)

diff --git a/drivers/pwm/pwm-imx.c b/drivers/pwm/pwm-imx.c
index c944f15f574c..d557279446ed 100644
--- a/drivers/pwm/pwm-imx.c
+++ b/drivers/pwm/pwm-imx.c
@@ -137,18 +137,36 @@ static void imx_pwm_sw_reset(struct pwm_chip *chip)
 		dev_warn(dev, "software reset timeout\n");
 }
 
+static void imx_pwm_wait_fifo_slot(struct pwm_chip *chip,
+				   struct pwm_device *pwm)
+{
+	struct imx_chip *imx = to_imx_chip(chip);
+	struct device *dev = chip->dev;
+	unsigned int period_ms;
+	int fifoav;
+	u32 sr;
+
+	sr = readl(imx->mmio_base + MX3_PWMSR);
+	fifoav = sr & MX3_PWMSR_FIFOAV_MASK;
+	if (fifoav == MX3_PWMSR_FIFOAV_4WORDS) {
+		period_ms = DIV_ROUND_UP(pwm_get_period(pwm),
+					 NSEC_PER_MSEC);
+		msleep(period_ms);
+
+		sr = readl(imx->mmio_base + MX3_PWMSR);
+		if (fifoav == (sr & MX3_PWMSR_FIFOAV_MASK))
+			dev_warn(dev, "there is no free FIFO slot\n");
+	}
+}
 
 static int imx_pwm_config_v2(struct pwm_chip *chip,
 		struct pwm_device *pwm, int duty_ns, int period_ns)
 {
 	struct imx_chip *imx = to_imx_chip(chip);
-	struct device *dev = chip->dev;
 	unsigned long long c;
 	unsigned long period_cycles, duty_cycles, prescale;
-	unsigned int period_ms;
 	bool enable = pwm_is_enabled(pwm);
-	int fifoav;
-	u32 cr, sr;
+	u32 cr;
 
 	/*
 	 * i.MX PWMv2 has a 4-word sample FIFO.
@@ -157,21 +175,10 @@ static int imx_pwm_config_v2(struct pwm_chip *chip,
 	 * wait for a full PWM cycle to get a relinquished FIFO slot
 	 * when the controller is enabled and the FIFO is fully loaded.
 	 */
-	if (enable) {
-		sr = readl(imx->mmio_base + MX3_PWMSR);
-		fifoav = sr & MX3_PWMSR_FIFOAV_MASK;
-		if (fifoav == MX3_PWMSR_FIFOAV_4WORDS) {
-			period_ms = DIV_ROUND_UP(pwm_get_period(pwm),
-						 NSEC_PER_MSEC);
-			msleep(period_ms);
-
-			sr = readl(imx->mmio_base + MX3_PWMSR);
-			if (fifoav == (sr & MX3_PWMSR_FIFOAV_MASK))
-				dev_warn(dev, "there is no free FIFO slot\n");
-		}
-	} else {
+	if (enable)
+		imx_pwm_wait_fifo_slot(chip, pwm);
+	else
 		imx_pwm_sw_reset(chip);
-	}
 
 	c = clk_get_rate(imx->clk_per);
 	c = c * period_ns;
-- 
GitLab


From 0ca1a11a1d816c8fb0bb29b985666cef912958c1 Mon Sep 17 00:00:00 2001
From: Lukasz Majewski <lukma@denx.de>
Date: Sun, 29 Jan 2017 22:54:11 +0100
Subject: [PATCH 0094/1084] pwm: imx: Provide atomic PWM support for i.MX PWMv2

This commit provides apply() callback implementation for i.MX's PWMv2.

Suggested-by: Stefan Agner <stefan@agner.ch>
Suggested-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Lukasz Majewski <l.majewski@majess.pl>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-imx.c | 67 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/drivers/pwm/pwm-imx.c b/drivers/pwm/pwm-imx.c
index d557279446ed..34f3031d5c40 100644
--- a/drivers/pwm/pwm-imx.c
+++ b/drivers/pwm/pwm-imx.c
@@ -248,6 +248,72 @@ static int imx_pwm_config(struct pwm_chip *chip,
 	return ret;
 }
 
+static int imx_pwm_apply_v2(struct pwm_chip *chip, struct pwm_device *pwm,
+			    struct pwm_state *state)
+{
+	unsigned long period_cycles, duty_cycles, prescale;
+	struct imx_chip *imx = to_imx_chip(chip);
+	struct pwm_state cstate;
+	unsigned long long c;
+	int ret;
+
+	pwm_get_state(pwm, &cstate);
+
+	if (state->enabled) {
+		c = clk_get_rate(imx->clk_per);
+		c *= state->period;
+
+		do_div(c, 1000000000);
+		period_cycles = c;
+
+		prescale = period_cycles / 0x10000 + 1;
+
+		period_cycles /= prescale;
+		c = (unsigned long long)period_cycles * state->duty_cycle;
+		do_div(c, state->period);
+		duty_cycles = c;
+
+		/*
+		 * according to imx pwm RM, the real period value should be
+		 * PERIOD value in PWMPR plus 2.
+		 */
+		if (period_cycles > 2)
+			period_cycles -= 2;
+		else
+			period_cycles = 0;
+
+		/*
+		 * Wait for a free FIFO slot if the PWM is already enabled, and
+		 * flush the FIFO if the PWM was disabled and is about to be
+		 * enabled.
+		 */
+		if (cstate.enabled) {
+			imx_pwm_wait_fifo_slot(chip, pwm);
+		} else {
+			ret = clk_prepare_enable(imx->clk_per);
+			if (ret)
+				return ret;
+
+			imx_pwm_sw_reset(chip);
+		}
+
+		writel(duty_cycles, imx->mmio_base + MX3_PWMSAR);
+		writel(period_cycles, imx->mmio_base + MX3_PWMPR);
+
+		writel(MX3_PWMCR_PRESCALER(prescale) |
+		       MX3_PWMCR_DOZEEN | MX3_PWMCR_WAITEN |
+		       MX3_PWMCR_DBGEN | MX3_PWMCR_CLKSRC_IPG_HIGH |
+		       MX3_PWMCR_EN,
+		       imx->mmio_base + MX3_PWMCR);
+	} else if (cstate.enabled) {
+		writel(0, imx->mmio_base + MX3_PWMCR);
+
+		clk_disable_unprepare(imx->clk_per);
+	}
+
+	return 0;
+}
+
 static int imx_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
 {
 	struct imx_chip *imx = to_imx_chip(chip);
@@ -279,6 +345,7 @@ static const struct pwm_ops imx_pwm_ops_v1 = {
 };
 
 static const struct pwm_ops imx_pwm_ops_v2 = {
+	.apply = imx_pwm_apply_v2,
 	.enable = imx_pwm_enable,
 	.disable = imx_pwm_disable,
 	.config = imx_pwm_config,
-- 
GitLab


From 871985b154ada5a0edaf1aa22d6a1de115b0213d Mon Sep 17 00:00:00 2001
From: Lukasz Majewski <lukma@denx.de>
Date: Sun, 29 Jan 2017 22:54:12 +0100
Subject: [PATCH 0095/1084] pwm: imx: Remove redundant i.MX PWMv2 code

The code providing functionality surpassed by the atomic PWM is not
needed anymore and hence can be removed.

Suggested-by: Stefan Agner <stefan@agner.ch>
Suggested-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Lukasz Majewski <l.majewski@majess.pl>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-imx.c | 127 ------------------------------------------
 1 file changed, 127 deletions(-)

diff --git a/drivers/pwm/pwm-imx.c b/drivers/pwm/pwm-imx.c
index 34f3031d5c40..0a81c028cb11 100644
--- a/drivers/pwm/pwm-imx.c
+++ b/drivers/pwm/pwm-imx.c
@@ -53,10 +53,6 @@ struct imx_chip {
 	void __iomem	*mmio_base;
 
 	struct pwm_chip	chip;
-
-	int (*config)(struct pwm_chip *chip,
-		struct pwm_device *pwm, int duty_ns, int period_ns);
-	void (*set_enable)(struct pwm_chip *chip, bool enable);
 };
 
 #define to_imx_chip(chip)	container_of(chip, struct imx_chip, chip)
@@ -159,95 +155,6 @@ static void imx_pwm_wait_fifo_slot(struct pwm_chip *chip,
 	}
 }
 
-static int imx_pwm_config_v2(struct pwm_chip *chip,
-		struct pwm_device *pwm, int duty_ns, int period_ns)
-{
-	struct imx_chip *imx = to_imx_chip(chip);
-	unsigned long long c;
-	unsigned long period_cycles, duty_cycles, prescale;
-	bool enable = pwm_is_enabled(pwm);
-	u32 cr;
-
-	/*
-	 * i.MX PWMv2 has a 4-word sample FIFO.
-	 * In order to avoid FIFO overflow issue, we do software reset
-	 * to clear all sample FIFO if the controller is disabled or
-	 * wait for a full PWM cycle to get a relinquished FIFO slot
-	 * when the controller is enabled and the FIFO is fully loaded.
-	 */
-	if (enable)
-		imx_pwm_wait_fifo_slot(chip, pwm);
-	else
-		imx_pwm_sw_reset(chip);
-
-	c = clk_get_rate(imx->clk_per);
-	c = c * period_ns;
-	do_div(c, 1000000000);
-	period_cycles = c;
-
-	prescale = period_cycles / 0x10000 + 1;
-
-	period_cycles /= prescale;
-	c = (unsigned long long)period_cycles * duty_ns;
-	do_div(c, period_ns);
-	duty_cycles = c;
-
-	/*
-	 * according to imx pwm RM, the real period value should be
-	 * PERIOD value in PWMPR plus 2.
-	 */
-	if (period_cycles > 2)
-		period_cycles -= 2;
-	else
-		period_cycles = 0;
-
-	writel(duty_cycles, imx->mmio_base + MX3_PWMSAR);
-	writel(period_cycles, imx->mmio_base + MX3_PWMPR);
-
-	cr = MX3_PWMCR_PRESCALER(prescale) |
-		MX3_PWMCR_DOZEEN | MX3_PWMCR_WAITEN |
-		MX3_PWMCR_DBGEN | MX3_PWMCR_CLKSRC_IPG_HIGH;
-
-	if (enable)
-		cr |= MX3_PWMCR_EN;
-
-	writel(cr, imx->mmio_base + MX3_PWMCR);
-
-	return 0;
-}
-
-static void imx_pwm_set_enable_v2(struct pwm_chip *chip, bool enable)
-{
-	struct imx_chip *imx = to_imx_chip(chip);
-	u32 val;
-
-	val = readl(imx->mmio_base + MX3_PWMCR);
-
-	if (enable)
-		val |= MX3_PWMCR_EN;
-	else
-		val &= ~MX3_PWMCR_EN;
-
-	writel(val, imx->mmio_base + MX3_PWMCR);
-}
-
-static int imx_pwm_config(struct pwm_chip *chip,
-		struct pwm_device *pwm, int duty_ns, int period_ns)
-{
-	struct imx_chip *imx = to_imx_chip(chip);
-	int ret;
-
-	ret = clk_prepare_enable(imx->clk_per);
-	if (ret)
-		return ret;
-
-	ret = imx->config(chip, pwm, duty_ns, period_ns);
-
-	clk_disable_unprepare(imx->clk_per);
-
-	return ret;
-}
-
 static int imx_pwm_apply_v2(struct pwm_chip *chip, struct pwm_device *pwm,
 			    struct pwm_state *state)
 {
@@ -314,29 +221,6 @@ static int imx_pwm_apply_v2(struct pwm_chip *chip, struct pwm_device *pwm,
 	return 0;
 }
 
-static int imx_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
-{
-	struct imx_chip *imx = to_imx_chip(chip);
-	int ret;
-
-	ret = clk_prepare_enable(imx->clk_per);
-	if (ret)
-		return ret;
-
-	imx->set_enable(chip, true);
-
-	return 0;
-}
-
-static void imx_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
-{
-	struct imx_chip *imx = to_imx_chip(chip);
-
-	imx->set_enable(chip, false);
-
-	clk_disable_unprepare(imx->clk_per);
-}
-
 static const struct pwm_ops imx_pwm_ops_v1 = {
 	.enable = imx_pwm_enable_v1,
 	.disable = imx_pwm_disable_v1,
@@ -346,16 +230,10 @@ static const struct pwm_ops imx_pwm_ops_v1 = {
 
 static const struct pwm_ops imx_pwm_ops_v2 = {
 	.apply = imx_pwm_apply_v2,
-	.enable = imx_pwm_enable,
-	.disable = imx_pwm_disable,
-	.config = imx_pwm_config,
 	.owner = THIS_MODULE,
 };
 
 struct imx_pwm_data {
-	int (*config)(struct pwm_chip *chip,
-		struct pwm_device *pwm, int duty_ns, int period_ns);
-	void (*set_enable)(struct pwm_chip *chip, bool enable);
 	const struct pwm_ops *ops;
 };
 
@@ -364,8 +242,6 @@ static struct imx_pwm_data imx_pwm_data_v1 = {
 };
 
 static struct imx_pwm_data imx_pwm_data_v2 = {
-	.config = imx_pwm_config_v2,
-	.set_enable = imx_pwm_set_enable_v2,
 	.ops = &imx_pwm_ops_v2,
 };
 
@@ -412,9 +288,6 @@ static int imx_pwm_probe(struct platform_device *pdev)
 	if (IS_ERR(imx->mmio_base))
 		return PTR_ERR(imx->mmio_base);
 
-	imx->config = data->config;
-	imx->set_enable = data->set_enable;
-
 	ret = pwmchip_add(&imx->chip);
 	if (ret < 0)
 		return ret;
-- 
GitLab


From c322f457755aea8eea91c79c473e17ffa7f12cfc Mon Sep 17 00:00:00 2001
From: Lukasz Majewski <l.majewski@majess.pl>
Date: Sun, 29 Jan 2017 22:54:14 +0100
Subject: [PATCH 0096/1084] pwm: imx: doc: Update imx-pwm.txt documentation
 entry

The imx-pwm.txt documentation update as a preparation for polarity
support.

Signed-off-by: Bhuvanchandra DV <bhuvanchandra.dv@toradex.com>
Signed-off-by: Lukasz Majewski <l.majewski@majess.pl>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 Documentation/devicetree/bindings/pwm/imx-pwm.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/pwm/imx-pwm.txt b/Documentation/devicetree/bindings/pwm/imx-pwm.txt
index e00c2e9f484d..c61bdf8cd41b 100644
--- a/Documentation/devicetree/bindings/pwm/imx-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/imx-pwm.txt
@@ -6,8 +6,8 @@ Required properties:
   - "fsl,imx1-pwm" for PWM compatible with the one integrated on i.MX1
   - "fsl,imx27-pwm" for PWM compatible with the one integrated on i.MX27
 - reg: physical base address and length of the controller's registers
-- #pwm-cells: should be 2. See pwm.txt in this directory for a description of
-  the cells format.
+- #pwm-cells: 2 for i.MX1 and 3 for i.MX27 and newer SoCs. See pwm.txt
+  in this directory for a description of the cells format.
 - clocks : Clock specifiers for both ipg and per clocks.
 - clock-names : Clock names should include both "ipg" and "per"
 See the clock consumer binding,
@@ -17,7 +17,7 @@ See the clock consumer binding,
 Example:
 
 pwm1: pwm@53fb4000 {
-	#pwm-cells = <2>;
+	#pwm-cells = <3>;
 	compatible = "fsl,imx53-pwm", "fsl,imx27-pwm";
 	reg = <0x53fb4000 0x4000>;
 	clocks = <&clks IMX5_CLK_PWM1_IPG_GATE>,
-- 
GitLab


From 326ed314fefebb259563926c8c6110a009562e07 Mon Sep 17 00:00:00 2001
From: Lukasz Majewski <lukma@denx.de>
Date: Sun, 29 Jan 2017 22:54:15 +0100
Subject: [PATCH 0097/1084] pwm: imx: Add polarity inversion support to i.MX's
 PWMv2

With this patch the polarity settings for i.MX's PWMv2 is now supported
on top of atomic PWM setting

Signed-off-by: Bhuvanchandra DV <bhuvanchandra.dv@toradex.com>
Signed-off-by: Lukasz Majewski <l.majewski@majess.pl>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-imx.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/pwm/pwm-imx.c b/drivers/pwm/pwm-imx.c
index 0a81c028cb11..957099e82ed3 100644
--- a/drivers/pwm/pwm-imx.c
+++ b/drivers/pwm/pwm-imx.c
@@ -38,6 +38,7 @@
 #define MX3_PWMCR_DOZEEN		(1 << 24)
 #define MX3_PWMCR_WAITEN		(1 << 23)
 #define MX3_PWMCR_DBGEN			(1 << 22)
+#define MX3_PWMCR_POUTC			(1 << 18)
 #define MX3_PWMCR_CLKSRC_IPG_HIGH	(2 << 16)
 #define MX3_PWMCR_CLKSRC_IPG		(1 << 16)
 #define MX3_PWMCR_SWR			(1 << 3)
@@ -163,6 +164,7 @@ static int imx_pwm_apply_v2(struct pwm_chip *chip, struct pwm_device *pwm,
 	struct pwm_state cstate;
 	unsigned long long c;
 	int ret;
+	u32 cr;
 
 	pwm_get_state(pwm, &cstate);
 
@@ -207,11 +209,15 @@ static int imx_pwm_apply_v2(struct pwm_chip *chip, struct pwm_device *pwm,
 		writel(duty_cycles, imx->mmio_base + MX3_PWMSAR);
 		writel(period_cycles, imx->mmio_base + MX3_PWMPR);
 
-		writel(MX3_PWMCR_PRESCALER(prescale) |
-		       MX3_PWMCR_DOZEEN | MX3_PWMCR_WAITEN |
-		       MX3_PWMCR_DBGEN | MX3_PWMCR_CLKSRC_IPG_HIGH |
-		       MX3_PWMCR_EN,
-		       imx->mmio_base + MX3_PWMCR);
+		cr = MX3_PWMCR_PRESCALER(prescale) |
+		     MX3_PWMCR_DOZEEN | MX3_PWMCR_WAITEN |
+		     MX3_PWMCR_DBGEN | MX3_PWMCR_CLKSRC_IPG_HIGH |
+		     MX3_PWMCR_EN;
+
+		if (state->polarity == PWM_POLARITY_INVERSED)
+			cr |= MX3_PWMCR_POUTC;
+
+		writel(cr, imx->mmio_base + MX3_PWMCR);
 	} else if (cstate.enabled) {
 		writel(0, imx->mmio_base + MX3_PWMCR);
 
@@ -234,6 +240,7 @@ static const struct pwm_ops imx_pwm_ops_v2 = {
 };
 
 struct imx_pwm_data {
+	bool polarity_supported;
 	const struct pwm_ops *ops;
 };
 
@@ -242,6 +249,7 @@ static struct imx_pwm_data imx_pwm_data_v1 = {
 };
 
 static struct imx_pwm_data imx_pwm_data_v2 = {
+	.polarity_supported = true,
 	.ops = &imx_pwm_ops_v2,
 };
 
@@ -283,6 +291,12 @@ static int imx_pwm_probe(struct platform_device *pdev)
 	imx->chip.npwm = 1;
 	imx->chip.can_sleep = true;
 
+	if (data->polarity_supported) {
+		dev_dbg(&pdev->dev, "PWM supports output inversion\n");
+		imx->chip.of_xlate = of_pwm_xlate_with_flags;
+		imx->chip.of_pwm_n_cells = 3;
+	}
+
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	imx->mmio_base = devm_ioremap_resource(&pdev->dev, r);
 	if (IS_ERR(imx->mmio_base))
-- 
GitLab


From f2dafc095018fcc6a71793cfefdd490c9f1c63be Mon Sep 17 00:00:00 2001
From: Lothar Wassmann <LW@KARO-electronics.de>
Date: Sun, 29 Jan 2017 22:54:05 +0100
Subject: [PATCH 0098/1084] pwm: Print error messages with pr_err() instead of
 pr_debug()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make the messages that are printed in case of fatal errors actually
visible to the user without having to recompile the driver with
debugging enabled.

Signed-off-by: Lothar Waßmann <LW@KARO-electronics.de>
Signed-off-by: Bhuvanchandra DV <bhuvanchandra.dv@toradex.com>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 78e114a11c4f..012e27c7f27e 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -663,13 +663,13 @@ struct pwm_device *of_pwm_get(struct device_node *np, const char *con_id)
 	err = of_parse_phandle_with_args(np, "pwms", "#pwm-cells", index,
 					 &args);
 	if (err) {
-		pr_debug("%s(): can't parse \"pwms\" property\n", __func__);
+		pr_err("%s(): can't parse \"pwms\" property\n", __func__);
 		return ERR_PTR(err);
 	}
 
 	pc = of_node_to_pwmchip(args.np);
 	if (IS_ERR(pc)) {
-		pr_debug("%s(): PWM chip not found\n", __func__);
+		pr_err("%s(): PWM chip not found\n", __func__);
 		pwm = ERR_CAST(pc);
 		goto put;
 	}
-- 
GitLab


From 42883cbc086b3f7aca9f1754f2d570af922825fc Mon Sep 17 00:00:00 2001
From: Lothar Wassmann <LW@KARO-electronics.de>
Date: Sun, 29 Jan 2017 22:54:13 +0100
Subject: [PATCH 0099/1084] pwm: Make the PWM_POLARITY flag in DTB optional

Change the PWM chip driver registration so that a chip driver that
supports polarity inversion can still be used with DTBs that don't
provide the polarity flag as part of the specifier.

This is done to provide polarity inversion support for the pwm-imx
driver without having to modify all existing DTS files.

Signed-off-by: Lothar Wassmann <LW@KARO-electronics.de>
Signed-off-by: Bhuvanchandra DV <bhuvanchandra.dv@toradex.com>
Suggested-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Lukasz Majewski <l.majewski@majess.pl>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/core.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 012e27c7f27e..e3d6c5437070 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -137,9 +137,14 @@ of_pwm_xlate_with_flags(struct pwm_chip *pc, const struct of_phandle_args *args)
 {
 	struct pwm_device *pwm;
 
+	/* check, whether the driver supports a third cell for flags */
 	if (pc->of_pwm_n_cells < 3)
 		return ERR_PTR(-EINVAL);
 
+	/* flags in the third cell are optional */
+	if (args->args_count < 2)
+		return ERR_PTR(-EINVAL);
+
 	if (args->args[0] >= pc->npwm)
 		return ERR_PTR(-EINVAL);
 
@@ -148,11 +153,10 @@ of_pwm_xlate_with_flags(struct pwm_chip *pc, const struct of_phandle_args *args)
 		return pwm;
 
 	pwm->args.period = args->args[1];
+	pwm->args.polarity = PWM_POLARITY_NORMAL;
 
-	if (args->args[2] & PWM_POLARITY_INVERTED)
+	if (args->args_count > 2 && args->args[2] & PWM_POLARITY_INVERTED)
 		pwm->args.polarity = PWM_POLARITY_INVERSED;
-	else
-		pwm->args.polarity = PWM_POLARITY_NORMAL;
 
 	return pwm;
 }
@@ -163,9 +167,14 @@ of_pwm_simple_xlate(struct pwm_chip *pc, const struct of_phandle_args *args)
 {
 	struct pwm_device *pwm;
 
+	/* sanity check driver support */
 	if (pc->of_pwm_n_cells < 2)
 		return ERR_PTR(-EINVAL);
 
+	/* all cells are required */
+	if (args->args_count != pc->of_pwm_n_cells)
+		return ERR_PTR(-EINVAL);
+
 	if (args->args[0] >= pc->npwm)
 		return ERR_PTR(-EINVAL);
 
@@ -674,13 +683,6 @@ struct pwm_device *of_pwm_get(struct device_node *np, const char *con_id)
 		goto put;
 	}
 
-	if (args.args_count != pc->of_pwm_n_cells) {
-		pr_debug("%s: wrong #pwm-cells for %s\n", np->full_name,
-			 args.np->full_name);
-		pwm = ERR_PTR(-EINVAL);
-		goto put;
-	}
-
 	pwm = pc->of_xlate(pc, &args);
 	if (IS_ERR(pwm))
 		goto put;
-- 
GitLab


From 69efb3439ccf2ce72e01edde05d2c63d624e251e Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 22 Jan 2017 17:14:07 +0100
Subject: [PATCH 0100/1084] pwm: Don't hold pwm_lookup_lock longer than
 necessary

There is no need to hold pwm_lookup_lock after we're done with looping
over pwm_lookup_list, so release it earlier.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/core.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index e3d6c5437070..799c4fb4cc2f 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -759,9 +759,9 @@ void pwm_remove_table(struct pwm_lookup *table, size_t num)
  */
 struct pwm_device *pwm_get(struct device *dev, const char *con_id)
 {
-	struct pwm_device *pwm = ERR_PTR(-EPROBE_DEFER);
 	const char *dev_id = dev ? dev_name(dev) : NULL;
-	struct pwm_chip *chip = NULL;
+	struct pwm_device *pwm;
+	struct pwm_chip *chip;
 	unsigned int best = 0;
 	struct pwm_lookup *p, *chosen = NULL;
 	unsigned int match;
@@ -819,24 +819,22 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id)
 		}
 	}
 
-	if (!chosen) {
-		pwm = ERR_PTR(-ENODEV);
-		goto out;
-	}
+	mutex_unlock(&pwm_lookup_lock);
+
+	if (!chosen)
+		return ERR_PTR(-ENODEV);
 
 	chip = pwmchip_find_by_name(chosen->provider);
 	if (!chip)
-		goto out;
+		return ERR_PTR(-EPROBE_DEFER);
 
 	pwm = pwm_request_from_chip(chip, chosen->index, con_id ?: dev_id);
 	if (IS_ERR(pwm))
-		goto out;
+		return pwm;
 
 	pwm->args.period = chosen->period;
 	pwm->args.polarity = chosen->polarity;
 
-out:
-	mutex_unlock(&pwm_lookup_lock);
 	return pwm;
 }
 EXPORT_SYMBOL_GPL(pwm_get);
-- 
GitLab


From b526a314263ea217b8fa9758dca5dc245fd49997 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 22 Jan 2017 17:14:08 +0100
Subject: [PATCH 0101/1084] pwm: Try to load modules during pwm_get()

Add a module name string to the pwm_lookup struct and if specified try
to load the module using request_module() if pwmchip_find_by_name() is
unable to find the PWM chip.

This is a last resort to work around drivers that can't - and can't be
made to - deal with deferred probe.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
[thierry.reding@gmail.com: rename new macro, reword commit message]
[thierry.reding@gmail.com: add comment explaining use-case]
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/core.c  | 14 ++++++++++++++
 include/linux/pwm.h | 23 +++++++++++++++--------
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 799c4fb4cc2f..a0860b30bd93 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -765,6 +765,7 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id)
 	unsigned int best = 0;
 	struct pwm_lookup *p, *chosen = NULL;
 	unsigned int match;
+	int err;
 
 	/* look up via DT first */
 	if (IS_ENABLED(CONFIG_OF) && dev && dev->of_node)
@@ -825,6 +826,19 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id)
 		return ERR_PTR(-ENODEV);
 
 	chip = pwmchip_find_by_name(chosen->provider);
+
+	/*
+	 * If the lookup entry specifies a module, load the module and retry
+	 * the PWM chip lookup. This can be used to work around driver load
+	 * ordering issues if driver's can't be made to properly support the
+	 * deferred probe mechanism.
+	 */
+	if (!chip && chosen->module) {
+		err = request_module(chosen->module);
+		if (err == 0)
+			chip = pwmchip_find_by_name(chosen->provider);
+	}
+
 	if (!chip)
 		return ERR_PTR(-EPROBE_DEFER);
 
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index eae215ef1b2c..08fad7c6a471 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -603,18 +603,25 @@ struct pwm_lookup {
 	const char *con_id;
 	unsigned int period;
 	enum pwm_polarity polarity;
+	const char *module; /* optional, may be NULL */
 };
 
-#define PWM_LOOKUP(_provider, _index, _dev_id, _con_id, _period, _polarity) \
-	{						\
-		.provider = _provider,			\
-		.index = _index,			\
-		.dev_id = _dev_id,			\
-		.con_id = _con_id,			\
-		.period = _period,			\
-		.polarity = _polarity			\
+#define PWM_LOOKUP_WITH_MODULE(_provider, _index, _dev_id, _con_id,	\
+			       _period, _polarity, _module)		\
+	{								\
+		.provider = _provider,					\
+		.index = _index,					\
+		.dev_id = _dev_id,					\
+		.con_id = _con_id,					\
+		.period = _period,					\
+		.polarity = _polarity,					\
+		.module = _module,					\
 	}
 
+#define PWM_LOOKUP(_provider, _index, _dev_id, _con_id, _period, _polarity) \
+	PWM_LOOKUP_WITH_MODULE(_provider, _index, _dev_id, _con_id, _period, \
+			       _polarity, NULL)
+
 #if IS_ENABLED(CONFIG_PWM)
 void pwm_add_table(struct pwm_lookup *table, size_t num);
 void pwm_remove_table(struct pwm_lookup *table, size_t num);
-- 
GitLab


From ddeaa6379d50a530f9f57b3d12f7940e079b052c Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sat, 15 Oct 2016 13:59:03 -0700
Subject: [PATCH 0102/1084] sunrpc & nfs: Add and use dprintk_cont macros

Allow line continuations to work properly with KERN_CONT.

Signed-off-by: Joe Perches <joe@perches.com>
[Anna: Add fallback dprintk_cont() for when CONFIG_SUNRPC_DEBUG=n]
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/write.c               |  6 ++--
 include/linux/sunrpc/debug.h | 58 +++++++++++++++++++++++++-----------
 2 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b00d53d13d47..ad4219a41f25 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1787,7 +1787,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
 		if (status < 0) {
 			nfs_context_set_write_error(req->wb_context, status);
 			nfs_inode_remove_request(req);
-			dprintk(", error = %d\n", status);
+			dprintk_cont(", error = %d\n", status);
 			goto next;
 		}
 
@@ -1796,11 +1796,11 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
 		if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) {
 			/* We have a match */
 			nfs_inode_remove_request(req);
-			dprintk(" OK\n");
+			dprintk_cont(" OK\n");
 			goto next;
 		}
 		/* We have a mismatch. Write the page again */
-		dprintk(" mismatch\n");
+		dprintk_cont(" mismatch\n");
 		nfs_mark_request_dirty(req);
 		set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
 	next:
diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index 59a7889e15db..8da0f37f3bdc 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -20,33 +20,55 @@ extern unsigned int		nfsd_debug;
 extern unsigned int		nlm_debug;
 #endif
 
-#define dprintk(args...)	dfprintk(FACILITY, ## args)
-#define dprintk_rcu(args...)	dfprintk_rcu(FACILITY, ## args)
+#define dprintk(fmt, ...)						\
+	dfprintk(FACILITY, fmt, ##__VA_ARGS__)
+#define dprintk_cont(fmt, ...)						\
+	dfprintk_cont(FACILITY, fmt, ##__VA_ARGS__)
+#define dprintk_rcu(fmt, ...)						\
+	dfprintk_rcu(FACILITY, fmt, ##__VA_ARGS__)
+#define dprintk_rcu_cont(fmt, ...)					\
+	dfprintk_rcu_cont(FACILITY, fmt, ##__VA_ARGS__)
 
 #undef ifdebug
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define ifdebug(fac)		if (unlikely(rpc_debug & RPCDBG_##fac))
 
-# define dfprintk(fac, args...)	\
-	do { \
-		ifdebug(fac) \
-			printk(KERN_DEFAULT args); \
-	} while (0)
-
-# define dfprintk_rcu(fac, args...)	\
-	do { \
-		ifdebug(fac) { \
-			rcu_read_lock(); \
-			printk(KERN_DEFAULT args); \
-			rcu_read_unlock(); \
-		} \
-	} while (0)
+# define dfprintk(fac, fmt, ...)					\
+do {									\
+	ifdebug(fac)							\
+		printk(KERN_DEFAULT fmt, ##__VA_ARGS__);		\
+} while (0)
+
+# define dfprintk_cont(fac, fmt, ...)					\
+do {									\
+	ifdebug(fac)							\
+		printk(KERN_CONT fmt, ##__VA_ARGS__);			\
+} while (0)
+
+# define dfprintk_rcu(fac, fmt, ...)					\
+do {									\
+	ifdebug(fac) {							\
+		rcu_read_lock();					\
+		printk(KERN_DEFAULT fmt, ##__VA_ARGS__);		\
+		rcu_read_unlock();					\
+	}								\
+} while (0)
+
+# define dfprintk_rcu_cont(fac, fmt, ...)				\
+do {									\
+	ifdebug(fac) {							\
+		rcu_read_lock();					\
+		printk(KERN_CONT fmt, ##__VA_ARGS__);			\
+		rcu_read_unlock();					\
+	}								\
+} while (0)
 
 # define RPC_IFDEBUG(x)		x
 #else
 # define ifdebug(fac)		if (0)
-# define dfprintk(fac, args...)	do {} while (0)
-# define dfprintk_rcu(fac, args...)	do {} while (0)
+# define dfprintk(fac, fmt, ...)	do {} while (0)
+# define dfprintk_cont(fac, fmt, ...)	do {} while (0)
+# define dfprintk_rcu(fac, fmt, ...)	do {} while (0)
 # define RPC_IFDEBUG(x)
 #endif
 
-- 
GitLab


From 4c3ffd058c28bb1aac30a68d1f02de4a2077823e Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 6 Jan 2017 14:19:58 +1100
Subject: [PATCH 0103/1084] SUNRPC: two small improvements to rpcauth shrinker.

1/ If we find an entry that is too young to be pruned,
  return SHRINK_STOP to ensure we don't get called again.
  This is more correct, and avoids wasting a little CPU time.
  Prior to 3.12, it can prevent drop_slab() from spinning indefinitely.

2/ Return a precise number from rpcauth_cache_shrink_count(), rather than
  rounding down to a multiple of 100 (of whatever sysctl_vfs_cache_pressure is).
  This ensures that when we "echo 3 > /proc/sys/vm/drop_caches", this cache is
  still purged, even if it has fewer than 100 entires.

Neither of these are really important, they just make behaviour
more predicatable, which can be helpful when debugging related issues.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/auth.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 2bff63a73cf8..d8f17ea7932e 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -464,8 +464,10 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
 		 * Note that the cred_unused list must be time-ordered.
 		 */
 		if (time_in_range(cred->cr_expire, expired, jiffies) &&
-		    test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
+		    test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
+			freed = SHRINK_STOP;
 			break;
+		}
 
 		list_del_init(&cred->cr_lru);
 		number_cred_unused--;
@@ -520,7 +522,7 @@ static unsigned long
 rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
 
 {
-	return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
+	return number_cred_unused * sysctl_vfs_cache_pressure / 100;
 }
 
 static void
-- 
GitLab


From 6f6e3c09c0dde7fbaf36f02ffa97bbb13fecd637 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 13 Jan 2017 11:04:27 +1100
Subject: [PATCH 0104/1084] NFS: tidy up nfs_show_mountd_netid

This function is a bit clumsy, incorrectly producing
",mountproto=" if mountd_protocol is 0 and !showdefaults,
and duplicating the code for reporting "auto".

Tidy it up so that it only makes a single seq_printf() call,
and more obviously does the right thing.

Fixes: ee671b016fbf ("NFS: convert proto= option to use netids rather than a protoname")
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/super.c | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 6bca17883b93..54e0f9f2dd94 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -531,39 +531,32 @@ static void nfs_show_mountd_netid(struct seq_file *m, struct nfs_server *nfss,
 				  int showdefaults)
 {
 	struct sockaddr *sap = (struct sockaddr *) &nfss->mountd_address;
+	char *proto = NULL;
 
-	seq_printf(m, ",mountproto=");
 	switch (sap->sa_family) {
 	case AF_INET:
 		switch (nfss->mountd_protocol) {
 		case IPPROTO_UDP:
-			seq_printf(m, RPCBIND_NETID_UDP);
+			proto = RPCBIND_NETID_UDP;
 			break;
 		case IPPROTO_TCP:
-			seq_printf(m, RPCBIND_NETID_TCP);
+			proto = RPCBIND_NETID_TCP;
 			break;
-		default:
-			if (showdefaults)
-				seq_printf(m, "auto");
 		}
 		break;
 	case AF_INET6:
 		switch (nfss->mountd_protocol) {
 		case IPPROTO_UDP:
-			seq_printf(m, RPCBIND_NETID_UDP6);
+			proto = RPCBIND_NETID_UDP6;
 			break;
 		case IPPROTO_TCP:
-			seq_printf(m, RPCBIND_NETID_TCP6);
+			proto = RPCBIND_NETID_TCP6;
 			break;
-		default:
-			if (showdefaults)
-				seq_printf(m, "auto");
 		}
 		break;
-	default:
-		if (showdefaults)
-			seq_printf(m, "auto");
 	}
+	if (proto || showdefaults)
+		seq_printf(m, ",mountproto=%s", proto ?: "auto");
 }
 
 static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
-- 
GitLab


From efc6f4aa742d7e8a67e44d11ab18b1c865880b4c Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Mon, 9 Jan 2017 15:14:33 -0500
Subject: [PATCH 0105/1084] NFS: Move nfs4_get_session() into nfs4_session.h

This puts session related functions together in the same space.  I only
keep one version of this function, since this variable will always be
NULL when using NFS v4.0.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs42proc.c   |  1 +
 fs/nfs/nfs4_fs.h     | 10 ----------
 fs/nfs/nfs4session.h |  5 +++++
 3 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index d12ff9385f49..b752d85ad8c6 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -12,6 +12,7 @@
 #include "nfs42.h"
 #include "iostat.h"
 #include "pnfs.h"
+#include "nfs4session.h"
 #include "internal.h"
 
 #define NFSDBG_FACILITY NFSDBG_PROC
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 665165833660..04ffea093d56 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -273,11 +273,6 @@ extern int nfs4_set_rw_stateid(nfs4_stateid *stateid,
 		fmode_t fmode);
 
 #if defined(CONFIG_NFS_V4_1)
-static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
-{
-	return server->nfs_client->cl_session;
-}
-
 extern int nfs41_setup_sequence(struct nfs4_session *session,
 		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
 		struct rpc_task *task);
@@ -357,11 +352,6 @@ nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
 		hdr->args.stable = NFS_FILE_SYNC;
 }
 #else /* CONFIG_NFS_v4_1 */
-static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
-{
-	return NULL;
-}
-
 static inline bool
 is_ds_only_client(struct nfs_client *clp)
 {
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index dae385500005..22cb55015b2b 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -103,6 +103,11 @@ static inline bool nfs4_test_locked_slot(const struct nfs4_slot_table *tbl,
 	return !!test_bit(slotid, tbl->used_slots);
 }
 
+static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
+{
+	return server->nfs_client->cl_session;
+}
+
 #if defined(CONFIG_NFS_V4_1)
 extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl,
 		u32 target_highest_slotid);
-- 
GitLab


From 172d9de15a0da9b6d52d2bae21a362689d82d35a Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Thu, 14 May 2015 10:56:10 -0400
Subject: [PATCH 0106/1084] NFS: Change nfs4_get_session() to take an
 nfs_client structure

pNFS only has access to the nfs_client structure, and not the
nfs_server, so we need to make this change so the function can be used
by pNFS as well.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs42proc.c   |  5 ++---
 fs/nfs/nfs4proc.c    | 10 +++++-----
 fs/nfs/nfs4session.h |  4 ++--
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index b752d85ad8c6..b03010eebafd 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -332,9 +332,8 @@ nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata)
 	}
 	nfs4_stateid_copy(&data->args.stateid, &lo->plh_stateid);
 	spin_unlock(&inode->i_lock);
-	nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args,
-			     &data->res.seq_res, task);
-
+	nfs41_setup_sequence(nfs4_get_session(server->nfs_client),
+			     &data->args.seq_args, &data->res.seq_res, task);
 }
 
 static void
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0a0eaecf9676..46cb55a81dbf 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -952,7 +952,7 @@ static int nfs4_setup_sequence(const struct nfs_server *server,
 			       struct nfs4_sequence_res *res,
 			       struct rpc_task *task)
 {
-	struct nfs4_session *session = nfs4_get_session(server);
+	struct nfs4_session *session = nfs4_get_session(server->nfs_client);
 	int ret = 0;
 
 	if (!session)
@@ -972,7 +972,7 @@ static int nfs4_setup_sequence(const struct nfs_server *server,
 static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_call_sync_data *data = calldata;
-	struct nfs4_session *session = nfs4_get_session(data->seq_server);
+	struct nfs4_session *session = nfs4_get_session(data->seq_server->nfs_client);
 
 	dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server);
 
@@ -8397,7 +8397,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_layoutget *lgp = calldata;
 	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
-	struct nfs4_session *session = nfs4_get_session(server);
+	struct nfs4_session *session = nfs4_get_session(server->nfs_client);
 
 	dprintk("--> %s\n", __func__);
 	nfs41_setup_sequence(session, &lgp->args.seq_args,
@@ -8794,7 +8794,7 @@ static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_layoutcommit_data *data = calldata;
 	struct nfs_server *server = NFS_SERVER(data->args.inode);
-	struct nfs4_session *session = nfs4_get_session(server);
+	struct nfs4_session *session = nfs4_get_session(server->nfs_client);
 
 	nfs41_setup_sequence(session,
 			&data->args.seq_args,
@@ -9120,7 +9120,7 @@ struct nfs_free_stateid_data {
 static void nfs41_free_stateid_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs_free_stateid_data *data = calldata;
-	nfs41_setup_sequence(nfs4_get_session(data->server),
+	nfs41_setup_sequence(nfs4_get_session(data->server->nfs_client),
 			&data->args.seq_args,
 			&data->res.seq_res,
 			task);
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index 22cb55015b2b..f6378d95b1b5 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -103,9 +103,9 @@ static inline bool nfs4_test_locked_slot(const struct nfs4_slot_table *tbl,
 	return !!test_bit(slotid, tbl->used_slots);
 }
 
-static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
+static inline struct nfs4_session *nfs4_get_session(const struct nfs_client *clp)
 {
-	return server->nfs_client->cl_session;
+	return clp->cl_session;
 }
 
 #if defined(CONFIG_NFS_V4_1)
-- 
GitLab


From 42e1cca7e91e1a7502694cfd18857ed243f54174 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Mon, 9 Jan 2017 15:48:22 -0500
Subject: [PATCH 0107/1084] NFS: Change nfs4_setup_sequence() to take an
 nfs_client structure

I want to have all callers use this function, rather than calling the
NFS v4.0 and v4.1 versions directly.  This includes pNFS, which only has
access to the nfs_client structure in some places.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 46cb55a81dbf..f62d58cb3014 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -947,16 +947,16 @@ int nfs41_setup_sequence(struct nfs4_session *session,
 }
 EXPORT_SYMBOL_GPL(nfs41_setup_sequence);
 
-static int nfs4_setup_sequence(const struct nfs_server *server,
+static int nfs4_setup_sequence(const struct nfs_client *client,
 			       struct nfs4_sequence_args *args,
 			       struct nfs4_sequence_res *res,
 			       struct rpc_task *task)
 {
-	struct nfs4_session *session = nfs4_get_session(server->nfs_client);
+	struct nfs4_session *session = nfs4_get_session(client);
 	int ret = 0;
 
 	if (!session)
-		return nfs40_setup_sequence(server->nfs_client->cl_slot_tbl,
+		return nfs40_setup_sequence(client->cl_slot_tbl,
 					    args, res, task);
 
 	dprintk("--> %s clp %p session %p sr_slot %u\n",
@@ -993,13 +993,12 @@ static const struct rpc_call_ops nfs41_call_sync_ops = {
 
 #else	/* !CONFIG_NFS_V4_1 */
 
-static int nfs4_setup_sequence(const struct nfs_server *server,
+static int nfs4_setup_sequence(const struct nfs_client *client,
 			       struct nfs4_sequence_args *args,
 			       struct nfs4_sequence_res *res,
 			       struct rpc_task *task)
 {
-	return nfs40_setup_sequence(server->nfs_client->cl_slot_tbl,
-				    args, res, task);
+	return nfs40_setup_sequence(client->cl_slot_tbl, args, res, task);
 }
 
 static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
@@ -1025,7 +1024,7 @@ EXPORT_SYMBOL_GPL(nfs4_sequence_done);
 static void nfs40_call_sync_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_call_sync_data *data = calldata;
-	nfs4_setup_sequence(data->seq_server,
+	nfs4_setup_sequence(data->seq_server->nfs_client,
 				data->seq_args, data->seq_res, task);
 }
 
@@ -2172,7 +2171,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
 		nfs_copy_fh(&data->o_res.fh, data->o_arg.fh);
 	}
 	data->timestamp = jiffies;
-	if (nfs4_setup_sequence(data->o_arg.server,
+	if (nfs4_setup_sequence(data->o_arg.server->nfs_client,
 				&data->o_arg.seq_args,
 				&data->o_res.seq_res,
 				task) != 0)
@@ -3234,7 +3233,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 	else if (calldata->arg.bitmask == NULL)
 		calldata->res.fattr = NULL;
 	calldata->timestamp = jiffies;
-	if (nfs4_setup_sequence(NFS_SERVER(inode),
+	if (nfs4_setup_sequence(NFS_SERVER(inode)->nfs_client,
 				&calldata->arg.seq_args,
 				&calldata->res.seq_res,
 				task) != 0)
@@ -4114,7 +4113,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
 
 static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
 {
-	nfs4_setup_sequence(NFS_SB(data->dentry->d_sb),
+	nfs4_setup_sequence(NFS_SB(data->dentry->d_sb)->nfs_client,
 			&data->args.seq_args,
 			&data->res.seq_res,
 			task);
@@ -4148,7 +4147,7 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
 
 static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data)
 {
-	nfs4_setup_sequence(NFS_SERVER(data->old_dir),
+	nfs4_setup_sequence(NFS_SERVER(data->old_dir)->nfs_client,
 			&data->args.seq_args,
 			&data->res.seq_res,
 			task);
@@ -4723,7 +4722,7 @@ static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
 static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task,
 				      struct nfs_pgio_header *hdr)
 {
-	if (nfs4_setup_sequence(NFS_SERVER(hdr->inode),
+	if (nfs4_setup_sequence(NFS_SERVER(hdr->inode)->nfs_client,
 			&hdr->args.seq_args,
 			&hdr->res.seq_res,
 			task))
@@ -4822,7 +4821,7 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
 
 static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
 {
-	nfs4_setup_sequence(NFS_SERVER(data->inode),
+	nfs4_setup_sequence(NFS_SERVER(data->inode)->nfs_client,
 			&data->args.seq_args,
 			&data->res.seq_res,
 			task);
@@ -5747,7 +5746,7 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
 	if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task))
 		return;
 
-	nfs4_setup_sequence(d_data->res.server,
+	nfs4_setup_sequence(d_data->res.server->nfs_client,
 			&d_data->args.seq_args,
 			&d_data->res.seq_res,
 			task);
@@ -5989,7 +5988,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
 		goto out_no_action;
 	}
 	calldata->timestamp = jiffies;
-	if (nfs4_setup_sequence(calldata->server,
+	if (nfs4_setup_sequence(calldata->server->nfs_client,
 				&calldata->arg.seq_args,
 				&calldata->res.seq_res,
 				task) != 0)
@@ -6174,7 +6173,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
 		goto out_release_open_seqid;
 	}
 	data->timestamp = jiffies;
-	if (nfs4_setup_sequence(data->server,
+	if (nfs4_setup_sequence(data->server->nfs_client,
 				&data->arg.seq_args,
 				&data->res.seq_res,
 				task) == 0)
-- 
GitLab


From 6de7e12f53a154d35d9aceae718b764ada23e430 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Mon, 9 Jan 2017 16:51:52 -0500
Subject: [PATCH 0108/1084] NFS: Use nfs4_setup_sequence() everywhere

This does the right thing depending on if we have a session, rather than
needing to handle this manually in multiple places.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/filelayout/filelayout.c         |  6 ++--
 fs/nfs/flexfilelayout/flexfilelayout.c | 40 ++++++++------------------
 fs/nfs/nfs42proc.c                     |  4 +--
 fs/nfs/nfs4_fs.h                       |  5 +---
 fs/nfs/nfs4proc.c                      | 30 +++++++++----------
 5 files changed, 33 insertions(+), 52 deletions(-)

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index a3fc48ba4931..7aff350f15b1 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -305,7 +305,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
 	}
 	hdr->pgio_done_cb = filelayout_read_done_cb;
 
-	if (nfs41_setup_sequence(hdr->ds_clp->cl_session,
+	if (nfs4_setup_sequence(hdr->ds_clp,
 			&hdr->args.seq_args,
 			&hdr->res.seq_res,
 			task))
@@ -403,7 +403,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
 		rpc_exit(task, 0);
 		return;
 	}
-	if (nfs41_setup_sequence(hdr->ds_clp->cl_session,
+	if (nfs4_setup_sequence(hdr->ds_clp,
 			&hdr->args.seq_args,
 			&hdr->res.seq_res,
 			task))
@@ -438,7 +438,7 @@ static void filelayout_commit_prepare(struct rpc_task *task, void *data)
 {
 	struct nfs_commit_data *wdata = data;
 
-	nfs41_setup_sequence(wdata->ds_clp->cl_session,
+	nfs4_setup_sequence(wdata->ds_clp,
 			&wdata->args.seq_args,
 			&wdata->res.seq_res,
 			task);
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 0ca4af8cca5d..cc9064974104 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1384,30 +1384,14 @@ static void ff_layout_read_prepare_v3(struct rpc_task *task, void *data)
 	rpc_call_start(task);
 }
 
-static int ff_layout_setup_sequence(struct nfs_client *ds_clp,
-				    struct nfs4_sequence_args *args,
-				    struct nfs4_sequence_res *res,
-				    struct rpc_task *task)
-{
-	if (ds_clp->cl_session)
-		return nfs41_setup_sequence(ds_clp->cl_session,
-					   args,
-					   res,
-					   task);
-	return nfs40_setup_sequence(ds_clp->cl_slot_tbl,
-				   args,
-				   res,
-				   task);
-}
-
 static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data)
 {
 	struct nfs_pgio_header *hdr = data;
 
-	if (ff_layout_setup_sequence(hdr->ds_clp,
-				     &hdr->args.seq_args,
-				     &hdr->res.seq_res,
-				     task))
+	if (nfs4_setup_sequence(hdr->ds_clp,
+				&hdr->args.seq_args,
+				&hdr->res.seq_res,
+				task))
 		return;
 
 	if (ff_layout_read_prepare_common(task, hdr))
@@ -1578,10 +1562,10 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data)
 {
 	struct nfs_pgio_header *hdr = data;
 
-	if (ff_layout_setup_sequence(hdr->ds_clp,
-				     &hdr->args.seq_args,
-				     &hdr->res.seq_res,
-				     task))
+	if (nfs4_setup_sequence(hdr->ds_clp,
+				&hdr->args.seq_args,
+				&hdr->res.seq_res,
+				task))
 		return;
 
 	if (ff_layout_write_prepare_common(task, hdr))
@@ -1667,10 +1651,10 @@ static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data)
 {
 	struct nfs_commit_data *wdata = data;
 
-	if (ff_layout_setup_sequence(wdata->ds_clp,
-				 &wdata->args.seq_args,
-				 &wdata->res.seq_res,
-				 task))
+	if (nfs4_setup_sequence(wdata->ds_clp,
+				&wdata->args.seq_args,
+				&wdata->res.seq_res,
+				task))
 		return;
 	ff_layout_commit_prepare_common(task, data);
 }
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index b03010eebafd..98cf58341066 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -332,8 +332,8 @@ nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata)
 	}
 	nfs4_stateid_copy(&data->args.stateid, &lo->plh_stateid);
 	spin_unlock(&inode->i_lock);
-	nfs41_setup_sequence(nfs4_get_session(server->nfs_client),
-			     &data->args.seq_args, &data->res.seq_res, task);
+	nfs4_setup_sequence(server->nfs_client, &data->args.seq_args,
+			    &data->res.seq_res, task);
 }
 
 static void
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 04ffea093d56..af285cc27ccf 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -273,9 +273,6 @@ extern int nfs4_set_rw_stateid(nfs4_stateid *stateid,
 		fmode_t fmode);
 
 #if defined(CONFIG_NFS_V4_1)
-extern int nfs41_setup_sequence(struct nfs4_session *session,
-		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
-		struct rpc_task *task);
 extern int nfs41_sequence_done(struct rpc_task *, struct nfs4_sequence_res *);
 extern int nfs4_proc_create_session(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_proc_destroy_session(struct nfs4_session *, struct rpc_cred *);
@@ -456,7 +453,7 @@ extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
 extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
 extern void nfs_release_seqid(struct nfs_seqid *seqid);
 extern void nfs_free_seqid(struct nfs_seqid *seqid);
-extern int nfs40_setup_sequence(struct nfs4_slot_table *tbl,
+extern int nfs4_setup_sequence(const struct nfs_client *client,
 				struct nfs4_sequence_args *args,
 				struct nfs4_sequence_res *res,
 				struct rpc_task *task);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f62d58cb3014..4ced1c964b1e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -622,10 +622,10 @@ static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args)
 	args->sa_privileged = 1;
 }
 
-int nfs40_setup_sequence(struct nfs4_slot_table *tbl,
-			 struct nfs4_sequence_args *args,
-			 struct nfs4_sequence_res *res,
-			 struct rpc_task *task)
+static int nfs40_setup_sequence(struct nfs4_slot_table *tbl,
+				struct nfs4_sequence_args *args,
+				struct nfs4_sequence_res *res,
+				struct rpc_task *task)
 {
 	struct nfs4_slot *slot;
 
@@ -662,7 +662,6 @@ int nfs40_setup_sequence(struct nfs4_slot_table *tbl,
 	spin_unlock(&tbl->slot_tbl_lock);
 	return -EAGAIN;
 }
-EXPORT_SYMBOL_GPL(nfs40_setup_sequence);
 
 static void nfs40_sequence_free_slot(struct nfs4_sequence_res *res)
 {
@@ -882,7 +881,7 @@ int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
 }
 EXPORT_SYMBOL_GPL(nfs4_sequence_done);
 
-int nfs41_setup_sequence(struct nfs4_session *session,
+static int nfs41_setup_sequence(struct nfs4_session *session,
 				struct nfs4_sequence_args *args,
 				struct nfs4_sequence_res *res,
 				struct rpc_task *task)
@@ -945,12 +944,11 @@ int nfs41_setup_sequence(struct nfs4_session *session,
 	spin_unlock(&tbl->slot_tbl_lock);
 	return -EAGAIN;
 }
-EXPORT_SYMBOL_GPL(nfs41_setup_sequence);
 
-static int nfs4_setup_sequence(const struct nfs_client *client,
-			       struct nfs4_sequence_args *args,
-			       struct nfs4_sequence_res *res,
-			       struct rpc_task *task)
+int nfs4_setup_sequence(const struct nfs_client *client,
+			struct nfs4_sequence_args *args,
+			struct nfs4_sequence_res *res,
+			struct rpc_task *task)
 {
 	struct nfs4_session *session = nfs4_get_session(client);
 	int ret = 0;
@@ -968,6 +966,7 @@ static int nfs4_setup_sequence(const struct nfs_client *client,
 	dprintk("<-- %s status=%d\n", __func__, ret);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(nfs4_setup_sequence);
 
 static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
 {
@@ -993,13 +992,14 @@ static const struct rpc_call_ops nfs41_call_sync_ops = {
 
 #else	/* !CONFIG_NFS_V4_1 */
 
-static int nfs4_setup_sequence(const struct nfs_client *client,
-			       struct nfs4_sequence_args *args,
-			       struct nfs4_sequence_res *res,
-			       struct rpc_task *task)
+int nfs4_setup_sequence(const struct nfs_client *client,
+			struct nfs4_sequence_args *args,
+			struct nfs4_sequence_res *res,
+			struct rpc_task *task)
 {
 	return nfs40_setup_sequence(client->cl_slot_tbl, args, res, task);
 }
+EXPORT_SYMBOL_GPL(nfs4_setup_sequence);
 
 static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
 {
-- 
GitLab


From 7981c8a659143173882dda43b3056a777faeeb7b Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Tue, 10 Jan 2017 11:39:53 -0500
Subject: [PATCH 0109/1084] NFS: Create a single nfs4_setup_sequence() function

The inline ifdef lets us put everything in a single place, rather than
having two (very similar) versions of this function.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 76 ++++++++++++++++++-----------------------------
 1 file changed, 29 insertions(+), 47 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4ced1c964b1e..e6f563c1689e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -945,37 +945,14 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
 	return -EAGAIN;
 }
 
-int nfs4_setup_sequence(const struct nfs_client *client,
-			struct nfs4_sequence_args *args,
-			struct nfs4_sequence_res *res,
-			struct rpc_task *task)
-{
-	struct nfs4_session *session = nfs4_get_session(client);
-	int ret = 0;
-
-	if (!session)
-		return nfs40_setup_sequence(client->cl_slot_tbl,
-					    args, res, task);
-
-	dprintk("--> %s clp %p session %p sr_slot %u\n",
-		__func__, session->clp, session, res->sr_slot ?
-			res->sr_slot->slot_nr : NFS4_NO_SLOT);
-
-	ret = nfs41_setup_sequence(session, args, res, task);
-
-	dprintk("<-- %s status=%d\n", __func__, ret);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(nfs4_setup_sequence);
-
 static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_call_sync_data *data = calldata;
-	struct nfs4_session *session = nfs4_get_session(data->seq_server->nfs_client);
 
 	dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server);
 
-	nfs41_setup_sequence(session, data->seq_args, data->seq_res, task);
+	nfs4_setup_sequence(data->seq_server->nfs_client,
+			    data->seq_args, data->seq_res, task);
 }
 
 static void nfs41_call_sync_done(struct rpc_task *task, void *calldata)
@@ -992,15 +969,6 @@ static const struct rpc_call_ops nfs41_call_sync_ops = {
 
 #else	/* !CONFIG_NFS_V4_1 */
 
-int nfs4_setup_sequence(const struct nfs_client *client,
-			struct nfs4_sequence_args *args,
-			struct nfs4_sequence_res *res,
-			struct rpc_task *task)
-{
-	return nfs40_setup_sequence(client->cl_slot_tbl, args, res, task);
-}
-EXPORT_SYMBOL_GPL(nfs4_setup_sequence);
-
 static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
 {
 	return nfs40_sequence_done(task, res);
@@ -1021,6 +989,22 @@ EXPORT_SYMBOL_GPL(nfs4_sequence_done);
 
 #endif	/* !CONFIG_NFS_V4_1 */
 
+int nfs4_setup_sequence(const struct nfs_client *client,
+			struct nfs4_sequence_args *args,
+			struct nfs4_sequence_res *res,
+			struct rpc_task *task)
+{
+#if defined(CONFIG_NFS_V4_1)
+	struct nfs4_session *session = nfs4_get_session(client);
+
+	if (session)
+		return nfs41_setup_sequence(session, args, res, task);
+#endif /* CONFIG_NFS_V4_1 */
+	return nfs40_setup_sequence(client->cl_slot_tbl, args, res, task);
+
+}
+EXPORT_SYMBOL_GPL(nfs4_setup_sequence);
+
 static void nfs40_call_sync_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_call_sync_data *data = calldata;
@@ -2047,8 +2031,8 @@ static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_opendata *data = calldata;
 
-	nfs40_setup_sequence(data->o_arg.server->nfs_client->cl_slot_tbl,
-			     &data->c_arg.seq_args, &data->c_res.seq_res, task);
+	nfs4_setup_sequence(data->o_arg.server->nfs_client,
+			   &data->c_arg.seq_args, &data->c_res.seq_res, task);
 }
 
 static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
@@ -6639,8 +6623,8 @@ static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata
 {
 	struct nfs_release_lockowner_data *data = calldata;
 	struct nfs_server *server = data->server;
-	nfs40_setup_sequence(server->nfs_client->cl_slot_tbl,
-			     &data->args.seq_args, &data->res.seq_res, task);
+	nfs4_setup_sequence(server->nfs_client, &data->args.seq_args,
+			   &data->res.seq_res, task);
 	data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
 	data->timestamp = jiffies;
 }
@@ -7830,7 +7814,7 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task,
 	dprintk("--> %s\n", __func__);
 	/* just setup sequence, do not trigger session recovery
 	   since we're invoked within one */
-	nfs41_setup_sequence(data->clp->cl_session,
+	nfs4_setup_sequence(data->clp,
 			&data->args->la_seq_args,
 			&data->res->lr_seq_res,
 			task);
@@ -8201,7 +8185,7 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
 	args = task->tk_msg.rpc_argp;
 	res = task->tk_msg.rpc_resp;
 
-	nfs41_setup_sequence(clp->cl_session, args, res, task);
+	nfs4_setup_sequence(clp, args, res, task);
 }
 
 static const struct rpc_call_ops nfs41_sequence_ops = {
@@ -8289,7 +8273,7 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data)
 {
 	struct nfs4_reclaim_complete_data *calldata = data;
 
-	nfs41_setup_sequence(calldata->clp->cl_session,
+	nfs4_setup_sequence(calldata->clp,
 			&calldata->arg.seq_args,
 			&calldata->res.seq_res,
 			task);
@@ -8396,10 +8380,9 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_layoutget *lgp = calldata;
 	struct nfs_server *server = NFS_SERVER(lgp->args.inode);
-	struct nfs4_session *session = nfs4_get_session(server->nfs_client);
 
 	dprintk("--> %s\n", __func__);
-	nfs41_setup_sequence(session, &lgp->args.seq_args,
+	nfs4_setup_sequence(server->nfs_client, &lgp->args.seq_args,
 				&lgp->res.seq_res, task);
 	dprintk("<-- %s\n", __func__);
 }
@@ -8643,7 +8626,7 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
 	struct nfs4_layoutreturn *lrp = calldata;
 
 	dprintk("--> %s\n", __func__);
-	nfs41_setup_sequence(lrp->clp->cl_session,
+	nfs4_setup_sequence(lrp->clp,
 			&lrp->args.seq_args,
 			&lrp->res.seq_res,
 			task);
@@ -8793,9 +8776,8 @@ static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_layoutcommit_data *data = calldata;
 	struct nfs_server *server = NFS_SERVER(data->args.inode);
-	struct nfs4_session *session = nfs4_get_session(server->nfs_client);
 
-	nfs41_setup_sequence(session,
+	nfs4_setup_sequence(server->nfs_client,
 			&data->args.seq_args,
 			&data->res.seq_res,
 			task);
@@ -9119,7 +9101,7 @@ struct nfs_free_stateid_data {
 static void nfs41_free_stateid_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs_free_stateid_data *data = calldata;
-	nfs41_setup_sequence(nfs4_get_session(data->server->nfs_client),
+	nfs4_setup_sequence(data->server->nfs_client,
 			&data->args.seq_args,
 			&data->res.seq_res,
 			task);
-- 
GitLab


From 9dd9107f330c5d371c62b460975a32d8bd5712b4 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Tue, 10 Jan 2017 12:01:46 -0500
Subject: [PATCH 0110/1084] NFS: Move slot-already-allocated check into
 nfs_setup_sequence()

This puts the check in a single place, rather than needing to implement
it twice for v4.0 and v4.1.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e6f563c1689e..dcb2f73c7d4c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -629,10 +629,6 @@ static int nfs40_setup_sequence(struct nfs4_slot_table *tbl,
 {
 	struct nfs4_slot *slot;
 
-	/* slot already allocated? */
-	if (res->sr_slot != NULL)
-		goto out_start;
-
 	spin_lock(&tbl->slot_tbl_lock);
 	if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
 		goto out_sleep;
@@ -649,7 +645,6 @@ static int nfs40_setup_sequence(struct nfs4_slot_table *tbl,
 	args->sa_slot = slot;
 	res->sr_slot = slot;
 
-out_start:
 	rpc_call_start(task);
 	return 0;
 
@@ -890,10 +885,6 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
 	struct nfs4_slot_table *tbl;
 
 	dprintk("--> %s\n", __func__);
-	/* slot already allocated? */
-	if (res->sr_slot != NULL)
-		goto out_success;
-
 	tbl = &session->fc_slot_table;
 
 	task->tk_timeout = 0;
@@ -931,7 +922,6 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
 	 */
 	res->sr_status = 1;
 	trace_nfs4_setup_sequence(session, args);
-out_success:
 	rpc_call_start(task);
 	return 0;
 out_sleep:
@@ -996,12 +986,21 @@ int nfs4_setup_sequence(const struct nfs_client *client,
 {
 #if defined(CONFIG_NFS_V4_1)
 	struct nfs4_session *session = nfs4_get_session(client);
+#endif /* CONFIG_NFS_V4_1 */
 
+	/* slot already allocated? */
+	if (res->sr_slot != NULL)
+		goto out_start;
+
+#if defined(CONFIG_NFS_V4_1)
 	if (session)
 		return nfs41_setup_sequence(session, args, res, task);
 #endif /* CONFIG_NFS_V4_1 */
 	return nfs40_setup_sequence(client->cl_slot_tbl, args, res, task);
 
+out_start:
+	rpc_call_start(task);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(nfs4_setup_sequence);
 
-- 
GitLab


From 6994cdd798568a0ddb8e0a85e2af24dbe655c341 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Tue, 10 Jan 2017 16:13:27 -0500
Subject: [PATCH 0111/1084] NFS: Lock the slot table from a single place during
 setup sequence

Rather than implementing this twice for NFS v4.0 and v4.1

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index dcb2f73c7d4c..681cb5e77da2 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -629,7 +629,6 @@ static int nfs40_setup_sequence(struct nfs4_slot_table *tbl,
 {
 	struct nfs4_slot *slot;
 
-	spin_lock(&tbl->slot_tbl_lock);
 	if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
 		goto out_sleep;
 
@@ -639,13 +638,10 @@ static int nfs40_setup_sequence(struct nfs4_slot_table *tbl,
 			task->tk_timeout = HZ >> 2;
 		goto out_sleep;
 	}
-	spin_unlock(&tbl->slot_tbl_lock);
 
 	slot->privileged = args->sa_privileged ? 1 : 0;
 	args->sa_slot = slot;
 	res->sr_slot = slot;
-
-	rpc_call_start(task);
 	return 0;
 
 out_sleep:
@@ -654,7 +650,6 @@ static int nfs40_setup_sequence(struct nfs4_slot_table *tbl,
 				NULL, RPC_PRIORITY_PRIVILEGED);
 	else
 		rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
-	spin_unlock(&tbl->slot_tbl_lock);
 	return -EAGAIN;
 }
 
@@ -889,7 +884,6 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
 
 	task->tk_timeout = 0;
 
-	spin_lock(&tbl->slot_tbl_lock);
 	if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state) &&
 	    !args->sa_privileged) {
 		/* The state manager will wait until the slot table is empty */
@@ -905,7 +899,6 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
 		dprintk("<-- %s: no free slots\n", __func__);
 		goto out_sleep;
 	}
-	spin_unlock(&tbl->slot_tbl_lock);
 
 	slot->privileged = args->sa_privileged ? 1 : 0;
 	args->sa_slot = slot;
@@ -922,7 +915,6 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
 	 */
 	res->sr_status = 1;
 	trace_nfs4_setup_sequence(session, args);
-	rpc_call_start(task);
 	return 0;
 out_sleep:
 	/* Privileged tasks are queued with top priority */
@@ -931,7 +923,6 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
 				NULL, RPC_PRIORITY_PRIVILEGED);
 	else
 		rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
-	spin_unlock(&tbl->slot_tbl_lock);
 	return -EAGAIN;
 }
 
@@ -984,19 +975,27 @@ int nfs4_setup_sequence(const struct nfs_client *client,
 			struct nfs4_sequence_res *res,
 			struct rpc_task *task)
 {
-#if defined(CONFIG_NFS_V4_1)
 	struct nfs4_session *session = nfs4_get_session(client);
-#endif /* CONFIG_NFS_V4_1 */
+	struct nfs4_slot_table *tbl  = session ? &session->fc_slot_table :
+						  client->cl_slot_tbl;
+	int ret;
 
 	/* slot already allocated? */
 	if (res->sr_slot != NULL)
 		goto out_start;
 
+	spin_lock(&tbl->slot_tbl_lock);
+
 #if defined(CONFIG_NFS_V4_1)
 	if (session)
-		return nfs41_setup_sequence(session, args, res, task);
+		ret = nfs41_setup_sequence(session, args, res, task);
+	else
 #endif /* CONFIG_NFS_V4_1 */
-	return nfs40_setup_sequence(client->cl_slot_tbl, args, res, task);
+		ret = nfs40_setup_sequence(client->cl_slot_tbl, args, res, task);
+
+	spin_unlock(&tbl->slot_tbl_lock);
+	if (ret < 0)
+		return ret;
 
 out_start:
 	rpc_call_start(task);
-- 
GitLab


From 0dcee8bb749e5de43b180ccfcb0c5005aa529c92 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Tue, 10 Jan 2017 16:29:54 -0500
Subject: [PATCH 0112/1084] NFS: Handle setup sequence task rescheduling in a
 single place

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 37 +++++++++++++++----------------------
 1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 681cb5e77da2..9759d6f9e1a5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -630,27 +630,19 @@ static int nfs40_setup_sequence(struct nfs4_slot_table *tbl,
 	struct nfs4_slot *slot;
 
 	if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
-		goto out_sleep;
+		return -EAGAIN;
 
 	slot = nfs4_alloc_slot(tbl);
 	if (IS_ERR(slot)) {
 		if (slot == ERR_PTR(-ENOMEM))
 			task->tk_timeout = HZ >> 2;
-		goto out_sleep;
+		return -EAGAIN;
 	}
 
 	slot->privileged = args->sa_privileged ? 1 : 0;
 	args->sa_slot = slot;
 	res->sr_slot = slot;
 	return 0;
-
-out_sleep:
-	if (args->sa_privileged)
-		rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task,
-				NULL, RPC_PRIORITY_PRIVILEGED);
-	else
-		rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
-	return -EAGAIN;
 }
 
 static void nfs40_sequence_free_slot(struct nfs4_sequence_res *res)
@@ -888,7 +880,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
 	    !args->sa_privileged) {
 		/* The state manager will wait until the slot table is empty */
 		dprintk("%s session is draining\n", __func__);
-		goto out_sleep;
+		return -EAGAIN;
 	}
 
 	slot = nfs4_alloc_slot(tbl);
@@ -897,7 +889,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
 		if (slot == ERR_PTR(-ENOMEM))
 			task->tk_timeout = HZ >> 2;
 		dprintk("<-- %s: no free slots\n", __func__);
-		goto out_sleep;
+		return -EAGAIN;
 	}
 
 	slot->privileged = args->sa_privileged ? 1 : 0;
@@ -916,14 +908,6 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
 	res->sr_status = 1;
 	trace_nfs4_setup_sequence(session, args);
 	return 0;
-out_sleep:
-	/* Privileged tasks are queued with top priority */
-	if (args->sa_privileged)
-		rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task,
-				NULL, RPC_PRIORITY_PRIVILEGED);
-	else
-		rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
-	return -EAGAIN;
 }
 
 static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
@@ -993,13 +977,22 @@ int nfs4_setup_sequence(const struct nfs_client *client,
 #endif /* CONFIG_NFS_V4_1 */
 		ret = nfs40_setup_sequence(client->cl_slot_tbl, args, res, task);
 
+	if (ret == -EAGAIN)
+		goto out_sleep;
 	spin_unlock(&tbl->slot_tbl_lock);
-	if (ret < 0)
-		return ret;
 
 out_start:
 	rpc_call_start(task);
 	return 0;
+
+out_sleep:
+	if (args->sa_privileged)
+		rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task,
+				NULL, RPC_PRIORITY_PRIVILEGED);
+	else
+		rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
+	spin_unlock(&tbl->slot_tbl_lock);
+	return -EAGAIN;
 }
 EXPORT_SYMBOL_GPL(nfs4_setup_sequence);
 
-- 
GitLab


From 76ee03540f316948c3bc89fc76ded86c88e887a5 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Tue, 10 Jan 2017 16:49:31 -0500
Subject: [PATCH 0113/1084] NFS: Check if the slot table is draining from
 nfs4_setup_sequence()

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9759d6f9e1a5..a74e1a4e3264 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -629,9 +629,6 @@ static int nfs40_setup_sequence(struct nfs4_slot_table *tbl,
 {
 	struct nfs4_slot *slot;
 
-	if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
-		return -EAGAIN;
-
 	slot = nfs4_alloc_slot(tbl);
 	if (IS_ERR(slot)) {
 		if (slot == ERR_PTR(-ENOMEM))
@@ -874,15 +871,6 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
 	dprintk("--> %s\n", __func__);
 	tbl = &session->fc_slot_table;
 
-	task->tk_timeout = 0;
-
-	if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state) &&
-	    !args->sa_privileged) {
-		/* The state manager will wait until the slot table is empty */
-		dprintk("%s session is draining\n", __func__);
-		return -EAGAIN;
-	}
-
 	slot = nfs4_alloc_slot(tbl);
 	if (IS_ERR(slot)) {
 		/* If out of memory, try again in 1/4 second */
@@ -960,15 +948,22 @@ int nfs4_setup_sequence(const struct nfs_client *client,
 			struct rpc_task *task)
 {
 	struct nfs4_session *session = nfs4_get_session(client);
-	struct nfs4_slot_table *tbl  = session ? &session->fc_slot_table :
-						  client->cl_slot_tbl;
+	struct nfs4_slot_table *tbl  = client->cl_slot_tbl;
 	int ret;
 
 	/* slot already allocated? */
 	if (res->sr_slot != NULL)
 		goto out_start;
 
+	if (session) {
+		tbl = &session->fc_slot_table;
+		task->tk_timeout = 0;
+	}
+
 	spin_lock(&tbl->slot_tbl_lock);
+	/* The state manager will wait until the slot table is empty */
+	if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
+		goto out_sleep;
 
 #if defined(CONFIG_NFS_V4_1)
 	if (session)
-- 
GitLab


From 3d35808b1de48e0c478668b3177fcd3360aae543 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Wed, 11 Jan 2017 10:54:04 -0500
Subject: [PATCH 0114/1084] NFS: Merge the remaining setup_sequence functions

This creates a single place for all the work to happen, using the
presence of a session to determine if extra values need to be set.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 87 +++++++++++------------------------------------
 1 file changed, 20 insertions(+), 67 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a74e1a4e3264..f6a362ffcbd4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -622,26 +622,6 @@ static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args)
 	args->sa_privileged = 1;
 }
 
-static int nfs40_setup_sequence(struct nfs4_slot_table *tbl,
-				struct nfs4_sequence_args *args,
-				struct nfs4_sequence_res *res,
-				struct rpc_task *task)
-{
-	struct nfs4_slot *slot;
-
-	slot = nfs4_alloc_slot(tbl);
-	if (IS_ERR(slot)) {
-		if (slot == ERR_PTR(-ENOMEM))
-			task->tk_timeout = HZ >> 2;
-		return -EAGAIN;
-	}
-
-	slot->privileged = args->sa_privileged ? 1 : 0;
-	args->sa_slot = slot;
-	res->sr_slot = slot;
-	return 0;
-}
-
 static void nfs40_sequence_free_slot(struct nfs4_sequence_res *res)
 {
 	struct nfs4_slot *slot = res->sr_slot;
@@ -860,44 +840,6 @@ int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
 }
 EXPORT_SYMBOL_GPL(nfs4_sequence_done);
 
-static int nfs41_setup_sequence(struct nfs4_session *session,
-				struct nfs4_sequence_args *args,
-				struct nfs4_sequence_res *res,
-				struct rpc_task *task)
-{
-	struct nfs4_slot *slot;
-	struct nfs4_slot_table *tbl;
-
-	dprintk("--> %s\n", __func__);
-	tbl = &session->fc_slot_table;
-
-	slot = nfs4_alloc_slot(tbl);
-	if (IS_ERR(slot)) {
-		/* If out of memory, try again in 1/4 second */
-		if (slot == ERR_PTR(-ENOMEM))
-			task->tk_timeout = HZ >> 2;
-		dprintk("<-- %s: no free slots\n", __func__);
-		return -EAGAIN;
-	}
-
-	slot->privileged = args->sa_privileged ? 1 : 0;
-	args->sa_slot = slot;
-
-	dprintk("<-- %s slotid=%u seqid=%u\n", __func__,
-			slot->slot_nr, slot->seq_nr);
-
-	res->sr_slot = slot;
-	res->sr_timestamp = jiffies;
-	res->sr_status_flags = 0;
-	/*
-	 * sr_status is only set in decode_sequence, and so will remain
-	 * set to 1 if an rpc level failure occurs.
-	 */
-	res->sr_status = 1;
-	trace_nfs4_setup_sequence(session, args);
-	return 0;
-}
-
 static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_call_sync_data *data = calldata;
@@ -949,7 +891,7 @@ int nfs4_setup_sequence(const struct nfs_client *client,
 {
 	struct nfs4_session *session = nfs4_get_session(client);
 	struct nfs4_slot_table *tbl  = client->cl_slot_tbl;
-	int ret;
+	struct nfs4_slot *slot;
 
 	/* slot already allocated? */
 	if (res->sr_slot != NULL)
@@ -965,17 +907,28 @@ int nfs4_setup_sequence(const struct nfs_client *client,
 	if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
 		goto out_sleep;
 
-#if defined(CONFIG_NFS_V4_1)
-	if (session)
-		ret = nfs41_setup_sequence(session, args, res, task);
-	else
-#endif /* CONFIG_NFS_V4_1 */
-		ret = nfs40_setup_sequence(client->cl_slot_tbl, args, res, task);
-
-	if (ret == -EAGAIN)
+	slot = nfs4_alloc_slot(tbl);
+	if (IS_ERR(slot)) {
+		/* Try again in 1/4 second */
+		if (slot == ERR_PTR(-ENOMEM))
+			task->tk_timeout = HZ >> 2;
 		goto out_sleep;
+	}
 	spin_unlock(&tbl->slot_tbl_lock);
 
+	slot->privileged = args->sa_privileged ? 1 : 0;
+	args->sa_slot = slot;
+
+	res->sr_slot = slot;
+	if (session) {
+		res->sr_timestamp = jiffies;
+		res->sr_status_flags = 0;
+		res->sr_status = 1;
+#ifdef CONFIG_NFS_V4_1
+		trace_nfs4_setup_sequence(session, args);
+#endif /* CONFIG_NFS_V4_1 */
+	}
+
 out_start:
 	rpc_call_start(task);
 	return 0;
-- 
GitLab


From ad05cc0f04341216923895c05e2c364ef34f1bb4 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Wed, 11 Jan 2017 13:37:06 -0500
Subject: [PATCH 0115/1084] NFS: Make trace_nfs4_setup_sequence() available to
 NFS v4.0

This tracepoint displays information about the slot that was chosen for
the RPC, in addition to session information.  This could be useful
information for debugging, and we can set the session id hash to 0 to
indicate that there is no session.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c    |  4 +--
 fs/nfs/nfs4session.h |  2 ++
 fs/nfs/nfs4trace.h   | 64 ++++++++++++++++++++++----------------------
 3 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f6a362ffcbd4..ae03f976fbd3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -924,11 +924,9 @@ int nfs4_setup_sequence(const struct nfs_client *client,
 		res->sr_timestamp = jiffies;
 		res->sr_status_flags = 0;
 		res->sr_status = 1;
-#ifdef CONFIG_NFS_V4_1
-		trace_nfs4_setup_sequence(session, args);
-#endif /* CONFIG_NFS_V4_1 */
 	}
 
+	trace_nfs4_setup_sequence(session, args);
 out_start:
 	rpc_call_start(task);
 	return 0;
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index f6378d95b1b5..dfae4880eacb 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -175,6 +175,8 @@ static inline int nfs4_has_persistent_session(const struct nfs_client *clp)
 	return 0;
 }
 
+#define nfs_session_id_hash(session) (0)
+
 #endif /* defined(CONFIG_NFS_V4_1) */
 #endif /* IS_ENABLED(CONFIG_NFS_V4) */
 #endif /* __LINUX_FS_NFS_NFS4SESSION_H */
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index cfb8f7ce5cf6..845d0eadefc9 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -241,38 +241,6 @@ DEFINE_NFS4_CLIENTID_EVENT(nfs4_bind_conn_to_session);
 DEFINE_NFS4_CLIENTID_EVENT(nfs4_sequence);
 DEFINE_NFS4_CLIENTID_EVENT(nfs4_reclaim_complete);
 
-TRACE_EVENT(nfs4_setup_sequence,
-		TP_PROTO(
-			const struct nfs4_session *session,
-			const struct nfs4_sequence_args *args
-		),
-		TP_ARGS(session, args),
-
-		TP_STRUCT__entry(
-			__field(unsigned int, session)
-			__field(unsigned int, slot_nr)
-			__field(unsigned int, seq_nr)
-			__field(unsigned int, highest_used_slotid)
-		),
-
-		TP_fast_assign(
-			const struct nfs4_slot *sa_slot = args->sa_slot;
-			__entry->session = nfs_session_id_hash(&session->sess_id);
-			__entry->slot_nr = sa_slot->slot_nr;
-			__entry->seq_nr = sa_slot->seq_nr;
-			__entry->highest_used_slotid =
-					sa_slot->table->highest_used_slotid;
-		),
-		TP_printk(
-			"session=0x%08x slot_nr=%u seq_nr=%u "
-			"highest_used_slotid=%u",
-			__entry->session,
-			__entry->slot_nr,
-			__entry->seq_nr,
-			__entry->highest_used_slotid
-		)
-);
-
 #define show_nfs4_sequence_status_flags(status) \
 	__print_flags((unsigned long)status, "|", \
 		{ SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
@@ -382,6 +350,38 @@ TRACE_EVENT(nfs4_cb_sequence,
 );
 #endif /* CONFIG_NFS_V4_1 */
 
+TRACE_EVENT(nfs4_setup_sequence,
+		TP_PROTO(
+			const struct nfs4_session *session,
+			const struct nfs4_sequence_args *args
+		),
+		TP_ARGS(session, args),
+
+		TP_STRUCT__entry(
+			__field(unsigned int, session)
+			__field(unsigned int, slot_nr)
+			__field(unsigned int, seq_nr)
+			__field(unsigned int, highest_used_slotid)
+		),
+
+		TP_fast_assign(
+			const struct nfs4_slot *sa_slot = args->sa_slot;
+			__entry->session = session ? nfs_session_id_hash(&session->sess_id) : 0;
+			__entry->slot_nr = sa_slot->slot_nr;
+			__entry->seq_nr = sa_slot->seq_nr;
+			__entry->highest_used_slotid =
+					sa_slot->table->highest_used_slotid;
+		),
+		TP_printk(
+			"session=0x%08x slot_nr=%u seq_nr=%u "
+			"highest_used_slotid=%u",
+			__entry->session,
+			__entry->slot_nr,
+			__entry->seq_nr,
+			__entry->highest_used_slotid
+		)
+);
+
 DECLARE_EVENT_CLASS(nfs4_open_event,
 		TP_PROTO(
 			const struct nfs_open_context *ctx,
-- 
GitLab


From d9b67e1e499b056a83d2db6046d74652cf836998 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Wed, 11 Jan 2017 15:04:25 -0500
Subject: [PATCH 0116/1084] NFS: Fix inconsistent indentation in nfs4proc.c

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 56 +++++++++++++++++++++++------------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ae03f976fbd3..328eda9e6db1 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2209,15 +2209,15 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
 		data->is_recover = 1;
 	}
 	task = rpc_run_task(&task_setup_data);
-        if (IS_ERR(task))
-                return PTR_ERR(task);
-        status = nfs4_wait_for_completion_rpc_task(task);
-        if (status != 0) {
-                data->cancelled = 1;
-                smp_wmb();
-        } else
-                status = data->rpc_status;
-        rpc_put_task(task);
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+	status = nfs4_wait_for_completion_rpc_task(task);
+	if (status != 0) {
+		data->cancelled = 1;
+		smp_wmb();
+	} else
+		status = data->rpc_status;
+	rpc_put_task(task);
 
 	return status;
 }
@@ -2226,7 +2226,7 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
 {
 	struct inode *dir = d_inode(data->dir);
 	struct nfs_openres *o_res = &data->o_res;
-        int status;
+	int status;
 
 	status = nfs4_run_open_task(data, 1);
 	if (status != 0 || !data->rpc_done)
@@ -2860,12 +2860,12 @@ static int _nfs4_do_setattr(struct inode *inode,
 			    struct nfs_open_context *ctx)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
-        struct rpc_message msg = {
+	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_SETATTR],
 		.rpc_argp	= arg,
 		.rpc_resp	= res,
 		.rpc_cred	= cred,
-        };
+	};
 	struct rpc_cred *delegation_cred = NULL;
 	unsigned long timestamp = jiffies;
 	fmode_t fmode;
@@ -2913,18 +2913,18 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
 {
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs4_state *state = ctx ? ctx->state : NULL;
-        struct nfs_setattrargs  arg = {
-                .fh             = NFS_FH(inode),
-                .iap            = sattr,
+	struct nfs_setattrargs	arg = {
+		.fh		= NFS_FH(inode),
+		.iap		= sattr,
 		.server		= server,
 		.bitmask = server->attr_bitmask,
 		.label		= ilabel,
-        };
-        struct nfs_setattrres  res = {
+	};
+	struct nfs_setattrres  res = {
 		.fattr		= fattr,
 		.label		= olabel,
 		.server		= server,
-        };
+	};
 	struct nfs4_exception exception = {
 		.state = state,
 		.inode = inode,
@@ -3038,7 +3038,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 		}
 	}
 
-        /* hmm. we are done with the inode, and in the process of freeing
+	/* hmm. we are done with the inode, and in the process of freeing
 	 * the state_owner. we keep this around to process errors
 	 */
 	switch (task->tk_status) {
@@ -4895,8 +4895,8 @@ static int buf_to_pages_noslab(const void *buf, size_t buflen,
 		if (newpage == NULL)
 			goto unwind;
 		memcpy(page_address(newpage), buf, len);
-                buf += len;
-                buflen -= len;
+		buf += len;
+		buflen -= len;
 		*pages++ = newpage;
 		rc++;
 	} while (buflen != 0);
@@ -5219,8 +5219,8 @@ static int _nfs4_do_set_security_label(struct inode *inode,
 	struct nfs_server *server = NFS_SERVER(inode);
 	const u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL };
 	struct nfs_setattrargs arg = {
-		.fh             = NFS_FH(inode),
-		.iap            = &sattr,
+		.fh		= NFS_FH(inode),
+		.iap		= &sattr,
 		.server		= server,
 		.bitmask	= bitmask,
 		.label		= ilabel,
@@ -5231,9 +5231,9 @@ static int _nfs4_do_set_security_label(struct inode *inode,
 		.server		= server,
 	};
 	struct rpc_message msg = {
-		.rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_SETATTR],
-		.rpc_argp       = &arg,
-		.rpc_resp       = &res,
+		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_SETATTR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
 	};
 	int status;
 
@@ -5779,8 +5779,8 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 	};
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_LOCKT],
-		.rpc_argp       = &arg,
-		.rpc_resp       = &res,
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
 		.rpc_cred	= state->owner->so_cred,
 	};
 	struct nfs4_lock_state *lsp;
-- 
GitLab


From eeea5361634eeef9a09b7b5ae449b41623c23886 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Wed, 11 Jan 2017 16:01:21 -0500
Subject: [PATCH 0117/1084] NFS: Clean up _nfs4_is_integrity_protected()

We can cut out the if statement and return the results of the comparison
directly.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 328eda9e6db1..3f5d4fd4e95a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -577,12 +577,7 @@ nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server,
 static bool _nfs4_is_integrity_protected(struct nfs_client *clp)
 {
 	rpc_authflavor_t flavor = clp->cl_rpcclient->cl_auth->au_flavor;
-
-	if (flavor == RPC_AUTH_GSS_KRB5I ||
-	    flavor == RPC_AUTH_GSS_KRB5P)
-		return true;
-
-	return false;
+	return (flavor == RPC_AUTH_GSS_KRB5I) || (flavor == RPC_AUTH_GSS_KRB5P);
 }
 
 static void do_renew_lease(struct nfs_client *clp, unsigned long timestamp)
-- 
GitLab


From 820bf85ce249ec5b4da9f6aaefa3f5491b23b587 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Wed, 11 Jan 2017 15:01:43 -0500
Subject: [PATCH 0118/1084] NFS: Remove nfs4_wait_for_completion_rpc_task()

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 3f5d4fd4e95a..b577e172c95b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1245,14 +1245,6 @@ static void nfs4_opendata_put(struct nfs4_opendata *p)
 		kref_put(&p->kref, nfs4_opendata_free);
 }
 
-static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
-{
-	int ret;
-
-	ret = rpc_wait_for_completion_task(task);
-	return ret;
-}
-
 static bool nfs4_mode_match_open_stateid(struct nfs4_state *state,
 		fmode_t fmode)
 {
@@ -2039,7 +2031,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
-	status = nfs4_wait_for_completion_rpc_task(task);
+	status = rpc_wait_for_completion_task(task);
 	if (status != 0) {
 		data->cancelled = 1;
 		smp_wmb();
@@ -2206,7 +2198,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
-	status = nfs4_wait_for_completion_rpc_task(task);
+	status = rpc_wait_for_completion_task(task);
 	if (status != 0) {
 		data->cancelled = 1;
 		smp_wmb();
@@ -5732,7 +5724,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 		return PTR_ERR(task);
 	if (!issync)
 		goto out;
-	status = nfs4_wait_for_completion_rpc_task(task);
+	status = rpc_wait_for_completion_task(task);
 	if (status != 0)
 		goto out;
 	status = data->rpc_status;
@@ -6002,7 +5994,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
 	status = PTR_ERR(task);
 	if (IS_ERR(task))
 		goto out;
-	status = nfs4_wait_for_completion_rpc_task(task);
+	status = rpc_wait_for_completion_task(task);
 	rpc_put_task(task);
 out:
 	request->fl_flags = fl_flags;
@@ -6229,7 +6221,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
-	ret = nfs4_wait_for_completion_rpc_task(task);
+	ret = rpc_wait_for_completion_task(task);
 	if (ret == 0) {
 		ret = data->rpc_status;
 		if (ret)
@@ -8297,7 +8289,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp,
 		status = PTR_ERR(task);
 		goto out;
 	}
-	status = nfs4_wait_for_completion_rpc_task(task);
+	status = rpc_wait_for_completion_task(task);
 	if (status == 0)
 		status = task->tk_status;
 	rpc_put_task(task);
@@ -8529,7 +8521,7 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags)
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return ERR_CAST(task);
-	status = nfs4_wait_for_completion_rpc_task(task);
+	status = rpc_wait_for_completion_task(task);
 	if (status == 0) {
 		status = nfs4_layoutget_handle_exception(task, lgp, &exception);
 		*timeout = exception.timeout;
-- 
GitLab


From 37a8484aef09638b9eead7070cb0ee27adb74ceb Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Wed, 11 Jan 2017 16:08:35 -0500
Subject: [PATCH 0119/1084] NFS: Return errors directly in
 _nfs4_opendata_reclaim_to_nfs4_state()

There is no need for a goto just to return an error code without any
cleanup.  Returning the error directly helps to clean up the code.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b577e172c95b..3e892b05e62b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1639,17 +1639,15 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
 	int ret;
 
 	if (!data->rpc_done) {
-		if (data->rpc_status) {
-			ret = data->rpc_status;
-			goto err;
-		}
+		if (data->rpc_status)
+			return ERR_PTR(data->rpc_status);
 		/* cached opens have already been processed */
 		goto update;
 	}
 
 	ret = nfs_refresh_inode(inode, &data->f_attr);
 	if (ret)
-		goto err;
+		return ERR_PTR(ret);
 
 	if (data->o_res.delegation_type != 0)
 		nfs4_opendata_check_deleg(data, state);
@@ -1659,9 +1657,6 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
 	atomic_inc(&state->count);
 
 	return state;
-err:
-	return ERR_PTR(ret);
-
 }
 
 static struct nfs4_state *
-- 
GitLab


From d7e9825848cede4cf7589b63029b76428ae76958 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Wed, 11 Jan 2017 16:13:29 -0500
Subject: [PATCH 0120/1084] NFS: Remove an extra if in
 _nfs4_recover_proc_open()

It's simpler just to return the status unconditionally

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 3e892b05e62b..031b867aaefa 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2216,11 +2216,8 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
 
 	nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr);
 
-	if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
+	if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM)
 		status = _nfs4_proc_open_confirm(data);
-		if (status != 0)
-			return status;
-	}
 
 	return status;
 }
-- 
GitLab


From 334f87dd11b7a0ed8f3d3b223fb5cede201e210a Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Wed, 11 Jan 2017 16:17:17 -0500
Subject: [PATCH 0121/1084] NFS: Remove nfs4_recover_expired_lease()

This function doesn't add much, since all it does is access the server's
nfs_client variable.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 031b867aaefa..baf968990475 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2311,11 +2311,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 	return 0;
 }
 
-static int nfs4_recover_expired_lease(struct nfs_server *server)
-{
-	return nfs4_client_recover_expired_lease(server->nfs_client);
-}
-
 /*
  * OPEN_EXPIRED:
  * 	reclaim state on the server after a network partition.
@@ -2693,7 +2688,7 @@ static int _nfs4_do_open(struct inode *dir,
 		dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n");
 		goto out_err;
 	}
-	status = nfs4_recover_expired_lease(server);
+	status = nfs4_client_recover_expired_lease(server->nfs_client);
 	if (status != 0)
 		goto err_put_state_owner;
 	if (d_really_is_positive(dentry))
-- 
GitLab


From 9df1336ca42c16622f1cae5dff246a0c9c644514 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Wed, 11 Jan 2017 16:30:08 -0500
Subject: [PATCH 0122/1084] NFS: Remove unnecessary goto in
 nfs4_lookup_root_sec()

Once again, it's easier and cleaner just to return the error directly.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index baf968990475..6c40944cb83d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3416,16 +3416,11 @@ static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandl
 		.pseudoflavor = flavor,
 	};
 	struct rpc_auth *auth;
-	int ret;
 
 	auth = rpcauth_create(&auth_args, server->client);
-	if (IS_ERR(auth)) {
-		ret = -EACCES;
-		goto out;
-	}
-	ret = nfs4_lookup_root(server, fhandle, info);
-out:
-	return ret;
+	if (IS_ERR(auth))
+		return -EACCES;
+	return nfs4_lookup_root(server, fhandle, info);
 }
 
 /*
-- 
GitLab


From 81b68de493a3c60517ef0435d2635b531ed590b9 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Wed, 11 Jan 2017 16:41:34 -0500
Subject: [PATCH 0123/1084] NFS: No need to set and return status in
 nfs41_lock_expired()

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6c40944cb83d..81390e97c8d5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6282,8 +6282,7 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques
 	if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) ||
 	    test_bit(NFS_LOCK_LOST, &lsp->ls_flags))
 		return 0;
-	status = nfs4_lock_expired(state, request);
-	return status;
+	return nfs4_lock_expired(state, request);
 }
 #endif
 
-- 
GitLab


From 49ad0145c3a81612482ecc064ce6ab908ae3bb1a Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Wed, 11 Jan 2017 16:51:59 -0500
Subject: [PATCH 0124/1084] NFS: Clean up nfs41_same_server_scope()

The function is cleaner this way, since we can use the result of
memcmp() directly

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 81390e97c8d5..bf014a2deefd 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7120,11 +7120,9 @@ static bool
 nfs41_same_server_scope(struct nfs41_server_scope *a,
 			struct nfs41_server_scope *b)
 {
-	if (a->server_scope_sz == b->server_scope_sz &&
-	    memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0)
-		return true;
-
-	return false;
+	if (a->server_scope_sz != b->server_scope_sz)
+		return false;
+	return memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0;
 }
 
 static void
-- 
GitLab


From 045c551947a8c6e787437d696262b562520ff2f0 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Wed, 11 Jan 2017 16:59:48 -0500
Subject: [PATCH 0125/1084] NFS: Return the comparison result directly in
 nfs41_match_stateid()

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index bf014a2deefd..aeaa6678b071 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -9116,10 +9116,8 @@ static bool nfs41_match_stateid(const nfs4_stateid *s1,
 
 	if (s1->seqid == s2->seqid)
 		return true;
-	if (s1->seqid == 0 || s2->seqid == 0)
-		return true;
 
-	return false;
+	return s1->seqid == 0 || s2->seqid == 0;
 }
 
 #endif /* CONFIG_NFS_V4_1 */
-- 
GitLab


From 2e54b9b1b0ace6997aa4f9811af0811b2d8160f2 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Thu, 12 Jan 2017 15:38:06 +0000
Subject: [PATCH 0126/1084] pNFS/flexfiles: Make local symbol layoutreturn_ops
 static

Fixes the following sparse warning:

fs/nfs/flexfilelayout/flexfilelayout.c:2114:34: warning:
 symbol 'layoutreturn_ops' was not declared. Should it be static?

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/flexfilelayout/flexfilelayout.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index cc9064974104..6104696325be 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -2076,7 +2076,7 @@ ff_layout_free_layoutreturn(struct nfs4_xdr_opaque_data *args)
 	kfree(ff_args);
 }
 
-const struct nfs4_xdr_opaque_ops layoutreturn_ops = {
+static const struct nfs4_xdr_opaque_ops layoutreturn_ops = {
 	.encode = ff_layout_encode_layoutreturn,
 	.free = ff_layout_free_layoutreturn,
 };
-- 
GitLab


From 68e33bd6bbb79819e5cb7bce26559191b144c465 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Thu, 12 Jan 2017 15:39:18 +0000
Subject: [PATCH 0127/1084] NFSv4: Fix warning for using 0 as NULL

Fixes the following sparse warning:

fs/nfs/nfs4state.c:862:60: warning: Using plain integer as NULL pointer

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index daeb94e3acd4..8156bad6b441 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -868,7 +868,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
 	
 	for(;;) {
 		spin_lock(&state->state_lock);
-		lsp = __nfs4_find_lock_state(state, owner, 0);
+		lsp = __nfs4_find_lock_state(state, owner, NULL);
 		if (lsp != NULL)
 			break;
 		if (new != NULL) {
-- 
GitLab


From bd80ef5ed46233d90f216805ff40aba326ff1bdd Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sun, 22 Jan 2017 19:17:14 +0100
Subject: [PATCH 0128/1084] ARM64: dts: meson: meson-gx: add the SAR ADC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the SAR ADC to meson-gxbb.dtsi and meson-gxl.dtsi. GXBB provides a
10-bit ADC while GXL and GXM provide a 12-bit ADC.
Some boards use resistor ladder buttons connected through one of the ADC
channels. On newer devices (GXL and GXM) some boards use pull-ups/downs
to change the resistance (and thus the ADC value) on one of the ADC
channels to indicate the board revision.

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Tested-by: Neil Armstrong <narmstrong@baylibre.com>
Reviewed-by: Andreas Färber <afaerber@suse.de>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 arch/arm64/boot/dts/amlogic/meson-gx.dtsi   |  8 ++++++++
 arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi | 10 ++++++++++
 arch/arm64/boot/dts/amlogic/meson-gxl.dtsi  | 10 ++++++++++
 arch/arm64/boot/dts/amlogic/meson-gxm.dtsi  |  4 ++++
 4 files changed, 32 insertions(+)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
index 9110dc1a2481..f100d0b013b4 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi
@@ -237,6 +237,14 @@ pwm_cd: pwm@8650 {
 				status = "disabled";
 			};
 
+			saradc: adc@8680 {
+				compatible = "amlogic,meson-saradc";
+				reg = <0x0 0x8680 0x0 0x34>;
+				#io-channel-cells = <1>;
+				interrupts = <GIC_SPI 73 IRQ_TYPE_EDGE_RISING>;
+				status = "disabled";
+			};
+
 			pwm_ef: pwm@86c0 {
 				compatible = "amlogic,meson-gx-pwm", "amlogic,meson-gxbb-pwm";
 				reg = <0x0 0x086c0 0x0 0x10>;
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
index 39a774ad83ce..04b3324bc132 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
@@ -490,6 +490,16 @@ &i2c_C {
 	clocks = <&clkc CLKID_I2C>;
 };
 
+&saradc {
+	compatible = "amlogic,meson-gxbb-saradc", "amlogic,meson-saradc";
+	clocks = <&xtal>,
+		 <&clkc CLKID_SAR_ADC>,
+		 <&clkc CLKID_SANA>,
+		 <&clkc CLKID_SAR_ADC_CLK>,
+		 <&clkc CLKID_SAR_ADC_SEL>;
+	clock-names = "clkin", "core", "sana", "adc_clk", "adc_sel";
+};
+
 &sd_emmc_a {
 	clocks = <&clkc CLKID_SD_EMMC_A>,
 		 <&xtal>,
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
index 5f1100af72b1..fe11b5fc61f7 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
@@ -347,6 +347,16 @@ &i2c_C {
 	clocks = <&clkc CLKID_I2C>;
 };
 
+&saradc {
+	compatible = "amlogic,meson-gxl-saradc", "amlogic,meson-saradc";
+	clocks = <&xtal>,
+		 <&clkc CLKID_SAR_ADC>,
+		 <&clkc CLKID_SANA>,
+		 <&clkc CLKID_SAR_ADC_CLK>,
+		 <&clkc CLKID_SAR_ADC_SEL>;
+	clock-names = "clkin", "core", "sana", "adc_clk", "adc_sel";
+};
+
 &sd_emmc_a {
 	clocks = <&clkc CLKID_SD_EMMC_A>,
 		 <&xtal>,
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi
index 4c55665a253f..ddea7305c644 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxm.dtsi
@@ -117,6 +117,10 @@ cpu7: cpu@103 {
 	};
 };
 
+&saradc {
+	compatible = "amlogic,meson-gxm-saradc", "amlogic,meson-saradc";
+};
+
 &scpi_dvfs {
 	clock-indices = <0 1>;
 	clock-output-names = "vbig", "vlittle";
-- 
GitLab


From 6b6a1867667a90f399762dc1ee6add5b4aa646c9 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Mon, 16 Jan 2017 11:22:45 +0100
Subject: [PATCH 0129/1084] ARM64: dts: meson-gxbb-p200: add ADC laddered keys

Add the 5 buttons connected to a resistor laddered matrix and sampled
by the SAR ADC channel 0.

Only the p200 board has these buttons, the P201 doesn't.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 .../boot/dts/amlogic/meson-gxbb-p200.dts      | 50 +++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-p200.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-p200.dts
index 03e3d76626dd..fc0e86cb4cde 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb-p200.dts
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-p200.dts
@@ -45,10 +45,55 @@
 /dts-v1/;
 
 #include "meson-gxbb-p20x.dtsi"
+#include <dt-bindings/input/input.h>
 
 / {
 	compatible = "amlogic,p200", "amlogic,meson-gxbb";
 	model = "Amlogic Meson GXBB P200 Development Board";
+
+	avdd18_usb_adc: regulator-avdd18_usb_adc {
+		compatible = "regulator-fixed";
+		regulator-name = "AVDD18_USB_ADC";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+	};
+
+	adc_keys {
+		compatible = "adc-keys";
+		io-channels = <&saradc 0>;
+		io-channel-names = "buttons";
+		keyup-threshold-microvolt = <1800000>;
+
+		button-home {
+			label = "Home";
+			linux,code = <KEY_HOME>;
+			press-threshold-microvolt = <900000>; /* 50% */
+		};
+
+		button-esc {
+			label = "Esc";
+			linux,code = <KEY_ESC>;
+			press-threshold-microvolt = <684000>; /* 38% */
+		};
+
+		button-up {
+			label = "Volume Up";
+			linux,code = <KEY_VOLUMEUP>;
+			press-threshold-microvolt = <468000>; /* 26% */
+		};
+
+		button-down {
+			label = "Volume Down";
+			linux,code = <KEY_VOLUMEDOWN>;
+			press-threshold-microvolt = <252000>; /* 14% */
+		};
+
+		button-menu {
+			label = "Menu";
+			linux,code = <KEY_MENU>;
+			press-threshold-microvolt = <0>; /* 0% */
+		};
+	};
 };
 
 &i2c_B {
@@ -56,3 +101,8 @@ &i2c_B {
 	pinctrl-0 = <&i2c_b_pins>;
 	pinctrl-names = "default";
 };
+
+&saradc {
+	status = "okay";
+	vref-supply = <&avdd18_usb_adc>;
+};
-- 
GitLab


From 2b477c00f3bd87c3286f5940cb4174d8b01ee0d5 Mon Sep 17 00:00:00 2001
From: Neil Brown <neilb@suse.com>
Date: Thu, 22 Dec 2016 12:38:06 -0500
Subject: [PATCH 0130/1084] svcrpc: free contexts immediately on PROC_DESTROY

We currently handle a client PROC_DESTROY request by turning it
CACHE_NEGATIVE, setting the expired time to now, and then waiting for
cache_clean to clean it up later.  Since we forgot to set the cache's
nextcheck value, that could take up to 30 minutes.  Also, though there's
probably no real bug in this case, setting CACHE_NEGATIVE directly like
this probably isn't a great idea in general.

So let's just remove the entry from the cache directly, and move this
bit of cache manipulation to a helper function.

Signed-off-by: Neil Brown <neilb@suse.com>
Reported-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h      |  1 +
 net/sunrpc/auth_gss/svcauth_gss.c |  4 ++--
 net/sunrpc/cache.c                | 12 ++++++++++++
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 62a60eeacb0a..9dcf2c8fe1c5 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -227,6 +227,7 @@ extern void sunrpc_destroy_cache_detail(struct cache_detail *cd);
 extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *,
 					umode_t, struct cache_detail *);
 extern void sunrpc_cache_unregister_pipefs(struct cache_detail *);
+extern void sunrpc_cache_unhash(struct cache_detail *, struct cache_head *);
 
 /* Must store cache_detail in seq_file->private if using next three functions */
 extern void *cache_seq_start(struct seq_file *file, loff_t *pos);
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 153082598522..a54a7a3d28f5 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1489,8 +1489,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 	case RPC_GSS_PROC_DESTROY:
 		if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
 			goto auth_err;
-		rsci->h.expiry_time = seconds_since_boot();
-		set_bit(CACHE_NEGATIVE, &rsci->h.flags);
+		/* Delete the entry from the cache_list and call cache_put */
+		sunrpc_cache_unhash(sn->rsc_cache, &rsci->h);
 		if (resv->iov_len + 4 > PAGE_SIZE)
 			goto drop;
 		svc_putnl(resv, RPC_SUCCESS);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 8147e8d56eb2..502b9fe9817b 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1855,3 +1855,15 @@ void sunrpc_cache_unregister_pipefs(struct cache_detail *cd)
 }
 EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs);
 
+void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h)
+{
+	write_lock(&cd->hash_lock);
+	if (!hlist_unhashed(&h->cache_list)){
+		hlist_del_init(&h->cache_list);
+		cd->entries--;
+		write_unlock(&cd->hash_lock);
+		cache_put(h, cd);
+	} else
+		write_unlock(&cd->hash_lock);
+}
+EXPORT_SYMBOL_GPL(sunrpc_cache_unhash);
-- 
GitLab


From e35659f1b03c03946cae8abb6b0a9e170b574f1c Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Wed, 21 Dec 2016 14:32:19 +1100
Subject: [PATCH 0131/1084] NFSD: correctly range-check v4.x minor version when
 setting versions.

Writing to /proc/fs/nfsd/versions allows individual major versions
and NFSv4 minor versions to be enabled or disabled.

However NFSv4.0 cannot currently be disabled, thought there is no good reason.
Also the minor number is parsed as a 'long' but used as an 'int'
so '4294967297' will be incorrectly treated as '1'.

This patch removes the test on 'minor == 0' and switches to kstrtouint()
to get correct range checking.

When reading from /proc/fs/nfsd/versions, 4.0 is current not reported.
To allow the disabling for v4.0 to be visible, while maintaining
backward compatibility, change code to report "-4.0" if appropriate, but
not "+4.0".

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfsctl.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index f3b2f34b10a3..d54fb0e3f30e 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -569,8 +569,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 			if (*minorp == '.') {
 				if (num != 4)
 					return -EINVAL;
-				minor = simple_strtoul(minorp+1, NULL, 0);
-				if (minor == 0)
+				if (kstrtouint(minorp+1, 0, &minor) < 0)
 					return -EINVAL;
 				if (nfsd_minorversion(minor, sign == '-' ?
 						     NFSD_CLEAR : NFSD_SET) < 0)
@@ -613,8 +612,13 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 			tlen += len;
 		}
 	if (nfsd_vers(4, NFSD_AVAIL))
-		for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION;
+		for (minor = 0; minor <= NFSD_SUPPORTED_MINOR_VERSION;
 		     minor++) {
+			if (minor == 0 && nfsd_minorversion(minor, NFSD_TEST))
+				/* for backward compatibility, don't report
+				 * +4.0
+				 */
+				continue;
 			len = snprintf(buf, remaining, " %c4.%u",
 					(nfsd_vers(4, NFSD_TEST) &&
 					 nfsd_minorversion(minor, NFSD_TEST)) ?
-- 
GitLab


From 54bbb7d206db78a3dfd87bc8d9735cbe3ac3f938 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Sat, 31 Dec 2016 20:59:53 +0800
Subject: [PATCH 0132/1084] NFSD: pass an integer for stable type to
 nfsd_vfs_write

After fae5096ad217 "nfsd: assume writeable exportabled filesystems have
f_sync" we no longer modify this argument.

This is just cleanup, no change in functionality.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs3proc.c |  2 +-
 fs/nfsd/nfs4proc.c |  2 +-
 fs/nfsd/nfs4xdr.c  |  2 +-
 fs/nfsd/nfsproc.c  |  3 +--
 fs/nfsd/vfs.c      | 11 +++++------
 fs/nfsd/vfs.h      |  4 ++--
 6 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index d818e4ffd79f..69cd0f1e26ff 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -197,7 +197,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
 				   argp->offset,
 				   rqstp->rq_vec, argp->vlen,
 				   &cnt,
-				   &resp->committed);
+				   resp->committed);
 	resp->count = cnt;
 	RETURN_STATUS(nfserr);
 }
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 74a6e573e061..2d4eb47177d1 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -983,7 +983,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 	status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
 				write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
-				&write->wr_how_written);
+				write->wr_how_written);
 	fput(filp);
 
 	write->wr_bytes_written = cnt;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 8fae53ce21d1..93eba39d57c4 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1250,7 +1250,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
 	READ_BUF(16);
 	p = xdr_decode_hyper(p, &write->wr_offset);
 	write->wr_stable_how = be32_to_cpup(p++);
-	if (write->wr_stable_how > 2)
+	if (write->wr_stable_how > NFS_FILE_SYNC)
 		goto xdr_error;
 	write->wr_buflen = be32_to_cpup(p++);
 
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 010aff5c5a79..f157c7eff82f 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -204,7 +204,6 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
 					struct nfsd_attrstat  *resp)
 {
 	__be32	nfserr;
-	int	stable = 1;
 	unsigned long cnt = argp->len;
 
 	dprintk("nfsd: WRITE    %s %d bytes at %d\n",
@@ -215,7 +214,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
 				   argp->offset,
 				   rqstp->rq_vec, argp->vlen,
 			           &cnt,
-				   &stable);
+				   NFS_DATA_SYNC);
 	return nfsd_return_attrs(nfserr, resp);
 }
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ca13236dbb1f..1bedb2bc907e 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -917,14 +917,13 @@ static int wait_for_concurrent_writes(struct file *file)
 __be32
 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 				loff_t offset, struct kvec *vec, int vlen,
-				unsigned long *cnt, int *stablep)
+				unsigned long *cnt, int stable)
 {
 	struct svc_export	*exp;
 	struct inode		*inode;
 	mm_segment_t		oldfs;
 	__be32			err = 0;
 	int			host_err;
-	int			stable = *stablep;
 	int			use_wgather;
 	loff_t			pos = offset;
 	unsigned int		pflags = current->flags;
@@ -945,7 +944,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
 
 	if (!EX_ISSYNC(exp))
-		stable = 0;
+		stable = NFS_UNSTABLE;
 
 	if (stable && !use_wgather)
 		flags |= RWF_SYNC;
@@ -1014,7 +1013,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
 __be32
 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 		loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt,
-		int *stablep)
+		int stable)
 {
 	__be32			err = 0;
 
@@ -1027,7 +1026,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 			goto out;
 		trace_write_opened(rqstp, fhp, offset, vlen);
 		err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
-				stablep);
+				stable);
 		trace_write_io_done(rqstp, fhp, offset, vlen);
 	} else {
 		err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
@@ -1037,7 +1036,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 		trace_write_opened(rqstp, fhp, offset, vlen);
 		if (cnt)
 			err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
-					     cnt, stablep);
+					     cnt, stable);
 		trace_write_io_done(rqstp, fhp, offset, vlen);
 		fput(file);
 	}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 0bf9e7bf5800..8458d8c08b18 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -84,11 +84,11 @@ __be32		nfsd_readv(struct file *, loff_t, struct kvec *, int,
 __be32 		nfsd_read(struct svc_rqst *, struct svc_fh *,
 				loff_t, struct kvec *, int, unsigned long *);
 __be32 		nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
-				loff_t, struct kvec *,int, unsigned long *, int *);
+				loff_t, struct kvec *,int, unsigned long *, int);
 __be32		nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
 				struct file *file, loff_t offset,
 				struct kvec *vec, int vlen, unsigned long *cnt,
-				int *stablep);
+				int stable);
 __be32		nfsd_readlink(struct svc_rqst *, struct svc_fh *,
 				char *, int *);
 __be32		nfsd_symlink(struct svc_rqst *, struct svc_fh *,
-- 
GitLab


From 52e380e049d28732ec280c1e8840c9eddc716adb Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Sat, 31 Dec 2016 21:00:13 +0800
Subject: [PATCH 0133/1084] NFSD: cleanup dead codes and values in nfsd_write

This is just cleanup, no change in functionality.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs3proc.c |  8 +++-----
 fs/nfsd/nfsproc.c  |  7 ++-----
 fs/nfsd/vfs.c      | 35 +++++++++++------------------------
 fs/nfsd/vfs.h      |  4 ++--
 4 files changed, 18 insertions(+), 36 deletions(-)

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 69cd0f1e26ff..045c9081eabe 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -193,11 +193,9 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
 
 	fh_copy(&resp->fh, &argp->fh);
 	resp->committed = argp->stable;
-	nfserr = nfsd_write(rqstp, &resp->fh, NULL,
-				   argp->offset,
-				   rqstp->rq_vec, argp->vlen,
-				   &cnt,
-				   resp->committed);
+	nfserr = nfsd_write(rqstp, &resp->fh, argp->offset,
+				rqstp->rq_vec, argp->vlen,
+				&cnt, resp->committed);
 	resp->count = cnt;
 	RETURN_STATUS(nfserr);
 }
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index f157c7eff82f..fa82b7707e85 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -210,11 +210,8 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
 		SVCFH_fmt(&argp->fh),
 		argp->len, argp->offset);
 
-	nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
-				   argp->offset,
-				   rqstp->rq_vec, argp->vlen,
-			           &cnt,
-				   NFS_DATA_SYNC);
+	nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset,
+				rqstp->rq_vec, argp->vlen, &cnt, NFS_DATA_SYNC);
 	return nfsd_return_attrs(nfserr, resp);
 }
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 1bedb2bc907e..2dd633fdef35 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1011,35 +1011,22 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
  * N.B. After this call fhp needs an fh_put
  */
 __be32
-nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
-		loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt,
-		int stable)
+nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
+	   struct kvec *vec, int vlen, unsigned long *cnt, int stable)
 {
-	__be32			err = 0;
+	struct file *file = NULL;
+	__be32 err = 0;
 
 	trace_write_start(rqstp, fhp, offset, vlen);
 
-	if (file) {
-		err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
-				NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE);
-		if (err)
-			goto out;
-		trace_write_opened(rqstp, fhp, offset, vlen);
-		err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
-				stable);
-		trace_write_io_done(rqstp, fhp, offset, vlen);
-	} else {
-		err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
-		if (err)
-			goto out;
+	err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
+	if (err)
+		goto out;
 
-		trace_write_opened(rqstp, fhp, offset, vlen);
-		if (cnt)
-			err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
-					     cnt, stable);
-		trace_write_io_done(rqstp, fhp, offset, vlen);
-		fput(file);
-	}
+	trace_write_opened(rqstp, fhp, offset, vlen);
+	err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
+	trace_write_io_done(rqstp, fhp, offset, vlen);
+	fput(file);
 out:
 	trace_write_done(rqstp, fhp, offset, vlen);
 	return err;
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 8458d8c08b18..db98c48c735a 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -83,8 +83,8 @@ __be32		nfsd_readv(struct file *, loff_t, struct kvec *, int,
 				unsigned long *);
 __be32 		nfsd_read(struct svc_rqst *, struct svc_fh *,
 				loff_t, struct kvec *, int, unsigned long *);
-__be32 		nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
-				loff_t, struct kvec *,int, unsigned long *, int);
+__be32 		nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
+				struct kvec *, int, unsigned long *, int);
 __be32		nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
 				struct file *file, loff_t offset,
 				struct kvec *vec, int vlen, unsigned long *cnt,
-- 
GitLab


From 865d50b23e4d29c4958e6082c2ed8631472c613f Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Sat, 31 Dec 2016 21:00:21 +0800
Subject: [PATCH 0134/1084] NFSD: Remove unused value inode in nfsd_vfs_write

This is just cleanup, no change in functionality.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/vfs.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 2dd633fdef35..5859f5826045 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -920,7 +920,6 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 				unsigned long *cnt, int stable)
 {
 	struct svc_export	*exp;
-	struct inode		*inode;
 	mm_segment_t		oldfs;
 	__be32			err = 0;
 	int			host_err;
@@ -938,9 +937,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 		 */
 		current->flags |= PF_LESS_THROTTLE;
 
-	inode = file_inode(file);
-	exp   = fhp->fh_export;
-
+	exp = fhp->fh_export;
 	use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
 
 	if (!EX_ISSYNC(exp))
-- 
GitLab


From 7b19824de685eae9802f679cab7457aad486d185 Mon Sep 17 00:00:00 2001
From: Scott Mayhew <smayhew@redhat.com>
Date: Thu, 5 Jan 2017 16:34:49 -0500
Subject: [PATCH 0135/1084] nfsd: initialize sin6_scope_id in
 nfsd_inet6addr_event()

I noticed this was missing when I was testing with link local addresses.

Signed-off-by: Scott Mayhew <smayhew@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfssvc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index e6bfd96734c0..2e378d0479ad 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -354,6 +354,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
 		dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr);
 		sin6.sin6_family = AF_INET6;
 		sin6.sin6_addr = ifa->addr;
+		if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+			sin6.sin6_scope_id = ifa->idev->dev->ifindex;
 		svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6);
 	}
 
-- 
GitLab


From c01410f7f6fd476072c9a71241938c24bed8b6f9 Mon Sep 17 00:00:00 2001
From: Scott Mayhew <smayhew@redhat.com>
Date: Thu, 5 Jan 2017 16:34:50 -0500
Subject: [PATCH 0136/1084] lockd: initialize sin6_scope_id in
 lockd_inet6addr_event()

I noticed this was missing when I was testing with link local addresses.

Signed-off-by: Scott Mayhew <smayhew@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/svc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 1c13dd80744f..7e4ea3b9f472 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -322,6 +322,8 @@ static int lockd_inet6addr_event(struct notifier_block *this,
 		dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr);
 		sin6.sin6_family = AF_INET6;
 		sin6.sin6_addr = ifa->addr;
+		if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+			sin6.sin6_scope_id = ifa->idev->dev->ifindex;
 		svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,
 			(struct sockaddr *)&sin6);
 	}
-- 
GitLab


From 5cf23dbb1d3122bbc378dbbd96880a086c1b63df Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 11 Jan 2017 20:40:36 -0500
Subject: [PATCH 0137/1084] nfsd: constify nfsd_suppatttrs

To keep me from accidentally writing to this again....

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c | 2 +-
 fs/nfsd/nfsd.h    | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 93eba39d57c4..12cc6b29e78c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -58,7 +58,7 @@
 
 #define NFSDDBG_FACILITY		NFSDDBG_XDR
 
-u32 nfsd_suppattrs[3][3] = {
+const u32 nfsd_suppattrs[3][3] = {
 	{NFSD4_SUPPORTED_ATTRS_WORD0,
 	 NFSD4_SUPPORTED_ATTRS_WORD1,
 	 NFSD4_SUPPORTED_ATTRS_WORD2},
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index d74c8c44dc35..d96606801d47 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -362,16 +362,16 @@ void		nfsd_lockd_shutdown(void);
 	FATTR4_WORD2_MODE_UMASK | \
 	NFSD4_2_SECURITY_ATTRS)
 
-extern u32 nfsd_suppattrs[3][3];
+extern const u32 nfsd_suppattrs[3][3];
 
-static inline bool bmval_is_subset(u32 *bm1, u32 *bm2)
+static inline bool bmval_is_subset(const u32 *bm1, const u32 *bm2)
 {
 	return !((bm1[0] & ~bm2[0]) ||
 	         (bm1[1] & ~bm2[1]) ||
 		 (bm1[2] & ~bm2[2]));
 }
 
-static inline bool nfsd_attrs_supported(u32 minorversion, u32 *bmval)
+static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
 {
 	return bmval_is_subset(bmval, nfsd_suppattrs[minorversion]);
 }
-- 
GitLab


From 32ddd944a056c786f6acdd95ed29e994adc613a2 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 3 Jan 2017 12:30:11 -0500
Subject: [PATCH 0138/1084] nfsd: opt in to labeled nfs per export

Currently turning on NFSv4.2 results in 4.2 clients suddenly seeing the
individual file labels as they're set on the server.  This is not what
they've previously seen, and not appropriate in may cases.  (In
particular, if clients have heterogenous security policies then one
client's labels may not even make sense to another.)  Labeled NFS should
be opted in only in those cases when the administrator knows it makes
sense.

It's helpful to be able to turn 4.2 on by default, and otherwise the
protocol upgrade seems free of regressions.  So, default labeled NFS to
off and provide an export flag to reenable it.

Users wanting labeled NFS support on an export will henceforth need to:

	- make sure 4.2 support is enabled on client and server (as
	  before), and
	- upgrade the server nfs-utils to a version supporting the new
	  "security_label" export flag.
	- set that "security_label" flag on the export.

This is commit may be seen as a regression to anyone currently depending
on security labels.  We believe those cases are currently rare.

Reported-by: tibbs@math.uh.edu
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/export.c                 | 1 +
 fs/nfsd/nfs4proc.c               | 4 ++++
 fs/nfsd/nfs4xdr.c                | 5 ++++-
 include/uapi/linux/nfsd/export.h | 5 +++--
 4 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 43e109cc0ccc..e71f11b1a180 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1102,6 +1102,7 @@ static struct flags {
 	{ NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
 	{ NFSEXP_V4ROOT, {"v4root", ""}},
 	{ NFSEXP_PNFS, {"pnfs", ""}},
+	{ NFSEXP_SECURITY_LABEL, {"security_label", ""}},
 	{ 0, {"", ""}}
 };
 
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 2d4eb47177d1..171f2d7ecfdd 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -95,11 +95,15 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		   u32 *bmval, u32 *writable)
 {
 	struct dentry *dentry = cstate->current_fh.fh_dentry;
+	struct svc_export *exp = cstate->current_fh.fh_export;
 
 	if (!nfsd_attrs_supported(cstate->minorversion, bmval))
 		return nfserr_attrnotsupp;
 	if ((bmval[0] & FATTR4_WORD0_ACL) && !IS_POSIXACL(d_inode(dentry)))
 		return nfserr_attrnotsupp;
+	if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) &&
+			!(exp->ex_flags & NFSEXP_SECURITY_LABEL))
+		return nfserr_attrnotsupp;
 	if (writable && !bmval_is_subset(bmval, writable))
 		return nfserr_inval;
 	if (writable && (bmval[2] & FATTR4_WORD2_MODE_UMASK) &&
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 12cc6b29e78c..3cc190755958 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2417,8 +2417,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
 	if ((bmval2 & FATTR4_WORD2_SECURITY_LABEL) ||
 	     bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
-		err = security_inode_getsecctx(d_inode(dentry),
+		if (exp->ex_flags & NFSEXP_SECURITY_LABEL)
+			err = security_inode_getsecctx(d_inode(dentry),
 						&context, &contextlen);
+		else
+			err = -EOPNOTSUPP;
 		contextsupport = (err == 0);
 		if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
 			if (err == -EOPNOTSUPP)
diff --git a/include/uapi/linux/nfsd/export.h b/include/uapi/linux/nfsd/export.h
index 0df7bd5d2fb1..c3be256107c6 100644
--- a/include/uapi/linux/nfsd/export.h
+++ b/include/uapi/linux/nfsd/export.h
@@ -32,7 +32,8 @@
 #define NFSEXP_ASYNC		0x0010
 #define NFSEXP_GATHERED_WRITES	0x0020
 #define NFSEXP_NOREADDIRPLUS    0x0040
-/* 80 100 currently unused */
+#define NFSEXP_SECURITY_LABEL	0x0080
+/* 0x100 currently unused */
 #define NFSEXP_NOHIDE		0x0200
 #define NFSEXP_NOSUBTREECHECK	0x0400
 #define	NFSEXP_NOAUTHNLM	0x0800		/* Don't authenticate NLM requests - just trust */
@@ -53,7 +54,7 @@
 #define NFSEXP_PNFS		0x20000
 
 /* All flags that we claim to support.  (Note we don't support NOACL.) */
-#define NFSEXP_ALLFLAGS		0x3FE7F
+#define NFSEXP_ALLFLAGS		0x3FEFF
 
 /* The flags that may vary depending on security flavor: */
 #define NFSEXP_SECINFO_FLAGS	(NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
-- 
GitLab


From d1160ebff1fea7ac7f6e6533b43144d377b56e6f Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Mon, 30 Jan 2017 11:57:36 +0100
Subject: [PATCH 0139/1084] arm64: dts: exynos: Add initial configuration for
 DISP clocks for TM2/TM2e

Add initial clock configuration for display subsystem for Exynos5433
based TM2/TM2e boards in device tree in order to avoid dependency on the
configuration left by the bootloader. This initial configuration is also
needed to ensure that display subsystem is operational if display power
domain gets turned off before clock controller is probed and the inital
clock configuration left by the bootloader saved.

TM2 and TM2e uses different rate for DISP PLL clock, but for better
maintainability all 'assigned-clocks-*' properties for DISP CMU are
defines in each board dts instead of redefining the rates property.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
---
 .../dts/exynos/exynos5433-tm2-common.dtsi     | 12 -------
 arch/arm64/boot/dts/exynos/exynos5433-tm2.dts | 34 +++++++++++++++++++
 .../arm64/boot/dts/exynos/exynos5433-tm2e.dts | 34 +++++++++++++++++++
 3 files changed, 68 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi b/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi
index 53fd0683d400..098ad557fee3 100644
--- a/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi
+++ b/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi
@@ -217,18 +217,6 @@ &cmu_aud {
 	assigned-clock-parents = <&cmu_top CLK_FOUT_AUD_PLL>;
 };
 
-&cmu_disp {
-	assigned-clocks = <&cmu_mif CLK_MOUT_SCLK_DECON_TV_ECLK_A>,
-			  <&cmu_mif CLK_DIV_SCLK_DECON_TV_ECLK>,
-			  <&cmu_disp CLK_MOUT_SCLK_DECON_TV_ECLK_USER>,
-			  <&cmu_disp CLK_MOUT_SCLK_DECON_TV_ECLK>;
-	assigned-clock-parents = <&cmu_mif CLK_MOUT_BUS_PLL_DIV2>,
-				 <0>,
-				 <&cmu_mif CLK_SCLK_DECON_TV_ECLK_DISP>,
-				 <&cmu_disp CLK_MOUT_SCLK_DECON_TV_ECLK_USER>;
-	assigned-clock-rates = <0>, <400000000>;
-};
-
 &cmu_fsys {
 	assigned-clocks = <&cmu_top CLK_MOUT_SCLK_USBDRD30>,
 		<&cmu_top CLK_MOUT_SCLK_USBHOST30>,
diff --git a/arch/arm64/boot/dts/exynos/exynos5433-tm2.dts b/arch/arm64/boot/dts/exynos/exynos5433-tm2.dts
index ddba2f889326..dea0a6f5bc18 100644
--- a/arch/arm64/boot/dts/exynos/exynos5433-tm2.dts
+++ b/arch/arm64/boot/dts/exynos/exynos5433-tm2.dts
@@ -18,6 +18,40 @@ / {
 	compatible = "samsung,tm2", "samsung,exynos5433";
 };
 
+&cmu_disp {
+	/*
+	 * TM2 and TM2e differ only by DISP_PLL rate, but define all assigned
+	 * clocks properties for DISP CMU for each board to keep them together
+	 * for easier review and maintenance.
+	 */
+	assigned-clocks = <&cmu_disp CLK_FOUT_DISP_PLL>,
+			  <&cmu_mif CLK_DIV_SCLK_DECON_TV_ECLK>,
+			  <&cmu_disp CLK_MOUT_ACLK_DISP_333_USER>,
+			  <&cmu_disp CLK_MOUT_SCLK_DSIM0_USER>,
+			  <&cmu_disp CLK_MOUT_SCLK_DSIM0>,
+			  <&cmu_disp CLK_MOUT_SCLK_DECON_ECLK_USER>,
+			  <&cmu_disp CLK_MOUT_SCLK_DECON_ECLK>,
+			  <&cmu_disp CLK_MOUT_PHYCLK_MIPIDPHY0_RXCLKESC0_USER>,
+			  <&cmu_disp CLK_MOUT_PHYCLK_MIPIDPHY0_BITCLKDIV8_USER>,
+			  <&cmu_disp CLK_MOUT_DISP_PLL>,
+			  <&cmu_mif CLK_MOUT_SCLK_DECON_TV_ECLK_A>,
+			  <&cmu_disp CLK_MOUT_SCLK_DECON_TV_ECLK_USER>,
+			  <&cmu_disp CLK_MOUT_SCLK_DECON_TV_ECLK>;
+	assigned-clock-parents = <0>, <0>,
+				 <&cmu_mif CLK_ACLK_DISP_333>,
+				 <&cmu_mif CLK_SCLK_DSIM0_DISP>,
+				 <&cmu_disp CLK_MOUT_SCLK_DSIM0_USER>,
+				 <&cmu_mif CLK_SCLK_DECON_ECLK_DISP>,
+				 <&cmu_disp CLK_MOUT_SCLK_DECON_ECLK_USER>,
+				 <&cmu_disp CLK_PHYCLK_MIPIDPHY0_RXCLKESC0_PHY>,
+				 <&cmu_disp CLK_PHYCLK_MIPIDPHY0_BITCLKDIV8_PHY>,
+				 <&cmu_disp CLK_FOUT_DISP_PLL>,
+				 <&cmu_mif CLK_MOUT_BUS_PLL_DIV2>,
+				 <&cmu_mif CLK_SCLK_DECON_TV_ECLK_DISP>,
+				 <&cmu_disp CLK_MOUT_SCLK_DECON_TV_ECLK_USER>;
+	assigned-clock-rates = <250000000>, <400000000>;
+};
+
 &hsi2c_9 {
 	status = "okay";
 
diff --git a/arch/arm64/boot/dts/exynos/exynos5433-tm2e.dts b/arch/arm64/boot/dts/exynos/exynos5433-tm2e.dts
index 2fbf3a860316..7891a31adc17 100644
--- a/arch/arm64/boot/dts/exynos/exynos5433-tm2e.dts
+++ b/arch/arm64/boot/dts/exynos/exynos5433-tm2e.dts
@@ -18,6 +18,40 @@ / {
 	compatible = "samsung,tm2e", "samsung,exynos5433";
 };
 
+&cmu_disp {
+	/*
+	 * TM2 and TM2e differ only by DISP_PLL rate, but define all assigned
+	 * clocks properties for DISP CMU for each board to keep them together
+	 * for easier review and maintenance.
+	 */
+	assigned-clocks = <&cmu_disp CLK_FOUT_DISP_PLL>,
+			  <&cmu_mif CLK_DIV_SCLK_DECON_TV_ECLK>,
+			  <&cmu_disp CLK_MOUT_ACLK_DISP_333_USER>,
+			  <&cmu_disp CLK_MOUT_SCLK_DSIM0_USER>,
+			  <&cmu_disp CLK_MOUT_SCLK_DSIM0>,
+			  <&cmu_disp CLK_MOUT_SCLK_DECON_ECLK_USER>,
+			  <&cmu_disp CLK_MOUT_SCLK_DECON_ECLK>,
+			  <&cmu_disp CLK_MOUT_PHYCLK_MIPIDPHY0_RXCLKESC0_USER>,
+			  <&cmu_disp CLK_MOUT_PHYCLK_MIPIDPHY0_BITCLKDIV8_USER>,
+			  <&cmu_disp CLK_MOUT_DISP_PLL>,
+			  <&cmu_mif CLK_MOUT_SCLK_DECON_TV_ECLK_A>,
+			  <&cmu_disp CLK_MOUT_SCLK_DECON_TV_ECLK_USER>,
+			  <&cmu_disp CLK_MOUT_SCLK_DECON_TV_ECLK>;
+	assigned-clock-parents = <0>, <0>,
+				 <&cmu_mif CLK_ACLK_DISP_333>,
+				 <&cmu_mif CLK_SCLK_DSIM0_DISP>,
+				 <&cmu_disp CLK_MOUT_SCLK_DSIM0_USER>,
+				 <&cmu_mif CLK_SCLK_DECON_ECLK_DISP>,
+				 <&cmu_disp CLK_MOUT_SCLK_DECON_ECLK_USER>,
+				 <&cmu_disp CLK_PHYCLK_MIPIDPHY0_RXCLKESC0_PHY>,
+				 <&cmu_disp CLK_PHYCLK_MIPIDPHY0_BITCLKDIV8_PHY>,
+				 <&cmu_disp CLK_FOUT_DISP_PLL>,
+				 <&cmu_mif CLK_MOUT_BUS_PLL_DIV2>,
+				 <&cmu_mif CLK_SCLK_DECON_TV_ECLK_DISP>,
+				 <&cmu_disp CLK_MOUT_SCLK_DECON_TV_ECLK_USER>;
+	assigned-clock-rates = <278000000>, <400000000>;
+};
+
 &ldo31_reg {
 	regulator-name = "TSP_VDD_1.8V_AP";
 	regulator-min-microvolt = <1800000>;
-- 
GitLab


From 860ac88abecab5d3d0e07306365f55cbc690c220 Mon Sep 17 00:00:00 2001
From: Pankaj Dubey <pankaj.dubey@samsung.com>
Date: Fri, 20 Jan 2017 09:49:56 +0530
Subject: [PATCH 0140/1084] pinctrl: dt-bindings: samsung: Add Exynos7 specific
 pinctrl macro definitions

Exynos7 SoC pinctrl configurations are similar to existing Exynos4/5 except
for FSYS1 pinctrl drive strengths. So adding Exynos7 specific FSYS1 blocks
pinctrl driver strength related macros which will be used in Exynos7
DTSi files.

Signed-off-by: Pankaj Dubey <pankaj.dubey@samsung.com>
Reviewed-by: Alim Akhtar <alim.akhtar@samsung.com>
Acked-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
---
 include/dt-bindings/pinctrl/samsung.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/dt-bindings/pinctrl/samsung.h b/include/dt-bindings/pinctrl/samsung.h
index e0ebb20ffdd3..b7aa3646208b 100644
--- a/include/dt-bindings/pinctrl/samsung.h
+++ b/include/dt-bindings/pinctrl/samsung.h
@@ -68,4 +68,12 @@
 #define EXYNOS_PIN_FUNC_6		6
 #define EXYNOS_PIN_FUNC_F		0xf
 
+/* Drive strengths for Exynos7 FSYS1 block */
+#define EXYNOS7_FSYS1_PIN_DRV_LV1	0
+#define EXYNOS7_FSYS1_PIN_DRV_LV2	4
+#define EXYNOS7_FSYS1_PIN_DRV_LV3	2
+#define EXYNOS7_FSYS1_PIN_DRV_LV4	6
+#define EXYNOS7_FSYS1_PIN_DRV_LV5	1
+#define EXYNOS7_FSYS1_PIN_DRV_LV6	5
+
 #endif /* __DT_BINDINGS_PINCTRL_SAMSUNG_H__ */
-- 
GitLab


From 51a2de551755c28c98191f80537acd3c7e1adda4 Mon Sep 17 00:00:00 2001
From: Pankaj Dubey <pankaj.dubey@samsung.com>
Date: Fri, 20 Jan 2017 09:49:57 +0530
Subject: [PATCH 0141/1084] arm64: dts: exynos: Use macros for pinctrl
 configuration on Exynos7

Usage of DTS macros instead of hard-coded numbers makes code easier to
read.  One does not have to remember which value means pull-up/down or
specific driver strength.

Signed-off-by: Pankaj Dubey <pankaj.dubey@samsung.com>
Reviewed-by: Alim Akhtar <alim.akhtar@samsung.com>
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
---
 .../boot/dts/exynos/exynos7-espresso.dts      |   4 +-
 .../boot/dts/exynos/exynos7-pinctrl.dtsi      | 302 +++++++++---------
 2 files changed, 154 insertions(+), 152 deletions(-)

diff --git a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
index c528dd52ba2d..25d9b4aa0ff6 100644
--- a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
+++ b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
@@ -328,8 +328,8 @@ buck10_reg: BUCK10 {
 &pinctrl_alive {
 	pmic_irq: pmic-irq {
 		samsung,pins = "gpa0-2";
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <3>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV4>;
 	};
 };
 
diff --git a/arch/arm64/boot/dts/exynos/exynos7-pinctrl.dtsi b/arch/arm64/boot/dts/exynos/exynos7-pinctrl.dtsi
index 7ebb93927f13..8f58850cd28c 100644
--- a/arch/arm64/boot/dts/exynos/exynos7-pinctrl.dtsi
+++ b/arch/arm64/boot/dts/exynos/exynos7-pinctrl.dtsi
@@ -12,6 +12,8 @@
  * published by the Free Software Foundation.
 */
 
+#include <dt-bindings/pinctrl/samsung.h>
+
 &pinctrl_alive {
 	gpa0: gpa0 {
 		gpio-controller;
@@ -187,163 +189,163 @@ gpg3: gpg3 {
 
 	hs_i2c10_bus: hs-i2c10-bus {
 		samsung,pins = "gpb0-1", "gpb0-0";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	hs_i2c11_bus: hs-i2c11-bus {
 		samsung,pins = "gpb0-3", "gpb0-2";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	hs_i2c2_bus: hs-i2c2-bus {
 		samsung,pins = "gpd0-3", "gpd0-2";
-		samsung,pin-function = <3>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	uart0_data: uart0-data {
 		samsung,pins = "gpd0-0", "gpd0-1";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	uart0_fctl: uart0-fctl {
 		samsung,pins = "gpd0-2", "gpd0-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	uart2_data: uart2-data {
 		samsung,pins = "gpd1-4", "gpd1-5";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	hs_i2c3_bus: hs-i2c3-bus {
 		samsung,pins = "gpd1-3", "gpd1-2";
-		samsung,pin-function = <3>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	uart1_data: uart1-data {
 		samsung,pins = "gpd1-0", "gpd1-1";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	uart1_fctl: uart1-fctl {
 		samsung,pins = "gpd1-2", "gpd1-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	hs_i2c0_bus: hs-i2c0-bus {
 		samsung,pins = "gpd2-1", "gpd2-0";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	hs_i2c1_bus: hs-i2c1-bus {
 		samsung,pins = "gpd2-3", "gpd2-2";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	hs_i2c9_bus: hs-i2c9-bus {
 		samsung,pins = "gpd2-7", "gpd2-6";
-		samsung,pin-function = <3>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	pwm0_out: pwm0-out {
 		samsung,pins = "gpd2-4";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	pwm1_out: pwm1-out {
 		samsung,pins = "gpd2-5";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	pwm2_out: pwm2-out {
 		samsung,pins = "gpd2-6";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	pwm3_out: pwm3-out {
 		samsung,pins = "gpd2-7";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	hs_i2c8_bus: hs-i2c8-bus {
 		samsung,pins = "gpd5-3", "gpd5-2";
-		samsung,pin-function = <3>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	uart3_data: uart3-data {
 		samsung,pins = "gpd5-0", "gpd5-1";
-		samsung,pin-function = <3>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_3>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	spi2_bus: spi2-bus {
 		samsung,pins = "gpd5-0", "gpd5-1", "gpd5-2", "gpd5-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	spi1_bus: spi1-bus {
 		samsung,pins = "gpd6-2", "gpd6-3", "gpd6-4", "gpd6-5";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	spi0_bus: spi0-bus {
 		samsung,pins = "gpd8-0", "gpd8-1", "gpd6-0", "gpd6-1";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	hs_i2c4_bus: hs-i2c4-bus {
 		samsung,pins = "gpg3-1", "gpg3-0";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	hs_i2c5_bus: hs-i2c5-bus {
 		samsung,pins = "gpg3-3", "gpg3-2";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 };
 
@@ -358,9 +360,9 @@ gpj0: gpj0 {
 
 	hs_i2c6_bus: hs-i2c6-bus {
 		samsung,pins = "gpj0-1", "gpj0-0";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 };
 
@@ -375,9 +377,9 @@ gpj1: gpj1 {
 
 	hs_i2c7_bus: hs-i2c7-bus {
 		samsung,pins = "gpj1-1", "gpj1-0";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 };
 
@@ -392,9 +394,9 @@ gpg4: gpg4 {
 
 	spi3_bus: spi3-bus {
 		samsung,pins = "gpg4-0", "gpg4-1", "gpg4-2", "gpg4-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 };
 
@@ -409,9 +411,9 @@ gpv7: gpv7 {
 
 	spi4_bus: spi4-bus {
 		samsung,pins = "gpv7-0", "gpv7-1", "gpv7-2", "gpv7-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 };
 
@@ -426,37 +428,37 @@ gpr4: gpr4 {
 
 	sd2_clk: sd2-clk {
 		samsung,pins = "gpr4-0";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <3>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV4>;
 	};
 
 	sd2_cmd: sd2-cmd {
 		samsung,pins = "gpr4-1";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <3>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV4>;
 	};
 
 	sd2_cd: sd2-cd {
 		samsung,pins = "gpr4-2";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <3>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV4>;
 	};
 
 	sd2_bus1: sd2-bus-width1 {
 		samsung,pins = "gpr4-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <3>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV4>;
 	};
 
 	sd2_bus4: sd2-bus-width4 {
 		samsung,pins = "gpr4-4", "gpr4-5", "gpr4-6";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <3>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV4>;
 	};
 };
 
@@ -495,107 +497,107 @@ gpr3: gpr3 {
 
 	sd0_clk: sd0-clk {
 		samsung,pins = "gpr0-0";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <4>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV2>;
 	};
 
 	sd0_cmd: sd0-cmd {
 		samsung,pins = "gpr0-1";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <4>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV2>;
 	};
 
 	sd0_ds: sd0-ds {
 		samsung,pins = "gpr0-2";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <1>;
-		samsung,pin-drv = <4>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_DOWN>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV2>;
 	};
 
 	sd0_qrdy: sd0-qrdy {
 		samsung,pins = "gpr0-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <1>;
-		samsung,pin-drv = <4>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_DOWN>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV2>;
 	};
 
 	sd0_bus1: sd0-bus-width1 {
 		samsung,pins = "gpr1-0";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <4>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV2>;
 	};
 
 	sd0_bus4: sd0-bus-width4 {
 		samsung,pins = "gpr1-1", "gpr1-2", "gpr1-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <4>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV2>;
 	};
 
 	sd0_bus8: sd0-bus-width8 {
 		samsung,pins = "gpr1-4", "gpr1-5", "gpr1-6", "gpr1-7";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <4>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV2>;
 	};
 
 	sd1_clk: sd1-clk {
 		samsung,pins = "gpr2-0";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <2>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV3>;
 	};
 
 	sd1_cmd: sd1-cmd {
 		samsung,pins = "gpr2-1";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <2>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV3>;
 	};
 
 	sd1_ds: sd1-ds {
 		samsung,pins = "gpr2-2";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <1>;
-		samsung,pin-drv = <6>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_DOWN>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV4>;
 	};
 
 	sd1_qrdy: sd1-qrdy {
 		samsung,pins = "gpr2-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <1>;
-		samsung,pin-drv = <6>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_DOWN>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV4>;
 	};
 
 	sd1_int: sd1-int {
 		samsung,pins = "gpr2-4";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <1>;
-		samsung,pin-drv = <6>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_DOWN>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV4>;
 	};
 
 	sd1_bus1: sd1-bus-width1 {
 		samsung,pins = "gpr3-0";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <2>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV3>;
 	};
 
 	sd1_bus4: sd1-bus-width4 {
 		samsung,pins = "gpr3-1", "gpr3-2", "gpr3-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <2>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV3>;
 	};
 
 	sd1_bus8: sd1-bus-width8 {
 		samsung,pins = "gpr3-4", "gpr3-5", "gpr3-6", "gpr3-7";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <2>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS7_FSYS1_PIN_DRV_LV3>;
 	};
 };
 
@@ -682,22 +684,22 @@ gpv6: gpv6 {
 
 	spi5_bus: spi5-bus {
 		samsung,pins = "gpf2-0", "gpf2-1", "gpf2-2", "gpf2-3";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 
 	ufs_refclk_out: ufs-refclk-out {
 		samsung,pins = "gpg2-4";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <0>;
-		samsung,pin-drv = <2>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV2>;
 	};
 
 	ufs_rst_n: ufs-rst-n {
 		samsung,pins = "gph1-5";
-		samsung,pin-function = <2>;
-		samsung,pin-pud = <3>;
-		samsung,pin-drv = <0>;
+		samsung,pin-function = <EXYNOS_PIN_FUNC_2>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_UP>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
 	};
 };
-- 
GitLab


From ad6afec832a77845a9a0f9353fe049225a976b16 Mon Sep 17 00:00:00 2001
From: Vivek Gautam <gautamvivek1987@gmail.com>
Date: Wed, 18 Jan 2017 12:02:52 +0530
Subject: [PATCH 0142/1084] arm64: dts: exynos: Add USB 3.0 controller node for
 Exynos7

Add USB 3.0 DRD controller device node, with its clock
and phy information to enable the same on Exynos7.

Signed-off-by: Vivek Gautam <gautamvivek1987@gmail.com>
Signed-off-by: Pankaj Dubey <pankaj.dubey@samsung.com>
Reviewed-by: Javier Martinez Canillas <javier@osg.samsung.com>
Reviewed-by: Alim Akhtar <alim.akhtar@samsung.com>
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
---
 arch/arm64/boot/dts/exynos/exynos7.dtsi | 34 +++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/arch/arm64/boot/dts/exynos/exynos7.dtsi b/arch/arm64/boot/dts/exynos/exynos7.dtsi
index 80aa60e38237..9a3fbed1765a 100644
--- a/arch/arm64/boot/dts/exynos/exynos7.dtsi
+++ b/arch/arm64/boot/dts/exynos/exynos7.dtsi
@@ -603,6 +603,40 @@ atlas_thermal: cluster0-thermal {
 				#include "exynos7-trip-points.dtsi"
 			};
 		};
+
+		usbdrd_phy: phy@15500000 {
+			compatible = "samsung,exynos7-usbdrd-phy";
+			reg = <0x15500000 0x100>;
+			clocks = <&clock_fsys0 ACLK_USBDRD300>,
+			       <&clock_fsys0 OSCCLK_PHY_CLKOUT_USB30_PHY>,
+			       <&clock_fsys0 PHYCLK_USBDRD300_UDRD30_PIPE_PCLK_USER>,
+			       <&clock_fsys0 PHYCLK_USBDRD300_UDRD30_PHYCLK_USER>,
+			       <&clock_fsys0 SCLK_USBDRD300_REFCLK>;
+			clock-names = "phy", "ref", "phy_pipe",
+				"phy_utmi", "itp";
+			samsung,pmu-syscon = <&pmu_system_controller>;
+			#phy-cells = <1>;
+		};
+
+		usbdrd3 {
+			compatible = "samsung,exynos7-dwusb3";
+			clocks = <&clock_fsys0 ACLK_USBDRD300>,
+			       <&clock_fsys0 SCLK_USBDRD300_SUSPENDCLK>,
+			       <&clock_fsys0 ACLK_AXIUS_USBDRD30X_FSYS0X>;
+			clock-names = "usbdrd30", "usbdrd30_susp_clk",
+				"usbdrd30_axius_clk";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges;
+
+			dwc3@15400000 {
+				compatible = "snps,dwc3";
+				reg = <0x15400000 0x10000>;
+				interrupts = <GIC_SPI 223 IRQ_TYPE_LEVEL_HIGH>;
+				phys = <&usbdrd_phy 0>, <&usbdrd_phy 1>;
+				phy-names = "usb2-phy", "usb3-phy";
+			};
+		};
 	};
 };
 
-- 
GitLab


From 6629490aa027a8af3160e63cffcddc97c1602c96 Mon Sep 17 00:00:00 2001
From: Vivek Gautam <gautamvivek1987@gmail.com>
Date: Wed, 18 Jan 2017 12:02:53 +0530
Subject: [PATCH 0143/1084] arm64: dts: exynos: Add regulators for Vbus and
 Vbus-Boost

Adding fixed voltage regulators for Vbus and Vbus-boost required
by USB 3.0 DRD controller on Exynos7-espresso board.

Signed-off-by: Vivek Gautam <gautamvivek1987@gmail.com>
Signed-off-by: Pankaj Dubey <pankaj.dubey@samsung.com>
Reviewed-by: Javier Martinez Canillas <javier@osg.samsung.com>
Reviewed-by: Alim Akhtar <alim.akhtar@samsung.com>
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
---
 .../boot/dts/exynos/exynos7-espresso.dts      | 45 +++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
index 25d9b4aa0ff6..e5892bb0ae6e 100644
--- a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
+++ b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
@@ -13,6 +13,7 @@
 #include "exynos7.dtsi"
 #include <dt-bindings/interrupt-controller/irq.h>
 #include <dt-bindings/clock/samsung,s2mps11.h>
+#include <dt-bindings/gpio/gpio.h>
 
 / {
 	model = "Samsung Exynos7 Espresso board based on EXYNOS7";
@@ -32,6 +33,29 @@ memory@40000000 {
 		device_type = "memory";
 		reg = <0x0 0x40000000 0x0 0xC0000000>;
 	};
+
+	usb30_vbus_reg: regulator-usb30 {
+		compatible = "regulator-fixed";
+		regulator-name = "VBUS_5V";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		gpio = <&gph1 1 GPIO_ACTIVE_HIGH>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&usb30_vbus_en>;
+		enable-active-high;
+	};
+
+	usb3drd_boost_5v: regulator-usb3drd-boost {
+		compatible = "regulator-fixed";
+		regulator-name = "VUSB_VBUS_5V";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		gpio = <&gpf4 1 GPIO_ACTIVE_HIGH>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&usb3drd_boost_en>;
+		enable-active-high;
+	};
+
 };
 
 &fin_pll {
@@ -365,3 +389,24 @@ &mmc_2 {
 	vqmmc-supply = <&ldo2_reg>;
 	disable-wp;
 };
+
+&pinctrl_bus1 {
+	usb30_vbus_en: usb30-vbus-en {
+		samsung,pins = "gph1-1";
+		samsung,pin-function = <EXYNOS_PIN_FUNC_OUTPUT>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
+	};
+
+	usb3drd_boost_en: usb3drd-boost-en {
+		samsung,pins = "gpf4-1";
+		samsung,pin-function = <EXYNOS_PIN_FUNC_OUTPUT>;
+		samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>;
+		samsung,pin-drv = <EXYNOS4_PIN_DRV_LV1>;
+	};
+};
+
+&usbdrd_phy {
+	vbus-supply = <&usb30_vbus_reg>;
+	vbus-boost-supply = <&usb3drd_boost_5v>;
+};
-- 
GitLab


From 17fa2dcbd6325ab877651bef04fca9dd828a2758 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 3 Feb 2017 18:01:23 +0100
Subject: [PATCH 0144/1084] iio: adc: handle unknow of_device_id data

If we get an unknown 'childmode' value, a number of variables are not
initialized properly:

drivers/iio/adc/rcar-gyroadc.c: In function 'rcar_gyroadc_probe':
drivers/iio/adc/rcar-gyroadc.c:390:5: error: 'num_channels' may be used uninitialized in this function [-Werror=maybe-uninitialized]
drivers/iio/adc/rcar-gyroadc.c:426:22: error: 'sample_width' may be used uninitialized in this function [-Werror=maybe-uninitialized]
drivers/iio/adc/rcar-gyroadc.c:428:23: error: 'channels' may be used uninitialized in this function [-Werror=maybe-uninitialized]

The driver is currently correct, but handling this properly is more robust
for possible modifications.

There is also a false-positive warning about adcmode being possibly uninitialized,
but that cannot happen as we also check the 'first' flag:

drivers/iio/adc/rcar-gyroadc.c:398:26: error: 'adcmode' may be used uninitialized in this function [-Werror=maybe-uninitialized]

This adds an initialization for 'adcmode' and bails out for any unknown childmode.

Fixes: 059c53b32329 ("iio: adc: Add Renesas GyroADC driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/rcar-gyroadc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/iio/adc/rcar-gyroadc.c b/drivers/iio/adc/rcar-gyroadc.c
index 0c44f72c32a8..018ed360e717 100644
--- a/drivers/iio/adc/rcar-gyroadc.c
+++ b/drivers/iio/adc/rcar-gyroadc.c
@@ -336,7 +336,7 @@ static int rcar_gyroadc_parse_subdevs(struct iio_dev *indio_dev)
 	struct device_node *child;
 	struct regulator *vref;
 	unsigned int reg;
-	unsigned int adcmode, childmode;
+	unsigned int adcmode = -1, childmode;
 	unsigned int sample_width;
 	unsigned int num_channels;
 	int ret, first = 1;
@@ -366,6 +366,8 @@ static int rcar_gyroadc_parse_subdevs(struct iio_dev *indio_dev)
 			channels = rcar_gyroadc_iio_channels_3;
 			num_channels = ARRAY_SIZE(rcar_gyroadc_iio_channels_3);
 			break;
+		default:
+			return -EINVAL;
 		}
 
 		/*
-- 
GitLab


From b880092109323d1cc608c420fa712d3e19508e13 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 30 Jan 2017 17:17:00 +1100
Subject: [PATCH 0145/1084] NFSDv4: use export cache flushtime for changeid on
 V4ROOT objects.

If you change the set of filesystems that are exported, then
the contents of various directories in the NFSv4 pseudo-root
is likely to change.  However the change-id of those
directories is currently tied to the underlying directory,
so the client may not see the changes in a timely fashion.

This patch changes the change-id number to be derived from the
"flush_time" of the export cache.  Whenever any changes are
made to the set of exported filesystems, this flush_time is
updated.  The result is that clients see changes to the set
of exported filesystems much more quickly, often immediately.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 3cc190755958..7e4df80456ff 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1966,9 +1966,13 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 	DECODE_TAIL;
 }
 
-static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode)
+static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
+			     struct svc_export *exp)
 {
-	if (IS_I_VERSION(inode)) {
+	if (exp->ex_flags & NFSEXP_V4ROOT) {
+		*p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time));
+		*p++ = 0;
+	} else if (IS_I_VERSION(inode)) {
 		p = xdr_encode_hyper(p, inode->i_version);
 	} else {
 		*p++ = cpu_to_be32(stat->ctime.tv_sec);
@@ -2493,7 +2497,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
 		p = xdr_reserve_space(xdr, 8);
 		if (!p)
 			goto out_resource;
-		p = encode_change(p, &stat, d_inode(dentry));
+		p = encode_change(p, &stat, d_inode(dentry), exp);
 	}
 	if (bmval0 & FATTR4_WORD0_SIZE) {
 		p = xdr_reserve_space(xdr, 8);
-- 
GitLab


From da8c1c46f2082ce4a094812806d7eab3c17a4817 Mon Sep 17 00:00:00 2001
From: Augusto Mecking Caringi <augustocaringi@gmail.com>
Date: Mon, 30 Jan 2017 10:47:36 +0000
Subject: [PATCH 0146/1084] thermal/intel_powerclamp: Remove set-but-not-used
 variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In poll_pkg_cstate() function, the variables jiffies_last and
jiffies_now are set but never used.

This has been detected by building the driver with W=1:

drivers/thermal/intel_powerclamp.c: In function ‘poll_pkg_cstate’:
drivers/thermal/intel_powerclamp.c:464:23: warning: variable
‘jiffies_last’ set but not used [-Wunused-but-set-variable]
static unsigned long jiffies_last;
                       ^

Signed-off-by: Augusto Mecking Caringi <augustocaringi@gmail.com>
Acked-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/intel_powerclamp.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c
index df64692e9e64..a47103a659fa 100644
--- a/drivers/thermal/intel_powerclamp.c
+++ b/drivers/thermal/intel_powerclamp.c
@@ -461,16 +461,13 @@ static void poll_pkg_cstate(struct work_struct *dummy)
 {
 	static u64 msr_last;
 	static u64 tsc_last;
-	static unsigned long jiffies_last;
 
 	u64 msr_now;
-	unsigned long jiffies_now;
 	u64 tsc_now;
 	u64 val64;
 
 	msr_now = pkg_state_counter();
 	tsc_now = rdtsc();
-	jiffies_now = jiffies;
 
 	/* calculate pkg cstate vs tsc ratio */
 	if (!msr_last || !tsc_last)
@@ -485,7 +482,6 @@ static void poll_pkg_cstate(struct work_struct *dummy)
 
 	/* update record */
 	msr_last = msr_now;
-	jiffies_last = jiffies_now;
 	tsc_last = tsc_now;
 
 	if (true == clamping)
-- 
GitLab


From af7bd4dc13093bf1477f370722bbab24cf457b91 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 17 Jan 2017 06:34:52 +0200
Subject: [PATCH 0147/1084] vfs: create vfs helper vfs_tmpfile()

Factor out some common vfs bits from do_tmpfile()
to be used by overlayfs for concurrent copy up.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/namei.c         | 68 +++++++++++++++++++++++++++++-----------------
 include/linux/fs.h |  3 ++
 2 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index ad74877e1442..7d87699c3e2e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3353,13 +3353,50 @@ static int do_last(struct nameidata *nd,
 	return error;
 }
 
+struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
+{
+	static const struct qstr name = QSTR_INIT("/", 1);
+	struct dentry *child = NULL;
+	struct inode *dir = dentry->d_inode;
+	struct inode *inode;
+	int error;
+
+	/* we want directory to be writable */
+	error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+	if (error)
+		goto out_err;
+	error = -EOPNOTSUPP;
+	if (!dir->i_op->tmpfile)
+		goto out_err;
+	error = -ENOMEM;
+	child = d_alloc(dentry, &name);
+	if (unlikely(!child))
+		goto out_err;
+	error = dir->i_op->tmpfile(dir, child, mode);
+	if (error)
+		goto out_err;
+	error = -ENOENT;
+	inode = child->d_inode;
+	if (unlikely(!inode))
+		goto out_err;
+	if (!(open_flag & O_EXCL)) {
+		spin_lock(&inode->i_lock);
+		inode->i_state |= I_LINKABLE;
+		spin_unlock(&inode->i_lock);
+	}
+	return child;
+
+out_err:
+	dput(child);
+	return ERR_PTR(error);
+}
+EXPORT_SYMBOL(vfs_tmpfile);
+
 static int do_tmpfile(struct nameidata *nd, unsigned flags,
 		const struct open_flags *op,
 		struct file *file, int *opened)
 {
-	static const struct qstr name = QSTR_INIT("/", 1);
 	struct dentry *child;
-	struct inode *dir;
 	struct path path;
 	int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path);
 	if (unlikely(error))
@@ -3367,25 +3404,12 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
 	error = mnt_want_write(path.mnt);
 	if (unlikely(error))
 		goto out;
-	dir = path.dentry->d_inode;
-	/* we want directory to be writable */
-	error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
-	if (error)
-		goto out2;
-	if (!dir->i_op->tmpfile) {
-		error = -EOPNOTSUPP;
-		goto out2;
-	}
-	child = d_alloc(path.dentry, &name);
-	if (unlikely(!child)) {
-		error = -ENOMEM;
+	child = vfs_tmpfile(path.dentry, op->mode, op->open_flag);
+	error = PTR_ERR(child);
+	if (unlikely(IS_ERR(child)))
 		goto out2;
-	}
 	dput(path.dentry);
 	path.dentry = child;
-	error = dir->i_op->tmpfile(dir, child, op->mode);
-	if (error)
-		goto out2;
 	audit_inode(nd->name, child, 0);
 	/* Don't check for other permissions, the inode was just created */
 	error = may_open(&path, 0, op->open_flag);
@@ -3396,14 +3420,8 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
 	if (error)
 		goto out2;
 	error = open_check_o_direct(file);
-	if (error) {
+	if (error)
 		fput(file);
-	} else if (!(op->open_flag & O_EXCL)) {
-		struct inode *inode = file_inode(file);
-		spin_lock(&inode->i_lock);
-		inode->i_state |= I_LINKABLE;
-		spin_unlock(&inode->i_lock);
-	}
 out2:
 	mnt_drop_write(path.mnt);
 out:
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2ba074328894..4a7f3cc9edab 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1561,6 +1561,9 @@ extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
 extern int vfs_whiteout(struct inode *, struct dentry *);
 
+extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode,
+				  int open_flag);
+
 /*
  * VFS file helper functions.
  */
-- 
GitLab


From 9e79b1326302589a035fe20e8cce7c1a7d8333ed Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 31 Jan 2017 10:34:55 +0200
Subject: [PATCH 0148/1084] vfs: deny fallocate() on directory

There was an obscure use case of fallocate of directory inode
in the vfs helper with the comment:
"Let individual file system decide if it supports preallocation
 for directories or not."

But there is no in-tree file system that implements fallocate
for directory operations.

Deny an attempt to fallocate a directory with EISDIR error.

This change is needed prior to converting sb_start_write()
to  file_start_write(), so freeze protection is correctly
handled for cases of fallocate file and blockdev.

Cc: linux-api@vger.kernel.org
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/open.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/fs/open.c b/fs/open.c
index 9921f70bc5ca..f9118f4113e7 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -301,12 +301,10 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	if (S_ISFIFO(inode->i_mode))
 		return -ESPIPE;
 
-	/*
-	 * Let individual file system decide if it supports preallocation
-	 * for directories or not.
-	 */
-	if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) &&
-	    !S_ISBLK(inode->i_mode))
+	if (S_ISDIR(inode->i_mode))
+		return -EISDIR;
+
+	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
 		return -ENODEV;
 
 	/* Check for wrap through zero too */
-- 
GitLab


From 11cbfb10775aa2a01cee966d118049ede9d0bdf2 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 31 Jan 2017 10:34:56 +0200
Subject: [PATCH 0149/1084] vfs: deny copy_file_range() for non regular files

There is no in-tree file system that implements copy_file_range()
for non regular files.

Deny an attempt to copy_file_range() a directory with EISDIR
and any other non regualr file with EINVAL to conform with
behavior of vfs_{clone,dedup}_file_range().

This change is needed prior to converting sb_start_write()
to  file_start_write() in the vfs helper.

Cc: linux-api@vger.kernel.org
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/read_write.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/read_write.c b/fs/read_write.c
index 5816d4c4cab0..511178d7723b 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1518,6 +1518,11 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 	if (flags != 0)
 		return -EINVAL;
 
+	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+		return -EISDIR;
+	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+		return -EINVAL;
+
 	ret = rw_verify_area(READ, file_in, &pos_in, len);
 	if (unlikely(ret))
 		return ret;
-- 
GitLab


From bfe219d373cadab761373aeea4c70406bc27ea2c Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 31 Jan 2017 10:34:57 +0200
Subject: [PATCH 0150/1084] vfs: wrap write f_ops with file_{start,end}_write()

Before calling write f_ops, call file_start_write() instead
of sb_start_write().

Replace {sb,file}_start_write() for {copy,clone}_file_range() and
for fallocate().

Beyond correct semantics, this avoids freeze protection to sb when
operating on special inodes, such as fallocate() on a blockdev.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/open.c          |  4 ++--
 fs/read_write.c    |  4 ++--
 include/linux/fs.h | 26 +++++++++++++-------------
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/fs/open.c b/fs/open.c
index f9118f4113e7..949cef29c3bb 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -314,7 +314,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	if (!file->f_op->fallocate)
 		return -EOPNOTSUPP;
 
-	sb_start_write(inode->i_sb);
+	file_start_write(file);
 	ret = file->f_op->fallocate(file, mode, offset, len);
 
 	/*
@@ -327,7 +327,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	if (ret == 0)
 		fsnotify_modify(file);
 
-	sb_end_write(inode->i_sb);
+	file_end_write(file);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(vfs_fallocate);
diff --git a/fs/read_write.c b/fs/read_write.c
index 511178d7723b..820a3f06c46a 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1543,7 +1543,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 	if (len == 0)
 		return 0;
 
-	sb_start_write(inode_out->i_sb);
+	file_start_write(file_out);
 
 	/*
 	 * Try cloning first, this is supported by more file systems, and
@@ -1579,7 +1579,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 	inc_syscr(current);
 	inc_syscw(current);
 
-	sb_end_write(inode_out->i_sb);
+	file_end_write(file_out);
 
 	return ret;
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4a7f3cc9edab..78c81e6f5d76 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1741,19 +1741,6 @@ extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 extern int vfs_dedupe_file_range(struct file *file,
 				 struct file_dedupe_range *same);
 
-static inline int do_clone_file_range(struct file *file_in, loff_t pos_in,
-				      struct file *file_out, loff_t pos_out,
-				      u64 len)
-{
-	int ret;
-
-	sb_start_write(file_inode(file_out)->i_sb);
-	ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len);
-	sb_end_write(file_inode(file_out)->i_sb);
-
-	return ret;
-}
-
 struct super_operations {
    	struct inode *(*alloc_inode)(struct super_block *sb);
 	void (*destroy_inode)(struct inode *);
@@ -2564,6 +2551,19 @@ static inline void file_end_write(struct file *file)
 	__sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE);
 }
 
+static inline int do_clone_file_range(struct file *file_in, loff_t pos_in,
+				      struct file *file_out, loff_t pos_out,
+				      u64 len)
+{
+	int ret;
+
+	file_start_write(file_out);
+	ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len);
+	file_end_write(file_out);
+
+	return ret;
+}
+
 /*
  * get_write_access() gets write permission for a file.
  * put_write_access() releases this write permission.
-- 
GitLab


From e7f52429b4a5b2e3224948d1737eb264c8f7e15f Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 17 Jan 2017 06:34:53 +0200
Subject: [PATCH 0151/1084] ovl: check if upperdir fs supports O_TMPFILE

This is needed for choosing between concurrent copyup
using O_TMPFILE and legacy copyup using workdir+rename.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/overlayfs.h |  9 +++++++++
 fs/overlayfs/ovl_entry.h |  1 +
 fs/overlayfs/super.c     | 10 ++++++++++
 3 files changed, 20 insertions(+)

diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 8af450b0e57a..3822a909ce1f 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -127,6 +127,15 @@ static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
 	return err;
 }
 
+static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
+{
+	struct dentry *ret = vfs_tmpfile(dentry, mode, 0);
+	int err = IS_ERR(ret) ? PTR_ERR(ret) : 0;
+
+	pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
+	return ret;
+}
+
 static inline struct inode *ovl_inode_real(struct inode *inode, bool *is_upper)
 {
 	unsigned long x = (unsigned long) READ_ONCE(inode->i_private);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index d14bca1850d9..65f240001aa6 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -27,6 +27,7 @@ struct ovl_fs {
 	struct ovl_config config;
 	/* creds of process who forced instantiation of super block */
 	const struct cred *creator_cred;
+	bool tmpfile;
 };
 
 /* private information held for every overlayfs dentry */
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 20f48abbb82f..ff05065b510f 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -825,6 +825,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 		 * creation of workdir in previous step.
 		 */
 		if (ufs->workdir) {
+			struct dentry *temp;
+
 			err = ovl_check_d_type_supported(&workpath);
 			if (err < 0)
 				goto out_put_workdir;
@@ -836,6 +838,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 			 */
 			if (!err)
 				pr_warn("overlayfs: upper fs needs to support d_type.\n");
+
+			/* Check if upper/work fs supports O_TMPFILE */
+			temp = ovl_do_tmpfile(ufs->workdir, S_IFREG | 0);
+			ufs->tmpfile = !IS_ERR(temp);
+			if (ufs->tmpfile)
+				dput(temp);
+			else
+				pr_warn("overlayfs: upper fs does not support tmpfile.\n");
 		}
 	}
 
-- 
GitLab


From 42f269b925405d9dd45014823e5057786d6ca452 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 17 Jan 2017 06:34:54 +0200
Subject: [PATCH 0152/1084] ovl: rearrange code in ovl_copy_up_locked()

As preparation to implementing copy up with O_TMPFILE,
name the variable for dentry before final rename 'temp' and
assign it to 'newdentry' only after rename.

Also lookup upper dentry before looking up temp dentry and
move ovl_set_timestamps() into ovl_copy_up_locked(), because
that is going to be more convenient for upcoming change.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c | 49 +++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 24 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index f57043dace62..01e332725e94 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -232,12 +232,14 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
 
 static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 			      struct dentry *dentry, struct path *lowerpath,
-			      struct kstat *stat, const char *link)
+			      struct kstat *stat, const char *link,
+			      struct kstat *pstat)
 {
 	struct inode *wdir = workdir->d_inode;
 	struct inode *udir = upperdir->d_inode;
 	struct dentry *newdentry = NULL;
 	struct dentry *upper = NULL;
+	struct dentry *temp = NULL;
 	int err;
 	const struct cred *old_creds = NULL;
 	struct cred *new_creds = NULL;
@@ -248,25 +250,25 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 		.link = link
 	};
 
-	newdentry = ovl_lookup_temp(workdir, dentry);
-	err = PTR_ERR(newdentry);
-	if (IS_ERR(newdentry))
-		goto out;
-
 	upper = lookup_one_len(dentry->d_name.name, upperdir,
 			       dentry->d_name.len);
 	err = PTR_ERR(upper);
 	if (IS_ERR(upper))
-		goto out1;
+		goto out;
 
 	err = security_inode_copy_up(dentry, &new_creds);
 	if (err < 0)
-		goto out2;
+		goto out1;
 
 	if (new_creds)
 		old_creds = override_creds(new_creds);
 
-	err = ovl_create_real(wdir, newdentry, &cattr, NULL, true);
+	temp = ovl_lookup_temp(workdir, dentry);
+	err = PTR_ERR(temp);
+	if (IS_ERR(temp))
+		goto out1;
+
+	err = ovl_create_real(wdir, temp, &cattr, NULL, true);
 
 	if (new_creds) {
 		revert_creds(old_creds);
@@ -281,39 +283,42 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 
 		ovl_path_upper(dentry, &upperpath);
 		BUG_ON(upperpath.dentry != NULL);
-		upperpath.dentry = newdentry;
+		upperpath.dentry = temp;
 
 		err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
 		if (err)
 			goto out_cleanup;
 	}
 
-	err = ovl_copy_xattr(lowerpath->dentry, newdentry);
+	err = ovl_copy_xattr(lowerpath->dentry, temp);
 	if (err)
 		goto out_cleanup;
 
-	inode_lock(newdentry->d_inode);
-	err = ovl_set_attr(newdentry, stat);
-	inode_unlock(newdentry->d_inode);
+	inode_lock(temp->d_inode);
+	err = ovl_set_attr(temp, stat);
+	inode_unlock(temp->d_inode);
 	if (err)
 		goto out_cleanup;
 
-	err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
+	err = ovl_do_rename(wdir, temp, udir, upper, 0);
 	if (err)
 		goto out_cleanup;
 
+	newdentry = dget(temp);
 	ovl_dentry_update(dentry, newdentry);
 	ovl_inode_update(d_inode(dentry), d_inode(newdentry));
-	newdentry = NULL;
+
+	/* Restore timestamps on parent (best effort) */
+	ovl_set_timestamps(upperdir, pstat);
 out2:
-	dput(upper);
+	dput(temp);
 out1:
-	dput(newdentry);
+	dput(upper);
 out:
 	return err;
 
 out_cleanup:
-	ovl_cleanup(wdir, newdentry);
+	ovl_cleanup(wdir, temp);
 	goto out2;
 }
 
@@ -368,11 +373,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 	}
 
 	err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
-				 stat, link);
-	if (!err) {
-		/* Restore timestamps on parent (best effort) */
-		ovl_set_timestamps(upperdir, &pstat);
-	}
+				 stat, link, &pstat);
 out_unlock:
 	unlock_rename(workdir, upperdir);
 	do_delayed_call(&done);
-- 
GitLab


From d8514d8edb5b045cf7f708e14f888ce760d60f0b Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 17 Jan 2017 06:34:55 +0200
Subject: [PATCH 0153/1084] ovl: copy up regular file using O_TMPFILE

In preparation for concurrent copy up, implement copy up
of regular file as O_TMPFILE that is linked to upperdir
instead of a file in workdir that is moved to upperdir.

Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 01e332725e94..6e39e90b5605 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -20,6 +20,7 @@
 #include <linux/fdtable.h>
 #include <linux/ratelimit.h>
 #include "overlayfs.h"
+#include "ovl_entry.h"
 
 #define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
 
@@ -233,7 +234,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
 static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 			      struct dentry *dentry, struct path *lowerpath,
 			      struct kstat *stat, const char *link,
-			      struct kstat *pstat)
+			      struct kstat *pstat, bool tmpfile)
 {
 	struct inode *wdir = workdir->d_inode;
 	struct inode *udir = upperdir->d_inode;
@@ -263,12 +264,17 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 	if (new_creds)
 		old_creds = override_creds(new_creds);
 
-	temp = ovl_lookup_temp(workdir, dentry);
+	if (tmpfile)
+		temp = ovl_do_tmpfile(upperdir, stat->mode);
+	else
+		temp = ovl_lookup_temp(workdir, dentry);
 	err = PTR_ERR(temp);
 	if (IS_ERR(temp))
 		goto out1;
 
-	err = ovl_create_real(wdir, temp, &cattr, NULL, true);
+	err = 0;
+	if (!tmpfile)
+		err = ovl_create_real(wdir, temp, &cattr, NULL, true);
 
 	if (new_creds) {
 		revert_creds(old_creds);
@@ -300,11 +306,14 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 	if (err)
 		goto out_cleanup;
 
-	err = ovl_do_rename(wdir, temp, udir, upper, 0);
+	if (tmpfile)
+		err = ovl_do_link(temp, udir, upper, true);
+	else
+		err = ovl_do_rename(wdir, temp, udir, upper, 0);
 	if (err)
 		goto out_cleanup;
 
-	newdentry = dget(temp);
+	newdentry = dget(tmpfile ? upper : temp);
 	ovl_dentry_update(dentry, newdentry);
 	ovl_inode_update(d_inode(dentry), d_inode(newdentry));
 
@@ -318,7 +327,8 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 	return err;
 
 out_cleanup:
-	ovl_cleanup(wdir, temp);
+	if (!tmpfile)
+		ovl_cleanup(wdir, temp);
 	goto out2;
 }
 
@@ -342,6 +352,9 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 	struct dentry *lowerdentry = lowerpath->dentry;
 	struct dentry *upperdir;
 	const char *link = NULL;
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	/* Should we copyup with O_TMPFILE or with workdir? */
+	bool tmpfile = S_ISREG(stat->mode) && ofs->tmpfile;
 
 	if (WARN_ON(!workdir))
 		return -EROFS;
@@ -373,7 +386,7 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 	}
 
 	err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
-				 stat, link, &pstat);
+				 stat, link, &pstat, tmpfile);
 out_unlock:
 	unlock_rename(workdir, upperdir);
 	do_delayed_call(&done);
-- 
GitLab


From 39d3d60a54df05a1a32fa71159d7a26a530dee6c Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 17 Jan 2017 06:34:56 +0200
Subject: [PATCH 0154/1084] ovl: introduce copy up waitqueue

The overlay sb 'copyup_wq' and overlay inode 'copying' condition
variable are about to replace the upper sb rename_lock, as finer
grained synchronization objects for concurrent copy up.

Suggested-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/overlayfs.h |  2 ++
 fs/overlayfs/ovl_entry.h |  2 ++
 fs/overlayfs/super.c     |  1 +
 fs/overlayfs/util.c      | 30 ++++++++++++++++++++++++++++++
 4 files changed, 35 insertions(+)

diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 3822a909ce1f..741dc0b6931f 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -178,6 +178,8 @@ void ovl_dentry_version_inc(struct dentry *dentry);
 u64 ovl_dentry_version_get(struct dentry *dentry);
 bool ovl_is_whiteout(struct dentry *dentry);
 struct file *ovl_path_open(struct path *path, int flags);
+int ovl_copy_up_start(struct dentry *dentry);
+void ovl_copy_up_end(struct dentry *dentry);
 
 /* namei.c */
 int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 65f240001aa6..59614faa14c3 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -28,6 +28,7 @@ struct ovl_fs {
 	/* creds of process who forced instantiation of super block */
 	const struct cred *creator_cred;
 	bool tmpfile;
+	wait_queue_head_t copyup_wq;
 };
 
 /* private information held for every overlayfs dentry */
@@ -39,6 +40,7 @@ struct ovl_entry {
 			u64 version;
 			const char *redirect;
 			bool opaque;
+			bool copying;
 		};
 		struct rcu_head rcu;
 	};
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index ff05065b510f..6792bb70b4ac 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -708,6 +708,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	if (!ufs)
 		goto out;
 
+	init_waitqueue_head(&ufs->copyup_wq);
 	ufs->config.redirect_dir = ovl_redirect_dir_def;
 	err = ovl_parse_opt((char *) data, &ufs->config);
 	if (err)
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 952286f4826c..01157d6e8cfe 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -263,3 +263,33 @@ struct file *ovl_path_open(struct path *path, int flags)
 {
 	return dentry_open(path, flags | O_NOATIME, current_cred());
 }
+
+int ovl_copy_up_start(struct dentry *dentry)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	struct ovl_entry *oe = dentry->d_fsdata;
+	int err;
+
+	spin_lock(&ofs->copyup_wq.lock);
+	err = wait_event_interruptible_locked(ofs->copyup_wq, !oe->copying);
+	if (!err) {
+		if (oe->__upperdentry)
+			err = 1; /* Already copied up */
+		else
+			oe->copying = true;
+	}
+	spin_unlock(&ofs->copyup_wq.lock);
+
+	return err;
+}
+
+void ovl_copy_up_end(struct dentry *dentry)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	spin_lock(&ofs->copyup_wq.lock);
+	oe->copying = false;
+	wake_up_locked(&ofs->copyup_wq);
+	spin_unlock(&ofs->copyup_wq.lock);
+}
-- 
GitLab


From 01ad3eb8a07385bc8849f0ee7c800e7c8bd7287e Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 17 Jan 2017 06:34:57 +0200
Subject: [PATCH 0155/1084] ovl: concurrent copy up of regular files

Now that copy up of regular file is done using O_TMPFILE,
we don't need to hold rename_lock throughout copy up.

Use the copy up waitqueue to synchronize concurrent copy up
of the same file. Different regular files can be copied up
concurrently.

The upper dir inode_lock is taken instead of rename_lock,
because it is needed for lookup and later for linking the
temp file, but it is released while copying up data.

Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 6e39e90b5605..48eb8812ac5b 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -291,7 +291,16 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 		BUG_ON(upperpath.dentry != NULL);
 		upperpath.dentry = temp;
 
-		err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
+		if (tmpfile) {
+			inode_unlock(udir);
+			err = ovl_copy_up_data(lowerpath, &upperpath,
+					       stat->size);
+			inode_lock_nested(udir, I_MUTEX_PARENT);
+		} else {
+			err = ovl_copy_up_data(lowerpath, &upperpath,
+					       stat->size);
+		}
+
 		if (err)
 			goto out_cleanup;
 	}
@@ -353,8 +362,6 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 	struct dentry *upperdir;
 	const char *link = NULL;
 	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
-	/* Should we copyup with O_TMPFILE or with workdir? */
-	bool tmpfile = S_ISREG(stat->mode) && ofs->tmpfile;
 
 	if (WARN_ON(!workdir))
 		return -EROFS;
@@ -374,6 +381,25 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 			return PTR_ERR(link);
 	}
 
+	/* Should we copyup with O_TMPFILE or with workdir? */
+	if (S_ISREG(stat->mode) && ofs->tmpfile) {
+		err = ovl_copy_up_start(dentry);
+		/* err < 0: interrupted, err > 0: raced with another copy-up */
+		if (unlikely(err)) {
+			pr_debug("ovl_copy_up_start(%pd2) = %i\n", dentry, err);
+			if (err > 0)
+				err = 0;
+			goto out_done;
+		}
+
+		inode_lock_nested(upperdir->d_inode, I_MUTEX_PARENT);
+		err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
+					 stat, link, &pstat, true);
+		inode_unlock(upperdir->d_inode);
+		ovl_copy_up_end(dentry);
+		goto out_done;
+	}
+
 	err = -EIO;
 	if (lock_rename(workdir, upperdir) != NULL) {
 		pr_err("overlayfs: failed to lock workdir+upperdir\n");
@@ -386,9 +412,10 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 	}
 
 	err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
-				 stat, link, &pstat, tmpfile);
+				 stat, link, &pstat, false);
 out_unlock:
 	unlock_rename(workdir, upperdir);
+out_done:
 	do_delayed_call(&done);
 
 	return err;
-- 
GitLab


From e593b2bf513dd4d3fbfa0f435392eea2c7f776f0 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 23 Jan 2017 14:32:21 +0200
Subject: [PATCH 0156/1084] ovl: properly implement sync_filesystem()

overlayfs syncs all inode pages on sync_filesystem(), but it also
needs to call s_op->sync_fs() of upper fs for metadata sync.

This fixes correctness of syncfs(2) as demonstrated by following
xfs specific test:

xfs_sync_stats()
{
	echo $1
	echo -n "xfs_log_force = "
	grep log /proc/fs/xfs/stat  | awk '{ print $5 }'
}

xfs_sync_stats "before touch"
touch x
xfs_sync_stats "after touch"
xfs_io -c syncfs .
xfs_sync_stats "after syncfs"
xfs_io -c fsync x
xfs_sync_stats "after fsync"
xfs_io -c fsync x
xfs_sync_stats "after fsync #2"

When this test is run in overlay mount over xfs, log force
count does not increase with syncfs command.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/super.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 6792bb70b4ac..346f668e7df0 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -160,6 +160,25 @@ static void ovl_put_super(struct super_block *sb)
 	kfree(ufs);
 }
 
+static int ovl_sync_fs(struct super_block *sb, int wait)
+{
+	struct ovl_fs *ufs = sb->s_fs_info;
+	struct super_block *upper_sb;
+	int ret;
+
+	if (!ufs->upper_mnt)
+		return 0;
+	upper_sb = ufs->upper_mnt->mnt_sb;
+	if (!upper_sb->s_op->sync_fs)
+		return 0;
+
+	/* real inodes have already been synced by sync_filesystem(ovl_sb) */
+	down_read(&upper_sb->s_umount);
+	ret = upper_sb->s_op->sync_fs(upper_sb, wait);
+	up_read(&upper_sb->s_umount);
+	return ret;
+}
+
 /**
  * ovl_statfs
  * @sb: The overlayfs super block
@@ -222,6 +241,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
 
 static const struct super_operations ovl_super_operations = {
 	.put_super	= ovl_put_super,
+	.sync_fs	= ovl_sync_fs,
 	.statfs		= ovl_statfs,
 	.show_options	= ovl_show_options,
 	.remount_fs	= ovl_remount,
-- 
GitLab


From 51f8f3c4e22535933ef9aecc00e9a6069e051b57 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Tue, 10 Jan 2017 21:30:21 +0300
Subject: [PATCH 0157/1084] ovl: drop CAP_SYS_RESOURCE from saved mounter's
 credentials

If overlay was mounted by root then quota set for upper layer does not work
because overlay now always use mounter's credentials for operations.
Also overlay might deplete reserved space and inodes in ext4.

This patch drops capability SYS_RESOURCE from saved credentials.
This affects creation new files, whiteouts, and copy-up operations.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Fixes: 1175b6b8d963 ("ovl: do operations on underlying file system in mounter's context")
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/super.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 346f668e7df0..7b2188181367 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -721,6 +721,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	unsigned int stacklen = 0;
 	unsigned int i;
 	bool remote = false;
+	struct cred *cred;
 	int err;
 
 	err = -ENOMEM;
@@ -901,10 +902,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	else
 		sb->s_d_op = &ovl_dentry_operations;
 
-	ufs->creator_cred = prepare_creds();
-	if (!ufs->creator_cred)
+	ufs->creator_cred = cred = prepare_creds();
+	if (!cred)
 		goto out_put_lower_mnt;
 
+	/* Never override disk quota limits or use reserved space */
+	cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
+
 	err = -ENOMEM;
 	oe = ovl_alloc_entry(numlower);
 	if (!oe)
-- 
GitLab


From e0fa56489f21e319d0aa9654834209faa3eb481d Mon Sep 17 00:00:00 2001
From: Baoyou Xie <baoyou.xie@linaro.org>
Date: Tue, 7 Feb 2017 08:56:39 +0800
Subject: [PATCH 0158/1084] dt: bindings: add documentation for zx2967 family
 thermal sensor

This patch adds dt-binding documentation for zx2967 family thermal sensor.

Signed-off-by: Baoyou Xie <baoyou.xie@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 .../bindings/thermal/zx2967-thermal.txt       | 116 ++++++++++++++++++
 1 file changed, 116 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/thermal/zx2967-thermal.txt

diff --git a/Documentation/devicetree/bindings/thermal/zx2967-thermal.txt b/Documentation/devicetree/bindings/thermal/zx2967-thermal.txt
new file mode 100644
index 000000000000..3dc1c6bf0478
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/zx2967-thermal.txt
@@ -0,0 +1,116 @@
+* ZTE zx2967 family Thermal
+
+Required Properties:
+- compatible: should be one of the following.
+    * zte,zx296718-thermal
+- reg: physical base address of the controller and length of memory mapped
+    region.
+- clocks : Pairs of phandle and specifier referencing the controller's clocks.
+- clock-names: "topcrm" for the topcrm clock.
+	       "apb" for the apb clock.
+- #thermal-sensor-cells: must be 0.
+
+Please note: slope coefficient defined in thermal-zones section need to be
+multiplied by 1000.
+
+Example for tempsensor:
+
+	tempsensor: tempsensor@148a000 {
+		compatible = "zte,zx296718-thermal";
+		reg = <0x0148a000 0x20>;
+		clocks = <&topcrm TEMPSENSOR_GATE>, <&audiocrm AUDIO_TS_PCLK>;
+		clock-names = "topcrm", "apb";
+		#thermal-sensor-cells = <0>;
+	};
+
+Example for cooling device:
+
+	cooling_dev: cooling_dev {
+		cluster0_cooling_dev: cluster0-cooling-dev {
+			#cooling-cells = <2>;
+			cpumask = <0xf>;
+			capacitance = <1500>;
+		};
+
+	cluster1_cooling_dev: cluster1-cooling-dev {
+			#cooling-cells = <2>;
+			cpumask = <0x30>;
+			capacitance = <2000>;
+		};
+	};
+
+Example for thermal zones:
+
+	thermal-zones {
+		zx296718_thermal: zx296718_thermal {
+			polling-delay-passive = <500>;
+			polling-delay = <1000>;
+			sustainable-power = <6500>;
+
+			thermal-sensors = <&tempsensor 0>;
+			/*
+			 * slope need to be multiplied by 1000.
+			 */
+			coefficients = <1951 (-922)>;
+
+			trips {
+				trip0: switch_on_temperature {
+					temperature = <90000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+
+				trip1: desired_temperature {
+					temperature = <100000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+
+				crit: critical_temperature {
+					temperature = <110000>;
+					hysteresis = <2000>;
+					type = "critical";
+				};
+			};
+
+			cooling-maps {
+				map0 {
+					trip = <&trip0>;
+					cooling-device = <&gpu 2 5>;
+				};
+
+				map1 {
+					trip = <&trip0>;
+					cooling-device = <&cluster0_cooling_dev 1 2>;
+				};
+
+				map2 {
+					trip = <&trip1>;
+					cooling-device = <&cluster0_cooling_dev 1 2>;
+				};
+
+				map3 {
+					trip = <&crit>;
+					cooling-device = <&cluster0_cooling_dev 1 2>;
+				};
+
+				map4 {
+					trip = <&trip0>;
+					cooling-device = <&cluster1_cooling_dev 1 2>;
+					contribution = <9000>;
+				};
+
+				map5 {
+					trip = <&trip1>;
+					cooling-device = <&cluster1_cooling_dev 1 2>;
+					contribution = <4096>;
+				};
+
+				map6 {
+					trip = <&crit>;
+					cooling-device = <&cluster1_cooling_dev 1 2>;
+					contribution = <4096>;
+				};
+			};
+		};
+	};
-- 
GitLab


From cbaf58032efca401834518b905f528ac912449e4 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 7 Feb 2017 11:58:15 -0500
Subject: [PATCH 0159/1084] svcrdma: Another sendto chunk list parsing update

Commit 5fdca6531434 ("svcrdma: Renovate sendto chunk list parsing")
missed a spot. svc_rdma_xdr_get_reply_hdr_len() also assumes the
Write list has only one Write chunk. There's no harm in making this
code more general.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/rpc_rdma.h        |  9 +++++
 include/linux/sunrpc/svc_rdma.h        |  2 +-
 net/sunrpc/xprtrdma/svc_rdma_marshal.c | 49 ++++++++++++++------------
 net/sunrpc/xprtrdma/svc_rdma_sendto.c  |  3 +-
 4 files changed, 38 insertions(+), 25 deletions(-)

diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h
index cfda6adcf33c..245fc59b7324 100644
--- a/include/linux/sunrpc/rpc_rdma.h
+++ b/include/linux/sunrpc/rpc_rdma.h
@@ -109,6 +109,15 @@ struct rpcrdma_msg {
 	} rm_body;
 };
 
+/*
+ * XDR sizes, in quads
+ */
+enum {
+	rpcrdma_fixed_maxsz	= 4,
+	rpcrdma_segment_maxsz	= 4,
+	rpcrdma_readchunk_maxsz	= 2 + rpcrdma_segment_maxsz,
+};
+
 /*
  * Smallest RPC/RDMA header: rm_xid through rm_type, then rm_nochunks
  */
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 757fb963696c..551c51816352 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -218,7 +218,7 @@ extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *,
 					     struct rpcrdma_msg *,
 					     struct rpcrdma_msg *,
 					     enum rpcrdma_proc);
-extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *);
+extern unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp);
 
 /* svc_rdma_recvfrom.c */
 extern int svc_rdma_recvfrom(struct svc_rqst *);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index 0ba9887f3e22..4e7203439e2f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -260,32 +260,35 @@ int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
 	return (int)((unsigned long)va - (unsigned long)startp);
 }
 
-int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
+/**
+ * svc_rdma_xdr_get_reply_hdr_length - Get length of Reply transport header
+ * @rdma_resp: buffer containing Reply transport header
+ *
+ * Returns length of transport header, in bytes.
+ */
+unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp)
 {
-	struct rpcrdma_write_array *wr_ary;
+	unsigned int nsegs;
+	__be32 *p;
 
-	/* There is no read-list in a reply */
+	p = rdma_resp;
 
-	/* skip write list */
-	wr_ary = (struct rpcrdma_write_array *)
-		&rmsgp->rm_body.rm_chunks[1];
-	if (wr_ary->wc_discrim)
-		wr_ary = (struct rpcrdma_write_array *)
-			&wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)].
-			wc_target.rs_length;
-	else
-		wr_ary = (struct rpcrdma_write_array *)
-			&wr_ary->wc_nchunks;
-
-	/* skip reply array */
-	if (wr_ary->wc_discrim)
-		wr_ary = (struct rpcrdma_write_array *)
-			&wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)];
-	else
-		wr_ary = (struct rpcrdma_write_array *)
-			&wr_ary->wc_nchunks;
-
-	return (unsigned long) wr_ary - (unsigned long) rmsgp;
+	/* RPC-over-RDMA V1 replies never have a Read list. */
+	p += rpcrdma_fixed_maxsz + 1;
+
+	/* Skip Write list. */
+	while (*p++ != xdr_zero) {
+		nsegs = be32_to_cpup(p++);
+		p += nsegs * rpcrdma_segment_maxsz;
+	}
+
+	/* Skip Reply chunk. */
+	if (*p++ != xdr_zero) {
+		nsegs = be32_to_cpup(p++);
+		p += nsegs * rpcrdma_segment_maxsz;
+	}
+
+	return (unsigned long)p - (unsigned long)rdma_resp;
 }
 
 void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index ad4d286a83c5..ba76f1617965 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -476,7 +476,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
 
 	/* Prepare the SGE for the RPCRDMA Header */
 	ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
-	ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
+	ctxt->sge[0].length =
+	    svc_rdma_xdr_get_reply_hdr_len((__be32 *)rdma_resp);
 	ctxt->sge[0].addr =
 	    ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
 			    ctxt->sge[0].length, DMA_TO_DEVICE);
-- 
GitLab


From 98fc21d3bfd55a36ce9eb7b32d1ce146f0d1696d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 7 Feb 2017 11:58:23 -0500
Subject: [PATCH 0160/1084] svcrdma: Clean up RPC-over-RDMA Reply header
 encoder

Replace C structure-based XDR decoding with pointer arithmetic.
Pointer arithmetic is considered more portable, and is used
throughout the kernel's existing XDR encoders. The gcc optimizer
generates similar assembler code either way.

Byte-swapping before a memory store on x86 typically results in an
instruction pipeline stall. Avoid byte-swapping when encoding a new
header.

svcrdma currently doesn't alter a connection's credit grant value
after the connection has been accepted, so it is effectively a
constant. Cache the byte-swapped value in a separate field.

Christoph suggested pulling the header encoding logic into the only
function that uses it.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          |  7 ++-----
 net/sunrpc/xprtrdma/svc_rdma_marshal.c   | 18 +-----------------
 net/sunrpc/xprtrdma/svc_rdma_sendto.c    | 19 ++++++++++++-------
 net/sunrpc/xprtrdma/svc_rdma_transport.c |  1 +
 4 files changed, 16 insertions(+), 29 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 551c51816352..25ecf92cae96 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -141,7 +141,8 @@ struct svcxprt_rdma {
 	atomic_t             sc_sq_avail;	/* SQEs ready to be consumed */
 	unsigned int	     sc_sq_depth;	/* Depth of SQ */
 	unsigned int	     sc_rq_depth;	/* Depth of RQ */
-	u32		     sc_max_requests;	/* Forward credits */
+	__be32		     sc_fc_credits;	/* Forward credits */
+	u32		     sc_max_requests;	/* Max requests */
 	u32		     sc_max_bc_requests;/* Backward credits */
 	int                  sc_max_req_size;	/* Size of each RQ WR buf */
 
@@ -214,10 +215,6 @@ extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int);
 extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int);
 extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int,
 					    __be32, __be64, u32);
-extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *,
-					     struct rpcrdma_msg *,
-					     struct rpcrdma_msg *,
-					     enum rpcrdma_proc);
 extern unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp);
 
 /* svc_rdma_recvfrom.c */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index 4e7203439e2f..c7d15b1126ce 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -249,7 +249,7 @@ int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
 
 	*va++ = rmsgp->rm_xid;
 	*va++ = rmsgp->rm_vers;
-	*va++ = cpu_to_be32(xprt->sc_max_requests);
+	*va++ = xprt->sc_fc_credits;
 	*va++ = rdma_error;
 	*va++ = cpu_to_be32(err);
 	if (err == ERR_VERS) {
@@ -329,19 +329,3 @@ void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
 	seg->rs_offset = rs_offset;
 	seg->rs_length = cpu_to_be32(write_len);
 }
-
-void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
-				  struct rpcrdma_msg *rdma_argp,
-				  struct rpcrdma_msg *rdma_resp,
-				  enum rpcrdma_proc rdma_type)
-{
-	rdma_resp->rm_xid = rdma_argp->rm_xid;
-	rdma_resp->rm_vers = rdma_argp->rm_vers;
-	rdma_resp->rm_credit = cpu_to_be32(xprt->sc_max_requests);
-	rdma_resp->rm_type = cpu_to_be32(rdma_type);
-
-	/* Encode <nul> chunks lists */
-	rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
-	rdma_resp->rm_body.rm_chunks[1] = xdr_zero;
-	rdma_resp->rm_body.rm_chunks[2] = xdr_zero;
-}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index ba76f1617965..515221b16d09 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -560,12 +560,12 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	struct rpcrdma_msg *rdma_argp;
 	struct rpcrdma_msg *rdma_resp;
 	struct rpcrdma_write_array *wr_ary, *rp_ary;
-	enum rpcrdma_proc reply_type;
 	int ret;
 	int inline_bytes;
 	struct page *res_page;
 	struct svc_rdma_req_map *vec;
 	u32 inv_rkey;
+	__be32 *p;
 
 	dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
 
@@ -597,12 +597,17 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	if (!res_page)
 		goto err0;
 	rdma_resp = page_address(res_page);
-	if (rp_ary)
-		reply_type = RDMA_NOMSG;
-	else
-		reply_type = RDMA_MSG;
-	svc_rdma_xdr_encode_reply_header(rdma, rdma_argp,
-					 rdma_resp, reply_type);
+
+	p = &rdma_resp->rm_xid;
+	*p++ = rdma_argp->rm_xid;
+	*p++ = rdma_argp->rm_vers;
+	*p++ = rdma->sc_fc_credits;
+	*p++ = rp_ary ? rdma_nomsg : rdma_msg;
+
+	/* Start with empty chunks */
+	*p++ = xdr_zero;
+	*p++ = xdr_zero;
+	*p   = xdr_zero;
 
 	/* Send any write-chunk data and build resp write-list */
 	if (wr_ary) {
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index ca2799af05a6..174928fe5f83 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1002,6 +1002,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	newxprt->sc_max_req_size = svcrdma_max_req_size;
 	newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
 					 svcrdma_max_requests);
+	newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
 	newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr,
 					    svcrdma_max_bc_requests);
 	newxprt->sc_rq_depth = newxprt->sc_max_requests +
-- 
GitLab


From 647e18e3bd0308b38d5341fc08cd70ed1ce28a71 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 7 Feb 2017 11:58:32 -0500
Subject: [PATCH 0161/1084] svcrdma: Clean up RPC-over-RDMA Call header decoder

Replace C structure-based XDR decoding with pointer arithmetic.
Pointer arithmetic is considered more portable.

Rename the "decode" functions. Nothing is decoded here, they
perform only transport header sanity checking. Use existing XDR
naming conventions to help readability.

Straight-line the hot path:
 - relocate the dprintk call sites out of line
 - remove unnecessary byte-swapping
 - reduce count of conditional branches

Deprecate RDMA_MSGP. It's not properly spec'd by RFC5666, and
therefore never used by any V1 client.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_marshal.c | 232 +++++++++----------------
 1 file changed, 79 insertions(+), 153 deletions(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index c7d15b1126ce..1c4aabf0f657 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2016 Oracle. All rights reserved.
  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -47,102 +48,43 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
-/*
- * Decodes a read chunk list. The expected format is as follows:
- *    descrim  : xdr_one
- *    position : __be32 offset into XDR stream
- *    handle   : __be32 RKEY
- *    . . .
- *  end-of-list: xdr_zero
- */
-static __be32 *decode_read_list(__be32 *va, __be32 *vaend)
+static __be32 *xdr_check_read_list(__be32 *p, __be32 *end)
 {
-	struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
+	__be32 *next;
 
-	while (ch->rc_discrim != xdr_zero) {
-		if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) >
-		    (unsigned long)vaend) {
-			dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch);
+	while (*p++ != xdr_zero) {
+		next = p + rpcrdma_readchunk_maxsz - 1;
+		if (next > end)
 			return NULL;
-		}
-		ch++;
+		p = next;
 	}
-	return &ch->rc_position;
+	return p;
 }
 
-/*
- * Decodes a write chunk list. The expected format is as follows:
- *    descrim  : xdr_one
- *    nchunks  : <count>
- *       handle   : __be32 RKEY           ---+
- *       length   : __be32 <len of segment>  |
- *       offset   : remove va                + <count>
- *       . . .                               |
- *                                        ---+
- */
-static __be32 *decode_write_list(__be32 *va, __be32 *vaend)
+static __be32 *xdr_check_write_list(__be32 *p, __be32 *end)
 {
-	unsigned long start, end;
-	int nchunks;
-
-	struct rpcrdma_write_array *ary =
-		(struct rpcrdma_write_array *)va;
-
-	/* Check for not write-array */
-	if (ary->wc_discrim == xdr_zero)
-		return &ary->wc_nchunks;
+	__be32 *next;
 
-	if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
-	    (unsigned long)vaend) {
-		dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
-		return NULL;
-	}
-	nchunks = be32_to_cpu(ary->wc_nchunks);
-
-	start = (unsigned long)&ary->wc_array[0];
-	end = (unsigned long)vaend;
-	if (nchunks < 0 ||
-	    nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
-	    (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
-		dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
-			ary, nchunks, vaend);
-		return NULL;
+	while (*p++ != xdr_zero) {
+		next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
+		if (next > end)
+			return NULL;
+		p = next;
 	}
-	/*
-	 * rs_length is the 2nd 4B field in wc_target and taking its
-	 * address skips the list terminator
-	 */
-	return &ary->wc_array[nchunks].wc_target.rs_length;
+	return p;
 }
 
-static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
+static __be32 *xdr_check_reply_chunk(__be32 *p, __be32 *end)
 {
-	unsigned long start, end;
-	int nchunks;
-	struct rpcrdma_write_array *ary =
-		(struct rpcrdma_write_array *)va;
-
-	/* Check for no reply-array */
-	if (ary->wc_discrim == xdr_zero)
-		return &ary->wc_nchunks;
-
-	if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
-	    (unsigned long)vaend) {
-		dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
-		return NULL;
-	}
-	nchunks = be32_to_cpu(ary->wc_nchunks);
-
-	start = (unsigned long)&ary->wc_array[0];
-	end = (unsigned long)vaend;
-	if (nchunks < 0 ||
-	    nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
-	    (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
-		dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
-			ary, nchunks, vaend);
-		return NULL;
+	__be32 *next;
+
+	if (*p++ != xdr_zero) {
+		next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
+		if (next > end)
+			return NULL;
+		p = next;
 	}
-	return (__be32 *)&ary->wc_array[nchunks];
+	return p;
 }
 
 /**
@@ -158,87 +100,71 @@ static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
  */
 int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
 {
-	struct rpcrdma_msg *rmsgp;
-	__be32 *va, *vaend;
-	unsigned int len;
-	u32 hdr_len;
+	__be32 *p, *end, *rdma_argp;
+	unsigned int hdr_len;
 
 	/* Verify that there's enough bytes for header + something */
-	if (rq_arg->len <= RPCRDMA_HDRLEN_ERR) {
-		dprintk("svcrdma: header too short = %d\n",
-			rq_arg->len);
-		return -EINVAL;
-	}
+	if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
+		goto out_short;
 
-	rmsgp = (struct rpcrdma_msg *)rq_arg->head[0].iov_base;
-	if (rmsgp->rm_vers != rpcrdma_version) {
-		dprintk("%s: bad version %u\n", __func__,
-			be32_to_cpu(rmsgp->rm_vers));
-		return -EPROTONOSUPPORT;
-	}
+	rdma_argp = rq_arg->head[0].iov_base;
+	if (*(rdma_argp + 1) != rpcrdma_version)
+		goto out_version;
 
-	switch (be32_to_cpu(rmsgp->rm_type)) {
-	case RDMA_MSG:
-	case RDMA_NOMSG:
+	switch (*(rdma_argp + 3)) {
+	case rdma_msg:
+	case rdma_nomsg:
 		break;
 
-	case RDMA_DONE:
-		/* Just drop it */
-		dprintk("svcrdma: dropping RDMA_DONE message\n");
-		return 0;
-
-	case RDMA_ERROR:
-		/* Possible if this is a backchannel reply.
-		 * XXX: We should cancel this XID, though.
-		 */
-		dprintk("svcrdma: dropping RDMA_ERROR message\n");
-		return 0;
-
-	case RDMA_MSGP:
-		/* Pull in the extra for the padded case, bump our pointer */
-		rmsgp->rm_body.rm_padded.rm_align =
-			be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align);
-		rmsgp->rm_body.rm_padded.rm_thresh =
-			be32_to_cpu(rmsgp->rm_body.rm_padded.rm_thresh);
-
-		va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
-		rq_arg->head[0].iov_base = va;
-		len = (u32)((unsigned long)va - (unsigned long)rmsgp);
-		rq_arg->head[0].iov_len -= len;
-		if (len > rq_arg->len)
-			return -EINVAL;
-		return len;
-	default:
-		dprintk("svcrdma: bad rdma procedure (%u)\n",
-			be32_to_cpu(rmsgp->rm_type));
-		return -EINVAL;
-	}
+	case rdma_done:
+		goto out_drop;
 
-	/* The chunk list may contain either a read chunk list or a write
-	 * chunk list and a reply chunk list.
-	 */
-	va = &rmsgp->rm_body.rm_chunks[0];
-	vaend = (__be32 *)((unsigned long)rmsgp + rq_arg->len);
-	va = decode_read_list(va, vaend);
-	if (!va) {
-		dprintk("svcrdma: failed to decode read list\n");
-		return -EINVAL;
-	}
-	va = decode_write_list(va, vaend);
-	if (!va) {
-		dprintk("svcrdma: failed to decode write list\n");
-		return -EINVAL;
-	}
-	va = decode_reply_array(va, vaend);
-	if (!va) {
-		dprintk("svcrdma: failed to decode reply chunk\n");
-		return -EINVAL;
+	case rdma_error:
+		goto out_drop;
+
+	default:
+		goto out_proc;
 	}
 
-	rq_arg->head[0].iov_base = va;
-	hdr_len = (unsigned long)va - (unsigned long)rmsgp;
+	end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len);
+	p = xdr_check_read_list(rdma_argp + 4, end);
+	if (!p)
+		goto out_inval;
+	p = xdr_check_write_list(p, end);
+	if (!p)
+		goto out_inval;
+	p = xdr_check_reply_chunk(p, end);
+	if (!p)
+		goto out_inval;
+	if (p > end)
+		goto out_inval;
+
+	rq_arg->head[0].iov_base = p;
+	hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
 	rq_arg->head[0].iov_len -= hdr_len;
 	return hdr_len;
+
+out_short:
+	dprintk("svcrdma: header too short = %d\n", rq_arg->len);
+	return -EINVAL;
+
+out_version:
+	dprintk("svcrdma: bad xprt version: %u\n",
+		be32_to_cpup(rdma_argp + 1));
+	return -EPROTONOSUPPORT;
+
+out_drop:
+	dprintk("svcrdma: dropping RDMA_DONE/ERROR message\n");
+	return 0;
+
+out_proc:
+	dprintk("svcrdma: bad rdma procedure (%u)\n",
+		be32_to_cpup(rdma_argp + 3));
+	return -EINVAL;
+
+out_inval:
+	dprintk("svcrdma: failed to parse transport header\n");
+	return -EINVAL;
 }
 
 int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
-- 
GitLab


From c2ccf64a6c4bf3bcbf4e6e6b94a51500991022a1 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 7 Feb 2017 11:58:40 -0500
Subject: [PATCH 0162/1084] svcrdma: Clean up backchannel send header encoding

Replace C structure-based XDR decoding with pointer arithmetic.
Pointer arithmetic is considered more portable.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 288e35c2d8f4..b1bc7a796782 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -200,19 +200,20 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
 {
 	struct rpc_xprt *xprt = rqst->rq_xprt;
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
-	struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)rqst->rq_buffer;
+	__be32 *p;
 	int rc;
 
 	/* Space in the send buffer for an RPC/RDMA header is reserved
 	 * via xprt->tsh_size.
 	 */
-	headerp->rm_xid = rqst->rq_xid;
-	headerp->rm_vers = rpcrdma_version;
-	headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
-	headerp->rm_type = rdma_msg;
-	headerp->rm_body.rm_chunks[0] = xdr_zero;
-	headerp->rm_body.rm_chunks[1] = xdr_zero;
-	headerp->rm_body.rm_chunks[2] = xdr_zero;
+	p = rqst->rq_buffer;
+	*p++ = rqst->rq_xid;
+	*p++ = rpcrdma_version;
+	*p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
+	*p++ = rdma_msg;
+	*p++ = xdr_zero;
+	*p++ = xdr_zero;
+	*p   = xdr_zero;
 
 #ifdef SVCRDMA_BACKCHANNEL_DEBUG
 	pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
-- 
GitLab


From aba7d14ba18c93a2ab37d50b057a885964ef285c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 7 Feb 2017 11:58:48 -0500
Subject: [PATCH 0163/1084] svcrdma: Remove unused sc_dto_q field

Clean up. Commit be99bb11400c ("svcrdma: Use new CQ API for
RPC-over-RDMA server send CQs") removed code that used the sc_dto_q
field, but neglected to remove sc_dto_q at the same time.

Fixes: be99bb11400c ("svcrdma: Use new CQ API for RPC-over- ...")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          | 1 -
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 25ecf92cae96..f77a7bc1612c 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -172,7 +172,6 @@ struct svcxprt_rdma {
 
 	wait_queue_head_t    sc_send_wait;	/* SQ exhaustion waitlist */
 	unsigned long	     sc_flags;
-	struct list_head     sc_dto_q;		/* DTO tasklet I/O pending Q */
 	struct list_head     sc_read_complete_q;
 	struct work_struct   sc_work;
 };
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 174928fe5f83..023eaa01157f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -557,7 +557,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 		return NULL;
 	svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
 	INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
-	INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
-- 
GitLab


From a3ab867fa64f9aedb3b01d570db5b43d2fc355fc Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 7 Feb 2017 11:58:56 -0500
Subject: [PATCH 0164/1084] svcrdma: Combine list fields in struct
 svc_rdma_op_ctxt

Clean up: The free list and the dto_q list fields are never used at
the same time. Reduce the size of struct svc_rdma_op_ctxt by
combining these fields.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          |  3 +--
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 14 +++++-----
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 33 +++++++++++-------------
 3 files changed, 22 insertions(+), 28 deletions(-)

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index f77a7bc1612c..b105f73e3ca2 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -70,7 +70,7 @@ extern atomic_t rdma_stat_sq_prod;
  * completes.
  */
 struct svc_rdma_op_ctxt {
-	struct list_head free;
+	struct list_head list;
 	struct svc_rdma_op_ctxt *read_hdr;
 	struct svc_rdma_fastreg_mr *frmr;
 	int hdr_count;
@@ -78,7 +78,6 @@ struct svc_rdma_op_ctxt {
 	struct ib_cqe cqe;
 	struct ib_cqe reg_cqe;
 	struct ib_cqe inv_cqe;
-	struct list_head dto_q;
 	u32 byte_len;
 	u32 position;
 	struct svcxprt_rdma *xprt;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 172b537f8cfc..b9ccd73631a9 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -608,18 +608,16 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 
 	spin_lock_bh(&rdma_xprt->sc_rq_dto_lock);
 	if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
-		ctxt = list_entry(rdma_xprt->sc_read_complete_q.next,
-				  struct svc_rdma_op_ctxt,
-				  dto_q);
-		list_del_init(&ctxt->dto_q);
+		ctxt = list_first_entry(&rdma_xprt->sc_read_complete_q,
+					struct svc_rdma_op_ctxt, list);
+		list_del(&ctxt->list);
 		spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
 		rdma_read_complete(rqstp, ctxt);
 		goto complete;
 	} else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
-		ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
-				  struct svc_rdma_op_ctxt,
-				  dto_q);
-		list_del_init(&ctxt->dto_q);
+		ctxt = list_first_entry(&rdma_xprt->sc_rq_dto_q,
+					struct svc_rdma_op_ctxt, list);
+		list_del(&ctxt->list);
 	} else {
 		atomic_inc(&rdma_stat_rq_starve);
 		clear_bit(XPT_DATA, &xprt->xpt_flags);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 023eaa01157f..87b8b5a10324 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -157,8 +157,7 @@ static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt,
 	ctxt = kmalloc(sizeof(*ctxt), flags);
 	if (ctxt) {
 		ctxt->xprt = xprt;
-		INIT_LIST_HEAD(&ctxt->free);
-		INIT_LIST_HEAD(&ctxt->dto_q);
+		INIT_LIST_HEAD(&ctxt->list);
 	}
 	return ctxt;
 }
@@ -180,7 +179,7 @@ static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
 			dprintk("svcrdma: No memory for RDMA ctxt\n");
 			return false;
 		}
-		list_add(&ctxt->free, &xprt->sc_ctxts);
+		list_add(&ctxt->list, &xprt->sc_ctxts);
 	}
 	return true;
 }
@@ -195,8 +194,8 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 		goto out_empty;
 
 	ctxt = list_first_entry(&xprt->sc_ctxts,
-				struct svc_rdma_op_ctxt, free);
-	list_del_init(&ctxt->free);
+				struct svc_rdma_op_ctxt, list);
+	list_del(&ctxt->list);
 	spin_unlock_bh(&xprt->sc_ctxt_lock);
 
 out:
@@ -256,7 +255,7 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
 
 	spin_lock_bh(&xprt->sc_ctxt_lock);
 	xprt->sc_ctxt_used--;
-	list_add(&ctxt->free, &xprt->sc_ctxts);
+	list_add(&ctxt->list, &xprt->sc_ctxts);
 	spin_unlock_bh(&xprt->sc_ctxt_lock);
 }
 
@@ -266,8 +265,8 @@ static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
 		struct svc_rdma_op_ctxt *ctxt;
 
 		ctxt = list_first_entry(&xprt->sc_ctxts,
-					struct svc_rdma_op_ctxt, free);
-		list_del(&ctxt->free);
+					struct svc_rdma_op_ctxt, list);
+		list_del(&ctxt->list);
 		kfree(ctxt);
 	}
 }
@@ -404,7 +403,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
 	/* All wc fields are now known to be valid */
 	ctxt->byte_len = wc->byte_len;
 	spin_lock(&xprt->sc_rq_dto_lock);
-	list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q);
+	list_add_tail(&ctxt->list, &xprt->sc_rq_dto_q);
 	spin_unlock(&xprt->sc_rq_dto_lock);
 
 	set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
@@ -525,7 +524,7 @@ void svc_rdma_wc_read(struct ib_cq *cq, struct ib_wc *wc)
 
 		read_hdr = ctxt->read_hdr;
 		spin_lock(&xprt->sc_rq_dto_lock);
-		list_add_tail(&read_hdr->dto_q,
+		list_add_tail(&read_hdr->list,
 			      &xprt->sc_read_complete_q);
 		spin_unlock(&xprt->sc_rq_dto_lock);
 
@@ -1213,20 +1212,18 @@ static void __svc_rdma_free(struct work_struct *work)
 	 */
 	while (!list_empty(&rdma->sc_read_complete_q)) {
 		struct svc_rdma_op_ctxt *ctxt;
-		ctxt = list_entry(rdma->sc_read_complete_q.next,
-				  struct svc_rdma_op_ctxt,
-				  dto_q);
-		list_del_init(&ctxt->dto_q);
+		ctxt = list_first_entry(&rdma->sc_read_complete_q,
+					struct svc_rdma_op_ctxt, list);
+		list_del(&ctxt->list);
 		svc_rdma_put_context(ctxt, 1);
 	}
 
 	/* Destroy queued, but not processed recv completions */
 	while (!list_empty(&rdma->sc_rq_dto_q)) {
 		struct svc_rdma_op_ctxt *ctxt;
-		ctxt = list_entry(rdma->sc_rq_dto_q.next,
-				  struct svc_rdma_op_ctxt,
-				  dto_q);
-		list_del_init(&ctxt->dto_q);
+		ctxt = list_first_entry(&rdma->sc_rq_dto_q,
+					struct svc_rdma_op_ctxt, list);
+		list_del(&ctxt->list);
 		svc_rdma_put_context(ctxt, 1);
 	}
 
-- 
GitLab


From 81fa3275f95ac357188fe3ca81b8e7c75360c88d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 7 Feb 2017 11:59:04 -0500
Subject: [PATCH 0165/1084] svcrdma: Poll CQs in "workqueue" mode

svcrdma calls svc_xprt_put() in its completion handlers, which
currently run in IRQ context.

However, svc_xprt_put() is meant to be invoked in process context,
not in IRQ context. After the last transport reference is gone, it
directly calls a transport release function that expects to run in
process context.

Change the CQ polling modes to IB_POLL_WORKQUEUE so that svcrdma
invokes svc_xprt_put() only in process context. As an added benefit,
bottom half-disabled spin locking can be eliminated from I/O paths.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  |  6 +++---
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 26 ++++++++++++------------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index b9ccd73631a9..f7b2daf72a86 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -606,12 +606,12 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 
 	dprintk("svcrdma: rqstp=%p\n", rqstp);
 
-	spin_lock_bh(&rdma_xprt->sc_rq_dto_lock);
+	spin_lock(&rdma_xprt->sc_rq_dto_lock);
 	if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
 		ctxt = list_first_entry(&rdma_xprt->sc_read_complete_q,
 					struct svc_rdma_op_ctxt, list);
 		list_del(&ctxt->list);
-		spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
+		spin_unlock(&rdma_xprt->sc_rq_dto_lock);
 		rdma_read_complete(rqstp, ctxt);
 		goto complete;
 	} else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
@@ -623,7 +623,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 		clear_bit(XPT_DATA, &xprt->xpt_flags);
 		ctxt = NULL;
 	}
-	spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
+	spin_unlock(&rdma_xprt->sc_rq_dto_lock);
 	if (!ctxt) {
 		/* This is the EAGAIN path. The svc_recv routine will
 		 * return -EAGAIN, the nfsd thread will go to call into
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 87b8b5a10324..ab2fd5377f94 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -188,7 +188,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 {
 	struct svc_rdma_op_ctxt *ctxt = NULL;
 
-	spin_lock_bh(&xprt->sc_ctxt_lock);
+	spin_lock(&xprt->sc_ctxt_lock);
 	xprt->sc_ctxt_used++;
 	if (list_empty(&xprt->sc_ctxts))
 		goto out_empty;
@@ -196,7 +196,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 	ctxt = list_first_entry(&xprt->sc_ctxts,
 				struct svc_rdma_op_ctxt, list);
 	list_del(&ctxt->list);
-	spin_unlock_bh(&xprt->sc_ctxt_lock);
+	spin_unlock(&xprt->sc_ctxt_lock);
 
 out:
 	ctxt->count = 0;
@@ -208,15 +208,15 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 	/* Either pre-allocation missed the mark, or send
 	 * queue accounting is broken.
 	 */
-	spin_unlock_bh(&xprt->sc_ctxt_lock);
+	spin_unlock(&xprt->sc_ctxt_lock);
 
 	ctxt = alloc_ctxt(xprt, GFP_NOIO);
 	if (ctxt)
 		goto out;
 
-	spin_lock_bh(&xprt->sc_ctxt_lock);
+	spin_lock(&xprt->sc_ctxt_lock);
 	xprt->sc_ctxt_used--;
-	spin_unlock_bh(&xprt->sc_ctxt_lock);
+	spin_unlock(&xprt->sc_ctxt_lock);
 	WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n");
 	return NULL;
 }
@@ -253,10 +253,10 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
 		for (i = 0; i < ctxt->count; i++)
 			put_page(ctxt->pages[i]);
 
-	spin_lock_bh(&xprt->sc_ctxt_lock);
+	spin_lock(&xprt->sc_ctxt_lock);
 	xprt->sc_ctxt_used--;
 	list_add(&ctxt->list, &xprt->sc_ctxts);
-	spin_unlock_bh(&xprt->sc_ctxt_lock);
+	spin_unlock(&xprt->sc_ctxt_lock);
 }
 
 static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
@@ -921,14 +921,14 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
 {
 	struct svc_rdma_fastreg_mr *frmr = NULL;
 
-	spin_lock_bh(&rdma->sc_frmr_q_lock);
+	spin_lock(&rdma->sc_frmr_q_lock);
 	if (!list_empty(&rdma->sc_frmr_q)) {
 		frmr = list_entry(rdma->sc_frmr_q.next,
 				  struct svc_rdma_fastreg_mr, frmr_list);
 		list_del_init(&frmr->frmr_list);
 		frmr->sg_nents = 0;
 	}
-	spin_unlock_bh(&rdma->sc_frmr_q_lock);
+	spin_unlock(&rdma->sc_frmr_q_lock);
 	if (frmr)
 		return frmr;
 
@@ -941,10 +941,10 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
 	if (frmr) {
 		ib_dma_unmap_sg(rdma->sc_cm_id->device,
 				frmr->sg, frmr->sg_nents, frmr->direction);
-		spin_lock_bh(&rdma->sc_frmr_q_lock);
+		spin_lock(&rdma->sc_frmr_q_lock);
 		WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
 		list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
-		spin_unlock_bh(&rdma->sc_frmr_q_lock);
+		spin_unlock(&rdma->sc_frmr_q_lock);
 	}
 }
 
@@ -1026,13 +1026,13 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 		goto errout;
 	}
 	newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth,
-					0, IB_POLL_SOFTIRQ);
+					0, IB_POLL_WORKQUEUE);
 	if (IS_ERR(newxprt->sc_sq_cq)) {
 		dprintk("svcrdma: error creating SQ CQ for connect request\n");
 		goto errout;
 	}
 	newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_rq_depth,
-					0, IB_POLL_SOFTIRQ);
+					0, IB_POLL_WORKQUEUE);
 	if (IS_ERR(newxprt->sc_rq_cq)) {
 		dprintk("svcrdma: error creating RQ CQ for connect request\n");
 		goto errout;
-- 
GitLab


From 471a930ad7d1b868456e835a67169a661ec73f65 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Wed, 8 Feb 2017 09:54:33 +0800
Subject: [PATCH 0166/1084] SUNRPC: Drop all entries from cache_detail when
 cache_purge()

User always free the cache_detail after sunrpc_destroy_cache_detail(),
so, it must cleanup up entries that left in the cache_detail,
otherwise, NULL reference may be caused when using the left entries.

Also, NeriBrown suggests "write a stand-alone cache_purge()."

v3, move the cache_fresh_unlocked() out of write lock,
v2, a stand-alone cache_purge(), not only for sunrpc_destroy_cache_detail

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/cache.c | 41 ++++++++++++++++++++++++++---------------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 502b9fe9817b..f5d6c60aae26 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -362,11 +362,6 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
 	cache_purge(cd);
 	spin_lock(&cache_list_lock);
 	write_lock(&cd->hash_lock);
-	if (cd->entries) {
-		write_unlock(&cd->hash_lock);
-		spin_unlock(&cache_list_lock);
-		goto out;
-	}
 	if (current_detail == cd)
 		current_detail = NULL;
 	list_del_init(&cd->others);
@@ -376,9 +371,6 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
 		/* module must be being unloaded so its safe to kill the worker */
 		cancel_delayed_work_sync(&cache_cleaner);
 	}
-	return;
-out:
-	printk(KERN_ERR "RPC: failed to unregister %s cache\n", cd->name);
 }
 EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail);
 
@@ -497,13 +489,32 @@ EXPORT_SYMBOL_GPL(cache_flush);
 
 void cache_purge(struct cache_detail *detail)
 {
-	time_t now = seconds_since_boot();
-	if (detail->flush_time >= now)
-		now = detail->flush_time + 1;
-	/* 'now' is the maximum value any 'last_refresh' can have */
-	detail->flush_time = now;
-	detail->nextcheck = seconds_since_boot();
-	cache_flush();
+	struct cache_head *ch = NULL;
+	struct hlist_head *head = NULL;
+	struct hlist_node *tmp = NULL;
+	int i = 0;
+
+	write_lock(&detail->hash_lock);
+	if (!detail->entries) {
+		write_unlock(&detail->hash_lock);
+		return;
+	}
+
+	dprintk("RPC: %d entries in %s cache\n", detail->entries, detail->name);
+	for (i = 0; i < detail->hash_size; i++) {
+		head = &detail->hash_table[i];
+		hlist_for_each_entry_safe(ch, tmp, head, cache_list) {
+			hlist_del_init(&ch->cache_list);
+			detail->entries--;
+
+			set_bit(CACHE_CLEANED, &ch->flags);
+			write_unlock(&detail->hash_lock);
+			cache_fresh_unlocked(ch, detail);
+			cache_put(ch, detail);
+			write_lock(&detail->hash_lock);
+		}
+	}
+	write_unlock(&detail->hash_lock);
 }
 EXPORT_SYMBOL_GPL(cache_purge);
 
-- 
GitLab


From d6fc8821c2d2aba4cc18447a467f543e46e7367d Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Wed, 8 Feb 2017 09:54:42 +0800
Subject: [PATCH 0167/1084] SUNRPC/Cache: Always treat the invalid cache as
 unexpired

When the first time pynfs runs after rpc/nfsd startup, always get the warning,

"Got error: Connection closed"

I found the problem is caused by,
1. A new startup of nfsd, rpc.mountd, etc,
2. A rpc request from client (pynfs test, or normal mounting),
3. An ip_map cache is created but invalid, so upcall to rpc.mountd,
4. rpc.mountd process the ip_map upcall, before write the valid data to nfsd,
   do auth_reload(), and check_useipaddr(),
5. For the first time, old_use_ipaddr = -1, it causes rpc.mountd do write_flush    that doing cache_clean,
6. The ip_map cache will be treat as expired and clean,
7. When rpc.mountd write the valid data to nfsd, a new ip_map is created
   and updated, the cache_check of old ip_map(doing the upcall) will
   return -ETIMEDOUT.
8. RPC layer return SVC_CLOSE and close the xprt after commit 4d712ef1db05
   "svcauth_gss: Close connection when dropping an incoming message"

NeilBrown suggest in another email,

"If CACHE_VALID is not set, then there is no data in the cache item,
 so there is nothing to expire. So it would be nice if cache items that
 don't have CACHE_VALID are never treated as expired."

v3, change the order of the two patches
v2, change the checking of CACHE_PENDING to CACHE_VALID

Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 9dcf2c8fe1c5..70738504d647 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -204,8 +204,11 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
 	kref_put(&h->ref, cd->cache_put);
 }
 
-static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
+static inline bool cache_is_expired(struct cache_detail *detail, struct cache_head *h)
 {
+	if (!test_bit(CACHE_VALID, &h->flags))
+		return false;
+
 	return  (h->expiry_time < seconds_since_boot()) ||
 		(detail->flush_time >= h->last_refresh);
 }
-- 
GitLab


From 920b4530fb80430ff30ef83efe21ba1fa5623731 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Wed, 1 Feb 2017 00:00:07 -0500
Subject: [PATCH 0168/1084] NFS: nfs_rename() handle -ERESTARTSYS dentry left
 behind

An interrupted rename will leave the old dentry behind if the rename
succeeds.  Fix this by moving the final local work of the rename to
rpc_call_done so that the results of the RENAME can always be handled,
even if the original process has already returned with -ERESTARTSYS.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/dir.c | 36 +++++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index fad81041f5ab..fb499a3f21b5 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2002,6 +2002,29 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 }
 EXPORT_SYMBOL_GPL(nfs_link);
 
+static void
+nfs_complete_rename(struct rpc_task *task, struct nfs_renamedata *data)
+{
+	struct dentry *old_dentry = data->old_dentry;
+	struct dentry *new_dentry = data->new_dentry;
+	struct inode *old_inode = d_inode(old_dentry);
+	struct inode *new_inode = d_inode(new_dentry);
+
+	nfs_mark_for_revalidate(old_inode);
+
+	switch (task->tk_status) {
+	case 0:
+		if (new_inode != NULL)
+			nfs_drop_nlink(new_inode);
+		d_move(old_dentry, new_dentry);
+		nfs_set_verifier(new_dentry,
+					nfs_save_change_attribute(data->new_dir));
+		break;
+	case -ENOENT:
+		nfs_dentry_handle_enoent(old_dentry);
+	}
+}
+
 /*
  * RENAME
  * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
@@ -2084,7 +2107,8 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (new_inode != NULL)
 		NFS_PROTO(new_inode)->return_delegation(new_inode);
 
-	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
+	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
+					nfs_complete_rename);
 	if (IS_ERR(task)) {
 		error = PTR_ERR(task);
 		goto out;
@@ -2094,21 +2118,11 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (error == 0)
 		error = task->tk_status;
 	rpc_put_task(task);
-	nfs_mark_for_revalidate(old_inode);
 out:
 	if (rehash)
 		d_rehash(rehash);
 	trace_nfs_rename_exit(old_dir, old_dentry,
 			new_dir, new_dentry, error);
-	if (!error) {
-		if (new_inode != NULL)
-			nfs_drop_nlink(new_inode);
-		d_move(old_dentry, new_dentry);
-		nfs_set_verifier(new_dentry,
-					nfs_save_change_attribute(new_dir));
-	} else if (error == -ENOENT)
-		nfs_dentry_handle_enoent(old_dentry);
-
 	/* new dentry created? */
 	if (dentry)
 		dput(dentry);
-- 
GitLab


From 600424e3d91be7c6c8c38b95db713983a6307efa Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 4 Jan 2017 04:22:33 +1000
Subject: [PATCH 0169/1084] nfs: no PG_private waiters remain, remove waker

Since commit 4f52b6bb ("NFS: Don't call COMMIT in ->releasepage()"),
no tasks wait on PagePrivate, so the wake introduced in commit 95905446
("NFS: avoid deadlocks with loop-back mounted NFS filesystems.") can
be removed.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/write.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ad4219a41f25..e75b056f46f4 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -728,8 +728,6 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 		if (likely(head->wb_page && !PageSwapCache(head->wb_page))) {
 			set_page_private(head->wb_page, 0);
 			ClearPagePrivate(head->wb_page);
-			smp_mb__after_atomic();
-			wake_up_page(head->wb_page, PG_private);
 			clear_bit(PG_MAPPED, &head->wb_flags);
 		}
 		nfsi->nrequests--;
-- 
GitLab


From 2864486bd0fdd14431058650c91fcb9fba605d43 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Tue, 7 Feb 2017 21:46:39 +0800
Subject: [PATCH 0170/1084] sunrpc: error out if register_shrinker fail

register_shrinker may return error when register fail, error out.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/auth.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index d8f17ea7932e..8b344d0eb6ed 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -878,8 +878,12 @@ int __init rpcauth_init_module(void)
 	err = rpc_init_generic_auth();
 	if (err < 0)
 		goto out2;
-	register_shrinker(&rpc_cred_shrinker);
+	err = register_shrinker(&rpc_cred_shrinker);
+	if (err < 0)
+		goto out3;
 	return 0;
+out3:
+	rpc_destroy_generic_auth();
 out2:
 	rpc_destroy_authunix();
 out1:
-- 
GitLab


From 863d7d9c2e0cbbde6cd15f87c4431dd806f2d917 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Tue, 7 Feb 2017 21:47:16 +0800
Subject: [PATCH 0171/1084] sunrpc/nfs: cleanup procfs/pipefs entry in
 cache_detail

Record flush/channel/content entries is useless, remove them.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/cache_lib.c           |  3 +--
 include/linux/sunrpc/cache.h | 15 +++--------
 net/sunrpc/cache.c           | 49 +++++++++++++-----------------------
 3 files changed, 21 insertions(+), 46 deletions(-)

diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c
index 6de15709d024..2ae676f93e6b 100644
--- a/fs/nfs/cache_lib.c
+++ b/fs/nfs/cache_lib.c
@@ -141,8 +141,7 @@ int nfs_cache_register_net(struct net *net, struct cache_detail *cd)
 
 void nfs_cache_unregister_sb(struct super_block *sb, struct cache_detail *cd)
 {
-	if (cd->u.pipefs.dir)
-		sunrpc_cache_unregister_pipefs(cd);
+	sunrpc_cache_unregister_pipefs(cd);
 }
 
 void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd)
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 62a60eeacb0a..bb5c9c80f12e 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -63,15 +63,6 @@ struct cache_head {
 
 #define	CACHE_NEW_EXPIRY 120	/* keep new things pending confirmation for 120 seconds */
 
-struct cache_detail_procfs {
-	struct proc_dir_entry	*proc_ent;
-	struct proc_dir_entry   *flush_ent, *channel_ent, *content_ent;
-};
-
-struct cache_detail_pipefs {
-	struct dentry *dir;
-};
-
 struct cache_detail {
 	struct module *		owner;
 	int			hash_size;
@@ -123,9 +114,9 @@ struct cache_detail {
 	time_t			last_warn;		/* when we last warned about no readers */
 
 	union {
-		struct cache_detail_procfs procfs;
-		struct cache_detail_pipefs pipefs;
-	} u;
+		struct proc_dir_entry	*procfs;
+		struct dentry		*pipefs;
+	};
 	struct net		*net;
 };
 
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 8147e8d56eb2..688ef8cbac62 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1600,21 +1600,12 @@ static const struct file_operations cache_flush_operations_procfs = {
 	.llseek		= no_llseek,
 };
 
-static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net)
+static void remove_cache_proc_entries(struct cache_detail *cd)
 {
-	struct sunrpc_net *sn;
-
-	if (cd->u.procfs.proc_ent == NULL)
-		return;
-	if (cd->u.procfs.flush_ent)
-		remove_proc_entry("flush", cd->u.procfs.proc_ent);
-	if (cd->u.procfs.channel_ent)
-		remove_proc_entry("channel", cd->u.procfs.proc_ent);
-	if (cd->u.procfs.content_ent)
-		remove_proc_entry("content", cd->u.procfs.proc_ent);
-	cd->u.procfs.proc_ent = NULL;
-	sn = net_generic(net, sunrpc_net_id);
-	remove_proc_entry(cd->name, sn->proc_net_rpc);
+	if (cd->procfs) {
+		proc_remove(cd->procfs);
+		cd->procfs = NULL;
+	}
 }
 
 #ifdef CONFIG_PROC_FS
@@ -1624,38 +1615,30 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
 	struct sunrpc_net *sn;
 
 	sn = net_generic(net, sunrpc_net_id);
-	cd->u.procfs.proc_ent = proc_mkdir(cd->name, sn->proc_net_rpc);
-	if (cd->u.procfs.proc_ent == NULL)
+	cd->procfs = proc_mkdir(cd->name, sn->proc_net_rpc);
+	if (cd->procfs == NULL)
 		goto out_nomem;
-	cd->u.procfs.channel_ent = NULL;
-	cd->u.procfs.content_ent = NULL;
 
 	p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR,
-			     cd->u.procfs.proc_ent,
-			     &cache_flush_operations_procfs, cd);
-	cd->u.procfs.flush_ent = p;
+			     cd->procfs, &cache_flush_operations_procfs, cd);
 	if (p == NULL)
 		goto out_nomem;
 
 	if (cd->cache_request || cd->cache_parse) {
 		p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR,
-				     cd->u.procfs.proc_ent,
-				     &cache_file_operations_procfs, cd);
-		cd->u.procfs.channel_ent = p;
+				cd->procfs, &cache_file_operations_procfs, cd);
 		if (p == NULL)
 			goto out_nomem;
 	}
 	if (cd->cache_show) {
 		p = proc_create_data("content", S_IFREG|S_IRUSR,
-				cd->u.procfs.proc_ent,
-				&content_file_operations_procfs, cd);
-		cd->u.procfs.content_ent = p;
+				cd->procfs, &content_file_operations_procfs, cd);
 		if (p == NULL)
 			goto out_nomem;
 	}
 	return 0;
 out_nomem:
-	remove_cache_proc_entries(cd, net);
+	remove_cache_proc_entries(cd);
 	return -ENOMEM;
 }
 #else /* CONFIG_PROC_FS */
@@ -1684,7 +1667,7 @@ EXPORT_SYMBOL_GPL(cache_register_net);
 
 void cache_unregister_net(struct cache_detail *cd, struct net *net)
 {
-	remove_cache_proc_entries(cd, net);
+	remove_cache_proc_entries(cd);
 	sunrpc_destroy_cache_detail(cd);
 }
 EXPORT_SYMBOL_GPL(cache_unregister_net);
@@ -1843,15 +1826,17 @@ int sunrpc_cache_register_pipefs(struct dentry *parent,
 	struct dentry *dir = rpc_create_cache_dir(parent, name, umode, cd);
 	if (IS_ERR(dir))
 		return PTR_ERR(dir);
-	cd->u.pipefs.dir = dir;
+	cd->pipefs = dir;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs);
 
 void sunrpc_cache_unregister_pipefs(struct cache_detail *cd)
 {
-	rpc_remove_cache_dir(cd->u.pipefs.dir);
-	cd->u.pipefs.dir = NULL;
+	if (cd->pipefs) {
+		rpc_remove_cache_dir(cd->pipefs);
+		cd->pipefs = NULL;
+	}
 }
 EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs);
 
-- 
GitLab


From 5786461bd8ea81433d81297215ee814a9a33ce7a Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Tue, 7 Feb 2017 21:48:11 +0800
Subject: [PATCH 0172/1084] sunrpc: rename NFS_NGROUPS to UNX_NGROUPS for auth
 unix

NFS_NGROUPS has been move to sunrpc, rename to UNX_NGROUPS.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/auth.h |  1 +
 net/sunrpc/auth_unix.c      | 18 ++++++++----------
 net/sunrpc/svcauth_unix.c   |  4 ++--
 3 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index b1bc62ba20a2..39c85fb1f0ed 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -32,6 +32,7 @@
  */
 #define UNX_MAXNODENAME	__NEW_UTS_LEN
 #define UNX_CALLSLACK	(21 + XDR_QUADLEN(UNX_MAXNODENAME))
+#define UNX_NGROUPS	16
 
 struct rpcsec_gss_info;
 
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 306fc0f54596..82337e1ec9cd 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -14,12 +14,10 @@
 #include <linux/sunrpc/auth.h>
 #include <linux/user_namespace.h>
 
-#define NFS_NGROUPS	16
-
 struct unx_cred {
 	struct rpc_cred		uc_base;
 	kgid_t			uc_gid;
-	kgid_t			uc_gids[NFS_NGROUPS];
+	kgid_t			uc_gids[UNX_NGROUPS];
 };
 #define uc_uid			uc_base.cr_uid
 
@@ -82,13 +80,13 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t
 
 	if (acred->group_info != NULL)
 		groups = acred->group_info->ngroups;
-	if (groups > NFS_NGROUPS)
-		groups = NFS_NGROUPS;
+	if (groups > UNX_NGROUPS)
+		groups = UNX_NGROUPS;
 
 	cred->uc_gid = acred->gid;
 	for (i = 0; i < groups; i++)
 		cred->uc_gids[i] = acred->group_info->gid[i];
-	if (i < NFS_NGROUPS)
+	if (i < UNX_NGROUPS)
 		cred->uc_gids[i] = INVALID_GID;
 
 	return &cred->uc_base;
@@ -132,12 +130,12 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
 
 	if (acred->group_info != NULL)
 		groups = acred->group_info->ngroups;
-	if (groups > NFS_NGROUPS)
-		groups = NFS_NGROUPS;
+	if (groups > UNX_NGROUPS)
+		groups = UNX_NGROUPS;
 	for (i = 0; i < groups ; i++)
 		if (!gid_eq(cred->uc_gids[i], acred->group_info->gid[i]))
 			return 0;
-	if (groups < NFS_NGROUPS && gid_valid(cred->uc_gids[groups]))
+	if (groups < UNX_NGROUPS && gid_valid(cred->uc_gids[groups]))
 		return 0;
 	return 1;
 }
@@ -166,7 +164,7 @@ unx_marshal(struct rpc_task *task, __be32 *p)
 	*p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid));
 	*p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid));
 	hold = p++;
-	for (i = 0; i < 16 && gid_valid(cred->uc_gids[i]); i++)
+	for (i = 0; i < UNX_NGROUPS && gid_valid(cred->uc_gids[i]); i++)
 		*p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i]));
 	*hold = htonl(p - hold - 1);		/* gid array length */
 	*base = htonl((p - base - 1) << 2);	/* cred length */
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 64af4f034de6..f81eaa8e0888 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -403,7 +403,7 @@ svcauth_unix_info_release(struct svc_xprt *xpt)
 /****************************************************************************
  * auth.unix.gid cache
  * simple cache to map a UID to a list of GIDs
- * because AUTH_UNIX aka AUTH_SYS has a max of 16
+ * because AUTH_UNIX aka AUTH_SYS has a max of UNX_NGROUPS
  */
 #define	GID_HASHBITS	8
 #define	GID_HASHMAX	(1<<GID_HASHBITS)
@@ -810,7 +810,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
 	cred->cr_uid = make_kuid(&init_user_ns, svc_getnl(argv)); /* uid */
 	cred->cr_gid = make_kgid(&init_user_ns, svc_getnl(argv)); /* gid */
 	slen = svc_getnl(argv);			/* gids length */
-	if (slen > 16 || (len -= (slen + 2)*4) < 0)
+	if (slen > UNX_NGROUPS || (len -= (slen + 2)*4) < 0)
 		goto badcred;
 	cred->cr_group_info = groups_alloc(slen);
 	if (cred->cr_group_info == NULL)
-- 
GitLab


From af4926e5619f8a33463f7202f27ba091893ed2ad Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Tue, 7 Feb 2017 21:48:49 +0800
Subject: [PATCH 0173/1084] sunrpc: remove dead codes of cr_magic in rpc_cred

Don't found any place using the cr_magic.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/auth.h | 5 -----
 net/sunrpc/auth.c           | 3 ---
 net/sunrpc/auth_null.c      | 3 ---
 3 files changed, 11 deletions(-)

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 39c85fb1f0ed..8fd3504946ad 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -64,9 +64,6 @@ struct rpc_cred {
 	struct rcu_head		cr_rcu;
 	struct rpc_auth *	cr_auth;
 	const struct rpc_credops *cr_ops;
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-	unsigned long		cr_magic;	/* 0x0f4aa4f0 */
-#endif
 	unsigned long		cr_expire;	/* when to gc */
 	unsigned long		cr_flags;	/* various flags */
 	atomic_t		cr_count;	/* ref count */
@@ -80,8 +77,6 @@ struct rpc_cred {
 #define RPCAUTH_CRED_HASHED	2
 #define RPCAUTH_CRED_NEGATIVE	3
 
-#define RPCAUTH_CRED_MAGIC	0x0f4aa4f0
-
 /* rpc_auth au_flags */
 #define RPCAUTH_AUTH_NO_CRKEY_TIMEOUT	0x0001 /* underlying cred has no key timeout */
 
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 8b344d0eb6ed..a1ee933e3029 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -648,9 +648,6 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
 	cred->cr_auth = auth;
 	cred->cr_ops = ops;
 	cred->cr_expire = jiffies;
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-	cred->cr_magic = RPCAUTH_CRED_MAGIC;
-#endif
 	cred->cr_uid = acred->uid;
 }
 EXPORT_SYMBOL_GPL(rpcauth_init_cred);
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 4d17376b2acb..5f3d527dff65 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -139,7 +139,4 @@ struct rpc_cred null_cred = {
 	.cr_ops		= &null_credops,
 	.cr_count	= ATOMIC_INIT(1),
 	.cr_flags	= 1UL << RPCAUTH_CRED_UPTODATE,
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-	.cr_magic	= RPCAUTH_CRED_MAGIC,
-#endif
 };
-- 
GitLab


From 6489a8f41370bcc53dac7107e298179da8c6cc05 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Tue, 7 Feb 2017 21:49:17 +0800
Subject: [PATCH 0174/1084] sunrpc: update the comments of sunrpc proc path

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/cache.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 688ef8cbac62..9e8561d1714d 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -717,7 +717,7 @@ void cache_clean_deferred(void *owner)
 /*
  * communicate with user-space
  *
- * We have a magic /proc file - /proc/sunrpc/<cachename>/channel.
+ * We have a magic /proc file - /proc/net/rpc/<cachename>/channel.
  * On read, you get a full request, or block.
  * On write, an update request is processed.
  * Poll works if anything to read, and always allows write.
@@ -1272,7 +1272,7 @@ EXPORT_SYMBOL_GPL(qword_get);
 
 
 /*
- * support /proc/sunrpc/cache/$CACHENAME/content
+ * support /proc/net/rpc/$CACHENAME/content
  * as a seqfile.
  * We call ->cache_show passing NULL for the item to
  * get a header, then pass each real item in the cache
-- 
GitLab


From 3f373e81b1e8d26a90523cd12385cbce588f3f18 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Tue, 7 Feb 2017 21:49:57 +0800
Subject: [PATCH 0175/1084] sunrpc: record rpc client pointer in seq->private
 directly

pos in rpc_clnt_iter is useless, drop it and record clnt in seq_private.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/debugfs.c | 35 ++++++++++-------------------------
 1 file changed, 10 insertions(+), 25 deletions(-)

diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index e7b4d93566df..c8fd0b6c1618 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -16,11 +16,6 @@ static struct dentry *rpc_xprt_dir;
 
 unsigned int rpc_inject_disconnect;
 
-struct rpc_clnt_iter {
-	struct rpc_clnt	*clnt;
-	loff_t		pos;
-};
-
 static int
 tasks_show(struct seq_file *f, void *v)
 {
@@ -47,12 +42,10 @@ static void *
 tasks_start(struct seq_file *f, loff_t *ppos)
 	__acquires(&clnt->cl_lock)
 {
-	struct rpc_clnt_iter *iter = f->private;
+	struct rpc_clnt *clnt = f->private;
 	loff_t pos = *ppos;
-	struct rpc_clnt *clnt = iter->clnt;
 	struct rpc_task *task;
 
-	iter->pos = pos + 1;
 	spin_lock(&clnt->cl_lock);
 	list_for_each_entry(task, &clnt->cl_tasks, tk_task)
 		if (pos-- == 0)
@@ -63,12 +56,10 @@ tasks_start(struct seq_file *f, loff_t *ppos)
 static void *
 tasks_next(struct seq_file *f, void *v, loff_t *pos)
 {
-	struct rpc_clnt_iter *iter = f->private;
-	struct rpc_clnt *clnt = iter->clnt;
+	struct rpc_clnt *clnt = f->private;
 	struct rpc_task *task = v;
 	struct list_head *next = task->tk_task.next;
 
-	++iter->pos;
 	++*pos;
 
 	/* If there's another task on list, return it */
@@ -81,9 +72,7 @@ static void
 tasks_stop(struct seq_file *f, void *v)
 	__releases(&clnt->cl_lock)
 {
-	struct rpc_clnt_iter *iter = f->private;
-	struct rpc_clnt *clnt = iter->clnt;
-
+	struct rpc_clnt *clnt = f->private;
 	spin_unlock(&clnt->cl_lock);
 }
 
@@ -96,17 +85,13 @@ static const struct seq_operations tasks_seq_operations = {
 
 static int tasks_open(struct inode *inode, struct file *filp)
 {
-	int ret = seq_open_private(filp, &tasks_seq_operations,
-					sizeof(struct rpc_clnt_iter));
-
+	int ret = seq_open(filp, &tasks_seq_operations);
 	if (!ret) {
 		struct seq_file *seq = filp->private_data;
-		struct rpc_clnt_iter *iter = seq->private;
-
-		iter->clnt = inode->i_private;
+		struct rpc_clnt *clnt = seq->private = inode->i_private;
 
-		if (!atomic_inc_not_zero(&iter->clnt->cl_count)) {
-			seq_release_private(inode, filp);
+		if (!atomic_inc_not_zero(&clnt->cl_count)) {
+			seq_release(inode, filp);
 			ret = -EINVAL;
 		}
 	}
@@ -118,10 +103,10 @@ static int
 tasks_release(struct inode *inode, struct file *filp)
 {
 	struct seq_file *seq = filp->private_data;
-	struct rpc_clnt_iter *iter = seq->private;
+	struct rpc_clnt *clnt = seq->private;
 
-	rpc_release_client(iter->clnt);
-	return seq_release_private(inode, filp);
+	rpc_release_client(clnt);
+	return seq_release(inode, filp);
 }
 
 static const struct file_operations tasks_fops = {
-- 
GitLab


From 8ccc869169d0c59d288b20ec2b060317f87e08b4 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Tue, 7 Feb 2017 21:50:32 +0800
Subject: [PATCH 0176/1084] sunrpc: use simple_read_from_buffer for reading
 cache flush

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/cache.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 9e8561d1714d..2f06f510b570 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1427,20 +1427,11 @@ static ssize_t read_flush(struct file *file, char __user *buf,
 			  struct cache_detail *cd)
 {
 	char tbuf[22];
-	unsigned long p = *ppos;
 	size_t len;
 
-	snprintf(tbuf, sizeof(tbuf), "%lu\n", convert_to_wallclock(cd->flush_time));
-	len = strlen(tbuf);
-	if (p >= len)
-		return 0;
-	len -= p;
-	if (len > count)
-		len = count;
-	if (copy_to_user(buf, (void*)(tbuf+p), len))
-		return -EFAULT;
-	*ppos += len;
-	return len;
+	len = snprintf(tbuf, sizeof(tbuf), "%lu\n",
+			convert_to_wallclock(cd->flush_time));
+	return simple_read_from_buffer(buf, count, ppos, tbuf, len);
 }
 
 static ssize_t write_flush(struct file *file, const char __user *buf,
-- 
GitLab


From a974deee477af89411e0f80456bfb344ac433c98 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 8 Feb 2017 11:29:46 -0500
Subject: [PATCH 0177/1084] NFSv4: Fix memory and state leak in
 _nfs4_open_and_get_state

If we exit because the file access check failed, we currently
leak the struct nfs4_state. We need to attach it to the
open context before returning.

Fixes: 3efb9722475e ("NFSv4: Refactor _nfs4_open_and_get_state..")
Cc: stable@vger.kernel.org # 3.10+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index aeaa6678b071..45271e5994f0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2624,6 +2624,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
 	ret = PTR_ERR(state);
 	if (IS_ERR(state))
 		goto out;
+	ctx->state = state;
 	if (server->caps & NFS_CAP_POSIX_LOCK)
 		set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
 	if (opendata->o_res.rflags & NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK)
@@ -2649,7 +2650,6 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
 	if (ret != 0)
 		goto out;
 
-	ctx->state = state;
 	if (d_inode(dentry) == state->inode) {
 		nfs_inode_attach_open_context(ctx);
 		if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
-- 
GitLab


From df2e32c5ada8590b859863cf89f418c7962b9a21 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Thu, 19 Jan 2017 22:27:53 -0800
Subject: [PATCH 0178/1084] qla2xxx: Remove direct access of scsi_status field
 in se_cmd

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_def.h    | 1 +
 drivers/scsi/qla2xxx/qla_target.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 5b1287a63c49..4aae861a6713 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -1794,6 +1794,7 @@ typedef struct {
 #define SS_RESIDUAL_OVER		BIT_10
 #define SS_SENSE_LEN_VALID		BIT_9
 #define SS_RESPONSE_INFO_LEN_VALID	BIT_8
+#define SS_SCSI_STATUS_BYTE	0xff
 
 #define SS_RESERVE_CONFLICT		(BIT_4 | BIT_3)
 #define SS_BUSY_CONDITION		BIT_3
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index e4fda84b959e..439c1fc44c7a 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -2288,7 +2288,7 @@ static void qlt_24xx_init_ctio_to_isp(struct ctio7_to_24xx *ctio,
 		int i;
 
 		if (qlt_need_explicit_conf(prm->tgt->ha, prm->cmd, 1)) {
-			if (prm->cmd->se_cmd.scsi_status != 0) {
+			if ((prm->rq_result & SS_SCSI_STATUS_BYTE) != 0) {
 				ql_dbg(ql_dbg_tgt, prm->cmd->vha, 0xe017,
 				    "Skipping EXPLICIT_CONFORM and "
 				    "CTIO7_FLAGS_CONFORM_REQ for FCP READ w/ "
-- 
GitLab


From be92fc3fde95c94bb54ff471219552a581640a6b Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Thu, 19 Jan 2017 22:27:54 -0800
Subject: [PATCH 0179/1084] qla2xxx: Cleanup TMF code translation from
 qla_target

Move code code which converts Task Mgmt Command flags for
ATIO to TCM #defines, from qla2xxx driver to tcm_qla2xxx
driver.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_target.c  | 75 +++---------------------------
 drivers/scsi/qla2xxx/qla_target.h  |  6 ++-
 drivers/scsi/qla2xxx/tcm_qla2xxx.c | 40 +++++++++++++++-
 3 files changed, 49 insertions(+), 72 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 439c1fc44c7a..fcdd5c9ed3bf 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -1592,8 +1592,9 @@ static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha,
 	mcmd->sess = sess;
 	memcpy(&mcmd->orig_iocb.abts, abts, sizeof(mcmd->orig_iocb.abts));
 	mcmd->reset_count = vha->hw->chip_reset;
+	mcmd->tmr_func = QLA_TGT_ABTS;
 
-	rc = ha->tgt.tgt_ops->handle_tmr(mcmd, lun, TMR_ABORT_TASK,
+	rc = ha->tgt.tgt_ops->handle_tmr(mcmd, lun, mcmd->tmr_func,
 	    abts->exchange_addr_to_abort);
 	if (rc != 0) {
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf052,
@@ -4051,7 +4052,6 @@ static int qlt_issue_task_mgmt(struct qla_tgt_sess *sess, uint32_t lun,
 	struct qla_tgt_mgmt_cmd *mcmd;
 	struct atio_from_isp *a = (struct atio_from_isp *)iocb;
 	int res;
-	uint8_t tmr_func;
 
 	mcmd = mempool_alloc(qla_tgt_mgmt_cmd_mempool, GFP_ATOMIC);
 	if (!mcmd) {
@@ -4073,74 +4073,12 @@ static int qlt_issue_task_mgmt(struct qla_tgt_sess *sess, uint32_t lun,
 	mcmd->reset_count = vha->hw->chip_reset;
 
 	switch (fn) {
-	case QLA_TGT_CLEAR_ACA:
-		ql_dbg(ql_dbg_tgt_tmr, vha, 0x10000,
-		    "qla_target(%d): CLEAR_ACA received\n", sess->vha->vp_idx);
-		tmr_func = TMR_CLEAR_ACA;
-		break;
-
-	case QLA_TGT_TARGET_RESET:
-		ql_dbg(ql_dbg_tgt_tmr, vha, 0x10001,
-		    "qla_target(%d): TARGET_RESET received\n",
-		    sess->vha->vp_idx);
-		tmr_func = TMR_TARGET_WARM_RESET;
-		break;
-
 	case QLA_TGT_LUN_RESET:
-		ql_dbg(ql_dbg_tgt_tmr, vha, 0x10002,
-		    "qla_target(%d): LUN_RESET received\n", sess->vha->vp_idx);
-		tmr_func = TMR_LUN_RESET;
-		abort_cmds_for_lun(vha, lun, a->u.isp24.fcp_hdr.s_id);
-		break;
-
-	case QLA_TGT_CLEAR_TS:
-		ql_dbg(ql_dbg_tgt_tmr, vha, 0x10003,
-		    "qla_target(%d): CLEAR_TS received\n", sess->vha->vp_idx);
-		tmr_func = TMR_CLEAR_TASK_SET;
-		break;
-
-	case QLA_TGT_ABORT_TS:
-		ql_dbg(ql_dbg_tgt_tmr, vha, 0x10004,
-		    "qla_target(%d): ABORT_TS received\n", sess->vha->vp_idx);
-		tmr_func = TMR_ABORT_TASK_SET;
-		break;
-#if 0
-	case QLA_TGT_ABORT_ALL:
-		ql_dbg(ql_dbg_tgt_tmr, vha, 0x10005,
-		    "qla_target(%d): Doing ABORT_ALL_TASKS\n",
-		    sess->vha->vp_idx);
-		tmr_func = 0;
-		break;
-
-	case QLA_TGT_ABORT_ALL_SESS:
-		ql_dbg(ql_dbg_tgt_tmr, vha, 0x10006,
-		    "qla_target(%d): Doing ABORT_ALL_TASKS_SESS\n",
-		    sess->vha->vp_idx);
-		tmr_func = 0;
-		break;
-
-	case QLA_TGT_NEXUS_LOSS_SESS:
-		ql_dbg(ql_dbg_tgt_tmr, vha, 0x10007,
-		    "qla_target(%d): Doing NEXUS_LOSS_SESS\n",
-		    sess->vha->vp_idx);
-		tmr_func = 0;
-		break;
-
-	case QLA_TGT_NEXUS_LOSS:
-		ql_dbg(ql_dbg_tgt_tmr, vha, 0x10008,
-		    "qla_target(%d): Doing NEXUS_LOSS\n", sess->vha->vp_idx);
-		tmr_func = 0;
-		break;
-#endif
-	default:
-		ql_dbg(ql_dbg_tgt_tmr, vha, 0x1000a,
-		    "qla_target(%d): Unknown task mgmt fn 0x%x\n",
-		    sess->vha->vp_idx, fn);
-		mempool_free(mcmd, qla_tgt_mgmt_cmd_mempool);
-		return -ENOSYS;
+	    abort_cmds_for_lun(vha, lun, a->u.isp24.fcp_hdr.s_id);
+	    break;
 	}
 
-	res = ha->tgt.tgt_ops->handle_tmr(mcmd, lun, tmr_func, 0);
+	res = ha->tgt.tgt_ops->handle_tmr(mcmd, lun, mcmd->tmr_func, 0);
 	if (res != 0) {
 		ql_dbg(ql_dbg_tgt_tmr, vha, 0x1000b,
 		    "qla_target(%d): tgt.tgt_ops->handle_tmr() failed: %d\n",
@@ -4215,8 +4153,9 @@ static int __qlt_abort_task(struct scsi_qla_host *vha,
 	lun = a->u.isp24.fcp_cmnd.lun;
 	unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun);
 	mcmd->reset_count = vha->hw->chip_reset;
+	mcmd->tmr_func = QLA_TGT_2G_ABORT_TASK;
 
-	rc = ha->tgt.tgt_ops->handle_tmr(mcmd, unpacked_lun, TMR_ABORT_TASK,
+	rc = ha->tgt.tgt_ops->handle_tmr(mcmd, unpacked_lun, mcmd->tmr_func,
 	    le16_to_cpu(iocb->u.isp2x.seq_id));
 	if (rc != 0) {
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf060,
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index 0824a8164a24..14f2e241db8c 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -744,7 +744,7 @@ struct qla_tgt_func_tmpl {
 			unsigned char *, uint32_t, int, int, int);
 	void (*handle_data)(struct qla_tgt_cmd *);
 	void (*handle_dif_err)(struct qla_tgt_cmd *);
-	int (*handle_tmr)(struct qla_tgt_mgmt_cmd *, uint32_t, uint8_t,
+	int (*handle_tmr)(struct qla_tgt_mgmt_cmd *, uint32_t, uint16_t,
 			uint32_t);
 	void (*free_cmd)(struct qla_tgt_cmd *);
 	void (*free_mcmd)(struct qla_tgt_mgmt_cmd *);
@@ -795,6 +795,8 @@ int qla2x00_wait_for_hba_online(struct scsi_qla_host *);
 #define QLA_TGT_ABORT_ALL               0xFFFE
 #define QLA_TGT_NEXUS_LOSS_SESS         0xFFFD
 #define QLA_TGT_NEXUS_LOSS              0xFFFC
+#define QLA_TGT_ABTS					0xFFFB
+#define QLA_TGT_2G_ABORT_TASK			0xFFFA
 
 /* Notify Acknowledge flags */
 #define NOTIFY_ACK_RES_COUNT        BIT_8
@@ -1056,7 +1058,7 @@ struct qla_tgt_sess_work_param {
 };
 
 struct qla_tgt_mgmt_cmd {
-	uint8_t tmr_func;
+	uint16_t tmr_func;
 	uint8_t fc_tm_rsp;
 	struct qla_tgt_sess *sess;
 	struct se_cmd se_cmd;
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index d925910be761..1bec6ab66aaf 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -563,13 +563,49 @@ static void tcm_qla2xxx_handle_dif_err(struct qla_tgt_cmd *cmd)
  * Called from qla_target.c:qlt_issue_task_mgmt()
  */
 static int tcm_qla2xxx_handle_tmr(struct qla_tgt_mgmt_cmd *mcmd, uint32_t lun,
-	uint8_t tmr_func, uint32_t tag)
+	uint16_t tmr_func, uint32_t tag)
 {
 	struct qla_tgt_sess *sess = mcmd->sess;
 	struct se_cmd *se_cmd = &mcmd->se_cmd;
+	int transl_tmr_func = 0;
+
+	switch (tmr_func) {
+	case QLA_TGT_ABTS:
+		pr_debug("%ld: ABTS received\n", sess->vha->host_no);
+		transl_tmr_func = TMR_ABORT_TASK;
+		break;
+	case QLA_TGT_2G_ABORT_TASK:
+		pr_debug("%ld: 2G Abort Task received\n", sess->vha->host_no);
+		transl_tmr_func = TMR_ABORT_TASK;
+		break;
+	case QLA_TGT_CLEAR_ACA:
+		pr_debug("%ld: CLEAR_ACA received\n", sess->vha->host_no);
+		transl_tmr_func = TMR_CLEAR_ACA;
+		break;
+	case QLA_TGT_TARGET_RESET:
+		pr_debug("%ld: TARGET_RESET received\n", sess->vha->host_no);
+		transl_tmr_func = TMR_TARGET_WARM_RESET;
+		break;
+	case QLA_TGT_LUN_RESET:
+		pr_debug("%ld: LUN_RESET received\n", sess->vha->host_no);
+		transl_tmr_func = TMR_LUN_RESET;
+		break;
+	case QLA_TGT_CLEAR_TS:
+		pr_debug("%ld: CLEAR_TS received\n", sess->vha->host_no);
+		transl_tmr_func = TMR_CLEAR_TASK_SET;
+		break;
+	case QLA_TGT_ABORT_TS:
+		pr_debug("%ld: ABORT_TS received\n", sess->vha->host_no);
+		transl_tmr_func = TMR_ABORT_TASK_SET;
+		break;
+	default:
+		pr_debug("%ld: Unknown task mgmt fn 0x%x\n",
+		    sess->vha->host_no, tmr_func);
+		return -ENOSYS;
+	}
 
 	return target_submit_tmr(se_cmd, sess->se_sess, NULL, lun, mcmd,
-			tmr_func, GFP_ATOMIC, tag, TARGET_SCF_ACK_KREF);
+	    transl_tmr_func, GFP_ATOMIC, tag, TARGET_SCF_ACK_KREF);
 }
 
 static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd)
-- 
GitLab


From 1eb42f965cedafb700e9c902ddafb1c51e3117f7 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Thu, 19 Jan 2017 22:27:55 -0800
Subject: [PATCH 0180/1084] qla2xxx: Make trace flags more readable

Trace flags are useful during debugging crash dumps
using crash utility. These trace flags makes it easier
to understand various states a command has successfully
completed.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_target.c  | 28 +++++++--------
 drivers/scsi/qla2xxx/qla_target.h  | 55 ++++++++++++++----------------
 drivers/scsi/qla2xxx/tcm_qla2xxx.c | 43 +++++++++++------------
 3 files changed, 59 insertions(+), 67 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index fcdd5c9ed3bf..5f31ae99fbe2 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -3297,7 +3297,7 @@ int qlt_abort_cmd(struct qla_tgt_cmd *cmd)
 		return EIO;
 	}
 	cmd->aborted = 1;
-	cmd->cmd_flags |= BIT_6;
+	cmd->trc_flags |= TRC_ABORT;
 	spin_unlock_irqrestore(&cmd->cmd_lock, flags);
 
 	qlt_send_term_exchange(vha, cmd, &cmd->atio, 0, 1);
@@ -3345,7 +3345,7 @@ static int qlt_prepare_srr_ctio(struct scsi_qla_host *vha,
 	struct qla_tgt_srr_imm *imm;
 
 	tgt->ctio_srr_id++;
-	cmd->cmd_flags |= BIT_15;
+	cmd->trc_flags |= TRC_SRR_CTIO;
 
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf019,
 	    "qla_target(%d): CTIO with SRR status received\n", vha->vp_idx);
@@ -3528,7 +3528,7 @@ qlt_abort_cmd_on_host_reset(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd)
 		dump_stack();
 	}
 
-	cmd->cmd_flags |= BIT_17;
+	cmd->trc_flags |= TRC_FLUSH;
 	ha->tgt.tgt_ops->free_cmd(cmd);
 }
 
@@ -3694,7 +3694,7 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
 		 */
 		if ((cmd->state != QLA_TGT_STATE_NEED_DATA) &&
 		    (!cmd->aborted)) {
-			cmd->cmd_flags |= BIT_13;
+			cmd->trc_flags |= TRC_CTIO_ERR;
 			if (qlt_term_ctio_exchange(vha, ctio, cmd, status))
 				return;
 		}
@@ -3702,7 +3702,7 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
 skip_term:
 
 	if (cmd->state == QLA_TGT_STATE_PROCESSED) {
-		cmd->cmd_flags |= BIT_12;
+		cmd->trc_flags |= TRC_CTIO_DONE;
 	} else if (cmd->state == QLA_TGT_STATE_NEED_DATA) {
 		cmd->state = QLA_TGT_STATE_DATA_IN;
 
@@ -3712,11 +3712,11 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
 		ha->tgt.tgt_ops->handle_data(cmd);
 		return;
 	} else if (cmd->aborted) {
-		cmd->cmd_flags |= BIT_18;
+		cmd->trc_flags |= TRC_CTIO_ABORTED;
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf01e,
 		  "Aborted command %p (tag %lld) finished\n", cmd, se_cmd->tag);
 	} else {
-		cmd->cmd_flags |= BIT_19;
+		cmd->trc_flags |= TRC_CTIO_STRANGE;
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05c,
 		    "qla_target(%d): A command in state (%d) should "
 		    "not return a CTIO complete\n", vha->vp_idx, cmd->state);
@@ -3781,7 +3781,7 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd)
 	int ret, fcp_task_attr, data_dir, bidi = 0;
 
 	cmd->cmd_in_wq = 0;
-	cmd->cmd_flags |= BIT_1;
+	cmd->trc_flags |= TRC_DO_WORK;
 	if (tgt->tgt_stop)
 		goto out_term;
 
@@ -3833,7 +3833,7 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd)
 	 * cmd has not sent to target yet, so pass NULL as the second
 	 * argument to qlt_send_term_exchange() and free the memory here.
 	 */
-	cmd->cmd_flags |= BIT_2;
+	cmd->trc_flags |= TRC_DO_WORK_ERR;
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	qlt_send_term_exchange(vha, NULL, &cmd->atio, 1, 0);
 
@@ -3884,7 +3884,7 @@ static struct qla_tgt_cmd *qlt_get_tag(scsi_qla_host_t *vha,
 	cmd->loop_id = sess->loop_id;
 	cmd->conf_compl_supported = sess->conf_compl_supported;
 
-	cmd->cmd_flags = 0;
+	cmd->trc_flags = 0;
 	cmd->jiffies_at_alloc = get_jiffies_64();
 
 	cmd->reset_count = vha->hw->chip_reset;
@@ -4020,7 +4020,7 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
 	}
 
 	cmd->cmd_in_wq = 1;
-	cmd->cmd_flags |= BIT_0;
+	cmd->trc_flags |= TRC_NEW_CMD;
 	cmd->se_cmd.cpuid = ha->msix_count ?
 		ha->tgt.rspq_vector_cpuid : WORK_CPU_UNBOUND;
 
@@ -4712,7 +4712,7 @@ static void qlt_handle_srr(struct scsi_qla_host *vha,
 			    0, 0, 0, NOTIFY_ACK_SRR_FLAGS_ACCEPT, 0, 0);
 			spin_unlock_irqrestore(&ha->hardware_lock, flags);
 			if (xmit_type & QLA_TGT_XMIT_DATA) {
-				cmd->cmd_flags |= BIT_8;
+				cmd->trc_flags |= TRC_SRR_XRDY;
 				qlt_rdy_to_xfer(cmd);
 			}
 		} else {
@@ -4731,7 +4731,7 @@ static void qlt_handle_srr(struct scsi_qla_host *vha,
 
 	/* Transmit response in case of status and data-in cases */
 	if (resp) {
-		cmd->cmd_flags |= BIT_7;
+		cmd->trc_flags |= TRC_SRR_RSP;
 		qlt_xmit_response(cmd, xmit_type, se_cmd->scsi_status);
 	}
 
@@ -4747,7 +4747,7 @@ static void qlt_handle_srr(struct scsi_qla_host *vha,
 		cmd->state = QLA_TGT_STATE_DATA_IN;
 		dump_stack();
 	} else {
-		cmd->cmd_flags |= BIT_9;
+		cmd->trc_flags |= TRC_SRR_TERM;
 		qlt_send_term_exchange(vha, cmd, &cmd->atio, 1, 0);
 	}
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index 14f2e241db8c..5d00529b68c4 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -965,35 +965,28 @@ struct qla_tgt_sess {
 	qlt_plogi_ack_t *plogi_link[QLT_PLOGI_LINK_MAX];
 };
 
-typedef enum {
-	/*
-	 * BIT_0 - Atio Arrival / schedule to work
-	 * BIT_1 - qlt_do_work
-	 * BIT_2 - qlt_do work failed
-	 * BIT_3 - xfer rdy/tcm_qla2xxx_write_pending
-	 * BIT_4 - read respond/tcm_qla2xx_queue_data_in
-	 * BIT_5 - status respond / tcm_qla2xx_queue_status
-	 * BIT_6 - tcm request to abort/Term exchange.
-	 *	pre_xmit_response->qlt_send_term_exchange
-	 * BIT_7 - SRR received (qlt_handle_srr->qlt_xmit_response)
-	 * BIT_8 - SRR received (qlt_handle_srr->qlt_rdy_to_xfer)
-	 * BIT_9 - SRR received (qla_handle_srr->qlt_send_term_exchange)
-	 * BIT_10 - Data in - hanlde_data->tcm_qla2xxx_handle_data
-
-	 * BIT_12 - good completion - qlt_ctio_do_completion -->free_cmd
-	 * BIT_13 - Bad completion -
-	 *	qlt_ctio_do_completion --> qlt_term_ctio_exchange
-	 * BIT_14 - Back end data received/sent.
-	 * BIT_15 - SRR prepare ctio
-	 * BIT_16 - complete free
-	 * BIT_17 - flush - qlt_abort_cmd_on_host_reset
-	 * BIT_18 - completion w/abort status
-	 * BIT_19 - completion w/unknown status
-	 * BIT_20 - tcm_qla2xxx_free_cmd
-	 */
-	CMD_FLAG_DATA_WORK = BIT_11,
-	CMD_FLAG_DATA_WORK_FREE = BIT_21,
-} cmd_flags_t;
+enum trace_flags {
+	TRC_NEW_CMD = BIT_0,
+	TRC_DO_WORK = BIT_1,
+	TRC_DO_WORK_ERR = BIT_2,
+	TRC_XFR_RDY = BIT_3,
+	TRC_XMIT_DATA = BIT_4,
+	TRC_XMIT_STATUS = BIT_5,
+	TRC_SRR_RSP =  BIT_6,
+	TRC_SRR_XRDY = BIT_7,
+	TRC_SRR_TERM = BIT_8,
+	TRC_SRR_CTIO = BIT_9,
+	TRC_FLUSH = BIT_10,
+	TRC_CTIO_ERR = BIT_11,
+	TRC_CTIO_DONE = BIT_12,
+	TRC_CTIO_ABORTED =  BIT_13,
+	TRC_CTIO_STRANGE= BIT_14,
+	TRC_CMD_DONE = BIT_15,
+	TRC_CMD_CHK_STOP = BIT_16,
+	TRC_CMD_FREE = BIT_17,
+	TRC_DATA_IN = BIT_18,
+	TRC_ABORT = BIT_19,
+};
 
 struct qla_tgt_cmd {
 	struct se_cmd se_cmd;
@@ -1016,6 +1009,8 @@ struct qla_tgt_cmd {
 	unsigned int cmd_sent_to_fw:1;
 	unsigned int cmd_in_wq:1;
 	unsigned int aborted:1;
+	unsigned int data_work:1;
+	unsigned int data_work_free:1;
 
 	struct scatterlist *sg;	/* cmd data buffer SG vector */
 	int sg_cnt;		/* SG segments count */
@@ -1040,7 +1035,7 @@ struct qla_tgt_cmd {
 	uint64_t jiffies_at_alloc;
 	uint64_t jiffies_at_free;
 
-	cmd_flags_t cmd_flags;
+	enum trace_flags trc_flags;
 };
 
 struct qla_tgt_sess_work_param {
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 1bec6ab66aaf..af4a198e5537 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -282,10 +282,10 @@ static void tcm_qla2xxx_complete_free(struct work_struct *work)
 
 	cmd->cmd_in_wq = 0;
 
-	WARN_ON(cmd->cmd_flags &  BIT_16);
+	WARN_ON(cmd->trc_flags & TRC_CMD_FREE);
 
 	cmd->vha->tgt_counters.qla_core_ret_sta_ctio++;
-	cmd->cmd_flags |= BIT_16;
+	cmd->trc_flags |= TRC_CMD_FREE;
 	transport_generic_free_cmd(&cmd->se_cmd, 0);
 }
 
@@ -299,8 +299,8 @@ static void tcm_qla2xxx_free_cmd(struct qla_tgt_cmd *cmd)
 	cmd->vha->tgt_counters.core_qla_free_cmd++;
 	cmd->cmd_in_wq = 1;
 
-	BUG_ON(cmd->cmd_flags & BIT_20);
-	cmd->cmd_flags |= BIT_20;
+	WARN_ON(cmd->trc_flags & TRC_CMD_DONE);
+	cmd->trc_flags |= TRC_CMD_DONE;
 
 	INIT_WORK(&cmd->work, tcm_qla2xxx_complete_free);
 	queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work);
@@ -315,7 +315,7 @@ static int tcm_qla2xxx_check_stop_free(struct se_cmd *se_cmd)
 
 	if ((se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) == 0) {
 		cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd);
-		cmd->cmd_flags |= BIT_14;
+		cmd->trc_flags |= TRC_CMD_CHK_STOP;
 	}
 
 	return target_put_sess_cmd(se_cmd);
@@ -377,7 +377,7 @@ static int tcm_qla2xxx_write_pending(struct se_cmd *se_cmd)
 			cmd->se_cmd.se_cmd_flags);
 		return 0;
 	}
-	cmd->cmd_flags |= BIT_3;
+	cmd->trc_flags |= TRC_XFR_RDY;
 	cmd->bufflen = se_cmd->data_length;
 	cmd->dma_data_direction = target_reverse_dma_direction(se_cmd);
 
@@ -493,9 +493,9 @@ static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
 	cmd->cmd_in_wq = 0;
 
 	spin_lock_irqsave(&cmd->cmd_lock, flags);
-	cmd->cmd_flags |= CMD_FLAG_DATA_WORK;
+	cmd->data_work = 1;
 	if (cmd->aborted) {
-		cmd->cmd_flags |= CMD_FLAG_DATA_WORK_FREE;
+		cmd->data_work_free = 1;
 		spin_unlock_irqrestore(&cmd->cmd_lock, flags);
 
 		tcm_qla2xxx_free_cmd(cmd);
@@ -532,7 +532,7 @@ static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
  */
 static void tcm_qla2xxx_handle_data(struct qla_tgt_cmd *cmd)
 {
-	cmd->cmd_flags |= BIT_10;
+	cmd->trc_flags |= TRC_DATA_IN;
 	cmd->cmd_in_wq = 1;
 	INIT_WORK(&cmd->work, tcm_qla2xxx_handle_data_work);
 	queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work);
@@ -627,7 +627,7 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd)
 		return 0;
 	}
 
-	cmd->cmd_flags |= BIT_4;
+	cmd->trc_flags |= TRC_XMIT_DATA;
 	cmd->bufflen = se_cmd->data_length;
 	cmd->dma_data_direction = target_reverse_dma_direction(se_cmd);
 
@@ -658,11 +658,11 @@ static int tcm_qla2xxx_queue_status(struct se_cmd *se_cmd)
 	cmd->sg_cnt = 0;
 	cmd->offset = 0;
 	cmd->dma_data_direction = target_reverse_dma_direction(se_cmd);
-	if (cmd->cmd_flags &  BIT_5) {
-		pr_crit("Bit_5 already set for cmd = %p.\n", cmd);
+	if (cmd->trc_flags & TRC_XMIT_STATUS) {
+		pr_crit("Multiple calls for status = %p.\n", cmd);
 		dump_stack();
 	}
-	cmd->cmd_flags |= BIT_5;
+	cmd->trc_flags |= TRC_XMIT_STATUS;
 
 	if (se_cmd->data_direction == DMA_FROM_DEVICE) {
 		/*
@@ -718,10 +718,7 @@ static void tcm_qla2xxx_queue_tm_rsp(struct se_cmd *se_cmd)
 	qlt_xmit_tm_rsp(mcmd);
 }
 
-
-#define DATA_WORK_NOT_FREE(_flags) \
-	(( _flags & (CMD_FLAG_DATA_WORK|CMD_FLAG_DATA_WORK_FREE)) == \
-	 CMD_FLAG_DATA_WORK)
+#define DATA_WORK_NOT_FREE(_cmd) (_cmd->data_work && !_cmd->data_work_free)
 static void tcm_qla2xxx_aborted_task(struct se_cmd *se_cmd)
 {
 	struct qla_tgt_cmd *cmd = container_of(se_cmd,
@@ -733,13 +730,13 @@ static void tcm_qla2xxx_aborted_task(struct se_cmd *se_cmd)
 
 	spin_lock_irqsave(&cmd->cmd_lock, flags);
 	if ((cmd->state == QLA_TGT_STATE_NEW)||
-		((cmd->state == QLA_TGT_STATE_DATA_IN) &&
-		 DATA_WORK_NOT_FREE(cmd->cmd_flags)) ) {
-
-		cmd->cmd_flags |= CMD_FLAG_DATA_WORK_FREE;
+	    ((cmd->state == QLA_TGT_STATE_DATA_IN) &&
+		DATA_WORK_NOT_FREE(cmd))) {
+		cmd->data_work_free = 1;
 		spin_unlock_irqrestore(&cmd->cmd_lock, flags);
-		/* Cmd have not reached firmware.
-		 * Use this trigger to free it. */
+		/*
+		 * cmd has not reached fw, Use this trigger to free it.
+		 */
 		tcm_qla2xxx_free_cmd(cmd);
 		return;
 	}
-- 
GitLab


From 2c39b5ca2a8cb99fc463fd622e8ea3b9fbb980a2 Mon Sep 17 00:00:00 2001
From: Himanshu Madhani <himanshu.madhani@cavium.com>
Date: Thu, 19 Jan 2017 22:27:56 -0800
Subject: [PATCH 0181/1084] qla2xxx: Remove SRR code

During initial implementation, tape support was included but not
enabled by default on target. So far, we don't see any target
customer requesting this support. Since this code is not being
used actively, we want to remove it and we will add back if there
are any request in future for SRR support.

Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Giridhar Malavali <giridhar.malavali@cavium.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_target.c | 641 ------------------------------
 drivers/scsi/qla2xxx/qla_target.h |  21 -
 2 files changed, 662 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 5f31ae99fbe2..119a445a82f6 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -106,8 +106,6 @@ static int qlt_issue_task_mgmt(struct qla_tgt_sess *sess, uint32_t lun,
 	int fn, void *iocb, int flags);
 static void qlt_send_term_exchange(struct scsi_qla_host *ha, struct qla_tgt_cmd
 	*cmd, struct atio_from_isp *atio, int ha_locked, int ul_abort);
-static void qlt_reject_free_srr_imm(struct scsi_qla_host *ha,
-	struct qla_tgt_srr_imm *imm, int ha_lock);
 static void qlt_abort_cmd_on_host_reset(struct scsi_qla_host *vha,
 	struct qla_tgt_cmd *cmd);
 static void qlt_alloc_qfull_cmd(struct scsi_qla_host *vha,
@@ -2183,95 +2181,6 @@ static inline int qlt_need_explicit_conf(struct qla_hw_data *ha,
 		    cmd->conf_compl_supported;
 }
 
-#ifdef CONFIG_QLA_TGT_DEBUG_SRR
-/*
- *  Original taken from the XFS code
- */
-static unsigned long qlt_srr_random(void)
-{
-	static int Inited;
-	static unsigned long RandomValue;
-	static DEFINE_SPINLOCK(lock);
-	/* cycles pseudo-randomly through all values between 1 and 2^31 - 2 */
-	register long rv;
-	register long lo;
-	register long hi;
-	unsigned long flags;
-
-	spin_lock_irqsave(&lock, flags);
-	if (!Inited) {
-		RandomValue = jiffies;
-		Inited = 1;
-	}
-	rv = RandomValue;
-	hi = rv / 127773;
-	lo = rv % 127773;
-	rv = 16807 * lo - 2836 * hi;
-	if (rv <= 0)
-		rv += 2147483647;
-	RandomValue = rv;
-	spin_unlock_irqrestore(&lock, flags);
-	return rv;
-}
-
-static void qlt_check_srr_debug(struct qla_tgt_cmd *cmd, int *xmit_type)
-{
-#if 0 /* This is not a real status packets lost, so it won't lead to SRR */
-	if ((*xmit_type & QLA_TGT_XMIT_STATUS) && (qlt_srr_random() % 200)
-	    == 50) {
-		*xmit_type &= ~QLA_TGT_XMIT_STATUS;
-		ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xf015,
-		    "Dropping cmd %p (tag %d) status", cmd, se_cmd->tag);
-	}
-#endif
-	/*
-	 * It's currently not possible to simulate SRRs for FCP_WRITE without
-	 * a physical link layer failure, so don't even try here..
-	 */
-	if (cmd->dma_data_direction != DMA_FROM_DEVICE)
-		return;
-
-	if (qlt_has_data(cmd) && (cmd->sg_cnt > 1) &&
-	    ((qlt_srr_random() % 100) == 20)) {
-		int i, leave = 0;
-		unsigned int tot_len = 0;
-
-		while (leave == 0)
-			leave = qlt_srr_random() % cmd->sg_cnt;
-
-		for (i = 0; i < leave; i++)
-			tot_len += cmd->sg[i].length;
-
-		ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xf016,
-		    "Cutting cmd %p (tag %d) buffer"
-		    " tail to len %d, sg_cnt %d (cmd->bufflen %d,"
-		    " cmd->sg_cnt %d)", cmd, se_cmd->tag, tot_len, leave,
-		    cmd->bufflen, cmd->sg_cnt);
-
-		cmd->bufflen = tot_len;
-		cmd->sg_cnt = leave;
-	}
-
-	if (qlt_has_data(cmd) && ((qlt_srr_random() % 100) == 70)) {
-		unsigned int offset = qlt_srr_random() % cmd->bufflen;
-
-		ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xf017,
-		    "Cutting cmd %p (tag %d) buffer head "
-		    "to offset %d (cmd->bufflen %d)", cmd, se_cmd->tag, offset,
-		    cmd->bufflen);
-		if (offset == 0)
-			*xmit_type &= ~QLA_TGT_XMIT_DATA;
-		else if (qlt_set_data_offset(cmd, offset)) {
-			ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xf018,
-			    "qlt_set_data_offset() failed (tag %d)", se_cmd->tag);
-		}
-	}
-}
-#else
-static inline void qlt_check_srr_debug(struct qla_tgt_cmd *cmd, int *xmit_type)
-{}
-#endif
-
 static void qlt_24xx_init_ctio_to_isp(struct ctio7_to_24xx *ctio,
 	struct qla_tgt_prm *prm)
 {
@@ -2686,7 +2595,6 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type,
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
 	memset(&prm, 0, sizeof(prm));
-	qlt_check_srr_debug(cmd, &xmit_type);
 
 	ql_dbg(ql_dbg_tgt, cmd->vha, 0xe018,
 	    "is_send_status=%d, cmd->bufflen=%d, cmd->sg_cnt=%d, cmd->dma_data_direction=%d se_cmd[%p]\n",
@@ -3336,90 +3244,6 @@ void qlt_free_cmd(struct qla_tgt_cmd *cmd)
 }
 EXPORT_SYMBOL(qlt_free_cmd);
 
-/* ha->hardware_lock supposed to be held on entry */
-static int qlt_prepare_srr_ctio(struct scsi_qla_host *vha,
-	struct qla_tgt_cmd *cmd, void *ctio)
-{
-	struct qla_tgt_srr_ctio *sc;
-	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
-	struct qla_tgt_srr_imm *imm;
-
-	tgt->ctio_srr_id++;
-	cmd->trc_flags |= TRC_SRR_CTIO;
-
-	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf019,
-	    "qla_target(%d): CTIO with SRR status received\n", vha->vp_idx);
-
-	if (!ctio) {
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf055,
-		    "qla_target(%d): SRR CTIO, but ctio is NULL\n",
-		    vha->vp_idx);
-		return -EINVAL;
-	}
-
-	sc = kzalloc(sizeof(*sc), GFP_ATOMIC);
-	if (sc != NULL) {
-		sc->cmd = cmd;
-		/* IRQ is already OFF */
-		spin_lock(&tgt->srr_lock);
-		sc->srr_id = tgt->ctio_srr_id;
-		list_add_tail(&sc->srr_list_entry,
-		    &tgt->srr_ctio_list);
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf01a,
-		    "CTIO SRR %p added (id %d)\n", sc, sc->srr_id);
-		if (tgt->imm_srr_id == tgt->ctio_srr_id) {
-			int found = 0;
-			list_for_each_entry(imm, &tgt->srr_imm_list,
-			    srr_list_entry) {
-				if (imm->srr_id == sc->srr_id) {
-					found = 1;
-					break;
-				}
-			}
-			if (found) {
-				ql_dbg(ql_dbg_tgt_mgt, vha, 0xf01b,
-				    "Scheduling srr work\n");
-				schedule_work(&tgt->srr_work);
-			} else {
-				ql_dbg(ql_dbg_tgt_mgt, vha, 0xf056,
-				    "qla_target(%d): imm_srr_id "
-				    "== ctio_srr_id (%d), but there is no "
-				    "corresponding SRR IMM, deleting CTIO "
-				    "SRR %p\n", vha->vp_idx,
-				    tgt->ctio_srr_id, sc);
-				list_del(&sc->srr_list_entry);
-				spin_unlock(&tgt->srr_lock);
-
-				kfree(sc);
-				return -EINVAL;
-			}
-		}
-		spin_unlock(&tgt->srr_lock);
-	} else {
-		struct qla_tgt_srr_imm *ti;
-
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf057,
-		    "qla_target(%d): Unable to allocate SRR CTIO entry\n",
-		    vha->vp_idx);
-		spin_lock(&tgt->srr_lock);
-		list_for_each_entry_safe(imm, ti, &tgt->srr_imm_list,
-		    srr_list_entry) {
-			if (imm->srr_id == tgt->ctio_srr_id) {
-				ql_dbg(ql_dbg_tgt_mgt, vha, 0xf01c,
-				    "IMM SRR %p deleted (id %d)\n",
-				    imm, imm->srr_id);
-				list_del(&imm->srr_list_entry);
-				qlt_reject_free_srr_imm(vha, imm, 1);
-			}
-		}
-		spin_unlock(&tgt->srr_lock);
-
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
 /*
  * ha->hardware_lock supposed to be held on entry. Might drop it, then reaquire
  */
@@ -3637,16 +3461,6 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
 			}
 			break;
 		}
-		case CTIO_SRR_RECEIVED:
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05a,
-			    "qla_target(%d): CTIO with SRR_RECEIVED"
-			    " status %x received (state %x, se_cmd %p)\n",
-			    vha->vp_idx, status, cmd->state, se_cmd);
-			if (qlt_prepare_srr_ctio(vha, cmd, ctio) != 0)
-				break;
-			else
-				return;
-
 		case CTIO_DIF_ERROR: {
 			struct ctio_crc_from_fw *crc =
 				(struct ctio_crc_from_fw *)ctio;
@@ -4499,451 +4313,6 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 	return res;
 }
 
-static int qlt_set_data_offset(struct qla_tgt_cmd *cmd, uint32_t offset)
-{
-#if 1
-	/*
-	 * FIXME: Reject non zero SRR relative offset until we can test
-	 * this code properly.
-	 */
-	pr_debug("Rejecting non zero SRR rel_offs: %u\n", offset);
-	return -1;
-#else
-	struct scatterlist *sg, *sgp, *sg_srr, *sg_srr_start = NULL;
-	size_t first_offset = 0, rem_offset = offset, tmp = 0;
-	int i, sg_srr_cnt, bufflen = 0;
-
-	ql_dbg(ql_dbg_tgt, cmd->vha, 0xe023,
-	    "Entering qla_tgt_set_data_offset: cmd: %p, cmd->sg: %p, "
-	    "cmd->sg_cnt: %u, direction: %d\n",
-	    cmd, cmd->sg, cmd->sg_cnt, cmd->dma_data_direction);
-
-	if (!cmd->sg || !cmd->sg_cnt) {
-		ql_dbg(ql_dbg_tgt, cmd->vha, 0xe055,
-		    "Missing cmd->sg or zero cmd->sg_cnt in"
-		    " qla_tgt_set_data_offset\n");
-		return -EINVAL;
-	}
-	/*
-	 * Walk the current cmd->sg list until we locate the new sg_srr_start
-	 */
-	for_each_sg(cmd->sg, sg, cmd->sg_cnt, i) {
-		ql_dbg(ql_dbg_tgt, cmd->vha, 0xe024,
-		    "sg[%d]: %p page: %p, length: %d, offset: %d\n",
-		    i, sg, sg_page(sg), sg->length, sg->offset);
-
-		if ((sg->length + tmp) > offset) {
-			first_offset = rem_offset;
-			sg_srr_start = sg;
-			ql_dbg(ql_dbg_tgt, cmd->vha, 0xe025,
-			    "Found matching sg[%d], using %p as sg_srr_start, "
-			    "and using first_offset: %zu\n", i, sg,
-			    first_offset);
-			break;
-		}
-		tmp += sg->length;
-		rem_offset -= sg->length;
-	}
-
-	if (!sg_srr_start) {
-		ql_dbg(ql_dbg_tgt, cmd->vha, 0xe056,
-		    "Unable to locate sg_srr_start for offset: %u\n", offset);
-		return -EINVAL;
-	}
-	sg_srr_cnt = (cmd->sg_cnt - i);
-
-	sg_srr = kzalloc(sizeof(struct scatterlist) * sg_srr_cnt, GFP_KERNEL);
-	if (!sg_srr) {
-		ql_dbg(ql_dbg_tgt, cmd->vha, 0xe057,
-		    "Unable to allocate sgp\n");
-		return -ENOMEM;
-	}
-	sg_init_table(sg_srr, sg_srr_cnt);
-	sgp = &sg_srr[0];
-	/*
-	 * Walk the remaining list for sg_srr_start, mapping to the newly
-	 * allocated sg_srr taking first_offset into account.
-	 */
-	for_each_sg(sg_srr_start, sg, sg_srr_cnt, i) {
-		if (first_offset) {
-			sg_set_page(sgp, sg_page(sg),
-			    (sg->length - first_offset), first_offset);
-			first_offset = 0;
-		} else {
-			sg_set_page(sgp, sg_page(sg), sg->length, 0);
-		}
-		bufflen += sgp->length;
-
-		sgp = sg_next(sgp);
-		if (!sgp)
-			break;
-	}
-
-	cmd->sg = sg_srr;
-	cmd->sg_cnt = sg_srr_cnt;
-	cmd->bufflen = bufflen;
-	cmd->offset += offset;
-	cmd->free_sg = 1;
-
-	ql_dbg(ql_dbg_tgt, cmd->vha, 0xe026, "New cmd->sg: %p\n", cmd->sg);
-	ql_dbg(ql_dbg_tgt, cmd->vha, 0xe027, "New cmd->sg_cnt: %u\n",
-	    cmd->sg_cnt);
-	ql_dbg(ql_dbg_tgt, cmd->vha, 0xe028, "New cmd->bufflen: %u\n",
-	    cmd->bufflen);
-	ql_dbg(ql_dbg_tgt, cmd->vha, 0xe029, "New cmd->offset: %u\n",
-	    cmd->offset);
-
-	if (cmd->sg_cnt < 0)
-		BUG();
-
-	if (cmd->bufflen < 0)
-		BUG();
-
-	return 0;
-#endif
-}
-
-static inline int qlt_srr_adjust_data(struct qla_tgt_cmd *cmd,
-	uint32_t srr_rel_offs, int *xmit_type)
-{
-	int res = 0, rel_offs;
-
-	rel_offs = srr_rel_offs - cmd->offset;
-	ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xf027, "srr_rel_offs=%d, rel_offs=%d",
-	    srr_rel_offs, rel_offs);
-
-	*xmit_type = QLA_TGT_XMIT_ALL;
-
-	if (rel_offs < 0) {
-		ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xf062,
-		    "qla_target(%d): SRR rel_offs (%d) < 0",
-		    cmd->vha->vp_idx, rel_offs);
-		res = -1;
-	} else if (rel_offs == cmd->bufflen)
-		*xmit_type = QLA_TGT_XMIT_STATUS;
-	else if (rel_offs > 0)
-		res = qlt_set_data_offset(cmd, rel_offs);
-
-	return res;
-}
-
-/* No locks, thread context */
-static void qlt_handle_srr(struct scsi_qla_host *vha,
-	struct qla_tgt_srr_ctio *sctio, struct qla_tgt_srr_imm *imm)
-{
-	struct imm_ntfy_from_isp *ntfy =
-	    (struct imm_ntfy_from_isp *)&imm->imm_ntfy;
-	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_cmd *cmd = sctio->cmd;
-	struct se_cmd *se_cmd = &cmd->se_cmd;
-	unsigned long flags;
-	int xmit_type = 0, resp = 0;
-	uint32_t offset;
-	uint16_t srr_ui;
-
-	offset = le32_to_cpu(ntfy->u.isp24.srr_rel_offs);
-	srr_ui = ntfy->u.isp24.srr_ui;
-
-	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf028, "SRR cmd %p, srr_ui %x\n",
-	    cmd, srr_ui);
-
-	switch (srr_ui) {
-	case SRR_IU_STATUS:
-		spin_lock_irqsave(&ha->hardware_lock, flags);
-		qlt_send_notify_ack(vha, ntfy,
-		    0, 0, 0, NOTIFY_ACK_SRR_FLAGS_ACCEPT, 0, 0);
-		spin_unlock_irqrestore(&ha->hardware_lock, flags);
-		xmit_type = QLA_TGT_XMIT_STATUS;
-		resp = 1;
-		break;
-	case SRR_IU_DATA_IN:
-		if (!cmd->sg || !cmd->sg_cnt) {
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf063,
-			    "Unable to process SRR_IU_DATA_IN due to"
-			    " missing cmd->sg, state: %d\n", cmd->state);
-			dump_stack();
-			goto out_reject;
-		}
-		if (se_cmd->scsi_status != 0) {
-			ql_dbg(ql_dbg_tgt, vha, 0xe02a,
-			    "Rejecting SRR_IU_DATA_IN with non GOOD "
-			    "scsi_status\n");
-			goto out_reject;
-		}
-		cmd->bufflen = se_cmd->data_length;
-
-		if (qlt_has_data(cmd)) {
-			if (qlt_srr_adjust_data(cmd, offset, &xmit_type) != 0)
-				goto out_reject;
-			spin_lock_irqsave(&ha->hardware_lock, flags);
-			qlt_send_notify_ack(vha, ntfy,
-			    0, 0, 0, NOTIFY_ACK_SRR_FLAGS_ACCEPT, 0, 0);
-			spin_unlock_irqrestore(&ha->hardware_lock, flags);
-			resp = 1;
-		} else {
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf064,
-			       "qla_target(%d): SRR for in data for cmd without them (tag %lld, SCSI status %d), reject",
-			       vha->vp_idx, se_cmd->tag,
-			    cmd->se_cmd.scsi_status);
-			goto out_reject;
-		}
-		break;
-	case SRR_IU_DATA_OUT:
-		if (!cmd->sg || !cmd->sg_cnt) {
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf065,
-			    "Unable to process SRR_IU_DATA_OUT due to"
-			    " missing cmd->sg\n");
-			dump_stack();
-			goto out_reject;
-		}
-		if (se_cmd->scsi_status != 0) {
-			ql_dbg(ql_dbg_tgt, vha, 0xe02b,
-			    "Rejecting SRR_IU_DATA_OUT"
-			    " with non GOOD scsi_status\n");
-			goto out_reject;
-		}
-		cmd->bufflen = se_cmd->data_length;
-
-		if (qlt_has_data(cmd)) {
-			if (qlt_srr_adjust_data(cmd, offset, &xmit_type) != 0)
-				goto out_reject;
-			spin_lock_irqsave(&ha->hardware_lock, flags);
-			qlt_send_notify_ack(vha, ntfy,
-			    0, 0, 0, NOTIFY_ACK_SRR_FLAGS_ACCEPT, 0, 0);
-			spin_unlock_irqrestore(&ha->hardware_lock, flags);
-			if (xmit_type & QLA_TGT_XMIT_DATA) {
-				cmd->trc_flags |= TRC_SRR_XRDY;
-				qlt_rdy_to_xfer(cmd);
-			}
-		} else {
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf066,
-			    "qla_target(%d): SRR for out data for cmd without them (tag %lld, SCSI status %d), reject",
-			       vha->vp_idx, se_cmd->tag, cmd->se_cmd.scsi_status);
-			goto out_reject;
-		}
-		break;
-	default:
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf067,
-		    "qla_target(%d): Unknown srr_ui value %x",
-		    vha->vp_idx, srr_ui);
-		goto out_reject;
-	}
-
-	/* Transmit response in case of status and data-in cases */
-	if (resp) {
-		cmd->trc_flags |= TRC_SRR_RSP;
-		qlt_xmit_response(cmd, xmit_type, se_cmd->scsi_status);
-	}
-
-	return;
-
-out_reject:
-	spin_lock_irqsave(&ha->hardware_lock, flags);
-	qlt_send_notify_ack(vha, ntfy, 0, 0, 0,
-	    NOTIFY_ACK_SRR_FLAGS_REJECT,
-	    NOTIFY_ACK_SRR_REJECT_REASON_UNABLE_TO_PERFORM,
-	    NOTIFY_ACK_SRR_FLAGS_REJECT_EXPL_NO_EXPL);
-	if (cmd->state == QLA_TGT_STATE_NEED_DATA) {
-		cmd->state = QLA_TGT_STATE_DATA_IN;
-		dump_stack();
-	} else {
-		cmd->trc_flags |= TRC_SRR_TERM;
-		qlt_send_term_exchange(vha, cmd, &cmd->atio, 1, 0);
-	}
-	spin_unlock_irqrestore(&ha->hardware_lock, flags);
-}
-
-static void qlt_reject_free_srr_imm(struct scsi_qla_host *vha,
-	struct qla_tgt_srr_imm *imm, int ha_locked)
-{
-	struct qla_hw_data *ha = vha->hw;
-	unsigned long flags = 0;
-
-#ifndef __CHECKER__
-	if (!ha_locked)
-		spin_lock_irqsave(&ha->hardware_lock, flags);
-#endif
-
-	qlt_send_notify_ack(vha, (void *)&imm->imm_ntfy, 0, 0, 0,
-	    NOTIFY_ACK_SRR_FLAGS_REJECT,
-	    NOTIFY_ACK_SRR_REJECT_REASON_UNABLE_TO_PERFORM,
-	    NOTIFY_ACK_SRR_FLAGS_REJECT_EXPL_NO_EXPL);
-
-#ifndef __CHECKER__
-	if (!ha_locked)
-		spin_unlock_irqrestore(&ha->hardware_lock, flags);
-#endif
-
-	kfree(imm);
-}
-
-static void qlt_handle_srr_work(struct work_struct *work)
-{
-	struct qla_tgt *tgt = container_of(work, struct qla_tgt, srr_work);
-	struct scsi_qla_host *vha = tgt->vha;
-	struct qla_tgt_srr_ctio *sctio;
-	unsigned long flags;
-
-	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf029, "Entering SRR work (tgt %p)\n",
-	    tgt);
-
-restart:
-	spin_lock_irqsave(&tgt->srr_lock, flags);
-	list_for_each_entry(sctio, &tgt->srr_ctio_list, srr_list_entry) {
-		struct qla_tgt_srr_imm *imm, *i, *ti;
-		struct qla_tgt_cmd *cmd;
-		struct se_cmd *se_cmd;
-
-		imm = NULL;
-		list_for_each_entry_safe(i, ti, &tgt->srr_imm_list,
-						srr_list_entry) {
-			if (i->srr_id == sctio->srr_id) {
-				list_del(&i->srr_list_entry);
-				if (imm) {
-					ql_dbg(ql_dbg_tgt_mgt, vha, 0xf068,
-					  "qla_target(%d): There must be "
-					  "only one IMM SRR per CTIO SRR "
-					  "(IMM SRR %p, id %d, CTIO %p\n",
-					  vha->vp_idx, i, i->srr_id, sctio);
-					qlt_reject_free_srr_imm(tgt->vha, i, 0);
-				} else
-					imm = i;
-			}
-		}
-
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf02a,
-		    "IMM SRR %p, CTIO SRR %p (id %d)\n", imm, sctio,
-		    sctio->srr_id);
-
-		if (imm == NULL) {
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf02b,
-			    "Not found matching IMM for SRR CTIO (id %d)\n",
-			    sctio->srr_id);
-			continue;
-		} else
-			list_del(&sctio->srr_list_entry);
-
-		spin_unlock_irqrestore(&tgt->srr_lock, flags);
-
-		cmd = sctio->cmd;
-		/*
-		 * Reset qla_tgt_cmd SRR values and SGL pointer+count to follow
-		 * tcm_qla2xxx_write_pending() and tcm_qla2xxx_queue_data_in()
-		 * logic..
-		 */
-		cmd->offset = 0;
-		if (cmd->free_sg) {
-			kfree(cmd->sg);
-			cmd->sg = NULL;
-			cmd->free_sg = 0;
-		}
-		se_cmd = &cmd->se_cmd;
-
-		cmd->sg_cnt = se_cmd->t_data_nents;
-		cmd->sg = se_cmd->t_data_sg;
-
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf02c,
-		       "SRR cmd %p (se_cmd %p, tag %lld, op %x), sg_cnt=%d, offset=%d",
-		       cmd, &cmd->se_cmd, se_cmd->tag, se_cmd->t_task_cdb ?
-		       se_cmd->t_task_cdb[0] : 0, cmd->sg_cnt, cmd->offset);
-
-		qlt_handle_srr(vha, sctio, imm);
-
-		kfree(imm);
-		kfree(sctio);
-		goto restart;
-	}
-	spin_unlock_irqrestore(&tgt->srr_lock, flags);
-}
-
-/* ha->hardware_lock supposed to be held on entry */
-static void qlt_prepare_srr_imm(struct scsi_qla_host *vha,
-	struct imm_ntfy_from_isp *iocb)
-{
-	struct qla_tgt_srr_imm *imm;
-	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
-	struct qla_tgt_srr_ctio *sctio;
-
-	tgt->imm_srr_id++;
-
-	ql_log(ql_log_warn, vha, 0xf02d, "qla_target(%d): SRR received\n",
-	    vha->vp_idx);
-
-	imm = kzalloc(sizeof(*imm), GFP_ATOMIC);
-	if (imm != NULL) {
-		memcpy(&imm->imm_ntfy, iocb, sizeof(imm->imm_ntfy));
-
-		/* IRQ is already OFF */
-		spin_lock(&tgt->srr_lock);
-		imm->srr_id = tgt->imm_srr_id;
-		list_add_tail(&imm->srr_list_entry,
-		    &tgt->srr_imm_list);
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf02e,
-		    "IMM NTFY SRR %p added (id %d, ui %x)\n",
-		    imm, imm->srr_id, iocb->u.isp24.srr_ui);
-		if (tgt->imm_srr_id == tgt->ctio_srr_id) {
-			int found = 0;
-			list_for_each_entry(sctio, &tgt->srr_ctio_list,
-			    srr_list_entry) {
-				if (sctio->srr_id == imm->srr_id) {
-					found = 1;
-					break;
-				}
-			}
-			if (found) {
-				ql_dbg(ql_dbg_tgt_mgt, vha, 0xf02f, "%s",
-				    "Scheduling srr work\n");
-				schedule_work(&tgt->srr_work);
-			} else {
-				ql_dbg(ql_dbg_tgt_mgt, vha, 0xf030,
-				    "qla_target(%d): imm_srr_id "
-				    "== ctio_srr_id (%d), but there is no "
-				    "corresponding SRR CTIO, deleting IMM "
-				    "SRR %p\n", vha->vp_idx, tgt->ctio_srr_id,
-				    imm);
-				list_del(&imm->srr_list_entry);
-
-				kfree(imm);
-
-				spin_unlock(&tgt->srr_lock);
-				goto out_reject;
-			}
-		}
-		spin_unlock(&tgt->srr_lock);
-	} else {
-		struct qla_tgt_srr_ctio *ts;
-
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf069,
-		    "qla_target(%d): Unable to allocate SRR IMM "
-		    "entry, SRR request will be rejected\n", vha->vp_idx);
-
-		/* IRQ is already OFF */
-		spin_lock(&tgt->srr_lock);
-		list_for_each_entry_safe(sctio, ts, &tgt->srr_ctio_list,
-		    srr_list_entry) {
-			if (sctio->srr_id == tgt->imm_srr_id) {
-				ql_dbg(ql_dbg_tgt_mgt, vha, 0xf031,
-				    "CTIO SRR %p deleted (id %d)\n",
-				    sctio, sctio->srr_id);
-				list_del(&sctio->srr_list_entry);
-				qlt_send_term_exchange(vha, sctio->cmd,
-				    &sctio->cmd->atio, 1, 0);
-				kfree(sctio);
-			}
-		}
-		spin_unlock(&tgt->srr_lock);
-		goto out_reject;
-	}
-
-	return;
-
-out_reject:
-	qlt_send_notify_ack(vha, iocb, 0, 0, 0,
-	    NOTIFY_ACK_SRR_FLAGS_REJECT,
-	    NOTIFY_ACK_SRR_REJECT_REASON_UNABLE_TO_PERFORM,
-	    NOTIFY_ACK_SRR_FLAGS_REJECT_EXPL_NO_EXPL);
-}
-
 /*
  * ha->hardware_lock supposed to be held on entry. Might drop it, then reaquire
  */
@@ -5065,12 +4434,6 @@ static void qlt_handle_imm_notify(struct scsi_qla_host *vha,
 		if (qlt_24xx_handle_els(vha, iocb) == 0)
 			send_notify_ack = 0;
 		break;
-
-	case IMM_NTFY_SRR:
-		qlt_prepare_srr_imm(vha, iocb);
-		send_notify_ack = 0;
-		break;
-
 	default:
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf06d,
 		    "qla_target(%d): Received unknown immediate "
@@ -6021,10 +5384,6 @@ int qlt_add_target(struct qla_hw_data *ha, struct scsi_qla_host *base_vha)
 	spin_lock_init(&tgt->sess_work_lock);
 	INIT_WORK(&tgt->sess_work, qlt_sess_work_fn);
 	INIT_LIST_HEAD(&tgt->sess_works_list);
-	spin_lock_init(&tgt->srr_lock);
-	INIT_LIST_HEAD(&tgt->srr_ctio_list);
-	INIT_LIST_HEAD(&tgt->srr_imm_list);
-	INIT_WORK(&tgt->srr_work, qlt_handle_srr_work);
 	atomic_set(&tgt->tgt_global_resets_count, 0);
 
 	base_vha->vha_tgt.qla_tgt = tgt;
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index 5d00529b68c4..f72bd0755a59 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -890,16 +890,7 @@ struct qla_tgt {
 	int notify_ack_expected;
 	int abts_resp_expected;
 	int modify_lun_expected;
-
-	int ctio_srr_id;
-	int imm_srr_id;
-	spinlock_t srr_lock;
-	struct list_head srr_ctio_list;
-	struct list_head srr_imm_list;
-	struct work_struct srr_work;
-
 	atomic_t tgt_global_resets_count;
-
 	struct list_head tgt_list_entry;
 };
 
@@ -1087,18 +1078,6 @@ struct qla_tgt_prm {
 	uint16_t tot_dsds;
 };
 
-struct qla_tgt_srr_imm {
-	struct list_head srr_list_entry;
-	int srr_id;
-	struct imm_ntfy_from_isp imm_ntfy;
-};
-
-struct qla_tgt_srr_ctio {
-	struct list_head srr_list_entry;
-	int srr_id;
-	struct qla_tgt_cmd *cmd;
-};
-
 /* Check for Switch reserved address */
 #define IS_SW_RESV_ADDR(_s_id) \
 	((_s_id.b.domain == 0xff) && (_s_id.b.area == 0xfc))
-- 
GitLab


From 5e4deaf6a47f397af68d3297bdc2913ae890bd4a Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Thu, 19 Jan 2017 22:27:57 -0800
Subject: [PATCH 0182/1084] qla2xxx: Fix wrong argument in sp done callback

Callback for sp->done expects scsi_qla_host is passed in as argument,
Instead qla_hw_data is passed in.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_isr.c | 8 ++++----
 drivers/scsi/qla2xxx/qla_mr.c  | 6 +++---
 drivers/scsi/qla2xxx/qla_os.c  | 2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index dc88a09f9043..802b50cc89d0 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1212,7 +1212,7 @@ qla2x00_process_completed_request(struct scsi_qla_host *vha,
 		req->outstanding_cmds[index] = NULL;
 
 		/* Save ISP completion status */
-		sp->done(ha, sp, DID_OK << 16);
+		sp->done(vha, sp, DID_OK << 16);
 	} else {
 		ql_log(ql_log_warn, vha, 0x3016, "Invalid SCSI SRB.\n");
 
@@ -2405,7 +2405,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 		    resid_len, fw_resid_len, sp, cp);
 
 	if (rsp->status_srb == NULL)
-		sp->done(ha, sp, res);
+		sp->done(vha, sp, res);
 }
 
 /**
@@ -2462,7 +2462,7 @@ qla2x00_status_cont_entry(struct rsp_que *rsp, sts_cont_entry_t *pkt)
 	/* Place command on done queue. */
 	if (sense_len == 0) {
 		rsp->status_srb = NULL;
-		sp->done(ha, sp, cp->result);
+		sp->done(vha, sp, cp->result);
 	}
 }
 
@@ -2498,7 +2498,7 @@ qla2x00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, sts_entry_t *pkt)
 
 	sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
 	if (sp) {
-		sp->done(ha, sp, res);
+		sp->done(vha, sp, res);
 		return;
 	}
 fatal:
diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c
index 02f1de18bc2b..d38bc6452abd 100644
--- a/drivers/scsi/qla2xxx/qla_mr.c
+++ b/drivers/scsi/qla2xxx/qla_mr.c
@@ -2537,7 +2537,7 @@ qlafx00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 		    par_sense_len, rsp_info_len);
 
 	if (rsp->status_srb == NULL)
-		sp->done(ha, sp, res);
+		sp->done(vha, sp, res);
 }
 
 /**
@@ -2614,7 +2614,7 @@ qlafx00_status_cont_entry(struct rsp_que *rsp, sts_cont_entry_t *pkt)
 	/* Place command on done queue. */
 	if (sense_len == 0) {
 		rsp->status_srb = NULL;
-		sp->done(ha, sp, cp->result);
+		sp->done(vha, sp, cp->result);
 	}
 }
 
@@ -2695,7 +2695,7 @@ qlafx00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp,
 
 	sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
 	if (sp) {
-		sp->done(ha, sp, res);
+		sp->done(vha, sp, res);
 		return;
 	}
 
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 0a000ecf0881..62ca1ddb0cc7 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1203,7 +1203,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
 	}
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-	sp->done(ha, sp, 0);
+	sp->done(vha, sp, 0);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
 	/* Did the command return during mailbox execution? */
-- 
GitLab


From 37cacc0a13012eee9fa0b07563eb149ec9244b1e Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Thu, 19 Jan 2017 22:27:58 -0800
Subject: [PATCH 0183/1084] qla2xxx: Use d_id instead of s_id for more clarity

Updated code with d_id from s_id for better readability and
clarity.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
[ bvanassche: fixed spelling of patch description ]
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>

Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_dfs.c     |  9 +++----
 drivers/scsi/qla2xxx/qla_target.c  | 24 ++++++++---------
 drivers/scsi/qla2xxx/qla_target.h  |  2 +-
 drivers/scsi/qla2xxx/tcm_qla2xxx.c | 41 ++++++++++++++++--------------
 4 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c
index 34272fde8a5b..98550c9b6161 100644
--- a/drivers/scsi/qla2xxx/qla_dfs.c
+++ b/drivers/scsi/qla2xxx/qla_dfs.c
@@ -26,12 +26,11 @@ qla2x00_dfs_tgt_sess_show(struct seq_file *s, void *unused)
 		seq_printf(s, "Port ID   Port Name                Handle\n");
 
 		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-		list_for_each_entry(sess, &tgt->sess_list, sess_list_entry) {
+		list_for_each_entry(sess, &tgt->sess_list, sess_list_entry)
 			seq_printf(s, "%02x:%02x:%02x  %8phC  %d\n",
-					   sess->s_id.b.domain,sess->s_id.b.area,
-					   sess->s_id.b.al_pa,	sess->port_name,
-					   sess->loop_id);
-		}
+			    sess->d_id.b.domain, sess->d_id.b.area,
+			    sess->d_id.b.al_pa, sess->port_name,
+			    sess->loop_id);
 		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 	}
 
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 119a445a82f6..671875a50951 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -530,7 +530,7 @@ static void qlt_free_session_done(struct work_struct *work)
 		"%s: se_sess %p / sess %p from port %8phC loop_id %#04x"
 		" s_id %02x:%02x:%02x logout %d keep %d els_logo %d\n",
 		__func__, sess->se_sess, sess, sess->port_name, sess->loop_id,
-		sess->s_id.b.domain, sess->s_id.b.area, sess->s_id.b.al_pa,
+		sess->d_id.b.domain, sess->d_id.b.area, sess->d_id.b.al_pa,
 		sess->logout_on_delete, sess->keep_nport_handle,
 		sess->send_els_logo);
 
@@ -538,7 +538,7 @@ static void qlt_free_session_done(struct work_struct *work)
 
 	if (sess->send_els_logo) {
 		qlt_port_logo_t logo;
-		logo.id = sess->s_id;
+		logo.id = sess->d_id;
 		logo.cmd_count = 0;
 		qlt_send_first_logo(vha, &logo);
 	}
@@ -548,7 +548,7 @@ static void qlt_free_session_done(struct work_struct *work)
 
 		memset(&fcport, 0, sizeof(fcport));
 		fcport.loop_id = sess->loop_id;
-		fcport.d_id = sess->s_id;
+		fcport.d_id = sess->d_id;
 		memcpy(fcport.port_name, sess->port_name, WWN_SIZE);
 		fcport.vha = vha;
 		fcport.tgt_session = sess;
@@ -757,7 +757,7 @@ static void qlt_schedule_sess_for_deletion(struct qla_tgt_sess *sess,
 	    "qla_target(%d): session for port %8phC (loop ID %d s_id %02x:%02x:%02x)"
 	    " scheduled for deletion in %u secs (expires: %lu) immed: %d, logout: %d, gen: %#x\n",
 	    sess->vha->vp_idx, sess->port_name, sess->loop_id,
-	    sess->s_id.b.domain, sess->s_id.b.area, sess->s_id.b.al_pa,
+	    sess->d_id.b.domain, sess->d_id.b.area, sess->d_id.b.al_pa,
 	    dev_loss_tmo, sess->expires, immediate, sess->logout_on_delete,
 	    sess->generation);
 
@@ -892,8 +892,8 @@ static struct qla_tgt_sess *qlt_create_sess(
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf005,
 			    "Double sess %p found (s_id %x:%x:%x, "
 			    "loop_id %d), updating to d_id %x:%x:%x, "
-			    "loop_id %d", sess, sess->s_id.b.domain,
-			    sess->s_id.b.al_pa, sess->s_id.b.area,
+			    "loop_id %d", sess, sess->d_id.b.domain,
+			    sess->d_id.b.al_pa, sess->d_id.b.area,
 			    sess->loop_id, fcport->d_id.b.domain,
 			    fcport->d_id.b.al_pa, fcport->d_id.b.area,
 			    fcport->loop_id);
@@ -943,7 +943,7 @@ static struct qla_tgt_sess *qlt_create_sess(
 	}
 	sess->tgt = vha->vha_tgt.qla_tgt;
 	sess->vha = vha;
-	sess->s_id = fcport->d_id;
+	sess->d_id = fcport->d_id;
 	sess->loop_id = fcport->loop_id;
 	sess->local = local;
 	kref_init(&sess->sess_kref);
@@ -974,8 +974,8 @@ static struct qla_tgt_sess *qlt_create_sess(
 	    "qla_target(%d): %ssession for wwn %8phC (loop_id %d, "
 	    "s_id %x:%x:%x, confirmed completion %ssupported) added\n",
 	    vha->vp_idx, local ?  "local " : "", fcport->port_name,
-	    fcport->loop_id, sess->s_id.b.domain, sess->s_id.b.area,
-	    sess->s_id.b.al_pa, sess->conf_compl_supported ?  "" : "not ");
+	    fcport->loop_id, sess->d_id.b.domain, sess->d_id.b.area,
+	    sess->d_id.b.al_pa, sess->conf_compl_supported ?  "" : "not ");
 
 	/*
 	 * Determine if this fc_port->port_name is allowed to access
@@ -4055,7 +4055,7 @@ qlt_find_sess_invalidate_other(struct qla_tgt *tgt, uint64_t wwn,
 		}
 
 		/* find other sess with nport_id collision */
-		if (port_id.b24 == other_sess->s_id.b24) {
+		if (port_id.b24 == other_sess->d_id.b24) {
 			if (loop_id != other_sess->loop_id) {
 				ql_dbg(ql_dbg_tgt_tmr, tgt->vha, 0x1000c,
 				    "Invalidating sess %p loop_id %d wwn %llx.\n",
@@ -4216,7 +4216,7 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 		  *    PLOGI and situation will correct itself.
 		  */
 		sess->keep_nport_handle = ((sess->loop_id == loop_id) &&
-					   (sess->s_id.b24 == port_id.b24));
+					   (sess->d_id.b24 == port_id.b24));
 		qlt_schedule_sess_for_deletion(sess, true);
 		break;
 
@@ -4264,7 +4264,7 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 
 			sess->local = 0;
 			sess->loop_id = loop_id;
-			sess->s_id = port_id;
+			sess->d_id = port_id;
 
 			if (wd3_lo & BIT_7)
 				sess->conf_compl_supported = 1;
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index f72bd0755a59..07ccf81f6d22 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -928,7 +928,7 @@ typedef struct {
  */
 struct qla_tgt_sess {
 	uint16_t loop_id;
-	port_id_t s_id;
+	port_id_t d_id;
 
 	unsigned int conf_compl_supported:1;
 	unsigned int deleted:2;
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index af4a198e5537..c96de1cfbd85 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -1388,9 +1388,9 @@ static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *lport,
 	struct se_session *se_sess = sess->se_sess;
 	unsigned char be_sid[3];
 
-	be_sid[0] = sess->s_id.b.domain;
-	be_sid[1] = sess->s_id.b.area;
-	be_sid[2] = sess->s_id.b.al_pa;
+	be_sid[0] = sess->d_id.b.domain;
+	be_sid[1] = sess->d_id.b.area;
+	be_sid[2] = sess->d_id.b.al_pa;
 
 	tcm_qla2xxx_set_sess_by_s_id(lport, NULL, nacl, se_sess,
 				sess, be_sid);
@@ -1442,9 +1442,9 @@ static int tcm_qla2xxx_session_cb(struct se_portal_group *se_tpg,
 	unsigned long flags;
 	unsigned char be_sid[3];
 
-	be_sid[0] = qlat_sess->s_id.b.domain;
-	be_sid[1] = qlat_sess->s_id.b.area;
-	be_sid[2] = qlat_sess->s_id.b.al_pa;
+	be_sid[0] = qlat_sess->d_id.b.domain;
+	be_sid[1] = qlat_sess->d_id.b.area;
+	be_sid[2] = qlat_sess->d_id.b.al_pa;
 
 	/*
 	 * And now setup se_nacl and session pointers into HW lport internal
@@ -1524,11 +1524,11 @@ static void tcm_qla2xxx_update_sess(struct qla_tgt_sess *sess, port_id_t s_id,
 	u32 key;
 
 
-	if (sess->loop_id != loop_id || sess->s_id.b24 != s_id.b24)
+	if (sess->loop_id != loop_id || sess->d_id.b24 != s_id.b24)
 		pr_info("Updating session %p from port %8phC loop_id %d -> %d s_id %x:%x:%x -> %x:%x:%x\n",
 		    sess, sess->port_name,
-		    sess->loop_id, loop_id, sess->s_id.b.domain,
-		    sess->s_id.b.area, sess->s_id.b.al_pa, s_id.b.domain,
+		    sess->loop_id, loop_id, sess->d_id.b.domain,
+		    sess->d_id.b.area, sess->d_id.b.al_pa, s_id.b.domain,
 		    s_id.b.area, s_id.b.al_pa);
 
 	if (sess->loop_id != loop_id) {
@@ -1548,18 +1548,20 @@ static void tcm_qla2xxx_update_sess(struct qla_tgt_sess *sess, port_id_t s_id,
 		sess->loop_id = loop_id;
 	}
 
-	if (sess->s_id.b24 != s_id.b24) {
-		key = (((u32) sess->s_id.b.domain << 16) |
-		       ((u32) sess->s_id.b.area   <<  8) |
-		       ((u32) sess->s_id.b.al_pa));
+	if (sess->d_id.b24 != s_id.b24) {
+		key = (((u32) sess->d_id.b.domain << 16) |
+		       ((u32) sess->d_id.b.area   <<  8) |
+		       ((u32) sess->d_id.b.al_pa));
 
 		if (btree_lookup32(&lport->lport_fcport_map, key))
-			WARN(btree_remove32(&lport->lport_fcport_map, key) != se_nacl,
-			     "Found wrong se_nacl when updating s_id %x:%x:%x\n",
-			     sess->s_id.b.domain, sess->s_id.b.area, sess->s_id.b.al_pa);
+			WARN(btree_remove32(&lport->lport_fcport_map, key) !=
+			    se_nacl, "Found wrong se_nacl when updating s_id %x:%x:%x\n",
+			    sess->d_id.b.domain, sess->d_id.b.area,
+			    sess->d_id.b.al_pa);
 		else
 			WARN(1, "No lport_fcport_map entry for s_id %x:%x:%x\n",
-			     sess->s_id.b.domain, sess->s_id.b.area, sess->s_id.b.al_pa);
+			     sess->d_id.b.domain, sess->d_id.b.area,
+			     sess->d_id.b.al_pa);
 
 		key = (((u32) s_id.b.domain << 16) |
 		       ((u32) s_id.b.area   <<  8) |
@@ -1570,10 +1572,11 @@ static void tcm_qla2xxx_update_sess(struct qla_tgt_sess *sess, port_id_t s_id,
 			     s_id.b.domain, s_id.b.area, s_id.b.al_pa);
 			btree_update32(&lport->lport_fcport_map, key, se_nacl);
 		} else {
-			btree_insert32(&lport->lport_fcport_map, key, se_nacl, GFP_ATOMIC);
+			btree_insert32(&lport->lport_fcport_map, key, se_nacl,
+			    GFP_ATOMIC);
 		}
 
-		sess->s_id = s_id;
+		sess->d_id = s_id;
 		nacl->nport_id = key;
 	}
 
-- 
GitLab


From 5d964837c6a743193c63c8912f98834c7457ba5c Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Thu, 19 Jan 2017 22:27:59 -0800
Subject: [PATCH 0184/1084] qla2xxx: Track I-T nexus as single fc_port struct

Current code merges qla_tgt_sess and fc_port structure
into single fc_port structure representing same I-T nexus.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
[ bvanassche: fixed spelling of patch description ]
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>

Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_def.h     | 112 +++++++++-
 drivers/scsi/qla2xxx/qla_dfs.c     |   4 +-
 drivers/scsi/qla2xxx/qla_init.c    |  12 --
 drivers/scsi/qla2xxx/qla_iocb.c    |   2 +-
 drivers/scsi/qla2xxx/qla_target.c  | 326 ++++++++++-------------------
 drivers/scsi/qla2xxx/qla_target.h  | 150 ++-----------
 drivers/scsi/qla2xxx/tcm_qla2xxx.c | 130 +++++++-----
 drivers/scsi/qla2xxx/tcm_qla2xxx.h |   4 +-
 8 files changed, 310 insertions(+), 430 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 4aae861a6713..1bdcc7e010ff 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -1976,6 +1976,84 @@ struct mbx_entry {
 	uint8_t port_name[WWN_SIZE];
 };
 
+#ifndef IMMED_NOTIFY_TYPE
+#define IMMED_NOTIFY_TYPE 0x0D		/* Immediate notify entry. */
+/*
+ * ISP queue -	immediate notify entry structure definition.
+ *		This is sent by the ISP to the Target driver.
+ *		This IOCB would have report of events sent by the
+ *		initiator, that needs to be handled by the target
+ *		driver immediately.
+ */
+struct imm_ntfy_from_isp {
+	uint8_t	 entry_type;		    /* Entry type. */
+	uint8_t	 entry_count;		    /* Entry count. */
+	uint8_t	 sys_define;		    /* System defined. */
+	uint8_t	 entry_status;		    /* Entry Status. */
+	union {
+		struct {
+			uint32_t sys_define_2; /* System defined. */
+			target_id_t target;
+			uint16_t lun;
+			uint8_t  target_id;
+			uint8_t  reserved_1;
+			uint16_t status_modifier;
+			uint16_t status;
+			uint16_t task_flags;
+			uint16_t seq_id;
+			uint16_t srr_rx_id;
+			uint32_t srr_rel_offs;
+			uint16_t srr_ui;
+#define SRR_IU_DATA_IN	0x1
+#define SRR_IU_DATA_OUT	0x5
+#define SRR_IU_STATUS	0x7
+			uint16_t srr_ox_id;
+			uint8_t reserved_2[28];
+		} isp2x;
+		struct {
+			uint32_t reserved;
+			uint16_t nport_handle;
+			uint16_t reserved_2;
+			uint16_t flags;
+#define NOTIFY24XX_FLAGS_GLOBAL_TPRLO   BIT_1
+#define NOTIFY24XX_FLAGS_PUREX_IOCB     BIT_0
+			uint16_t srr_rx_id;
+			uint16_t status;
+			uint8_t  status_subcode;
+			uint8_t  fw_handle;
+			uint32_t exchange_address;
+			uint32_t srr_rel_offs;
+			uint16_t srr_ui;
+			uint16_t srr_ox_id;
+			union {
+				struct {
+					uint8_t node_name[8];
+				} plogi; /* PLOGI/ADISC/PDISC */
+				struct {
+					/* PRLI word 3 bit 0-15 */
+					uint16_t wd3_lo;
+					uint8_t resv0[6];
+				} prli;
+				struct {
+					uint8_t port_id[3];
+					uint8_t resv1;
+					uint16_t nport_handle;
+					uint16_t resv2;
+				} req_els;
+			} u;
+			uint8_t port_name[8];
+			uint8_t resv3[3];
+			uint8_t  vp_index;
+			uint32_t reserved_5;
+			uint8_t  port_id[3];
+			uint8_t  reserved_6;
+		} isp24;
+	} u;
+	uint16_t reserved_7;
+	uint16_t ox_id;
+} __packed;
+#endif
+
 /*
  * ISP request and response queue entry sizes
  */
@@ -2026,7 +2104,7 @@ typedef struct {
 /*
  * Fibre channel port type.
  */
- typedef enum {
+typedef enum {
 	FCT_UNKNOWN,
 	FCT_RSCN,
 	FCT_SWITCH,
@@ -2035,6 +2113,19 @@ typedef struct {
 	FCT_TARGET
 } fc_port_type_t;
 
+enum qlt_plogi_link_t {
+	QLT_PLOGI_LINK_SAME_WWN,
+	QLT_PLOGI_LINK_CONFLICT,
+	QLT_PLOGI_LINK_MAX
+};
+
+struct qlt_plogi_ack_t {
+	struct list_head	list;
+	struct imm_ntfy_from_isp iocb;
+	port_id_t	id;
+	int		ref_count;
+};
+
 /*
  * Fibre channel port structure.
  */
@@ -2048,6 +2139,25 @@ typedef struct fc_port {
 	uint16_t loop_id;
 	uint16_t old_loop_id;
 
+	unsigned int conf_compl_supported:1;
+	unsigned int deleted:2;
+	unsigned int local:1;
+	unsigned int logout_on_delete:1;
+	unsigned int keep_nport_handle:1;
+	unsigned int send_els_logo:1;
+
+	unsigned char logout_completed;
+	int generation;
+
+	struct se_session *se_sess;
+	struct kref sess_kref;
+	struct qla_tgt *tgt;
+	unsigned long expires;
+	struct list_head del_list_entry;
+	struct work_struct free_work;
+
+	struct qlt_plogi_ack_t *plogi_link[QLT_PLOGI_LINK_MAX];
+
 	uint16_t tgt_id;
 	uint16_t old_tgt_id;
 
diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c
index 98550c9b6161..b48cce696bac 100644
--- a/drivers/scsi/qla2xxx/qla_dfs.c
+++ b/drivers/scsi/qla2xxx/qla_dfs.c
@@ -18,7 +18,7 @@ qla2x00_dfs_tgt_sess_show(struct seq_file *s, void *unused)
 	scsi_qla_host_t *vha = s->private;
 	struct qla_hw_data *ha = vha->hw;
 	unsigned long flags;
-	struct qla_tgt_sess *sess = NULL;
+	struct fc_port *sess = NULL;
 	struct qla_tgt *tgt= vha->vha_tgt.qla_tgt;
 
 	seq_printf(s, "%s\n",vha->host_str);
@@ -26,7 +26,7 @@ qla2x00_dfs_tgt_sess_show(struct seq_file *s, void *unused)
 		seq_printf(s, "Port ID   Port Name                Handle\n");
 
 		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-		list_for_each_entry(sess, &tgt->sess_list, sess_list_entry)
+		list_for_each_entry(sess, &vha->vp_fcports, list)
 			seq_printf(s, "%02x:%02x:%02x  %8phC  %d\n",
 			    sess->d_id.b.domain, sess->d_id.b.area,
 			    sess->d_id.b.al_pa, sess->port_name,
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 7b6317c8c2e9..5978b79d4a61 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -3352,12 +3352,6 @@ qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport)
 		    "Unable to allocate fc remote port.\n");
 		return;
 	}
-	/*
-	 * Create target mode FC NEXUS in qla_target.c if target mode is
-	 * enabled..
-	 */
-
-	qlt_fc_port_added(vha, fcport);
 
 	spin_lock_irqsave(fcport->vha->host->host_lock, flags);
 	*((fc_port_t **)rport->dd_data) = fcport;
@@ -3407,12 +3401,6 @@ qla2x00_update_fcport(scsi_qla_host_t *vha, fc_port_t *fcport)
 reg_port:
 	if (qla_ini_mode_enabled(vha))
 		qla2x00_reg_remote_port(vha, fcport);
-	else {
-		/*
-		 * Create target mode FC NEXUS in qla_target.c
-		 */
-		qlt_fc_port_added(vha, fcport);
-	}
 }
 
 /*
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 58e49a3e1de8..834e22197842 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -2248,7 +2248,7 @@ qla24xx_logout_iocb(srb_t *sp, struct logio_entry_24xx *logio)
 	logio->control_flags =
 	    cpu_to_le16(LCF_COMMAND_LOGO|LCF_IMPL_LOGO);
 	if (!sp->fcport->tgt_session ||
-	    !sp->fcport->tgt_session->keep_nport_handle)
+	    !sp->fcport->keep_nport_handle)
 		logio->control_flags |= cpu_to_le16(LCF_FREE_NPORT);
 	logio->nport_handle = cpu_to_le16(sp->fcport->loop_id);
 	logio->port_id[0] = sp->fcport->d_id.b.al_pa;
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 671875a50951..40b61a327786 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -102,7 +102,7 @@ enum fcp_resp_rsp_codes {
 static void qlt_24xx_atio_pkt(struct scsi_qla_host *ha,
 	struct atio_from_isp *pkt, uint8_t);
 static void qlt_response_pkt(struct scsi_qla_host *ha, response_t *pkt);
-static int qlt_issue_task_mgmt(struct qla_tgt_sess *sess, uint32_t lun,
+static int qlt_issue_task_mgmt(struct fc_port *sess, u64 lun,
 	int fn, void *iocb, int flags);
 static void qlt_send_term_exchange(struct scsi_qla_host *ha, struct qla_tgt_cmd
 	*cmd, struct atio_from_isp *atio, int ha_locked, int ul_abort);
@@ -138,21 +138,6 @@ void qlt_do_generation_tick(struct scsi_qla_host *vha, int *dest)
 	wmb();
 }
 
-/* ha->hardware_lock supposed to be held on entry (to protect tgt->sess_list) */
-static struct qla_tgt_sess *qlt_find_sess_by_port_name(
-	struct qla_tgt *tgt,
-	const uint8_t *port_name)
-{
-	struct qla_tgt_sess *sess;
-
-	list_for_each_entry(sess, &tgt->sess_list, sess_list_entry) {
-		if (!memcmp(sess->port_name, port_name, WWN_SIZE))
-			return sess;
-	}
-
-	return NULL;
-}
-
 /* Might release hw lock, then reaquire!! */
 static inline int qlt_issue_marker(struct scsi_qla_host *vha, int vha_locked)
 {
@@ -399,16 +384,16 @@ void qlt_response_pkt_all_vps(struct scsi_qla_host *vha, response_t *pkt)
  * guarantees that ref_count is not modified concurrently.
  * Upon successful return content of iocb is undefined
  */
-static qlt_plogi_ack_t *
+static struct qlt_plogi_ack_t *
 qlt_plogi_ack_find_add(struct scsi_qla_host *vha, port_id_t *id,
 		       struct imm_ntfy_from_isp *iocb)
 {
-	qlt_plogi_ack_t *pla;
+	struct qlt_plogi_ack_t *pla;
 
 	list_for_each_entry(pla, &vha->plogi_ack_list, list) {
 		if (pla->id.b24 == id->b24) {
 			qlt_send_term_imm_notif(vha, &pla->iocb, 1);
-			pla->iocb = *iocb;
+			memcpy(&pla->iocb, iocb, sizeof(pla->iocb));
 			return pla;
 		}
 	}
@@ -421,15 +406,17 @@ qlt_plogi_ack_find_add(struct scsi_qla_host *vha, port_id_t *id,
 		return NULL;
 	}
 
-	pla->iocb = *iocb;
+	memcpy(&pla->iocb, iocb, sizeof(pla->iocb));
 	pla->id = *id;
 	list_add_tail(&pla->list, &vha->plogi_ack_list);
 
 	return pla;
 }
 
-static void qlt_plogi_ack_unref(struct scsi_qla_host *vha, qlt_plogi_ack_t *pla)
+static void qlt_plogi_ack_unref(struct scsi_qla_host *vha,
+    struct qlt_plogi_ack_t *pla)
 {
+	struct imm_ntfy_from_isp *iocb = &pla->iocb;
 	BUG_ON(!pla->ref_count);
 	pla->ref_count--;
 
@@ -438,21 +425,22 @@ static void qlt_plogi_ack_unref(struct scsi_qla_host *vha, qlt_plogi_ack_t *pla)
 
 	ql_dbg(ql_dbg_async, vha, 0x5089,
 	    "Sending PLOGI ACK to wwn %8phC s_id %02x:%02x:%02x loop_id %#04x"
-	    " exch %#x ox_id %#x\n", pla->iocb.u.isp24.port_name,
-	    pla->iocb.u.isp24.port_id[2], pla->iocb.u.isp24.port_id[1],
-	    pla->iocb.u.isp24.port_id[0],
-	    le16_to_cpu(pla->iocb.u.isp24.nport_handle),
-	    pla->iocb.u.isp24.exchange_address, pla->iocb.ox_id);
-	qlt_send_notify_ack(vha, &pla->iocb, 0, 0, 0, 0, 0, 0);
+	    " exch %#x ox_id %#x\n", iocb->u.isp24.port_name,
+	    iocb->u.isp24.port_id[2], iocb->u.isp24.port_id[1],
+	    iocb->u.isp24.port_id[0],
+	    le16_to_cpu(iocb->u.isp24.nport_handle),
+	    iocb->u.isp24.exchange_address, iocb->ox_id);
+	qlt_send_notify_ack(vha, iocb, 0, 0, 0, 0, 0, 0);
 
 	list_del(&pla->list);
 	kmem_cache_free(qla_tgt_plogi_cachep, pla);
 }
 
 static void
-qlt_plogi_ack_link(struct scsi_qla_host *vha, qlt_plogi_ack_t *pla,
-    struct qla_tgt_sess *sess, qlt_plogi_link_t link)
+qlt_plogi_ack_link(struct scsi_qla_host *vha, struct qlt_plogi_ack_t *pla,
+    struct fc_port *sess, enum qlt_plogi_link_t link)
 {
+	struct imm_ntfy_from_isp *iocb = &pla->iocb;
 	/* Inc ref_count first because link might already be pointing at pla */
 	pla->ref_count++;
 
@@ -462,8 +450,8 @@ qlt_plogi_ack_link(struct scsi_qla_host *vha, qlt_plogi_ack_t *pla,
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf097,
 	    "Linking sess %p [%d] wwn %8phC with PLOGI ACK to wwn %8phC"
 	    " s_id %02x:%02x:%02x, ref=%d\n", sess, link, sess->port_name,
-	    pla->iocb.u.isp24.port_name, pla->iocb.u.isp24.port_id[2],
-	    pla->iocb.u.isp24.port_id[1], pla->iocb.u.isp24.port_id[0],
+	    iocb->u.isp24.port_name, iocb->u.isp24.port_id[2],
+	    iocb->u.isp24.port_id[1], iocb->u.isp24.port_id[0],
 	    pla->ref_count);
 
 	sess->plogi_link[link] = pla;
@@ -517,7 +505,7 @@ qlt_send_first_logo(struct scsi_qla_host *vha, qlt_port_logo_t *logo)
 
 static void qlt_free_session_done(struct work_struct *work)
 {
-	struct qla_tgt_sess *sess = container_of(work, struct qla_tgt_sess,
+	struct fc_port *sess = container_of(work, struct fc_port,
 	    free_work);
 	struct qla_tgt *tgt = sess->tgt;
 	struct scsi_qla_host *vha = sess->vha;
@@ -551,7 +539,6 @@ static void qlt_free_session_done(struct work_struct *work)
 		fcport.d_id = sess->d_id;
 		memcpy(fcport.port_name, sess->port_name, WWN_SIZE);
 		fcport.vha = vha;
-		fcport.tgt_session = sess;
 
 		rc = qla2x00_post_async_logout_work(vha, &fcport, NULL);
 		if (rc != QLA_SUCCESS)
@@ -587,22 +574,22 @@ static void qlt_free_session_done(struct work_struct *work)
 	}
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-
 	{
-		qlt_plogi_ack_t *own =
+		struct qlt_plogi_ack_t *own =
 		    sess->plogi_link[QLT_PLOGI_LINK_SAME_WWN];
-		qlt_plogi_ack_t *con =
+		struct qlt_plogi_ack_t *con =
 		    sess->plogi_link[QLT_PLOGI_LINK_CONFLICT];
+		struct imm_ntfy_from_isp *iocb;
 
 		if (con) {
+			iocb = &con->iocb;
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf099,
-			    "se_sess %p / sess %p port %8phC is gone,"
-			    " %s (ref=%d), releasing PLOGI for %8phC (ref=%d)\n",
-			    sess->se_sess, sess, sess->port_name,
-			    own ? "releasing own PLOGI" :
-			    "no own PLOGI pending",
-			    own ? own->ref_count : -1,
-			    con->iocb.u.isp24.port_name, con->ref_count);
+				 "se_sess %p / sess %p port %8phC is gone,"
+				 " %s (ref=%d), releasing PLOGI for %8phC (ref=%d)\n",
+				 sess->se_sess, sess, sess->port_name,
+				 own ? "releasing own PLOGI" : "no own PLOGI pending",
+				 own ? own->ref_count : -1,
+				 iocb->u.isp24.port_name, con->ref_count);
 			qlt_plogi_ack_unref(vha, con);
 		} else {
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf09a,
@@ -616,15 +603,15 @@ static void qlt_free_session_done(struct work_struct *work)
 		if (own)
 			qlt_plogi_ack_unref(vha, own);
 	}
-
-	list_del(&sess->sess_list_entry);
-
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
+	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+	sess->se_sess = NULL;
+	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf001,
 	    "Unregistration of sess %p finished\n", sess);
 
-	kfree(sess);
 	/*
 	 * We need to protect against race, when tgt is freed before or
 	 * inside wake_up()
@@ -635,37 +622,30 @@ static void qlt_free_session_done(struct work_struct *work)
 }
 
 /* ha->tgt.sess_lock supposed to be held on entry */
-static void qlt_release_session(struct kref *kref)
+void qlt_unreg_sess(struct fc_port *sess)
 {
-	struct qla_tgt_sess *sess =
-		container_of(kref, struct qla_tgt_sess, sess_kref);
 	struct scsi_qla_host *vha = sess->vha;
 
+	ql_dbg(ql_dbg_disc, sess->vha, 0xffff,
+	    "%s sess %p for deletion %8phC\n",
+	    __func__, sess, sess->port_name);
+
 	if (sess->se_sess)
 		vha->hw->tgt.tgt_ops->clear_nacl_from_fcport_map(sess);
 
-	if (!list_empty(&sess->del_list_entry))
-		list_del_init(&sess->del_list_entry);
+	qla2x00_mark_device_lost(vha, sess, 1, 1);
+
 	sess->deleted = QLA_SESS_DELETION_IN_PROGRESS;
 
 	INIT_WORK(&sess->free_work, qlt_free_session_done);
 	schedule_work(&sess->free_work);
 }
-
-void qlt_put_sess(struct qla_tgt_sess *sess)
-{
-	if (!sess)
-		return;
-
-	assert_spin_locked(&sess->vha->hw->tgt.sess_lock);
-	kref_put(&sess->sess_kref, qlt_release_session);
-}
-EXPORT_SYMBOL(qlt_put_sess);
+EXPORT_SYMBOL(qlt_unreg_sess);
 
 static int qlt_reset(struct scsi_qla_host *vha, void *iocb, int mcmd)
 {
 	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_sess *sess = NULL;
+	struct fc_port *sess = NULL;
 	uint16_t loop_id;
 	int res = 0;
 	struct imm_ntfy_from_isp *n = (struct imm_ntfy_from_isp *)iocb;
@@ -678,31 +658,6 @@ static int qlt_reset(struct scsi_qla_host *vha, void *iocb, int mcmd)
 		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
 		qlt_clear_tgt_db(vha->vha_tgt.qla_tgt);
 		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
-#if 0 /* FIXME: do we need to choose a session here? */
-		if (!list_empty(&ha->tgt.qla_tgt->sess_list)) {
-			sess = list_entry(ha->tgt.qla_tgt->sess_list.next,
-			    typeof(*sess), sess_list_entry);
-			switch (mcmd) {
-			case QLA_TGT_NEXUS_LOSS_SESS:
-				mcmd = QLA_TGT_NEXUS_LOSS;
-				break;
-			case QLA_TGT_ABORT_ALL_SESS:
-				mcmd = QLA_TGT_ABORT_ALL;
-				break;
-			case QLA_TGT_NEXUS_LOSS:
-			case QLA_TGT_ABORT_ALL:
-				break;
-			default:
-				ql_dbg(ql_dbg_tgt, vha, 0xe046,
-				    "qla_target(%d): Not allowed "
-				    "command %x in %s", vha->vp_idx,
-				    mcmd, __func__);
-				sess = NULL;
-				break;
-			}
-		} else
-			sess = NULL;
-#endif
 	} else {
 		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
 		sess = ha->tgt.tgt_ops->find_sess_by_loop_id(vha, loop_id);
@@ -725,7 +680,7 @@ static int qlt_reset(struct scsi_qla_host *vha, void *iocb, int mcmd)
 }
 
 /* ha->tgt.sess_lock supposed to be held on entry */
-static void qlt_schedule_sess_for_deletion(struct qla_tgt_sess *sess,
+static void qlt_schedule_sess_for_deletion(struct fc_port *sess,
 	bool immediate)
 {
 	struct qla_tgt *tgt = sess->tgt;
@@ -771,10 +726,13 @@ static void qlt_schedule_sess_for_deletion(struct qla_tgt_sess *sess,
 /* ha->tgt.sess_lock supposed to be held on entry */
 static void qlt_clear_tgt_db(struct qla_tgt *tgt)
 {
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
+	scsi_qla_host_t *vha = tgt->vha;
 
-	list_for_each_entry(sess, &tgt->sess_list, sess_list_entry)
-		qlt_schedule_sess_for_deletion(sess, true);
+	list_for_each_entry(sess, &vha->vp_fcports, list) {
+		if (sess->se_sess)
+			qlt_schedule_sess_for_deletion(sess, true);
+	}
 
 	/* At this point tgt could be already dead */
 }
@@ -829,7 +787,7 @@ static int qla24xx_get_loop_id(struct scsi_qla_host *vha, const uint8_t *s_id,
 }
 
 /* ha->tgt.sess_lock supposed to be held on entry */
-static void qlt_undelete_sess(struct qla_tgt_sess *sess)
+static void qlt_undelete_sess(struct fc_port *sess)
 {
 	BUG_ON(sess->deleted != QLA_SESS_DELETION_PENDING);
 
@@ -843,7 +801,7 @@ static void qlt_del_sess_work_fn(struct delayed_work *work)
 	    sess_del_work);
 	struct scsi_qla_host *vha = tgt->vha;
 	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 	unsigned long flags, elapsed;
 
 	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
@@ -861,7 +819,7 @@ static void qlt_del_sess_work_fn(struct delayed_work *work)
 			    sess);
 			if (sess->se_sess)
 				ha->tgt.tgt_ops->shutdown_sess(sess);
-			qlt_put_sess(sess);
+			ha->tgt.tgt_ops->put_sess(sess);
 		} else {
 			schedule_delayed_work(&tgt->sess_del_work,
 			    sess->expires - elapsed);
@@ -875,19 +833,18 @@ static void qlt_del_sess_work_fn(struct delayed_work *work)
  * Adds an extra ref to allow to drop hw lock after adding sess to the list.
  * Caller must put it.
  */
-static struct qla_tgt_sess *qlt_create_sess(
+static struct fc_port *qlt_create_sess(
 	struct scsi_qla_host *vha,
 	fc_port_t *fcport,
 	bool local)
 {
 	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 	unsigned long flags;
 
 	/* Check to avoid double sessions */
 	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-	list_for_each_entry(sess, &vha->vha_tgt.qla_tgt->sess_list,
-				sess_list_entry) {
+	list_for_each_entry(sess, &vha->vp_fcports, list) {
 		if (!memcmp(sess->port_name, fcport->port_name, WWN_SIZE)) {
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf005,
 			    "Double sess %p found (s_id %x:%x:%x, "
@@ -965,7 +922,7 @@ static struct qla_tgt_sess *qlt_create_sess(
 	memcpy(sess->port_name, fcport->port_name, sizeof(sess->port_name));
 
 	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-	list_add_tail(&sess->sess_list_entry, &vha->vha_tgt.qla_tgt->sess_list);
+	list_add_tail(&sess->list, &vha->vp_fcports);
 	vha->vha_tgt.qla_tgt->sess_count++;
 	qlt_do_generation_tick(vha, &sess->generation);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
@@ -988,7 +945,7 @@ static struct qla_tgt_sess *qlt_create_sess(
 		return NULL;
 	} else {
 		/*
-		 * Take an extra reference to ->sess_kref here to handle qla_tgt_sess
+		 * Take an extra reference to ->sess_kref here to handle fc_port
 		 * access across ->tgt.sess_lock reaquire.
 		 */
 		kref_get(&sess->sess_kref);
@@ -997,73 +954,6 @@ static struct qla_tgt_sess *qlt_create_sess(
 	return sess;
 }
 
-/*
- * Called from qla2x00_reg_remote_port()
- */
-void qlt_fc_port_added(struct scsi_qla_host *vha, fc_port_t *fcport)
-{
-	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
-	struct qla_tgt_sess *sess;
-	unsigned long flags;
-
-	if (!vha->hw->tgt.tgt_ops)
-		return;
-
-	if (!tgt || (fcport->port_type != FCT_INITIATOR))
-		return;
-
-	if (qla_ini_mode_enabled(vha))
-		return;
-
-	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-	if (tgt->tgt_stop) {
-		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
-		return;
-	}
-	sess = qlt_find_sess_by_port_name(tgt, fcport->port_name);
-	if (!sess) {
-		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
-
-		mutex_lock(&vha->vha_tgt.tgt_mutex);
-		sess = qlt_create_sess(vha, fcport, false);
-		mutex_unlock(&vha->vha_tgt.tgt_mutex);
-
-		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-	} else if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) {
-		/* Point of no return */
-		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
-		return;
-	} else {
-		kref_get(&sess->sess_kref);
-
-		if (sess->deleted) {
-			qlt_undelete_sess(sess);
-
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf04c,
-			    "qla_target(%u): %ssession for port %8phC "
-			    "(loop ID %d) reappeared\n", vha->vp_idx,
-			    sess->local ? "local " : "", sess->port_name,
-			    sess->loop_id);
-
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf007,
-			    "Reappeared sess %p\n", sess);
-		}
-		ha->tgt.tgt_ops->update_sess(sess, fcport->d_id, fcport->loop_id,
-					(fcport->flags & FCF_CONF_COMP_SUPPORTED));
-	}
-
-	if (sess && sess->local) {
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf04d,
-		    "qla_target(%u): local session for "
-		    "port %8phC (loop ID %d) became global\n", vha->vp_idx,
-		    fcport->port_name, sess->loop_id);
-		sess->local = 0;
-	}
-	qlt_put_sess(sess);
-	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
-}
-
 /*
  * max_gen - specifies maximum session generation
  * at which this deletion requestion is still valid
@@ -1072,7 +962,7 @@ void
 qlt_fc_port_deleted(struct scsi_qla_host *vha, fc_port_t *fcport, int max_gen)
 {
 	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess = fcport;
 	unsigned long flags;
 
 	if (!vha->hw->tgt.tgt_ops)
@@ -1086,8 +976,7 @@ qlt_fc_port_deleted(struct scsi_qla_host *vha, fc_port_t *fcport, int max_gen)
 		spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 		return;
 	}
-	sess = qlt_find_sess_by_port_name(tgt, fcport->port_name);
-	if (!sess) {
+	if (!sess->se_sess) {
 		spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 		return;
 	}
@@ -1120,8 +1009,8 @@ static inline int test_tgt_sess_count(struct qla_tgt *tgt)
 	 */
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	ql_dbg(ql_dbg_tgt, tgt->vha, 0xe002,
-	    "tgt %p, empty(sess_list)=%d sess_count=%d\n",
-	    tgt, list_empty(&tgt->sess_list), tgt->sess_count);
+	    "tgt %p, sess_count=%d\n",
+	    tgt, tgt->sess_count);
 	res = (tgt->sess_count == 0);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
@@ -1184,9 +1073,7 @@ int qlt_stop_phase1(struct qla_tgt *tgt)
 	spin_unlock_irqrestore(&tgt->sess_work_lock, flags);
 
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf00a,
-	    "Waiting for tgt %p: list_empty(sess_list)=%d "
-	    "sess_count=%d\n", tgt, list_empty(&tgt->sess_list),
-	    tgt->sess_count);
+	    "Waiting for tgt %p: sess_count=%d\n", tgt, tgt->sess_count);
 
 	wait_event(tgt->waitQ, test_tgt_sess_count(tgt));
 
@@ -1538,7 +1425,7 @@ static void abort_cmds_for_lun(struct scsi_qla_host *vha,
 
 /* ha->hardware_lock supposed to be held on entry */
 static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha,
-	struct abts_recv_from_24xx *abts, struct qla_tgt_sess *sess)
+	struct abts_recv_from_24xx *abts, struct fc_port *sess)
 {
 	struct qla_hw_data *ha = vha->hw;
 	struct se_session *se_sess = sess->se_sess;
@@ -1547,8 +1434,9 @@ static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha,
 	u32 lun = 0;
 	int rc;
 	bool found_lun = false;
+	unsigned long flags;
 
-	spin_lock(&se_sess->sess_cmd_lock);
+	spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
 	list_for_each_entry(se_cmd, &se_sess->sess_cmd_list, se_cmd_list) {
 		struct qla_tgt_cmd *cmd =
 			container_of(se_cmd, struct qla_tgt_cmd, se_cmd);
@@ -1558,7 +1446,7 @@ static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha,
 			break;
 		}
 	}
-	spin_unlock(&se_sess->sess_cmd_lock);
+	spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 
 	/* cmd not in LIO lists, look in qla list */
 	if (!found_lun) {
@@ -1612,7 +1500,7 @@ static void qlt_24xx_handle_abts(struct scsi_qla_host *vha,
 	struct abts_recv_from_24xx *abts)
 {
 	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 	uint32_t tag = abts->exchange_addr_to_abort;
 	uint8_t s_id[3];
 	int rc;
@@ -3215,7 +3103,7 @@ EXPORT_SYMBOL(qlt_abort_cmd);
 
 void qlt_free_cmd(struct qla_tgt_cmd *cmd)
 {
-	struct qla_tgt_sess *sess = cmd->sess;
+	struct fc_port *sess = cmd->sess;
 
 	ql_dbg(ql_dbg_tgt, cmd->vha, 0xe074,
 	    "%s: se_cmd[%p] ox_id %04x\n",
@@ -3577,7 +3465,7 @@ static inline int qlt_get_fcp_task_attr(struct scsi_qla_host *vha,
 	return fcp_task_attr;
 }
 
-static struct qla_tgt_sess *qlt_make_local_sess(struct scsi_qla_host *,
+static struct fc_port *qlt_make_local_sess(struct scsi_qla_host *,
 					uint8_t *);
 /*
  * Process context for I/O path into tcm_qla2xxx code
@@ -3587,7 +3475,7 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd)
 	scsi_qla_host_t *vha = cmd->vha;
 	struct qla_hw_data *ha = vha->hw;
 	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
-	struct qla_tgt_sess *sess = cmd->sess;
+	struct fc_port *sess = cmd->sess;
 	struct atio_from_isp *atio = &cmd->atio;
 	unsigned char *cdb;
 	unsigned long flags;
@@ -3637,7 +3525,7 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd)
 	 * Drop extra session reference from qla_tgt_handle_cmd_for_atio*(
 	 */
 	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-	qlt_put_sess(sess);
+	ha->tgt.tgt_ops->put_sess(sess);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 	return;
 
@@ -3656,7 +3544,7 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd)
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
 	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-	qlt_put_sess(sess);
+	ha->tgt.tgt_ops->put_sess(sess);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 }
 
@@ -3674,7 +3562,7 @@ static void qlt_do_work(struct work_struct *work)
 }
 
 static struct qla_tgt_cmd *qlt_get_tag(scsi_qla_host_t *vha,
-				       struct qla_tgt_sess *sess,
+				       struct fc_port *sess,
 				       struct atio_from_isp *atio)
 {
 	struct se_session *se_sess = sess->se_sess;
@@ -3715,7 +3603,7 @@ static void qlt_create_sess_from_atio(struct work_struct *work)
 					struct qla_tgt_sess_op, work);
 	scsi_qla_host_t *vha = op->vha;
 	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 	struct qla_tgt_cmd *cmd;
 	unsigned long flags;
 	uint8_t *s_id = op->atio.u.isp24.fcp_hdr.s_id;
@@ -3756,7 +3644,7 @@ static void qlt_create_sess_from_atio(struct work_struct *work)
 	if (!cmd) {
 		spin_lock_irqsave(&ha->hardware_lock, flags);
 		qlt_send_busy(vha, &op->atio, SAM_STAT_BUSY);
-		qlt_put_sess(sess);
+		ha->tgt.tgt_ops->put_sess(sess);
 		spin_unlock_irqrestore(&ha->hardware_lock, flags);
 		kfree(op);
 		return;
@@ -3783,8 +3671,9 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
 {
 	struct qla_hw_data *ha = vha->hw;
 	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 	struct qla_tgt_cmd *cmd;
+	unsigned long flags;
 
 	if (unlikely(tgt->tgt_stop)) {
 		ql_dbg(ql_dbg_io, vha, 0x3061,
@@ -3829,7 +3718,9 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
 	if (!cmd) {
 		ql_dbg(ql_dbg_io, vha, 0x3062,
 		    "qla_target(%d): Allocation of cmd failed\n", vha->vp_idx);
-		qlt_put_sess(sess);
+		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+		ha->tgt.tgt_ops->put_sess(sess);
+		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 		return -ENOMEM;
 	}
 
@@ -3858,7 +3749,7 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
 }
 
 /* ha->hardware_lock supposed to be held on entry */
-static int qlt_issue_task_mgmt(struct qla_tgt_sess *sess, uint32_t lun,
+static int qlt_issue_task_mgmt(struct fc_port *sess, u64 lun,
 	int fn, void *iocb, int flags)
 {
 	struct scsi_qla_host *vha = sess->vha;
@@ -3910,7 +3801,7 @@ static int qlt_handle_task_mgmt(struct scsi_qla_host *vha, void *iocb)
 	struct atio_from_isp *a = (struct atio_from_isp *)iocb;
 	struct qla_hw_data *ha = vha->hw;
 	struct qla_tgt *tgt;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 	uint32_t lun, unpacked_lun;
 	int fn;
 	unsigned long flags;
@@ -3943,7 +3834,7 @@ static int qlt_handle_task_mgmt(struct scsi_qla_host *vha, void *iocb)
 
 /* ha->hardware_lock supposed to be held on entry */
 static int __qlt_abort_task(struct scsi_qla_host *vha,
-	struct imm_ntfy_from_isp *iocb, struct qla_tgt_sess *sess)
+	struct imm_ntfy_from_isp *iocb, struct fc_port *sess)
 {
 	struct atio_from_isp *a = (struct atio_from_isp *)iocb;
 	struct qla_hw_data *ha = vha->hw;
@@ -3987,7 +3878,7 @@ static int qlt_abort_task(struct scsi_qla_host *vha,
 	struct imm_ntfy_from_isp *iocb)
 {
 	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 	int loop_id;
 	unsigned long flags;
 
@@ -4017,14 +3908,14 @@ void qlt_logo_completion_handler(fc_port_t *fcport, int rc)
 				" port %8phC loop_id %#04x s_id %02x:%02x:%02x"
 				" LOGO failed: %#x\n",
 				__func__,
-				fcport->tgt_session->se_sess,
+				fcport->se_sess,
 				fcport->tgt_session,
 				fcport->port_name, fcport->loop_id,
 				fcport->d_id.b.domain, fcport->d_id.b.area,
 				fcport->d_id.b.al_pa, rc);
 		}
 
-		fcport->tgt_session->logout_completed = 1;
+		fcport->logout_completed = 1;
 	}
 }
 
@@ -4035,16 +3926,17 @@ void qlt_logo_completion_handler(fc_port_t *fcport, int rc)
 * deletion. Returns existing session with matching wwn if present.
 * Null otherwise.
 */
-static struct qla_tgt_sess *
+static struct fc_port *
 qlt_find_sess_invalidate_other(struct qla_tgt *tgt, uint64_t wwn,
-    port_id_t port_id, uint16_t loop_id, struct qla_tgt_sess **conflict_sess)
+    port_id_t port_id, uint16_t loop_id, struct fc_port **conflict_sess)
 {
-	struct qla_tgt_sess *sess = NULL, *other_sess;
+	struct fc_port *sess = NULL, *other_sess;
 	uint64_t other_wwn;
+	scsi_qla_host_t *vha = tgt->vha;
 
 	*conflict_sess = NULL;
 
-	list_for_each_entry(other_sess, &tgt->sess_list, sess_list_entry) {
+	list_for_each_entry(other_sess, &vha->vp_fcports, list) {
 
 		other_wwn = wwn_to_u64(other_sess->port_name);
 
@@ -4136,13 +4028,13 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 {
 	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
 	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_sess *sess = NULL, *conflict_sess = NULL;
+	struct fc_port *sess = NULL, *conflict_sess = NULL;
 	uint64_t wwn;
 	port_id_t port_id;
 	uint16_t loop_id;
 	uint16_t wd3_lo;
 	int res = 0;
-	qlt_plogi_ack_t *pla;
+	struct qlt_plogi_ack_t *pla;
 	unsigned long flags;
 
 	wwn = wwn_to_u64(iocb->u.isp24.port_name);
@@ -4455,7 +4347,7 @@ static int __qlt_send_busy(struct scsi_qla_host *vha,
 	struct ctio7_to_24xx *ctio24;
 	struct qla_hw_data *ha = vha->hw;
 	request_t *pkt;
-	struct qla_tgt_sess *sess = NULL;
+	struct fc_port *sess = NULL;
 	unsigned long flags;
 
 	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
@@ -4516,7 +4408,7 @@ qlt_alloc_qfull_cmd(struct scsi_qla_host *vha,
 {
 	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
 	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 	struct se_session *se_sess;
 	struct qla_tgt_cmd *cmd;
 	int tag;
@@ -5112,10 +5004,10 @@ static fc_port_t *qlt_get_port_database(struct scsi_qla_host *vha,
 }
 
 /* Must be called under tgt_mutex */
-static struct qla_tgt_sess *qlt_make_local_sess(struct scsi_qla_host *vha,
+static struct fc_port *qlt_make_local_sess(struct scsi_qla_host *vha,
 	uint8_t *s_id)
 {
-	struct qla_tgt_sess *sess = NULL;
+	struct fc_port *sess = NULL;
 	fc_port_t *fcport = NULL;
 	int rc, global_resets;
 	uint16_t loop_id = 0;
@@ -5177,7 +5069,6 @@ static struct qla_tgt_sess *qlt_make_local_sess(struct scsi_qla_host *vha,
 
 	mutex_unlock(&vha->vha_tgt.tgt_mutex);
 
-	kfree(fcport);
 	return sess;
 }
 
@@ -5186,7 +5077,7 @@ static void qlt_abort_work(struct qla_tgt *tgt,
 {
 	struct scsi_qla_host *vha = tgt->vha;
 	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_sess *sess = NULL;
+	struct fc_port *sess = NULL;
 	unsigned long flags = 0, flags2 = 0;
 	uint32_t be_s_id;
 	uint8_t s_id[3];
@@ -5230,8 +5121,8 @@ static void qlt_abort_work(struct qla_tgt *tgt,
 	if (rc != 0)
 		goto out_term;
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
-
-	qlt_put_sess(sess);
+	if (sess)
+		ha->tgt.tgt_ops->put_sess(sess);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
 	return;
 
@@ -5242,7 +5133,7 @@ static void qlt_abort_work(struct qla_tgt *tgt,
 	qlt_24xx_send_abts_resp(vha, &prm->abts, FCP_TMF_REJECTED, false);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
-	qlt_put_sess(sess);
+	ha->tgt.tgt_ops->put_sess(sess);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
 }
 
@@ -5252,7 +5143,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
 	struct atio_from_isp *a = &prm->tm_iocb2;
 	struct scsi_qla_host *vha = tgt->vha;
 	struct qla_hw_data *ha = vha->hw;
-	struct qla_tgt_sess *sess = NULL;
+	struct fc_port *sess = NULL;
 	unsigned long flags;
 	uint8_t *s_id = NULL; /* to hide compiler warnings */
 	int rc;
@@ -5294,13 +5185,14 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
 	if (rc != 0)
 		goto out_term;
 
-	qlt_put_sess(sess);
+	ha->tgt.tgt_ops->put_sess(sess);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 	return;
 
 out_term:
 	qlt_send_term_exchange(vha, NULL, &prm->tm_iocb2, 1, 0);
-	qlt_put_sess(sess);
+	if (sess)
+		ha->tgt.tgt_ops->put_sess(sess);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 }
 
@@ -5377,7 +5269,6 @@ int qlt_add_target(struct qla_hw_data *ha, struct scsi_qla_host *base_vha)
 	tgt->ha = ha;
 	tgt->vha = base_vha;
 	init_waitqueue_head(&tgt->waitQ);
-	INIT_LIST_HEAD(&tgt->sess_list);
 	INIT_LIST_HEAD(&tgt->del_sess_list);
 	INIT_DELAYED_WORK(&tgt->sess_del_work,
 		(void (*)(struct work_struct *))qlt_del_sess_work_fn);
@@ -6233,9 +6124,8 @@ int __init qlt_init(void)
 	}
 
 	qla_tgt_plogi_cachep = kmem_cache_create("qla_tgt_plogi_cachep",
-						 sizeof(qlt_plogi_ack_t),
-						 __alignof__(qlt_plogi_ack_t),
-						 0, NULL);
+	    sizeof(struct qlt_plogi_ack_t), __alignof__(struct qlt_plogi_ack_t),
+	    0, NULL);
 
 	if (!qla_tgt_plogi_cachep) {
 		ql_log(ql_log_fatal, NULL, 0xe06d,
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index 07ccf81f6d22..9c35ba15c687 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -118,84 +118,6 @@
 			 ? le16_to_cpu((iocb)->u.isp2x.target.extended)	\
 			 : (uint16_t)(iocb)->u.isp2x.target.id.standard)
 
-#ifndef IMMED_NOTIFY_TYPE
-#define IMMED_NOTIFY_TYPE 0x0D		/* Immediate notify entry. */
-/*
- * ISP queue -	immediate notify entry structure definition.
- *		This is sent by the ISP to the Target driver.
- *		This IOCB would have report of events sent by the
- *		initiator, that needs to be handled by the target
- *		driver immediately.
- */
-struct imm_ntfy_from_isp {
-	uint8_t	 entry_type;		    /* Entry type. */
-	uint8_t	 entry_count;		    /* Entry count. */
-	uint8_t	 sys_define;		    /* System defined. */
-	uint8_t	 entry_status;		    /* Entry Status. */
-	union {
-		struct {
-			uint32_t sys_define_2; /* System defined. */
-			target_id_t target;
-			uint16_t lun;
-			uint8_t  target_id;
-			uint8_t  reserved_1;
-			uint16_t status_modifier;
-			uint16_t status;
-			uint16_t task_flags;
-			uint16_t seq_id;
-			uint16_t srr_rx_id;
-			uint32_t srr_rel_offs;
-			uint16_t srr_ui;
-#define SRR_IU_DATA_IN	0x1
-#define SRR_IU_DATA_OUT	0x5
-#define SRR_IU_STATUS	0x7
-			uint16_t srr_ox_id;
-			uint8_t reserved_2[28];
-		} isp2x;
-		struct {
-			uint32_t reserved;
-			uint16_t nport_handle;
-			uint16_t reserved_2;
-			uint16_t flags;
-#define NOTIFY24XX_FLAGS_GLOBAL_TPRLO   BIT_1
-#define NOTIFY24XX_FLAGS_PUREX_IOCB     BIT_0
-			uint16_t srr_rx_id;
-			uint16_t status;
-			uint8_t  status_subcode;
-			uint8_t  fw_handle;
-			uint32_t exchange_address;
-			uint32_t srr_rel_offs;
-			uint16_t srr_ui;
-			uint16_t srr_ox_id;
-			union {
-				struct {
-					uint8_t node_name[8];
-				} plogi; /* PLOGI/ADISC/PDISC */
-				struct {
-					/* PRLI word 3 bit 0-15 */
-					uint16_t wd3_lo;
-					uint8_t resv0[6];
-				} prli;
-				struct {
-					uint8_t port_id[3];
-					uint8_t resv1;
-					uint16_t nport_handle;
-					uint16_t resv2;
-				} req_els;
-			} u;
-			uint8_t port_name[8];
-			uint8_t resv3[3];
-			uint8_t  vp_index;
-			uint32_t reserved_5;
-			uint8_t  port_id[3];
-			uint8_t  reserved_6;
-		} isp24;
-	} u;
-	uint16_t reserved_7;
-	uint16_t ox_id;
-} __packed;
-#endif
-
 #ifndef NOTIFY_ACK_TYPE
 #define NOTIFY_ACK_TYPE 0x0E	  /* Notify acknowledge entry. */
 /*
@@ -731,7 +653,7 @@ struct abts_resp_from_24xx_fw {
 \********************************************************************/
 
 struct qla_tgt_mgmt_cmd;
-struct qla_tgt_sess;
+struct fc_port;
 
 /*
  * This structure provides a template of function calls that the
@@ -748,17 +670,18 @@ struct qla_tgt_func_tmpl {
 			uint32_t);
 	void (*free_cmd)(struct qla_tgt_cmd *);
 	void (*free_mcmd)(struct qla_tgt_mgmt_cmd *);
-	void (*free_session)(struct qla_tgt_sess *);
+	void (*free_session)(struct fc_port *);
 
 	int (*check_initiator_node_acl)(struct scsi_qla_host *, unsigned char *,
-					struct qla_tgt_sess *);
-	void (*update_sess)(struct qla_tgt_sess *, port_id_t, uint16_t, bool);
-	struct qla_tgt_sess *(*find_sess_by_loop_id)(struct scsi_qla_host *,
+					struct fc_port *);
+	void (*update_sess)(struct fc_port *, port_id_t, uint16_t, bool);
+	struct fc_port *(*find_sess_by_loop_id)(struct scsi_qla_host *,
 						const uint16_t);
-	struct qla_tgt_sess *(*find_sess_by_s_id)(struct scsi_qla_host *,
+	struct fc_port *(*find_sess_by_s_id)(struct scsi_qla_host *,
 						const uint8_t *);
-	void (*clear_nacl_from_fcport_map)(struct qla_tgt_sess *);
-	void (*shutdown_sess)(struct qla_tgt_sess *);
+	void (*clear_nacl_from_fcport_map)(struct fc_port *);
+	void (*put_sess)(struct fc_port *);
+	void (*shutdown_sess)(struct fc_port *);
 };
 
 int qla2x00_wait_for_hba_online(struct scsi_qla_host *);
@@ -874,9 +797,6 @@ struct qla_tgt {
 	/* Count of sessions refering qla_tgt. Protected by hardware_lock. */
 	int sess_count;
 
-	/* Protected by hardware_lock. Addition also protected by tgt_mutex. */
-	struct list_head sess_list;
-
 	/* Protected by hardware_lock */
 	struct list_head del_sess_list;
 	struct delayed_work sess_del_work;
@@ -910,52 +830,6 @@ enum qla_sess_deletion {
 	QLA_SESS_DELETION_IN_PROGRESS	= 2,
 };
 
-typedef enum {
-	QLT_PLOGI_LINK_SAME_WWN,
-	QLT_PLOGI_LINK_CONFLICT,
-	QLT_PLOGI_LINK_MAX
-} qlt_plogi_link_t;
-
-typedef struct {
-	struct list_head		list;
-	struct imm_ntfy_from_isp	iocb;
-	port_id_t			id;
-	int				ref_count;
-} qlt_plogi_ack_t;
-
-/*
- * Equivilant to IT Nexus (Initiator-Target)
- */
-struct qla_tgt_sess {
-	uint16_t loop_id;
-	port_id_t d_id;
-
-	unsigned int conf_compl_supported:1;
-	unsigned int deleted:2;
-	unsigned int local:1;
-	unsigned int logout_on_delete:1;
-	unsigned int keep_nport_handle:1;
-	unsigned int send_els_logo:1;
-
-	unsigned char logout_completed;
-
-	int generation;
-
-	struct se_session *se_sess;
-	struct kref sess_kref;
-	struct scsi_qla_host *vha;
-	struct qla_tgt *tgt;
-
-	struct list_head sess_list_entry;
-	unsigned long expires;
-	struct list_head del_list_entry;
-
-	uint8_t port_name[WWN_SIZE];
-	struct work_struct free_work;
-
-	qlt_plogi_ack_t *plogi_link[QLT_PLOGI_LINK_MAX];
-};
-
 enum trace_flags {
 	TRC_NEW_CMD = BIT_0,
 	TRC_DO_WORK = BIT_1,
@@ -981,7 +855,7 @@ enum trace_flags {
 
 struct qla_tgt_cmd {
 	struct se_cmd se_cmd;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 	int state;
 	struct work_struct free_work;
 	struct work_struct work;
@@ -1046,7 +920,7 @@ struct qla_tgt_sess_work_param {
 struct qla_tgt_mgmt_cmd {
 	uint16_t tmr_func;
 	uint8_t fc_tm_rsp;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 	struct se_cmd se_cmd;
 	struct work_struct free_work;
 	unsigned int flags;
@@ -1097,7 +971,7 @@ extern int qlt_remove_target(struct qla_hw_data *, struct scsi_qla_host *);
 extern int qlt_lport_register(void *, u64, u64, u64,
 			int (*callback)(struct scsi_qla_host *, void *, u64, u64));
 extern void qlt_lport_deregister(struct scsi_qla_host *);
-void qlt_put_sess(struct qla_tgt_sess *sess);
+extern void qlt_unreg_sess(struct fc_port *);
 extern void qlt_fc_port_added(struct scsi_qla_host *, fc_port_t *);
 extern void qlt_fc_port_deleted(struct scsi_qla_host *, fc_port_t *, int);
 extern int __init qlt_init(void);
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index c96de1cfbd85..e37d7ee95473 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -339,9 +339,26 @@ static void tcm_qla2xxx_release_cmd(struct se_cmd *se_cmd)
 	qlt_free_cmd(cmd);
 }
 
+static void tcm_qla2xxx_release_session(struct kref *kref)
+{
+	struct fc_port  *sess = container_of(kref,
+	    struct fc_port, sess_kref);
+
+	qlt_unreg_sess(sess);
+}
+
+static void tcm_qla2xxx_put_sess(struct fc_port *sess)
+{
+	if (!sess)
+		return;
+
+	assert_spin_locked(&sess->vha->hw->tgt.sess_lock);
+	kref_put(&sess->sess_kref, tcm_qla2xxx_release_session);
+}
+
 static void tcm_qla2xxx_close_session(struct se_session *se_sess)
 {
-	struct qla_tgt_sess *sess = se_sess->fabric_sess_ptr;
+	struct fc_port *sess = se_sess->fabric_sess_ptr;
 	struct scsi_qla_host *vha;
 	unsigned long flags;
 
@@ -350,7 +367,7 @@ static void tcm_qla2xxx_close_session(struct se_session *se_sess)
 
 	spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
 	target_sess_cmd_list_set_waiting(se_sess);
-	qlt_put_sess(sess);
+	tcm_qla2xxx_put_sess(sess);
 	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 }
 
@@ -441,7 +458,7 @@ static int tcm_qla2xxx_handle_cmd(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd,
 {
 	struct se_cmd *se_cmd = &cmd->se_cmd;
 	struct se_session *se_sess;
-	struct qla_tgt_sess *sess;
+	struct fc_port *sess;
 #ifdef CONFIG_TCM_QLA2XXX_DEBUG
 	struct se_portal_group *se_tpg;
 	struct tcm_qla2xxx_tpg *tpg;
@@ -456,7 +473,7 @@ static int tcm_qla2xxx_handle_cmd(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd,
 
 	sess = cmd->sess;
 	if (!sess) {
-		pr_err("Unable to locate struct qla_tgt_sess from qla_tgt_cmd\n");
+		pr_err("Unable to locate struct fc_port from qla_tgt_cmd\n");
 		return -EINVAL;
 	}
 
@@ -565,7 +582,7 @@ static void tcm_qla2xxx_handle_dif_err(struct qla_tgt_cmd *cmd)
 static int tcm_qla2xxx_handle_tmr(struct qla_tgt_mgmt_cmd *mcmd, uint32_t lun,
 	uint16_t tmr_func, uint32_t tag)
 {
-	struct qla_tgt_sess *sess = mcmd->sess;
+	struct fc_port *sess = mcmd->sess;
 	struct se_cmd *se_cmd = &mcmd->se_cmd;
 	int transl_tmr_func = 0;
 
@@ -746,11 +763,11 @@ static void tcm_qla2xxx_aborted_task(struct se_cmd *se_cmd)
 }
 
 static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *,
-			struct tcm_qla2xxx_nacl *, struct qla_tgt_sess *);
+			struct tcm_qla2xxx_nacl *, struct fc_port *);
 /*
  * Expected to be called with struct qla_hw_data->tgt.sess_lock held
  */
-static void tcm_qla2xxx_clear_nacl_from_fcport_map(struct qla_tgt_sess *sess)
+static void tcm_qla2xxx_clear_nacl_from_fcport_map(struct fc_port *sess)
 {
 	struct se_node_acl *se_nacl = sess->se_sess->se_node_acl;
 	struct se_portal_group *se_tpg = se_nacl->se_tpg;
@@ -789,7 +806,7 @@ static void tcm_qla2xxx_clear_nacl_from_fcport_map(struct qla_tgt_sess *sess)
 	tcm_qla2xxx_clear_sess_lookup(lport, nacl, sess);
 }
 
-static void tcm_qla2xxx_shutdown_sess(struct qla_tgt_sess *sess)
+static void tcm_qla2xxx_shutdown_sess(struct fc_port *sess)
 {
 	assert_spin_locked(&sess->vha->hw->tgt.sess_lock);
 	target_sess_cmd_list_set_waiting(sess->se_sess);
@@ -1174,7 +1191,7 @@ static struct se_portal_group *tcm_qla2xxx_npiv_make_tpg(
 /*
  * Expected to be called with struct qla_hw_data->tgt.sess_lock held
  */
-static struct qla_tgt_sess *tcm_qla2xxx_find_sess_by_s_id(
+static struct fc_port *tcm_qla2xxx_find_sess_by_s_id(
 	scsi_qla_host_t *vha,
 	const uint8_t *s_id)
 {
@@ -1202,12 +1219,12 @@ static struct qla_tgt_sess *tcm_qla2xxx_find_sess_by_s_id(
 	    se_nacl, se_nacl->initiatorname);
 
 	nacl = container_of(se_nacl, struct tcm_qla2xxx_nacl, se_node_acl);
-	if (!nacl->qla_tgt_sess) {
-		pr_err("Unable to locate struct qla_tgt_sess\n");
+	if (!nacl->fc_port) {
+		pr_err("Unable to locate struct fc_port\n");
 		return NULL;
 	}
 
-	return nacl->qla_tgt_sess;
+	return nacl->fc_port;
 }
 
 /*
@@ -1218,7 +1235,7 @@ static void tcm_qla2xxx_set_sess_by_s_id(
 	struct se_node_acl *new_se_nacl,
 	struct tcm_qla2xxx_nacl *nacl,
 	struct se_session *se_sess,
-	struct qla_tgt_sess *qla_tgt_sess,
+	struct fc_port *fc_port,
 	uint8_t *s_id)
 {
 	u32 key;
@@ -1242,22 +1259,22 @@ static void tcm_qla2xxx_set_sess_by_s_id(
 			pr_debug("Wiping nonexisting fc_port entry\n");
 		}
 
-		qla_tgt_sess->se_sess = se_sess;
-		nacl->qla_tgt_sess = qla_tgt_sess;
+		fc_port->se_sess = se_sess;
+		nacl->fc_port = fc_port;
 		return;
 	}
 
-	if (nacl->qla_tgt_sess) {
+	if (nacl->fc_port) {
 		if (new_se_nacl == NULL) {
-			pr_debug("Clearing existing nacl->qla_tgt_sess and fc_port entry\n");
+			pr_debug("Clearing existing nacl->fc_port and fc_port entry\n");
 			btree_remove32(&lport->lport_fcport_map, key);
-			nacl->qla_tgt_sess = NULL;
+			nacl->fc_port = NULL;
 			return;
 		}
-		pr_debug("Replacing existing nacl->qla_tgt_sess and fc_port entry\n");
+		pr_debug("Replacing existing nacl->fc_port and fc_port entry\n");
 		btree_update32(&lport->lport_fcport_map, key, new_se_nacl);
-		qla_tgt_sess->se_sess = se_sess;
-		nacl->qla_tgt_sess = qla_tgt_sess;
+		fc_port->se_sess = se_sess;
+		nacl->fc_port = fc_port;
 		return;
 	}
 
@@ -1267,19 +1284,19 @@ static void tcm_qla2xxx_set_sess_by_s_id(
 		return;
 	}
 
-	pr_debug("Replacing existing fc_port entry w/o active nacl->qla_tgt_sess\n");
+	pr_debug("Replacing existing fc_port entry w/o active nacl->fc_port\n");
 	btree_update32(&lport->lport_fcport_map, key, new_se_nacl);
-	qla_tgt_sess->se_sess = se_sess;
-	nacl->qla_tgt_sess = qla_tgt_sess;
+	fc_port->se_sess = se_sess;
+	nacl->fc_port = fc_port;
 
-	pr_debug("Setup nacl->qla_tgt_sess %p by s_id for se_nacl: %p, initiatorname: %s\n",
-	    nacl->qla_tgt_sess, new_se_nacl, new_se_nacl->initiatorname);
+	pr_debug("Setup nacl->fc_port %p by s_id for se_nacl: %p, initiatorname: %s\n",
+	    nacl->fc_port, new_se_nacl, new_se_nacl->initiatorname);
 }
 
 /*
  * Expected to be called with struct qla_hw_data->tgt.sess_lock held
  */
-static struct qla_tgt_sess *tcm_qla2xxx_find_sess_by_loop_id(
+static struct fc_port *tcm_qla2xxx_find_sess_by_loop_id(
 	scsi_qla_host_t *vha,
 	const uint16_t loop_id)
 {
@@ -1307,12 +1324,12 @@ static struct qla_tgt_sess *tcm_qla2xxx_find_sess_by_loop_id(
 
 	nacl = container_of(se_nacl, struct tcm_qla2xxx_nacl, se_node_acl);
 
-	if (!nacl->qla_tgt_sess) {
-		pr_err("Unable to locate struct qla_tgt_sess\n");
+	if (!nacl->fc_port) {
+		pr_err("Unable to locate struct fc_port\n");
 		return NULL;
 	}
 
-	return nacl->qla_tgt_sess;
+	return nacl->fc_port;
 }
 
 /*
@@ -1323,7 +1340,7 @@ static void tcm_qla2xxx_set_sess_by_loop_id(
 	struct se_node_acl *new_se_nacl,
 	struct tcm_qla2xxx_nacl *nacl,
 	struct se_session *se_sess,
-	struct qla_tgt_sess *qla_tgt_sess,
+	struct fc_port *fc_port,
 	uint16_t loop_id)
 {
 	struct se_node_acl *saved_nacl;
@@ -1338,27 +1355,27 @@ static void tcm_qla2xxx_set_sess_by_loop_id(
 	if (!saved_nacl) {
 		pr_debug("Setting up new fc_loopid->se_nacl to new_se_nacl\n");
 		fc_loopid->se_nacl = new_se_nacl;
-		if (qla_tgt_sess->se_sess != se_sess)
-			qla_tgt_sess->se_sess = se_sess;
-		if (nacl->qla_tgt_sess != qla_tgt_sess)
-			nacl->qla_tgt_sess = qla_tgt_sess;
+		if (fc_port->se_sess != se_sess)
+			fc_port->se_sess = se_sess;
+		if (nacl->fc_port != fc_port)
+			nacl->fc_port = fc_port;
 		return;
 	}
 
-	if (nacl->qla_tgt_sess) {
+	if (nacl->fc_port) {
 		if (new_se_nacl == NULL) {
-			pr_debug("Clearing nacl->qla_tgt_sess and fc_loopid->se_nacl\n");
+			pr_debug("Clearing nacl->fc_port and fc_loopid->se_nacl\n");
 			fc_loopid->se_nacl = NULL;
-			nacl->qla_tgt_sess = NULL;
+			nacl->fc_port = NULL;
 			return;
 		}
 
-		pr_debug("Replacing existing nacl->qla_tgt_sess and fc_loopid->se_nacl\n");
+		pr_debug("Replacing existing nacl->fc_port and fc_loopid->se_nacl\n");
 		fc_loopid->se_nacl = new_se_nacl;
-		if (qla_tgt_sess->se_sess != se_sess)
-			qla_tgt_sess->se_sess = se_sess;
-		if (nacl->qla_tgt_sess != qla_tgt_sess)
-			nacl->qla_tgt_sess = qla_tgt_sess;
+		if (fc_port->se_sess != se_sess)
+			fc_port->se_sess = se_sess;
+		if (nacl->fc_port != fc_port)
+			nacl->fc_port = fc_port;
 		return;
 	}
 
@@ -1368,22 +1385,22 @@ static void tcm_qla2xxx_set_sess_by_loop_id(
 		return;
 	}
 
-	pr_debug("Replacing existing fc_loopid->se_nacl w/o active nacl->qla_tgt_sess\n");
+	pr_debug("Replacing existing fc_loopid->se_nacl w/o active nacl->fc_port\n");
 	fc_loopid->se_nacl = new_se_nacl;
-	if (qla_tgt_sess->se_sess != se_sess)
-		qla_tgt_sess->se_sess = se_sess;
-	if (nacl->qla_tgt_sess != qla_tgt_sess)
-		nacl->qla_tgt_sess = qla_tgt_sess;
+	if (fc_port->se_sess != se_sess)
+		fc_port->se_sess = se_sess;
+	if (nacl->fc_port != fc_port)
+		nacl->fc_port = fc_port;
 
-	pr_debug("Setup nacl->qla_tgt_sess %p by loop_id for se_nacl: %p, initiatorname: %s\n",
-	    nacl->qla_tgt_sess, new_se_nacl, new_se_nacl->initiatorname);
+	pr_debug("Setup nacl->fc_port %p by loop_id for se_nacl: %p, initiatorname: %s\n",
+	    nacl->fc_port, new_se_nacl, new_se_nacl->initiatorname);
 }
 
 /*
  * Should always be called with qla_hw_data->tgt.sess_lock held.
  */
 static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *lport,
-		struct tcm_qla2xxx_nacl *nacl, struct qla_tgt_sess *sess)
+		struct tcm_qla2xxx_nacl *nacl, struct fc_port *sess)
 {
 	struct se_session *se_sess = sess->se_sess;
 	unsigned char be_sid[3];
@@ -1398,7 +1415,7 @@ static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *lport,
 				sess, sess->loop_id);
 }
 
-static void tcm_qla2xxx_free_session(struct qla_tgt_sess *sess)
+static void tcm_qla2xxx_free_session(struct fc_port *sess)
 {
 	struct qla_tgt *tgt = sess->tgt;
 	struct qla_hw_data *ha = tgt->ha;
@@ -1410,7 +1427,7 @@ static void tcm_qla2xxx_free_session(struct qla_tgt_sess *sess)
 
 	se_sess = sess->se_sess;
 	if (!se_sess) {
-		pr_err("struct qla_tgt_sess->se_sess is NULL\n");
+		pr_err("struct fc_port->se_sess is NULL\n");
 		dump_stack();
 		return;
 	}
@@ -1437,7 +1454,7 @@ static int tcm_qla2xxx_session_cb(struct se_portal_group *se_tpg,
 	struct se_node_acl *se_nacl = se_sess->se_node_acl;
 	struct tcm_qla2xxx_nacl *nacl = container_of(se_nacl,
 				struct tcm_qla2xxx_nacl, se_node_acl);
-	struct qla_tgt_sess *qlat_sess = p;
+	struct fc_port *qlat_sess = p;
 	uint16_t loop_id = qlat_sess->loop_id;
 	unsigned long flags;
 	unsigned char be_sid[3];
@@ -1467,7 +1484,7 @@ static int tcm_qla2xxx_session_cb(struct se_portal_group *se_tpg,
 static int tcm_qla2xxx_check_initiator_node_acl(
 	scsi_qla_host_t *vha,
 	unsigned char *fc_wwpn,
-	struct qla_tgt_sess *qlat_sess)
+	struct fc_port *qlat_sess)
 {
 	struct qla_hw_data *ha = vha->hw;
 	struct tcm_qla2xxx_lport *lport;
@@ -1511,7 +1528,7 @@ static int tcm_qla2xxx_check_initiator_node_acl(
 	return 0;
 }
 
-static void tcm_qla2xxx_update_sess(struct qla_tgt_sess *sess, port_id_t s_id,
+static void tcm_qla2xxx_update_sess(struct fc_port *sess, port_id_t s_id,
 				    uint16_t loop_id, bool conf_compl_supported)
 {
 	struct qla_tgt *tgt = sess->tgt;
@@ -1603,6 +1620,7 @@ static struct qla_tgt_func_tmpl tcm_qla2xxx_template = {
 	.find_sess_by_s_id	= tcm_qla2xxx_find_sess_by_s_id,
 	.find_sess_by_loop_id	= tcm_qla2xxx_find_sess_by_loop_id,
 	.clear_nacl_from_fcport_map = tcm_qla2xxx_clear_nacl_from_fcport_map,
+	.put_sess		= tcm_qla2xxx_put_sess,
 	.shutdown_sess		= tcm_qla2xxx_shutdown_sess,
 };
 
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
index cf8430be183b..071035dfa99a 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
@@ -20,8 +20,8 @@ struct tcm_qla2xxx_nacl {
 	u64 nport_wwnn;
 	/* ASCII formatted WWPN for FC Initiator Nport */
 	char nport_name[TCM_QLA2XXX_NAMELEN];
-	/* Pointer to qla_tgt_sess */
-	struct qla_tgt_sess *qla_tgt_sess;
+	/* Pointer to fc_port */
+	struct fc_port *fc_port;
 	/* Pointer to TCM FC nexus */
 	struct se_session *nport_nexus;
 };
-- 
GitLab


From 726b85487067d7f5b23495bc33c484b8517c4074 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Thu, 19 Jan 2017 22:28:00 -0800
Subject: [PATCH 0185/1084] qla2xxx: Add framework for async fabric discovery

Currently code performs a full scan of the fabric for
every RSCN. Its an expensive process in a noisy large SAN.

This patch optimizes expensive fabric discovery process by
scanning switch for the affected port when RSCN is received.

Currently Initiator Mode code makes login/logout decision without
knowledge of target mode. This causes driver and firmware to go
out-of-sync. This framework synchronizes both initiator mode
personality and target mode personality in making login/logout
decision.

This patch adds following capabilities in the driver

- Send Notification Acknowledgement asynchronously.
- Update session/fcport state asynchronously.
- Create a session or fcport struct asynchronously.
- Send GNL asynchronously. The command will ask FW to
  provide a list of FC Port entries FW knows about.
- Send GPDB asynchronously. The command will ask FW to
  provide detail data of an FC Port FW knows about or
  perform ADISC to verify the state of the session.
- Send GPNID asynchronously. The command will ask switch
  to provide WWPN for provided NPort ID.
- Send GPSC asynchronously. The command will ask switch
  to provide registered port speed for provided WWPN.
- Send GIDPN asynchronously. The command will ask the
  switch to provide Nport ID for provided WWPN.
- In driver unload path, schedule all session for deletion
  and wait for deletion to complete before allowing driver
  unload to proceed.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
[ bvanassche: fixed spelling in patch description ]
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_attr.c   |    3 +
 drivers/scsi/qla2xxx/qla_def.h    |  175 +++-
 drivers/scsi/qla2xxx/qla_fw.h     |   31 +
 drivers/scsi/qla2xxx/qla_gbl.h    |   56 +-
 drivers/scsi/qla2xxx/qla_gs.c     |  724 +++++++++++++--
 drivers/scsi/qla2xxx/qla_init.c   | 1396 ++++++++++++++++++++++-------
 drivers/scsi/qla2xxx/qla_inline.h |    7 +-
 drivers/scsi/qla2xxx/qla_iocb.c   |   71 +-
 drivers/scsi/qla2xxx/qla_isr.c    |  260 ++++--
 drivers/scsi/qla2xxx/qla_mbx.c    |   88 --
 drivers/scsi/qla2xxx/qla_os.c     |  271 ++++--
 drivers/scsi/qla2xxx/qla_target.c | 1002 ++++++++++++++-------
 drivers/scsi/qla2xxx/qla_target.h |   17 +-
 13 files changed, 3114 insertions(+), 987 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index f201f4099620..f610103994af 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -2163,6 +2163,9 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
 	clear_bit(vha->vp_idx, ha->vp_idx_map);
 	mutex_unlock(&ha->vport_lock);
 
+	dma_free_coherent(&ha->pdev->dev, vha->gnl.size, vha->gnl.l,
+	    vha->gnl.ldma);
+
 	if (vha->qpair->vp_idx == vha->vp_idx) {
 		if (qla2xxx_delete_qpair(vha, vha->qpair) != QLA_SUCCESS)
 			ql_log(ql_log_warn, vha, 0x7087,
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 1bdcc7e010ff..79b4e88db3b1 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -55,6 +55,8 @@
 
 #include "qla_settings.h"
 
+#define MODE_DUAL (MODE_TARGET | MODE_INITIATOR)
+
 /*
  * Data bit definitions
  */
@@ -251,6 +253,14 @@
 
 #define MAX_CMDSZ	16		/* SCSI maximum CDB size. */
 #include "qla_fw.h"
+
+struct name_list_extended {
+	struct get_name_list_extended *l;
+	dma_addr_t		ldma;
+	struct list_head 	fcports;	/* protect by sess_list */
+	u32			size;
+	u8			sent;
+};
 /*
  * Timeout timer counts in seconds
  */
@@ -309,6 +319,17 @@ struct els_logo_payload {
 	uint8_t wwpn[WWN_SIZE];
 };
 
+struct ct_arg {
+	void		*iocb;
+	u16		nport_handle;
+	dma_addr_t	req_dma;
+	dma_addr_t	rsp_dma;
+	u32		req_size;
+	u32		rsp_size;
+	void		*req;
+	void		*rsp;
+};
+
 /*
  * SRB extensions.
  */
@@ -320,6 +341,7 @@ struct srb_iocb {
 #define SRB_LOGIN_COND_PLOGI	BIT_1
 #define SRB_LOGIN_SKIP_PRLI	BIT_2
 			uint16_t data[2];
+			u32 iop[2];
 		} logio;
 		struct {
 #define ELS_DCMD_TIMEOUT 20
@@ -372,6 +394,16 @@ struct srb_iocb {
 			__le16 comp_status;
 			struct completion comp;
 		} abt;
+		struct ct_arg ctarg;
+		struct {
+			__le16 in_mb[28]; 	/* fr fw */
+			__le16 out_mb[28];	/* to fw */
+			void *out, *in;
+			dma_addr_t out_dma, in_dma;
+		} mbx;
+		struct {
+			struct imm_ntfy_from_isp *ntfy;
+		} nack;
 	} u;
 
 	struct timer_list timer;
@@ -392,16 +424,24 @@ struct srb_iocb {
 #define SRB_FXIOCB_BCMD	11
 #define SRB_ABT_CMD	12
 #define SRB_ELS_DCMD	13
+#define SRB_MB_IOCB	14
+#define SRB_CT_PTHRU_CMD 15
+#define SRB_NACK_PLOGI	16
+#define SRB_NACK_PRLI	17
+#define SRB_NACK_LOGO	18
 
 typedef struct srb {
 	atomic_t ref_count;
 	struct fc_port *fcport;
+	void *vha;
 	uint32_t handle;
 	uint16_t flags;
 	uint16_t type;
 	char *name;
 	int iocbs;
 	struct qla_qpair *qpair;
+	u32 gen1;	/* scratch */
+	u32 gen2;	/* scratch */
 	union {
 		struct srb_iocb iocb_cmd;
 		struct bsg_job *bsg_job;
@@ -2101,6 +2141,18 @@ typedef struct {
 #define FC4_TYPE_OTHER		0x0
 #define FC4_TYPE_UNKNOWN	0xff
 
+/* mailbox command 4G & above */
+struct mbx_24xx_entry {
+	uint8_t		entry_type;
+	uint8_t		entry_count;
+	uint8_t		sys_define1;
+	uint8_t		entry_status;
+	uint32_t	handle;
+	uint16_t	mb[28];
+};
+
+#define IOCB_SIZE 64
+
 /*
  * Fibre channel port type.
  */
@@ -2113,6 +2165,12 @@ typedef enum {
 	FCT_TARGET
 } fc_port_type_t;
 
+enum qla_sess_deletion {
+	QLA_SESS_DELETION_NONE		= 0,
+	QLA_SESS_DELETION_IN_PROGRESS,
+	QLA_SESS_DELETED,
+};
+
 enum qlt_plogi_link_t {
 	QLT_PLOGI_LINK_SAME_WWN,
 	QLT_PLOGI_LINK_CONFLICT,
@@ -2124,6 +2182,48 @@ struct qlt_plogi_ack_t {
 	struct imm_ntfy_from_isp iocb;
 	port_id_t	id;
 	int		ref_count;
+	void		*fcport;
+};
+
+struct ct_sns_desc {
+	struct ct_sns_pkt	*ct_sns;
+	dma_addr_t		ct_sns_dma;
+};
+
+enum discovery_state {
+	DSC_DELETED,
+	DSC_GID_PN,
+	DSC_GNL,
+	DSC_LOGIN_PEND,
+	DSC_LOGIN_FAILED,
+	DSC_GPDB,
+	DSC_GPSC,
+	DSC_UPD_FCPORT,
+	DSC_LOGIN_COMPLETE,
+	DSC_DELETE_PEND,
+};
+
+enum login_state {	/* FW control Target side */
+	DSC_LS_LLIOCB_SENT = 2,
+	DSC_LS_PLOGI_PEND,
+	DSC_LS_PLOGI_COMP,
+	DSC_LS_PRLI_PEND,
+	DSC_LS_PRLI_COMP,
+	DSC_LS_PORT_UNAVAIL,
+	DSC_LS_PRLO_PEND = 9,
+	DSC_LS_LOGO_PEND,
+};
+
+enum fcport_mgt_event {
+	FCME_RELOGIN = 1,
+	FCME_RSCN,
+	FCME_GIDPN_DONE,
+	FCME_PLOGI_DONE,	/* Initiator side sent LLIOCB */
+	FCME_GNL_DONE,
+	FCME_GPSC_DONE,
+	FCME_GPDB_DONE,
+	FCME_GPNID_DONE,
+	FCME_DELETE_DONE,
 };
 
 /*
@@ -2143,9 +2243,13 @@ typedef struct fc_port {
 	unsigned int deleted:2;
 	unsigned int local:1;
 	unsigned int logout_on_delete:1;
+	unsigned int logo_ack_needed:1;
 	unsigned int keep_nport_handle:1;
 	unsigned int send_els_logo:1;
+	unsigned int login_pause:1;
+	unsigned int login_succ:1;
 
+	struct fc_port *conflict;
 	unsigned char logout_completed;
 	int generation;
 
@@ -2186,8 +2290,30 @@ typedef struct fc_port {
 
 	unsigned long retry_delay_timestamp;
 	struct qla_tgt_sess *tgt_session;
+	struct ct_sns_desc ct_desc;
+	enum discovery_state disc_state;
+	enum login_state fw_login_state;
+	u32 login_gen, last_login_gen;
+	u32 rscn_gen, last_rscn_gen;
+	u32 chip_reset;
+	struct list_head gnl_entry;
+	struct work_struct del_work;
+	u8 iocb[IOCB_SIZE];
 } fc_port_t;
 
+#define QLA_FCPORT_SCAN		1
+#define QLA_FCPORT_FOUND	2
+
+struct event_arg {
+	enum fcport_mgt_event	event;
+	fc_port_t		*fcport;
+	srb_t			*sp;
+	port_id_t		id;
+	u16			data[2], rc;
+	u8			port_name[WWN_SIZE];
+	u32			iop[2];
+};
+
 #include "qla_mr.h"
 
 /*
@@ -2265,6 +2391,10 @@ static const char * const port_state_str[] = {
 #define	GFT_ID_REQ_SIZE	(16 + 4)
 #define	GFT_ID_RSP_SIZE	(16 + 32)
 
+#define GID_PN_CMD 0x121
+#define GID_PN_REQ_SIZE (16 + 8)
+#define GID_PN_RSP_SIZE (16 + 4)
+
 #define	RFT_ID_CMD	0x217
 #define	RFT_ID_REQ_SIZE	(16 + 4 + 32)
 #define	RFT_ID_RSP_SIZE	16
@@ -2590,6 +2720,10 @@ struct ct_sns_req {
 			uint8_t reserved;
 			uint8_t port_name[3];
 		} gff_id;
+
+		struct {
+			uint8_t port_name[8];
+		} gid_pn;
 	} req;
 };
 
@@ -2669,6 +2803,10 @@ struct ct_sns_rsp {
 		struct {
 			uint8_t fc4_features[128];
 		} gff_id;
+		struct {
+			uint8_t reserved;
+			uint8_t port_id[3];
+		} gid_pn;
 	} rsp;
 };
 
@@ -2810,11 +2948,11 @@ struct isp_operations {
 
 	uint16_t (*calc_req_entries) (uint16_t);
 	void (*build_iocbs) (srb_t *, cmd_entry_t *, uint16_t);
-	void * (*prep_ms_iocb) (struct scsi_qla_host *, uint32_t, uint32_t);
-	void * (*prep_ms_fdmi_iocb) (struct scsi_qla_host *, uint32_t,
+	void *(*prep_ms_iocb) (struct scsi_qla_host *, struct ct_arg *);
+	void *(*prep_ms_fdmi_iocb) (struct scsi_qla_host *, uint32_t,
 	    uint32_t);
 
-	uint8_t * (*read_nvram) (struct scsi_qla_host *, uint8_t *,
+	uint8_t *(*read_nvram) (struct scsi_qla_host *, uint8_t *,
 		uint32_t, uint32_t);
 	int (*write_nvram) (struct scsi_qla_host *, uint8_t *, uint32_t,
 		uint32_t);
@@ -2876,13 +3014,21 @@ enum qla_work_type {
 	QLA_EVT_AEN,
 	QLA_EVT_IDC_ACK,
 	QLA_EVT_ASYNC_LOGIN,
-	QLA_EVT_ASYNC_LOGIN_DONE,
 	QLA_EVT_ASYNC_LOGOUT,
 	QLA_EVT_ASYNC_LOGOUT_DONE,
 	QLA_EVT_ASYNC_ADISC,
 	QLA_EVT_ASYNC_ADISC_DONE,
 	QLA_EVT_UEVENT,
 	QLA_EVT_AENFX,
+	QLA_EVT_GIDPN,
+	QLA_EVT_GPNID,
+	QLA_EVT_GPNID_DONE,
+	QLA_EVT_NEW_SESS,
+	QLA_EVT_GPDB,
+	QLA_EVT_GPSC,
+	QLA_EVT_UPD_FCPORT,
+	QLA_EVT_GNL,
+	QLA_EVT_NACK,
 };
 
 
@@ -2918,6 +3064,23 @@ struct qla_work_evt {
 		struct {
 			srb_t *sp;
 		} iosb;
+		struct {
+			port_id_t id;
+		} gpnid;
+		struct {
+			port_id_t id;
+			u8 port_name[8];
+			void *pla;
+		} new_sess;
+		struct { /*Get PDB, Get Speed, update fcport, gnl, gidpn */
+			fc_port_t *fcport;
+			u8 opt;
+		} fcport;
+		struct {
+			fc_port_t *fcport;
+			u8 iocb[IOCB_SIZE];
+			int type;
+		} nack;
 	 } u;
 };
 
@@ -3899,6 +4062,10 @@ typedef struct scsi_qla_host {
 	struct qla8044_reset_template reset_tmplt;
 	struct qla_tgt_counters tgt_counters;
 	uint16_t	bbcr;
+	struct name_list_extended gnl;
+	/* Count of active session/fcport */
+	int fcport_count;
+	wait_queue_head_t fcport_waitQ;
 } scsi_qla_host_t;
 
 struct qla27xx_image_status {
diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
index 8a2368b32dec..ee135cf96aee 100644
--- a/drivers/scsi/qla2xxx/qla_fw.h
+++ b/drivers/scsi/qla2xxx/qla_fw.h
@@ -72,6 +72,37 @@ struct port_database_24xx {
 	uint8_t reserved_3[24];
 };
 
+/*
+ * MB 75h returns a list of DB entries similar to port_database_24xx(64B).
+ * However, in this case it returns 1st 40 bytes.
+ */
+struct get_name_list_extended {
+	__le16 flags;
+	u8 current_login_state;
+	u8 last_login_state;
+	u8 hard_address[3];
+	u8 reserved_1;
+	u8 port_id[3];
+	u8 sequence_id;
+	__le16 port_timer;
+	__le16 nport_handle;			/* N_PORT handle. */
+	__le16 receive_data_size;
+	__le16 reserved_2;
+
+	/* PRLI SVC Param are Big endian */
+	u8 prli_svc_param_word_0[2]; /* Bits 15-0 of word 0 */
+	u8 prli_svc_param_word_3[2]; /* Bits 15-0 of word 3 */
+	u8 port_name[WWN_SIZE];
+	u8 node_name[WWN_SIZE];
+};
+
+/* MB 75h: This is the short version of the database */
+struct get_name_list {
+	u8 port_node_name[WWN_SIZE]; /* B7 most sig, B0 least sig */
+	__le16 nport_handle;
+	u8 reserved;
+};
+
 struct vp_database_24xx {
 	uint16_t vp_status;
 	uint8_t  options;
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index afa0116a163b..dd95ff620ae3 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -73,6 +73,10 @@ extern void qla2x00_async_logout_done(struct scsi_qla_host *, fc_port_t *,
     uint16_t *);
 extern void qla2x00_async_adisc_done(struct scsi_qla_host *, fc_port_t *,
     uint16_t *);
+struct qla_work_evt *qla2x00_alloc_work(struct scsi_qla_host *,
+    enum qla_work_type);
+extern int qla24xx_async_gnl(struct scsi_qla_host *, fc_port_t *);
+int qla2x00_post_work(struct scsi_qla_host *vha, struct qla_work_evt *e);
 extern void *qla2x00_alloc_iocbs(struct scsi_qla_host *, srb_t *);
 extern void *qla2x00_alloc_iocbs_ready(struct scsi_qla_host *, srb_t *);
 extern int qla24xx_update_fcport_fcp_prio(scsi_qla_host_t *, fc_port_t *);
@@ -94,6 +98,13 @@ extern uint8_t qla27xx_find_valid_image(struct scsi_qla_host *);
 extern struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *,
 	int, int);
 extern int qla2xxx_delete_qpair(struct scsi_qla_host *, struct qla_qpair *);
+void qla2x00_fcport_event_handler(scsi_qla_host_t *, struct event_arg *);
+int qla24xx_async_gpdb(struct scsi_qla_host *, fc_port_t *, u8);
+int qla24xx_async_notify_ack(scsi_qla_host_t *, fc_port_t *,
+	struct imm_ntfy_from_isp *, int);
+int qla24xx_post_newsess_work(struct scsi_qla_host *, port_id_t *, u8 *,
+    void *);
+int qla24xx_fcport_handle_login(struct scsi_qla_host *, fc_port_t *);
 
 /*
  * Global Data in qla_os.c source file.
@@ -135,8 +146,6 @@ extern int qla2x00_post_aen_work(struct scsi_qla_host *, enum
 extern int qla2x00_post_idc_ack_work(struct scsi_qla_host *, uint16_t *);
 extern int qla2x00_post_async_login_work(struct scsi_qla_host *, fc_port_t *,
     uint16_t *);
-extern int qla2x00_post_async_login_done_work(struct scsi_qla_host *,
-    fc_port_t *, uint16_t *);
 extern int qla2x00_post_async_logout_work(struct scsi_qla_host *, fc_port_t *,
     uint16_t *);
 extern int qla2x00_post_async_logout_done_work(struct scsi_qla_host *,
@@ -179,6 +188,10 @@ extern void qla2x00_disable_board_on_pci_error(struct work_struct *);
 extern void qla2x00_sp_compl(void *, void *, int);
 extern void qla2xxx_qpair_sp_free_dma(void *, void *);
 extern void qla2xxx_qpair_sp_compl(void *, void *, int);
+extern int qla24xx_post_upd_fcport_work(struct scsi_qla_host *, fc_port_t *);
+void qla2x00_handle_login_done_event(struct scsi_qla_host *, fc_port_t *,
+	uint16_t *);
+int qla24xx_post_gnl_work(struct scsi_qla_host *, fc_port_t *);
 
 /*
  * Global Functions in qla_mid.c source file.
@@ -301,9 +314,6 @@ qla2x00_get_retry_cnt(scsi_qla_host_t *, uint8_t *, uint8_t *, uint16_t *);
 extern int
 qla2x00_init_firmware(scsi_qla_host_t *, uint16_t);
 
-extern int
-qla2x00_get_node_name_list(scsi_qla_host_t *, void **, int *);
-
 extern int
 qla2x00_get_port_database(scsi_qla_host_t *, fc_port_t *, uint8_t);
 
@@ -483,6 +493,9 @@ qla2x00_process_completed_request(struct scsi_qla_host *, struct req_que *,
 	uint32_t);
 extern irqreturn_t
 qla2xxx_msix_rsp_q(int irq, void *dev_id);
+fc_port_t *qla2x00_find_fcport_by_loopid(scsi_qla_host_t *, uint16_t);
+fc_port_t *qla2x00_find_fcport_by_wwpn(scsi_qla_host_t *, u8 *, u8);
+fc_port_t *qla2x00_find_fcport_by_nportid(scsi_qla_host_t *, port_id_t *, u8);
 
 /*
  * Global Function Prototypes in qla_sup.c source file.
@@ -574,8 +587,8 @@ extern void qla2xxx_dump_post_process(scsi_qla_host_t *, int);
 /*
  * Global Function Prototypes in qla_gs.c source file.
  */
-extern void *qla2x00_prep_ms_iocb(scsi_qla_host_t *, uint32_t, uint32_t);
-extern void *qla24xx_prep_ms_iocb(scsi_qla_host_t *, uint32_t, uint32_t);
+extern void *qla2x00_prep_ms_iocb(scsi_qla_host_t *, struct ct_arg *);
+extern void *qla24xx_prep_ms_iocb(scsi_qla_host_t *, struct ct_arg *);
 extern int qla2x00_ga_nxt(scsi_qla_host_t *, fc_port_t *);
 extern int qla2x00_gid_pt(scsi_qla_host_t *, sw_info_t *);
 extern int qla2x00_gpn_id(scsi_qla_host_t *, sw_info_t *);
@@ -591,6 +604,23 @@ extern int qla2x00_fdmi_register(scsi_qla_host_t *);
 extern int qla2x00_gfpn_id(scsi_qla_host_t *, sw_info_t *);
 extern int qla2x00_gpsc(scsi_qla_host_t *, sw_info_t *);
 extern void qla2x00_get_sym_node_name(scsi_qla_host_t *, uint8_t *, size_t);
+extern int qla2x00_chk_ms_status(scsi_qla_host_t *, ms_iocb_entry_t *,
+	struct ct_sns_rsp *, const char *);
+extern void qla2x00_async_iocb_timeout(void *data);
+extern int qla24xx_async_gidpn(scsi_qla_host_t *, fc_port_t *);
+int qla24xx_post_gidpn_work(struct scsi_qla_host *, fc_port_t *);
+void qla24xx_handle_gidpn_event(scsi_qla_host_t *, struct event_arg *);
+
+extern void qla2x00_free_fcport(fc_port_t *);
+
+extern int qla24xx_post_gpnid_work(struct scsi_qla_host *, port_id_t *);
+extern int qla24xx_async_gpnid(scsi_qla_host_t *, port_id_t *);
+void qla24xx_async_gpnid_done(scsi_qla_host_t *, srb_t*);
+void qla24xx_handle_gpnid_event(scsi_qla_host_t *, struct event_arg *);
+
+int qla24xx_post_gpsc_work(struct scsi_qla_host *, fc_port_t *);
+int qla24xx_async_gpsc(scsi_qla_host_t *, fc_port_t *);
+int qla2x00_mgmt_svr_login(scsi_qla_host_t *);
 
 /*
  * Global Function Prototypes in qla_attr.c source file.
@@ -803,4 +833,16 @@ extern int qla_get_exchoffld_status(scsi_qla_host_t *, uint16_t *, uint16_t *);
 extern int qla_set_exchoffld_mem_cfg(scsi_qla_host_t *, dma_addr_t);
 extern void qlt_handle_abts_recv(struct scsi_qla_host *, response_t *);
 
+int qla24xx_async_notify_ack(scsi_qla_host_t *, fc_port_t *,
+	struct imm_ntfy_from_isp *, int);
+void qla24xx_do_nack_work(struct scsi_qla_host *, struct qla_work_evt *);
+void qlt_plogi_ack_link(struct scsi_qla_host *, struct qlt_plogi_ack_t *,
+	struct fc_port *, enum qlt_plogi_link_t);
+void qlt_plogi_ack_unref(struct scsi_qla_host *, struct qlt_plogi_ack_t *);
+extern void qlt_schedule_sess_for_deletion(struct fc_port *, bool);
+extern void qlt_schedule_sess_for_deletion_lock(struct fc_port *);
+extern struct fc_port *qlt_find_sess_invalidate_other(scsi_qla_host_t *,
+	uint64_t wwn, port_id_t port_id, uint16_t loop_id, struct fc_port **);
+void qla24xx_delete_sess_fn(struct work_struct *);
+
 #endif /* _QLA_GBL_H */
diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
index 94e8a8592f69..8f7054dc742b 100644
--- a/drivers/scsi/qla2xxx/qla_gs.c
+++ b/drivers/scsi/qla2xxx/qla_gs.c
@@ -24,12 +24,12 @@ static int qla2x00_sns_rnn_id(scsi_qla_host_t *);
  * Returns a pointer to the @ha's ms_iocb.
  */
 void *
-qla2x00_prep_ms_iocb(scsi_qla_host_t *vha, uint32_t req_size, uint32_t rsp_size)
+qla2x00_prep_ms_iocb(scsi_qla_host_t *vha, struct ct_arg *arg)
 {
 	struct qla_hw_data *ha = vha->hw;
 	ms_iocb_entry_t *ms_pkt;
 
-	ms_pkt = ha->ms_iocb;
+	ms_pkt = (ms_iocb_entry_t *)arg->iocb;
 	memset(ms_pkt, 0, sizeof(ms_iocb_entry_t));
 
 	ms_pkt->entry_type = MS_IOCB_TYPE;
@@ -39,15 +39,15 @@ qla2x00_prep_ms_iocb(scsi_qla_host_t *vha, uint32_t req_size, uint32_t rsp_size)
 	ms_pkt->timeout = cpu_to_le16(ha->r_a_tov / 10 * 2);
 	ms_pkt->cmd_dsd_count = cpu_to_le16(1);
 	ms_pkt->total_dsd_count = cpu_to_le16(2);
-	ms_pkt->rsp_bytecount = cpu_to_le32(rsp_size);
-	ms_pkt->req_bytecount = cpu_to_le32(req_size);
+	ms_pkt->rsp_bytecount = cpu_to_le32(arg->rsp_size);
+	ms_pkt->req_bytecount = cpu_to_le32(arg->req_size);
 
-	ms_pkt->dseg_req_address[0] = cpu_to_le32(LSD(ha->ct_sns_dma));
-	ms_pkt->dseg_req_address[1] = cpu_to_le32(MSD(ha->ct_sns_dma));
+	ms_pkt->dseg_req_address[0] = cpu_to_le32(LSD(arg->req_dma));
+	ms_pkt->dseg_req_address[1] = cpu_to_le32(MSD(arg->req_dma));
 	ms_pkt->dseg_req_length = ms_pkt->req_bytecount;
 
-	ms_pkt->dseg_rsp_address[0] = cpu_to_le32(LSD(ha->ct_sns_dma));
-	ms_pkt->dseg_rsp_address[1] = cpu_to_le32(MSD(ha->ct_sns_dma));
+	ms_pkt->dseg_rsp_address[0] = cpu_to_le32(LSD(arg->rsp_dma));
+	ms_pkt->dseg_rsp_address[1] = cpu_to_le32(MSD(arg->rsp_dma));
 	ms_pkt->dseg_rsp_length = ms_pkt->rsp_bytecount;
 
 	vha->qla_stats.control_requests++;
@@ -64,29 +64,29 @@ qla2x00_prep_ms_iocb(scsi_qla_host_t *vha, uint32_t req_size, uint32_t rsp_size)
  * Returns a pointer to the @ha's ms_iocb.
  */
 void *
-qla24xx_prep_ms_iocb(scsi_qla_host_t *vha, uint32_t req_size, uint32_t rsp_size)
+qla24xx_prep_ms_iocb(scsi_qla_host_t *vha, struct ct_arg *arg)
 {
 	struct qla_hw_data *ha = vha->hw;
 	struct ct_entry_24xx *ct_pkt;
 
-	ct_pkt = (struct ct_entry_24xx *)ha->ms_iocb;
+	ct_pkt = (struct ct_entry_24xx *)arg->iocb;
 	memset(ct_pkt, 0, sizeof(struct ct_entry_24xx));
 
 	ct_pkt->entry_type = CT_IOCB_TYPE;
 	ct_pkt->entry_count = 1;
-	ct_pkt->nport_handle = cpu_to_le16(NPH_SNS);
+	ct_pkt->nport_handle = cpu_to_le16(arg->nport_handle);
 	ct_pkt->timeout = cpu_to_le16(ha->r_a_tov / 10 * 2);
 	ct_pkt->cmd_dsd_count = cpu_to_le16(1);
 	ct_pkt->rsp_dsd_count = cpu_to_le16(1);
-	ct_pkt->rsp_byte_count = cpu_to_le32(rsp_size);
-	ct_pkt->cmd_byte_count = cpu_to_le32(req_size);
+	ct_pkt->rsp_byte_count = cpu_to_le32(arg->rsp_size);
+	ct_pkt->cmd_byte_count = cpu_to_le32(arg->req_size);
 
-	ct_pkt->dseg_0_address[0] = cpu_to_le32(LSD(ha->ct_sns_dma));
-	ct_pkt->dseg_0_address[1] = cpu_to_le32(MSD(ha->ct_sns_dma));
+	ct_pkt->dseg_0_address[0] = cpu_to_le32(LSD(arg->req_dma));
+	ct_pkt->dseg_0_address[1] = cpu_to_le32(MSD(arg->req_dma));
 	ct_pkt->dseg_0_len = ct_pkt->cmd_byte_count;
 
-	ct_pkt->dseg_1_address[0] = cpu_to_le32(LSD(ha->ct_sns_dma));
-	ct_pkt->dseg_1_address[1] = cpu_to_le32(MSD(ha->ct_sns_dma));
+	ct_pkt->dseg_1_address[0] = cpu_to_le32(LSD(arg->rsp_dma));
+	ct_pkt->dseg_1_address[1] = cpu_to_le32(MSD(arg->rsp_dma));
 	ct_pkt->dseg_1_len = ct_pkt->rsp_byte_count;
 	ct_pkt->vp_index = vha->vp_idx;
 
@@ -117,7 +117,7 @@ qla2x00_prep_ct_req(struct ct_sns_pkt *p, uint16_t cmd, uint16_t rsp_size)
 	return &p->p.req;
 }
 
-static int
+int
 qla2x00_chk_ms_status(scsi_qla_host_t *vha, ms_iocb_entry_t *ms_pkt,
     struct ct_sns_rsp *ct_rsp, const char *routine)
 {
@@ -183,14 +183,21 @@ qla2x00_ga_nxt(scsi_qla_host_t *vha, fc_port_t *fcport)
 	struct ct_sns_req	*ct_req;
 	struct ct_sns_rsp	*ct_rsp;
 	struct qla_hw_data *ha = vha->hw;
+	struct ct_arg arg;
 
 	if (IS_QLA2100(ha) || IS_QLA2200(ha))
 		return qla2x00_sns_ga_nxt(vha, fcport);
 
+	arg.iocb = ha->ms_iocb;
+	arg.req_dma = ha->ct_sns_dma;
+	arg.rsp_dma = ha->ct_sns_dma;
+	arg.req_size = GA_NXT_REQ_SIZE;
+	arg.rsp_size = GA_NXT_RSP_SIZE;
+	arg.nport_handle = NPH_SNS;
+
 	/* Issue GA_NXT */
 	/* Prepare common MS IOCB */
-	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, GA_NXT_REQ_SIZE,
-	    GA_NXT_RSP_SIZE);
+	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, &arg);
 
 	/* Prepare CT request */
 	ct_req = qla2x00_prep_ct_req(ha->ct_sns, GA_NXT_CMD,
@@ -269,16 +276,24 @@ qla2x00_gid_pt(scsi_qla_host_t *vha, sw_info_t *list)
 	struct ct_sns_gid_pt_data *gid_data;
 	struct qla_hw_data *ha = vha->hw;
 	uint16_t gid_pt_rsp_size;
+	struct ct_arg arg;
 
 	if (IS_QLA2100(ha) || IS_QLA2200(ha))
 		return qla2x00_sns_gid_pt(vha, list);
 
 	gid_data = NULL;
 	gid_pt_rsp_size = qla2x00_gid_pt_rsp_size(vha);
+
+	arg.iocb = ha->ms_iocb;
+	arg.req_dma = ha->ct_sns_dma;
+	arg.rsp_dma = ha->ct_sns_dma;
+	arg.req_size = GID_PT_REQ_SIZE;
+	arg.rsp_size = gid_pt_rsp_size;
+	arg.nport_handle = NPH_SNS;
+
 	/* Issue GID_PT */
 	/* Prepare common MS IOCB */
-	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, GID_PT_REQ_SIZE,
-	    gid_pt_rsp_size);
+	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, &arg);
 
 	/* Prepare CT request */
 	ct_req = qla2x00_prep_ct_req(ha->ct_sns, GID_PT_CMD, gid_pt_rsp_size);
@@ -344,15 +359,22 @@ qla2x00_gpn_id(scsi_qla_host_t *vha, sw_info_t *list)
 	struct ct_sns_req	*ct_req;
 	struct ct_sns_rsp	*ct_rsp;
 	struct qla_hw_data *ha = vha->hw;
+	struct ct_arg arg;
 
 	if (IS_QLA2100(ha) || IS_QLA2200(ha))
 		return qla2x00_sns_gpn_id(vha, list);
 
+	arg.iocb = ha->ms_iocb;
+	arg.req_dma = ha->ct_sns_dma;
+	arg.rsp_dma = ha->ct_sns_dma;
+	arg.req_size = GPN_ID_REQ_SIZE;
+	arg.rsp_size = GPN_ID_RSP_SIZE;
+	arg.nport_handle = NPH_SNS;
+
 	for (i = 0; i < ha->max_fibre_devices; i++) {
 		/* Issue GPN_ID */
 		/* Prepare common MS IOCB */
-		ms_pkt = ha->isp_ops->prep_ms_iocb(vha, GPN_ID_REQ_SIZE,
-		    GPN_ID_RSP_SIZE);
+		ms_pkt = ha->isp_ops->prep_ms_iocb(vha, &arg);
 
 		/* Prepare CT request */
 		ct_req = qla2x00_prep_ct_req(ha->ct_sns, GPN_ID_CMD,
@@ -406,15 +428,22 @@ qla2x00_gnn_id(scsi_qla_host_t *vha, sw_info_t *list)
 	ms_iocb_entry_t	*ms_pkt;
 	struct ct_sns_req	*ct_req;
 	struct ct_sns_rsp	*ct_rsp;
+	struct ct_arg arg;
 
 	if (IS_QLA2100(ha) || IS_QLA2200(ha))
 		return qla2x00_sns_gnn_id(vha, list);
 
+	arg.iocb = ha->ms_iocb;
+	arg.req_dma = ha->ct_sns_dma;
+	arg.rsp_dma = ha->ct_sns_dma;
+	arg.req_size = GNN_ID_REQ_SIZE;
+	arg.rsp_size = GNN_ID_RSP_SIZE;
+	arg.nport_handle = NPH_SNS;
+
 	for (i = 0; i < ha->max_fibre_devices; i++) {
 		/* Issue GNN_ID */
 		/* Prepare common MS IOCB */
-		ms_pkt = ha->isp_ops->prep_ms_iocb(vha, GNN_ID_REQ_SIZE,
-		    GNN_ID_RSP_SIZE);
+		ms_pkt = ha->isp_ops->prep_ms_iocb(vha, &arg);
 
 		/* Prepare CT request */
 		ct_req = qla2x00_prep_ct_req(ha->ct_sns, GNN_ID_CMD,
@@ -473,14 +502,21 @@ qla2x00_rft_id(scsi_qla_host_t *vha)
 	ms_iocb_entry_t	*ms_pkt;
 	struct ct_sns_req	*ct_req;
 	struct ct_sns_rsp	*ct_rsp;
+	struct ct_arg arg;
 
 	if (IS_QLA2100(ha) || IS_QLA2200(ha))
 		return qla2x00_sns_rft_id(vha);
 
+	arg.iocb = ha->ms_iocb;
+	arg.req_dma = ha->ct_sns_dma;
+	arg.rsp_dma = ha->ct_sns_dma;
+	arg.req_size = RFT_ID_REQ_SIZE;
+	arg.rsp_size = RFT_ID_RSP_SIZE;
+	arg.nport_handle = NPH_SNS;
+
 	/* Issue RFT_ID */
 	/* Prepare common MS IOCB */
-	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, RFT_ID_REQ_SIZE,
-	    RFT_ID_RSP_SIZE);
+	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, &arg);
 
 	/* Prepare CT request */
 	ct_req = qla2x00_prep_ct_req(ha->ct_sns, RFT_ID_CMD,
@@ -526,6 +562,7 @@ qla2x00_rff_id(scsi_qla_host_t *vha)
 	ms_iocb_entry_t	*ms_pkt;
 	struct ct_sns_req	*ct_req;
 	struct ct_sns_rsp	*ct_rsp;
+	struct ct_arg arg;
 
 	if (IS_QLA2100(ha) || IS_QLA2200(ha)) {
 		ql_dbg(ql_dbg_disc, vha, 0x2046,
@@ -533,10 +570,16 @@ qla2x00_rff_id(scsi_qla_host_t *vha)
 		return (QLA_SUCCESS);
 	}
 
+	arg.iocb = ha->ms_iocb;
+	arg.req_dma = ha->ct_sns_dma;
+	arg.rsp_dma = ha->ct_sns_dma;
+	arg.req_size = RFF_ID_REQ_SIZE;
+	arg.rsp_size = RFF_ID_RSP_SIZE;
+	arg.nport_handle = NPH_SNS;
+
 	/* Issue RFF_ID */
 	/* Prepare common MS IOCB */
-	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, RFF_ID_REQ_SIZE,
-	    RFF_ID_RSP_SIZE);
+	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, &arg);
 
 	/* Prepare CT request */
 	ct_req = qla2x00_prep_ct_req(ha->ct_sns, RFF_ID_CMD,
@@ -584,14 +627,21 @@ qla2x00_rnn_id(scsi_qla_host_t *vha)
 	ms_iocb_entry_t	*ms_pkt;
 	struct ct_sns_req	*ct_req;
 	struct ct_sns_rsp	*ct_rsp;
+	struct ct_arg arg;
 
 	if (IS_QLA2100(ha) || IS_QLA2200(ha))
 		return qla2x00_sns_rnn_id(vha);
 
+	arg.iocb = ha->ms_iocb;
+	arg.req_dma = ha->ct_sns_dma;
+	arg.rsp_dma = ha->ct_sns_dma;
+	arg.req_size = RNN_ID_REQ_SIZE;
+	arg.rsp_size = RNN_ID_RSP_SIZE;
+	arg.nport_handle = NPH_SNS;
+
 	/* Issue RNN_ID */
 	/* Prepare common MS IOCB */
-	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, RNN_ID_REQ_SIZE,
-	    RNN_ID_RSP_SIZE);
+	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, &arg);
 
 	/* Prepare CT request */
 	ct_req = qla2x00_prep_ct_req(ha->ct_sns, RNN_ID_CMD, RNN_ID_RSP_SIZE);
@@ -651,6 +701,7 @@ qla2x00_rsnn_nn(scsi_qla_host_t *vha)
 	ms_iocb_entry_t	*ms_pkt;
 	struct ct_sns_req	*ct_req;
 	struct ct_sns_rsp	*ct_rsp;
+	struct ct_arg arg;
 
 	if (IS_QLA2100(ha) || IS_QLA2200(ha)) {
 		ql_dbg(ql_dbg_disc, vha, 0x2050,
@@ -658,10 +709,17 @@ qla2x00_rsnn_nn(scsi_qla_host_t *vha)
 		return (QLA_SUCCESS);
 	}
 
+	arg.iocb = ha->ms_iocb;
+	arg.req_dma = ha->ct_sns_dma;
+	arg.rsp_dma = ha->ct_sns_dma;
+	arg.req_size = 0;
+	arg.rsp_size = RSNN_NN_RSP_SIZE;
+	arg.nport_handle = NPH_SNS;
+
 	/* Issue RSNN_NN */
 	/* Prepare common MS IOCB */
 	/*   Request size adjusted after CT preparation */
-	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, 0, RSNN_NN_RSP_SIZE);
+	ms_pkt = ha->isp_ops->prep_ms_iocb(vha, &arg);
 
 	/* Prepare CT request */
 	ct_req = qla2x00_prep_ct_req(ha->ct_sns, RSNN_NN_CMD,
@@ -1103,7 +1161,7 @@ qla2x00_sns_rnn_id(scsi_qla_host_t *vha)
  *
  * Returns 0 on success.
  */
-static int
+int
 qla2x00_mgmt_svr_login(scsi_qla_host_t *vha)
 {
 	int ret, rval;
@@ -2425,15 +2483,22 @@ qla2x00_gfpn_id(scsi_qla_host_t *vha, sw_info_t *list)
 	ms_iocb_entry_t	*ms_pkt;
 	struct ct_sns_req	*ct_req;
 	struct ct_sns_rsp	*ct_rsp;
+	struct ct_arg arg;
 
 	if (!IS_IIDMA_CAPABLE(ha))
 		return QLA_FUNCTION_FAILED;
 
+	arg.iocb = ha->ms_iocb;
+	arg.req_dma = ha->ct_sns_dma;
+	arg.rsp_dma = ha->ct_sns_dma;
+	arg.req_size = GFPN_ID_REQ_SIZE;
+	arg.rsp_size = GFPN_ID_RSP_SIZE;
+	arg.nport_handle = NPH_SNS;
+
 	for (i = 0; i < ha->max_fibre_devices; i++) {
 		/* Issue GFPN_ID */
 		/* Prepare common MS IOCB */
-		ms_pkt = ha->isp_ops->prep_ms_iocb(vha, GFPN_ID_REQ_SIZE,
-		    GFPN_ID_RSP_SIZE);
+		ms_pkt = ha->isp_ops->prep_ms_iocb(vha, &arg);
 
 		/* Prepare CT request */
 		ct_req = qla2x00_prep_ct_req(ha->ct_sns, GFPN_ID_CMD,
@@ -2471,36 +2536,6 @@ qla2x00_gfpn_id(scsi_qla_host_t *vha, sw_info_t *list)
 	return (rval);
 }
 
-static inline void *
-qla24xx_prep_ms_fm_iocb(scsi_qla_host_t *vha, uint32_t req_size,
-    uint32_t rsp_size)
-{
-	struct ct_entry_24xx *ct_pkt;
-	struct qla_hw_data *ha = vha->hw;
-	ct_pkt = (struct ct_entry_24xx *)ha->ms_iocb;
-	memset(ct_pkt, 0, sizeof(struct ct_entry_24xx));
-
-	ct_pkt->entry_type = CT_IOCB_TYPE;
-	ct_pkt->entry_count = 1;
-	ct_pkt->nport_handle = cpu_to_le16(vha->mgmt_svr_loop_id);
-	ct_pkt->timeout = cpu_to_le16(ha->r_a_tov / 10 * 2);
-	ct_pkt->cmd_dsd_count = cpu_to_le16(1);
-	ct_pkt->rsp_dsd_count = cpu_to_le16(1);
-	ct_pkt->rsp_byte_count = cpu_to_le32(rsp_size);
-	ct_pkt->cmd_byte_count = cpu_to_le32(req_size);
-
-	ct_pkt->dseg_0_address[0] = cpu_to_le32(LSD(ha->ct_sns_dma));
-	ct_pkt->dseg_0_address[1] = cpu_to_le32(MSD(ha->ct_sns_dma));
-	ct_pkt->dseg_0_len = ct_pkt->cmd_byte_count;
-
-	ct_pkt->dseg_1_address[0] = cpu_to_le32(LSD(ha->ct_sns_dma));
-	ct_pkt->dseg_1_address[1] = cpu_to_le32(MSD(ha->ct_sns_dma));
-	ct_pkt->dseg_1_len = ct_pkt->rsp_byte_count;
-	ct_pkt->vp_index = vha->vp_idx;
-
-	return ct_pkt;
-}
-
 
 static inline struct ct_sns_req *
 qla24xx_prep_ct_fm_req(struct ct_sns_pkt *p, uint16_t cmd,
@@ -2530,9 +2565,10 @@ qla2x00_gpsc(scsi_qla_host_t *vha, sw_info_t *list)
 	int		rval;
 	uint16_t	i;
 	struct qla_hw_data *ha = vha->hw;
-	ms_iocb_entry_t	*ms_pkt;
+	ms_iocb_entry_t *ms_pkt;
 	struct ct_sns_req	*ct_req;
 	struct ct_sns_rsp	*ct_rsp;
+	struct ct_arg arg;
 
 	if (!IS_IIDMA_CAPABLE(ha))
 		return QLA_FUNCTION_FAILED;
@@ -2543,11 +2579,17 @@ qla2x00_gpsc(scsi_qla_host_t *vha, sw_info_t *list)
 	if (rval)
 		return rval;
 
+	arg.iocb = ha->ms_iocb;
+	arg.req_dma = ha->ct_sns_dma;
+	arg.rsp_dma = ha->ct_sns_dma;
+	arg.req_size = GPSC_REQ_SIZE;
+	arg.rsp_size = GPSC_RSP_SIZE;
+	arg.nport_handle = vha->mgmt_svr_loop_id;
+
 	for (i = 0; i < ha->max_fibre_devices; i++) {
 		/* Issue GFPN_ID */
 		/* Prepare common MS IOCB */
-		ms_pkt = qla24xx_prep_ms_fm_iocb(vha, GPSC_REQ_SIZE,
-		    GPSC_RSP_SIZE);
+		ms_pkt = qla24xx_prep_ms_iocb(vha, &arg);
 
 		/* Prepare CT request */
 		ct_req = qla24xx_prep_ct_fm_req(ha->ct_sns, GPSC_CMD,
@@ -2641,6 +2683,7 @@ qla2x00_gff_id(scsi_qla_host_t *vha, sw_info_t *list)
 	struct ct_sns_rsp	*ct_rsp;
 	struct qla_hw_data *ha = vha->hw;
 	uint8_t fcp_scsi_features = 0;
+	struct ct_arg arg;
 
 	for (i = 0; i < ha->max_fibre_devices; i++) {
 		/* Set default FC4 Type as UNKNOWN so the default is to
@@ -2651,9 +2694,15 @@ qla2x00_gff_id(scsi_qla_host_t *vha, sw_info_t *list)
 		if (!IS_FWI2_CAPABLE(ha))
 			continue;
 
+		arg.iocb = ha->ms_iocb;
+		arg.req_dma = ha->ct_sns_dma;
+		arg.rsp_dma = ha->ct_sns_dma;
+		arg.req_size = GFF_ID_REQ_SIZE;
+		arg.rsp_size = GFF_ID_RSP_SIZE;
+		arg.nport_handle = NPH_SNS;
+
 		/* Prepare common MS IOCB */
-		ms_pkt = ha->isp_ops->prep_ms_iocb(vha, GFF_ID_REQ_SIZE,
-		    GFF_ID_RSP_SIZE);
+		ms_pkt = ha->isp_ops->prep_ms_iocb(vha, &arg);
 
 		/* Prepare CT request */
 		ct_req = qla2x00_prep_ct_req(ha->ct_sns, GFF_ID_CMD,
@@ -2692,3 +2741,536 @@ qla2x00_gff_id(scsi_qla_host_t *vha, sw_info_t *list)
 			break;
 	}
 }
+
+/* GID_PN completion processing. */
+void qla24xx_handle_gidpn_event(scsi_qla_host_t *vha, struct event_arg *ea)
+{
+	fc_port_t *fcport = ea->fcport;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"%s %8phC login state %d \n",
+		__func__, fcport->port_name, fcport->fw_login_state);
+
+	if (ea->sp->gen2 != fcport->login_gen) {
+		/* PLOGI/PRLI/LOGO came in while cmd was out.*/
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+		    "%s %8phC generation changed rscn %d|%d login %d|%d \n",
+		    __func__, fcport->port_name, fcport->last_rscn_gen,
+		    fcport->rscn_gen, fcport->last_login_gen, fcport->login_gen);
+		return;
+	}
+
+	if (!ea->rc) {
+		if (ea->sp->gen1 == fcport->rscn_gen) {
+			fcport->scan_state = QLA_FCPORT_FOUND;
+			fcport->flags |= FCF_FABRIC_DEVICE;
+
+			if (fcport->d_id.b24 == ea->id.b24) {
+				/* cable plugged into the same place */
+				switch (vha->host->active_mode) {
+				case MODE_TARGET:
+					/* NOOP. let the other guy login to us.*/
+					break;
+				case MODE_INITIATOR:
+				case MODE_DUAL:
+				default:
+					if (atomic_read(&fcport->state) ==
+					    FCS_ONLINE)
+						break;
+					ql_dbg(ql_dbg_disc, vha, 0xffff,
+					    "%s %d %8phC post gnl\n",
+					    __func__, __LINE__, fcport->port_name);
+					qla24xx_post_gnl_work(vha, fcport);
+					break;
+				}
+			} else { /* fcport->d_id.b24 != ea->id.b24 */
+				fcport->d_id.b24 = ea->id.b24;
+				if (fcport->deleted == QLA_SESS_DELETED) {
+					ql_dbg(ql_dbg_disc, vha, 0xffff,
+					    "%s %d %8phC post del sess\n",
+					    __func__, __LINE__, fcport->port_name);
+					qlt_schedule_sess_for_deletion_lock(fcport);
+				}
+			}
+		} else { /* ea->sp->gen1 != fcport->rscn_gen */
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			    "%s %d %8phC post gidpn\n",
+			    __func__, __LINE__, fcport->port_name);
+			/* rscn came in while cmd was out */
+			qla24xx_post_gidpn_work(vha, fcport);
+		}
+	} else { /* ea->rc */
+		/* cable pulled */
+		if (ea->sp->gen1 == fcport->rscn_gen) {
+			if (ea->sp->gen2 == fcport->login_gen) {
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+				    "%s %d %8phC post del sess\n", __func__,
+				    __LINE__, fcport->port_name);
+				qlt_schedule_sess_for_deletion_lock(fcport);
+			} else {
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+				    "%s %d %8phC login\n", __func__, __LINE__,
+				    fcport->port_name);
+				qla24xx_fcport_handle_login(vha, fcport);
+			}
+		} else {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			    "%s %d %8phC post gidpn\n", __func__, __LINE__,
+			    fcport->port_name);
+			qla24xx_post_gidpn_work(vha, fcport);
+		}
+	}
+} /* gidpn_event */
+
+static void qla2x00_async_gidpn_sp_done(void *v, void *s, int res)
+{
+	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
+	struct srb *sp = (struct srb *)s;
+	fc_port_t *fcport = sp->fcport;
+	u8 *id = fcport->ct_desc.ct_sns->p.rsp.rsp.gid_pn.port_id;
+	struct event_arg ea;
+
+	fcport->flags &= ~FCF_ASYNC_SENT;
+
+	memset(&ea, 0, sizeof(ea));
+	ea.fcport = fcport;
+	ea.id.b.domain = id[0];
+	ea.id.b.area = id[1];
+	ea.id.b.al_pa = id[2];
+	ea.sp = sp;
+	ea.rc = res;
+	ea.event = FCME_GIDPN_DONE;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+	    "Async done-%s res %x, WWPN %8phC ID %3phC \n",
+	    sp->name, res, fcport->port_name, id);
+
+	qla2x00_fcport_event_handler(vha, &ea);
+
+	sp->free(vha, sp);
+}
+
+int qla24xx_async_gidpn(scsi_qla_host_t *vha, fc_port_t *fcport)
+{
+	int rval = QLA_FUNCTION_FAILED;
+	struct ct_sns_req       *ct_req;
+	srb_t *sp;
+
+	if (!vha->flags.online)
+		goto done;
+
+	fcport->flags |= FCF_ASYNC_SENT;
+	fcport->disc_state = DSC_GID_PN;
+	fcport->scan_state = QLA_FCPORT_SCAN;
+	sp = qla2x00_get_sp(vha, fcport, GFP_ATOMIC);
+	if (!sp)
+		goto done;
+
+	sp->type = SRB_CT_PTHRU_CMD;
+	sp->name = "gidpn";
+	sp->gen1 = fcport->rscn_gen;
+	sp->gen2 = fcport->login_gen;
+
+	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+
+	/* CT_IU preamble  */
+	ct_req = qla2x00_prep_ct_req(fcport->ct_desc.ct_sns, GID_PN_CMD,
+		GID_PN_RSP_SIZE);
+
+	/* GIDPN req */
+	memcpy(ct_req->req.gid_pn.port_name, fcport->port_name,
+		WWN_SIZE);
+
+	/* req & rsp use the same buffer */
+	sp->u.iocb_cmd.u.ctarg.req = fcport->ct_desc.ct_sns;
+	sp->u.iocb_cmd.u.ctarg.req_dma = fcport->ct_desc.ct_sns_dma;
+	sp->u.iocb_cmd.u.ctarg.rsp = fcport->ct_desc.ct_sns;
+	sp->u.iocb_cmd.u.ctarg.rsp_dma = fcport->ct_desc.ct_sns_dma;
+	sp->u.iocb_cmd.u.ctarg.req_size = GID_PN_REQ_SIZE;
+	sp->u.iocb_cmd.u.ctarg.rsp_size = GID_PN_RSP_SIZE;
+	sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
+
+	sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
+	sp->done = qla2x00_async_gidpn_sp_done;
+
+	rval = qla2x00_start_sp(sp);
+	if (rval != QLA_SUCCESS)
+		goto done_free_sp;
+
+	ql_dbg(ql_dbg_disc, vha, 0x206f,
+		"Async-%s - %8phC hdl=%x loopid=%x portid %02x%02x%02x.\n",
+		sp->name, fcport->port_name,
+		sp->handle, fcport->loop_id, fcport->d_id.b.domain,
+		fcport->d_id.b.area, fcport->d_id.b.al_pa);
+	return rval;
+
+done_free_sp:
+	sp->free(vha, sp);
+done:
+	fcport->flags &= ~FCF_ASYNC_SENT;
+	return rval;
+}
+
+int qla24xx_post_gidpn_work(struct scsi_qla_host *vha, fc_port_t *fcport)
+{
+	struct qla_work_evt *e;
+
+	if ((atomic_read(&vha->loop_state) != LOOP_READY) ||
+		test_bit(UNLOADING, &vha->dpc_flags))
+		return 0;
+
+	e = qla2x00_alloc_work(vha, QLA_EVT_GIDPN);
+	if (!e)
+		return QLA_FUNCTION_FAILED;
+
+	e->u.fcport.fcport = fcport;
+	return qla2x00_post_work(vha, e);
+}
+
+int qla24xx_post_gpsc_work(struct scsi_qla_host *vha, fc_port_t *fcport)
+{
+	struct qla_work_evt *e;
+
+	e = qla2x00_alloc_work(vha, QLA_EVT_GPSC);
+	if (!e)
+		return QLA_FUNCTION_FAILED;
+
+	e->u.fcport.fcport = fcport;
+	return qla2x00_post_work(vha, e);
+}
+
+static void qla24xx_async_gpsc_sp_done(void *v, void *s, int res)
+{
+	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
+	struct qla_hw_data *ha = vha->hw;
+	struct srb *sp = (struct srb *)s;
+	fc_port_t *fcport = sp->fcport;
+	struct ct_sns_rsp       *ct_rsp;
+	struct event_arg ea;
+
+	ct_rsp = &fcport->ct_desc.ct_sns->p.rsp;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+	    "Async done-%s res %x, WWPN %8phC \n",
+	    sp->name, res, fcport->port_name);
+
+	fcport->flags &= ~FCF_ASYNC_SENT;
+
+	if (res == (DID_ERROR << 16)) {
+		/* entry status error */
+		goto done;
+	} else if (res) {
+		if ((ct_rsp->header.reason_code ==
+			 CT_REASON_INVALID_COMMAND_CODE) ||
+			(ct_rsp->header.reason_code ==
+			 CT_REASON_COMMAND_UNSUPPORTED)) {
+			ql_dbg(ql_dbg_disc, vha, 0x205a,
+				"GPSC command unsupported, disabling "
+				"query.\n");
+			ha->flags.gpsc_supported = 0;
+			res = QLA_SUCCESS;
+		}
+	} else {
+		switch (be16_to_cpu(ct_rsp->rsp.gpsc.speed)) {
+		case BIT_15:
+			fcport->fp_speed = PORT_SPEED_1GB;
+			break;
+		case BIT_14:
+			fcport->fp_speed = PORT_SPEED_2GB;
+			break;
+		case BIT_13:
+			fcport->fp_speed = PORT_SPEED_4GB;
+			break;
+		case BIT_12:
+			fcport->fp_speed = PORT_SPEED_10GB;
+			break;
+		case BIT_11:
+			fcport->fp_speed = PORT_SPEED_8GB;
+			break;
+		case BIT_10:
+			fcport->fp_speed = PORT_SPEED_16GB;
+			break;
+		case BIT_8:
+			fcport->fp_speed = PORT_SPEED_32GB;
+			break;
+		}
+
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"Async-%s OUT WWPN %8phC speeds=%04x speed=%04x.\n",
+			sp->name,
+			fcport->fabric_port_name,
+			be16_to_cpu(ct_rsp->rsp.gpsc.speeds),
+			be16_to_cpu(ct_rsp->rsp.gpsc.speed));
+	}
+done:
+	memset(&ea, 0, sizeof(ea));
+	ea.event = FCME_GPSC_DONE;
+	ea.rc = res;
+	ea.fcport = fcport;
+	qla2x00_fcport_event_handler(vha, &ea);
+
+	sp->free(vha, sp);
+}
+
+int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport)
+{
+	int rval = QLA_FUNCTION_FAILED;
+	struct ct_sns_req       *ct_req;
+	srb_t *sp;
+
+	if (!vha->flags.online)
+		goto done;
+
+	fcport->flags |= FCF_ASYNC_SENT;
+	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+	if (!sp)
+		goto done;
+
+	sp->type = SRB_CT_PTHRU_CMD;
+	sp->name = "gpsc";
+	sp->gen1 = fcport->rscn_gen;
+	sp->gen2 = fcport->login_gen;
+
+	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+
+	/* CT_IU preamble  */
+	ct_req = qla24xx_prep_ct_fm_req(fcport->ct_desc.ct_sns, GPSC_CMD,
+		GPSC_RSP_SIZE);
+
+	/* GPSC req */
+	memcpy(ct_req->req.gpsc.port_name, fcport->port_name,
+		WWN_SIZE);
+
+	sp->u.iocb_cmd.u.ctarg.req = fcport->ct_desc.ct_sns;
+	sp->u.iocb_cmd.u.ctarg.req_dma = fcport->ct_desc.ct_sns_dma;
+	sp->u.iocb_cmd.u.ctarg.rsp = fcport->ct_desc.ct_sns;
+	sp->u.iocb_cmd.u.ctarg.rsp_dma = fcport->ct_desc.ct_sns_dma;
+	sp->u.iocb_cmd.u.ctarg.req_size = GPSC_REQ_SIZE;
+	sp->u.iocb_cmd.u.ctarg.rsp_size = GPSC_RSP_SIZE;
+	sp->u.iocb_cmd.u.ctarg.nport_handle = vha->mgmt_svr_loop_id;
+
+	sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
+	sp->done = qla24xx_async_gpsc_sp_done;
+
+	rval = qla2x00_start_sp(sp);
+	if (rval != QLA_SUCCESS)
+		goto done_free_sp;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"Async-%s %8phC hdl=%x loopid=%x portid=%02x%02x%02x.\n",
+		sp->name, fcport->port_name, sp->handle,
+		fcport->loop_id, fcport->d_id.b.domain,
+		fcport->d_id.b.area, fcport->d_id.b.al_pa);
+	return rval;
+
+done_free_sp:
+	sp->free(vha, sp);
+done:
+	fcport->flags &= ~FCF_ASYNC_SENT;
+	return rval;
+}
+
+int qla24xx_post_gpnid_work(struct scsi_qla_host *vha, port_id_t *id)
+{
+	struct qla_work_evt *e;
+
+	if (test_bit(UNLOADING, &vha->dpc_flags))
+		return 0;
+
+	e = qla2x00_alloc_work(vha, QLA_EVT_GPNID);
+	if (!e)
+		return QLA_FUNCTION_FAILED;
+
+	e->u.gpnid.id = *id;
+	return qla2x00_post_work(vha, e);
+}
+
+void qla24xx_async_gpnid_done(scsi_qla_host_t *vha, srb_t *sp)
+{
+	if (sp->u.iocb_cmd.u.ctarg.req) {
+		dma_free_coherent(&vha->hw->pdev->dev,
+			sizeof(struct ct_sns_pkt),
+			sp->u.iocb_cmd.u.ctarg.req,
+			sp->u.iocb_cmd.u.ctarg.req_dma);
+		sp->u.iocb_cmd.u.ctarg.req = NULL;
+	}
+	if (sp->u.iocb_cmd.u.ctarg.rsp) {
+		dma_free_coherent(&vha->hw->pdev->dev,
+			sizeof(struct ct_sns_pkt),
+			sp->u.iocb_cmd.u.ctarg.rsp,
+			sp->u.iocb_cmd.u.ctarg.rsp_dma);
+		sp->u.iocb_cmd.u.ctarg.rsp = NULL;
+	}
+
+	sp->free(vha, sp);
+}
+
+void qla24xx_handle_gpnid_event(scsi_qla_host_t *vha, struct event_arg *ea)
+{
+	fc_port_t *fcport;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+	fcport = qla2x00_find_fcport_by_wwpn(vha, ea->port_name, 1);
+	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
+	if (fcport) {
+		/* cable moved. just plugged in */
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post del sess\n",
+			   __func__, __LINE__, fcport->port_name);
+
+		fcport->rscn_gen++;
+		fcport->d_id = ea->id;
+		fcport->scan_state = QLA_FCPORT_FOUND;
+		fcport->flags |= FCF_FABRIC_DEVICE;
+
+		qlt_schedule_sess_for_deletion_lock(fcport);
+	} else {
+		/* create new fcport */
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post new sess\n",
+			   __func__, __LINE__, ea->port_name);
+
+		qla24xx_post_newsess_work(vha, &ea->id, ea->port_name, NULL);
+	}
+}
+
+static void qla2x00_async_gpnid_sp_done(void *v, void *s, int res)
+{
+	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
+	struct srb *sp = (struct srb *)s;
+	struct ct_sns_req *ct_req =
+	    (struct ct_sns_req *)sp->u.iocb_cmd.u.ctarg.req;
+	struct ct_sns_rsp *ct_rsp =
+	    (struct ct_sns_rsp *)sp->u.iocb_cmd.u.ctarg.rsp;
+	struct event_arg ea;
+	struct qla_work_evt *e;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"Async done-%s res %x ID %3phC. %8phC\n",
+		sp->name, res, ct_req->req.port_id.port_id,
+		ct_rsp->rsp.gpn_id.port_name);
+
+	memset(&ea, 0, sizeof(ea));
+	memcpy(ea.port_name, ct_rsp->rsp.gpn_id.port_name, WWN_SIZE);
+	ea.sp = sp;
+	ea.id.b.domain = ct_req->req.port_id.port_id[0];
+	ea.id.b.area = ct_req->req.port_id.port_id[1];
+	ea.id.b.al_pa = ct_req->req.port_id.port_id[2];
+	ea.rc = res;
+	ea.event = FCME_GPNID_DONE;
+
+	qla2x00_fcport_event_handler(vha, &ea);
+
+	e = qla2x00_alloc_work(vha, QLA_EVT_GPNID_DONE);
+	if (!e) {
+		/* please ignore kernel warning. otherwise, we have mem leak. */
+		if (sp->u.iocb_cmd.u.ctarg.req) {
+			dma_free_coherent(&vha->hw->pdev->dev,
+				sizeof(struct ct_sns_pkt),
+				sp->u.iocb_cmd.u.ctarg.req,
+				sp->u.iocb_cmd.u.ctarg.req_dma);
+			sp->u.iocb_cmd.u.ctarg.req = NULL;
+		}
+		if (sp->u.iocb_cmd.u.ctarg.rsp) {
+			dma_free_coherent(&vha->hw->pdev->dev,
+				sizeof(struct ct_sns_pkt),
+				sp->u.iocb_cmd.u.ctarg.rsp,
+				sp->u.iocb_cmd.u.ctarg.rsp_dma);
+			sp->u.iocb_cmd.u.ctarg.rsp = NULL;
+		}
+
+		sp->free(vha, sp);
+		return;
+	}
+
+	e->u.iosb.sp = sp;
+	qla2x00_post_work(vha, e);
+}
+
+/* Get WWPN with Nport ID. */
+int qla24xx_async_gpnid(scsi_qla_host_t *vha, port_id_t *id)
+{
+	int rval = QLA_FUNCTION_FAILED;
+	struct ct_sns_req       *ct_req;
+	srb_t *sp;
+	struct ct_sns_pkt *ct_sns;
+
+	if (!vha->flags.online)
+		goto done;
+
+	sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL);
+	if (!sp)
+		goto done;
+
+	sp->type = SRB_CT_PTHRU_CMD;
+	sp->name = "gpnid";
+	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+
+	sp->u.iocb_cmd.u.ctarg.req = dma_alloc_coherent(&vha->hw->pdev->dev,
+		sizeof(struct ct_sns_pkt), &sp->u.iocb_cmd.u.ctarg.req_dma,
+		GFP_KERNEL);
+	if (!sp->u.iocb_cmd.u.ctarg.req) {
+		ql_log(ql_log_warn, vha, 0xffff,
+			"Failed to allocate ct_sns request.\n");
+		goto done_free_sp;
+	}
+
+	sp->u.iocb_cmd.u.ctarg.rsp = dma_alloc_coherent(&vha->hw->pdev->dev,
+		sizeof(struct ct_sns_pkt), &sp->u.iocb_cmd.u.ctarg.rsp_dma,
+		GFP_KERNEL);
+	if (!sp->u.iocb_cmd.u.ctarg.rsp) {
+		ql_log(ql_log_warn, vha, 0xffff,
+			"Failed to allocate ct_sns request.\n");
+		goto done_free_sp;
+	}
+
+	ct_sns = (struct ct_sns_pkt *)sp->u.iocb_cmd.u.ctarg.rsp;
+	memset(ct_sns, 0, sizeof(*ct_sns));
+
+	ct_sns = (struct ct_sns_pkt *)sp->u.iocb_cmd.u.ctarg.req;
+	/* CT_IU preamble  */
+	ct_req = qla2x00_prep_ct_req(ct_sns, GPN_ID_CMD, GPN_ID_RSP_SIZE);
+
+	/* GPN_ID req */
+	ct_req->req.port_id.port_id[0] = id->b.domain;
+	ct_req->req.port_id.port_id[1] = id->b.area;
+	ct_req->req.port_id.port_id[2] = id->b.al_pa;
+
+	sp->u.iocb_cmd.u.ctarg.req_size = GPN_ID_REQ_SIZE;
+	sp->u.iocb_cmd.u.ctarg.rsp_size = GPN_ID_RSP_SIZE;
+	sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS;
+
+	sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
+	sp->done = qla2x00_async_gpnid_sp_done;
+
+	rval = qla2x00_start_sp(sp);
+	if (rval != QLA_SUCCESS)
+		goto done_free_sp;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"Async-%s hdl=%x ID %3phC.\n", sp->name,
+		sp->handle, ct_req->req.port_id.port_id);
+	return rval;
+
+done_free_sp:
+	if (sp->u.iocb_cmd.u.ctarg.req) {
+		dma_free_coherent(&vha->hw->pdev->dev,
+			sizeof(struct ct_sns_pkt),
+			sp->u.iocb_cmd.u.ctarg.req,
+			sp->u.iocb_cmd.u.ctarg.req_dma);
+		sp->u.iocb_cmd.u.ctarg.req = NULL;
+	}
+	if (sp->u.iocb_cmd.u.ctarg.rsp) {
+		dma_free_coherent(&vha->hw->pdev->dev,
+			sizeof(struct ct_sns_pkt),
+			sp->u.iocb_cmd.u.ctarg.rsp,
+			sp->u.iocb_cmd.u.ctarg.rsp_dma);
+		sp->u.iocb_cmd.u.ctarg.rsp = NULL;
+	}
+
+	sp->free(vha, sp);
+done:
+	return rval;
+}
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 5978b79d4a61..8b7c0468a0e0 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -30,15 +30,15 @@ static int qla2x00_configure_hba(scsi_qla_host_t *);
 static int qla2x00_configure_loop(scsi_qla_host_t *);
 static int qla2x00_configure_local_loop(scsi_qla_host_t *);
 static int qla2x00_configure_fabric(scsi_qla_host_t *);
-static int qla2x00_find_all_fabric_devs(scsi_qla_host_t *, struct list_head *);
-static int qla2x00_fabric_dev_login(scsi_qla_host_t *, fc_port_t *,
-    uint16_t *);
-
+static int qla2x00_find_all_fabric_devs(scsi_qla_host_t *);
 static int qla2x00_restart_isp(scsi_qla_host_t *);
 
 static struct qla_chip_state_84xx *qla84xx_get_chip(struct scsi_qla_host *);
 static int qla84xx_init_chip(scsi_qla_host_t *);
 static int qla25xx_init_queues(struct qla_hw_data *);
+static int qla24xx_post_gpdb_work(struct scsi_qla_host *, fc_port_t *, u8);
+static void qla24xx_handle_plogi_done_event(struct scsi_qla_host *,
+    struct event_arg *);
 
 /* SRB Extensions ---------------------------------------------------------- */
 
@@ -47,8 +47,8 @@ qla2x00_sp_timeout(unsigned long __data)
 {
 	srb_t *sp = (srb_t *)__data;
 	struct srb_iocb *iocb;
-	fc_port_t *fcport = sp->fcport;
-	struct qla_hw_data *ha = fcport->vha->hw;
+	scsi_qla_host_t *vha = (scsi_qla_host_t *)sp->vha;
+	struct qla_hw_data *ha = vha->hw;
 	struct req_que *req;
 	unsigned long flags;
 
@@ -57,7 +57,7 @@ qla2x00_sp_timeout(unsigned long __data)
 	req->outstanding_cmds[sp->handle] = NULL;
 	iocb = &sp->u.iocb_cmd;
 	iocb->timeout(sp);
-	sp->free(fcport->vha, sp);
+	sp->free(vha, sp);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 }
 
@@ -94,29 +94,44 @@ qla2x00_get_async_timeout(struct scsi_qla_host *vha)
 	return tmo;
 }
 
-static void
+void
 qla2x00_async_iocb_timeout(void *data)
 {
 	srb_t *sp = (srb_t *)data;
 	fc_port_t *fcport = sp->fcport;
+	struct srb_iocb *lio = &sp->u.iocb_cmd;
+	struct event_arg ea;
 
 	ql_dbg(ql_dbg_disc, fcport->vha, 0x2071,
-	    "Async-%s timeout - hdl=%x portid=%02x%02x%02x.\n",
-	    sp->name, sp->handle, fcport->d_id.b.domain, fcport->d_id.b.area,
-	    fcport->d_id.b.al_pa);
+	    "Async-%s timeout - hdl=%x portid=%06x %8phC.\n",
+	    sp->name, sp->handle, fcport->d_id.b24, fcport->port_name);
 
 	fcport->flags &= ~FCF_ASYNC_SENT;
-	if (sp->type == SRB_LOGIN_CMD) {
-		struct srb_iocb *lio = &sp->u.iocb_cmd;
-		qla2x00_post_async_logout_work(fcport->vha, fcport, NULL);
+
+	switch (sp->type) {
+	case SRB_LOGIN_CMD:
 		/* Retry as needed. */
 		lio->u.logio.data[0] = MBS_COMMAND_ERROR;
 		lio->u.logio.data[1] = lio->u.logio.flags & SRB_LOGIN_RETRIED ?
 			QLA_LOGIO_LOGIN_RETRIED : 0;
-		qla2x00_post_async_login_done_work(fcport->vha, fcport,
-			lio->u.logio.data);
-	} else if (sp->type == SRB_LOGOUT_CMD) {
+		memset(&ea, 0, sizeof(ea));
+		ea.event = FCME_PLOGI_DONE;
+		ea.fcport = sp->fcport;
+		ea.data[0] = lio->u.logio.data[0];
+		ea.data[1] = lio->u.logio.data[1];
+		ea.sp = sp;
+		qla24xx_handle_plogi_done_event(fcport->vha, &ea);
+		break;
+	case SRB_LOGOUT_CMD:
 		qlt_logo_completion_handler(fcport, QLA_FUNCTION_TIMEOUT);
+		break;
+	case SRB_CT_PTHRU_CMD:
+	case SRB_MB_IOCB:
+	case SRB_NACK_PLOGI:
+	case SRB_NACK_PRLI:
+	case SRB_NACK_LOGO:
+		sp->done(sp->vha, sp, QLA_FUNCTION_TIMEOUT);
+		break;
 	}
 }
 
@@ -126,10 +141,25 @@ qla2x00_async_login_sp_done(void *data, void *ptr, int res)
 	srb_t *sp = (srb_t *)ptr;
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
 	struct scsi_qla_host *vha = (scsi_qla_host_t *)data;
+	struct event_arg ea;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"%s %8phC res %d \n",
+		   __func__, sp->fcport->port_name, res);
+
+	sp->fcport->flags &= ~FCF_ASYNC_SENT;
+	if (!test_bit(UNLOADING, &vha->dpc_flags)) {
+		memset(&ea, 0, sizeof(ea));
+		ea.event = FCME_PLOGI_DONE;
+		ea.fcport = sp->fcport;
+		ea.data[0] = lio->u.logio.data[0];
+		ea.data[1] = lio->u.logio.data[1];
+		ea.iop[0] = lio->u.logio.iop[0];
+		ea.iop[1] = lio->u.logio.iop[1];
+		ea.sp = sp;
+		qla2x00_fcport_event_handler(vha, &ea);
+	}
 
-	if (!test_bit(UNLOADING, &vha->dpc_flags))
-		qla2x00_post_async_login_done_work(sp->fcport->vha, sp->fcport,
-		    lio->u.logio.data);
 	sp->free(sp->fcport->vha, sp);
 }
 
@@ -139,13 +169,23 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport,
 {
 	srb_t *sp;
 	struct srb_iocb *lio;
-	int rval;
+	int rval = QLA_FUNCTION_FAILED;
+
+	if (!vha->flags.online)
+		goto done;
+
+	if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
+	    (fcport->fw_login_state == DSC_LS_PLOGI_COMP) ||
+	    (fcport->fw_login_state == DSC_LS_PRLI_PEND))
+		goto done;
 
-	rval = QLA_FUNCTION_FAILED;
 	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
 	if (!sp)
 		goto done;
 
+	fcport->flags |= FCF_ASYNC_SENT;
+	fcport->logout_completed = 0;
+
 	sp->type = SRB_LOGIN_CMD;
 	sp->name = "login";
 	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
@@ -165,8 +205,8 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport,
 	}
 
 	ql_dbg(ql_dbg_disc, vha, 0x2072,
-	    "Async-login - hdl=%x, loopid=%x portid=%02x%02x%02x "
-	    "retries=%d.\n", sp->handle, fcport->loop_id,
+	    "Async-login - %8phC hdl=%x, loopid=%x portid=%02x%02x%02x "
+		"retries=%d.\n", fcport->port_name, sp->handle, fcport->loop_id,
 	    fcport->d_id.b.domain, fcport->d_id.b.area, fcport->d_id.b.al_pa,
 	    fcport->login_retry);
 	return rval;
@@ -174,6 +214,7 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport,
 done_free_sp:
 	sp->free(fcport->vha, sp);
 done:
+	fcport->flags &= ~FCF_ASYNC_SENT;
 	return rval;
 }
 
@@ -184,6 +225,7 @@ qla2x00_async_logout_sp_done(void *data, void *ptr, int res)
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
 	struct scsi_qla_host *vha = (scsi_qla_host_t *)data;
 
+	sp->fcport->flags &= ~FCF_ASYNC_SENT;
 	if (!test_bit(UNLOADING, &vha->dpc_flags))
 		qla2x00_post_async_logout_done_work(sp->fcport->vha, sp->fcport,
 		    lio->u.logio.data);
@@ -198,6 +240,7 @@ qla2x00_async_logout(struct scsi_qla_host *vha, fc_port_t *fcport)
 	int rval;
 
 	rval = QLA_FUNCTION_FAILED;
+	fcport->flags |= FCF_ASYNC_SENT;
 	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
 	if (!sp)
 		goto done;
@@ -214,14 +257,16 @@ qla2x00_async_logout(struct scsi_qla_host *vha, fc_port_t *fcport)
 		goto done_free_sp;
 
 	ql_dbg(ql_dbg_disc, vha, 0x2070,
-	    "Async-logout - hdl=%x loop-id=%x portid=%02x%02x%02x.\n",
+	    "Async-logout - hdl=%x loop-id=%x portid=%02x%02x%02x %8phC.\n",
 	    sp->handle, fcport->loop_id, fcport->d_id.b.domain,
-	    fcport->d_id.b.area, fcport->d_id.b.al_pa);
+		fcport->d_id.b.area, fcport->d_id.b.al_pa,
+		fcport->port_name);
 	return rval;
 
 done_free_sp:
 	sp->free(fcport->vha, sp);
 done:
+	fcport->flags &= ~FCF_ASYNC_SENT;
 	return rval;
 }
 
@@ -247,6 +292,7 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport,
 	int rval;
 
 	rval = QLA_FUNCTION_FAILED;
+	fcport->flags |= FCF_ASYNC_SENT;
 	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
 	if (!sp)
 		goto done;
@@ -273,9 +319,817 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport,
 done_free_sp:
 	sp->free(fcport->vha, sp);
 done:
+	fcport->flags &= ~FCF_ASYNC_SENT;
+	return rval;
+}
+
+static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha,
+	struct event_arg *ea)
+{
+	fc_port_t *fcport, *conflict_fcport;
+	struct get_name_list_extended *e;
+	u16 i, n, found = 0, loop_id;
+	port_id_t id;
+	u64 wwn;
+	u8 opt = 0;
+
+	fcport = ea->fcport;
+
+	if (ea->rc) { /* rval */
+		if (fcport->login_retry == 0) {
+			fcport->login_retry = vha->hw->login_retry_count;
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+				"GNL failed Port login retry %8phN, retry cnt=%d.\n",
+				fcport->port_name, fcport->login_retry);
+		}
+		return;
+	}
+
+	if (fcport->last_rscn_gen != fcport->rscn_gen) {
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+		    "%s %8phC rscn gen changed rscn %d|%d \n",
+		    __func__, fcport->port_name,
+		    fcport->last_rscn_gen, fcport->rscn_gen);
+		qla24xx_post_gidpn_work(vha, fcport);
+		return;
+	} else if (fcport->last_login_gen != fcport->login_gen) {
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+			"%s %8phC login gen changed login %d|%d \n",
+			__func__, fcport->port_name,
+			fcport->last_login_gen, fcport->login_gen);
+		return;
+	}
+
+	n = ea->data[0] / sizeof(struct get_name_list_extended);
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+	    "%s %d %8phC n %d %02x%02x%02x lid %d \n",
+	    __func__, __LINE__, fcport->port_name, n,
+	    fcport->d_id.b.domain, fcport->d_id.b.area,
+	    fcport->d_id.b.al_pa, fcport->loop_id);
+
+	for (i = 0; i < n; i++) {
+		e = &vha->gnl.l[i];
+		wwn = wwn_to_u64(e->port_name);
+
+		if (memcmp((u8 *)&wwn, fcport->port_name, WWN_SIZE))
+			continue;
+
+		found = 1;
+		id.b.domain = e->port_id[2];
+		id.b.area = e->port_id[1];
+		id.b.al_pa = e->port_id[0];
+		id.b.rsvd_1 = 0;
+
+		loop_id = le16_to_cpu(e->nport_handle);
+		loop_id = (loop_id & 0x7fff);
+
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+			"%s found %8phC CLS [%d|%d] ID[%02x%02x%02x|%02x%02x%02x] lid[%d|%d]\n",
+			   __func__, fcport->port_name,
+			e->current_login_state, fcport->fw_login_state,
+			id.b.domain, id.b.area, id.b.al_pa,
+			fcport->d_id.b.domain, fcport->d_id.b.area,
+			fcport->d_id.b.al_pa, loop_id, fcport->loop_id);
+
+		if ((id.b24 != fcport->d_id.b24) ||
+		    ((fcport->loop_id != FC_NO_LOOP_ID) &&
+			(fcport->loop_id != loop_id))) {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post del sess\n",
+			   __func__, __LINE__, fcport->port_name);
+			qlt_schedule_sess_for_deletion(fcport, 1);
+			return;
+		}
+
+		fcport->loop_id = loop_id;
+
+		wwn = wwn_to_u64(fcport->port_name);
+		qlt_find_sess_invalidate_other(vha, wwn,
+			id, loop_id, &conflict_fcport);
+
+		if (conflict_fcport) {
+			/*
+			 * Another share fcport share the same loop_id &
+			 * nport id. Conflict fcport needs to finish
+			 * cleanup before this fcport can proceed to login.
+			 */
+			conflict_fcport->conflict = fcport;
+			fcport->login_pause = 1;
+		}
+
+		switch (e->current_login_state) {
+		case DSC_LS_PRLI_COMP:
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post gpdb\n",
+			   __func__, __LINE__, fcport->port_name);
+			opt = PDO_FORCE_ADISC;
+			qla24xx_post_gpdb_work(vha, fcport, opt);
+			break;
+
+		case DSC_LS_PORT_UNAVAIL:
+		default:
+			if (fcport->loop_id == FC_NO_LOOP_ID) {
+				qla2x00_find_new_loop_id(vha, fcport);
+				fcport->fw_login_state = DSC_LS_PORT_UNAVAIL;
+			}
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC \n",
+			   __func__, __LINE__, fcport->port_name);
+			qla24xx_fcport_handle_login(vha, fcport);
+			break;
+		}
+	}
+
+	if (!found) {
+		/* fw has no record of this port */
+		if (fcport->loop_id == FC_NO_LOOP_ID) {
+			qla2x00_find_new_loop_id(vha, fcport);
+			fcport->fw_login_state = DSC_LS_PORT_UNAVAIL;
+		} else {
+			for (i = 0; i < n; i++) {
+				e = &vha->gnl.l[i];
+				id.b.domain = e->port_id[0];
+				id.b.area = e->port_id[1];
+				id.b.al_pa = e->port_id[2];
+				id.b.rsvd_1 = 0;
+				loop_id = le16_to_cpu(e->nport_handle);
+
+				if (fcport->d_id.b24 == id.b24) {
+					conflict_fcport =
+					    qla2x00_find_fcport_by_wwpn(vha,
+						e->port_name, 0);
+
+					ql_dbg(ql_dbg_disc, vha, 0xffff,
+					    "%s %d %8phC post del sess\n",
+					    __func__, __LINE__,
+					    conflict_fcport->port_name);
+					qlt_schedule_sess_for_deletion
+						(conflict_fcport, 1);
+				}
+
+				if (fcport->loop_id == loop_id) {
+					/* FW already picked this loop id for another fcport */
+					qla2x00_find_new_loop_id(vha, fcport);
+				}
+			}
+		}
+		qla24xx_fcport_handle_login(vha, fcport);
+	}
+} /* gnl_event */
+
+static void
+qla24xx_async_gnl_sp_done(void *v, void *s, int res)
+{
+	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
+	struct srb *sp = (struct srb *)s;
+	unsigned long flags;
+	struct fc_port *fcport = NULL, *tf;
+	u16 i, n = 0, loop_id;
+	struct event_arg ea;
+	struct get_name_list_extended *e;
+	u64 wwn;
+	struct list_head h;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+	    "Async done-%s res %x mb[1]=%x mb[2]=%x \n",
+	    sp->name, res, sp->u.iocb_cmd.u.mbx.in_mb[1],
+	    sp->u.iocb_cmd.u.mbx.in_mb[2]);
+
+	memset(&ea, 0, sizeof(ea));
+	ea.sp = sp;
+	ea.rc = res;
+	ea.event = FCME_GNL_DONE;
+
+	if (sp->u.iocb_cmd.u.mbx.in_mb[1] >=
+	    sizeof(struct get_name_list_extended)) {
+		n = sp->u.iocb_cmd.u.mbx.in_mb[1] /
+		    sizeof(struct get_name_list_extended);
+		ea.data[0] = sp->u.iocb_cmd.u.mbx.in_mb[1]; /* amnt xfered */
+	}
+
+	for (i = 0; i < n; i++) {
+		e = &vha->gnl.l[i];
+		loop_id = le16_to_cpu(e->nport_handle);
+		/* mask out reserve bit */
+		loop_id = (loop_id & 0x7fff);
+		set_bit(loop_id, vha->hw->loop_id_map);
+		wwn = wwn_to_u64(e->port_name);
+
+		ql_dbg(ql_dbg_disc + ql_dbg_verbose, vha, 0xffff,
+		    "%s %8phC %02x:%02x:%02x state %d/%d lid %x \n",
+		    __func__, (void *)&wwn, e->port_id[2], e->port_id[1],
+		    e->port_id[0], e->current_login_state, e->last_login_state,
+		    (loop_id & 0x7fff));
+	}
+
+	spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+	vha->gnl.sent = 0;
+
+	INIT_LIST_HEAD(&h);
+	fcport = tf = NULL;
+	if (!list_empty(&vha->gnl.fcports))
+		list_splice_init(&vha->gnl.fcports, &h);
+
+	list_for_each_entry_safe(fcport, tf, &h, gnl_entry) {
+		list_del_init(&fcport->gnl_entry);
+		fcport->flags &= ~FCF_ASYNC_SENT;
+		ea.fcport = fcport;
+
+		qla2x00_fcport_event_handler(vha, &ea);
+	}
+
+	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
+	sp->free(vha, sp);
+}
+
+int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport)
+{
+	srb_t *sp;
+	struct srb_iocb *mbx;
+	int rval = QLA_FUNCTION_FAILED;
+	unsigned long flags;
+	u16 *mb;
+
+	if (!vha->flags.online)
+		goto done;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+	    "Async-gnlist WWPN %8phC \n", fcport->port_name);
+
+	spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+	fcport->flags |= FCF_ASYNC_SENT;
+	fcport->disc_state = DSC_GNL;
+	fcport->last_rscn_gen = fcport->rscn_gen;
+	fcport->last_login_gen = fcport->login_gen;
+
+	list_add_tail(&fcport->gnl_entry, &vha->gnl.fcports);
+	if (vha->gnl.sent) {
+		spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+		rval = QLA_SUCCESS;
+		goto done;
+	}
+	vha->gnl.sent = 1;
+	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
+	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+	if (!sp)
+		goto done;
+	sp->type = SRB_MB_IOCB;
+	sp->name = "gnlist";
+	sp->gen1 = fcport->rscn_gen;
+	sp->gen2 = fcport->login_gen;
+
+	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha)+2);
+
+	mb = sp->u.iocb_cmd.u.mbx.out_mb;
+	mb[0] = MBC_PORT_NODE_NAME_LIST;
+	mb[1] = BIT_2 | BIT_3;
+	mb[2] = MSW(vha->gnl.ldma);
+	mb[3] = LSW(vha->gnl.ldma);
+	mb[6] = MSW(MSD(vha->gnl.ldma));
+	mb[7] = LSW(MSD(vha->gnl.ldma));
+	mb[8] = vha->gnl.size;
+	mb[9] = vha->vp_idx;
+
+	mbx = &sp->u.iocb_cmd;
+	mbx->timeout = qla2x00_async_iocb_timeout;
+
+	sp->done = qla24xx_async_gnl_sp_done;
+
+	rval = qla2x00_start_sp(sp);
+	if (rval != QLA_SUCCESS)
+		goto done_free_sp;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"Async-%s - OUT WWPN %8phC hndl %x\n",
+		sp->name, fcport->port_name, sp->handle);
+
+	return rval;
+
+done_free_sp:
+	sp->free(fcport->vha, sp);
+done:
+	fcport->flags &= ~FCF_ASYNC_SENT;
+	return rval;
+}
+
+int qla24xx_post_gnl_work(struct scsi_qla_host *vha, fc_port_t *fcport)
+{
+	struct qla_work_evt *e;
+
+	e = qla2x00_alloc_work(vha, QLA_EVT_GNL);
+	if (!e)
+		return QLA_FUNCTION_FAILED;
+
+	e->u.fcport.fcport = fcport;
+	return qla2x00_post_work(vha, e);
+}
+
+static
+void qla24xx_async_gpdb_sp_done(void *v, void *s, int res)
+{
+	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
+	struct srb *sp = (struct srb *)s;
+	struct qla_hw_data *ha = vha->hw;
+	uint64_t zero = 0;
+	struct port_database_24xx *pd;
+	fc_port_t *fcport = sp->fcport;
+	u16 *mb = sp->u.iocb_cmd.u.mbx.in_mb;
+	int rval = QLA_SUCCESS;
+	struct event_arg ea;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+	    "Async done-%s res %x, WWPN %8phC mb[1]=%x mb[2]=%x \n",
+	    sp->name, res, fcport->port_name, mb[1], mb[2]);
+
+	fcport->flags &= ~FCF_ASYNC_SENT;
+
+	if (res) {
+		rval = res;
+		goto gpd_error_out;
+	}
+
+	pd = (struct port_database_24xx *)sp->u.iocb_cmd.u.mbx.in;
+
+	/* Check for logged in state. */
+	if (pd->current_login_state != PDS_PRLI_COMPLETE &&
+	    pd->last_login_state != PDS_PRLI_COMPLETE) {
+		ql_dbg(ql_dbg_mbx, vha, 0xffff,
+		    "Unable to verify login-state (%x/%x) for "
+		    "loop_id %x.\n", pd->current_login_state,
+		    pd->last_login_state, fcport->loop_id);
+		rval = QLA_FUNCTION_FAILED;
+		goto gpd_error_out;
+	}
+
+	if (fcport->loop_id == FC_NO_LOOP_ID ||
+	    (memcmp(fcport->port_name, (uint8_t *)&zero, 8) &&
+		memcmp(fcport->port_name, pd->port_name, 8))) {
+		/* We lost the device mid way. */
+		rval = QLA_NOT_LOGGED_IN;
+		goto gpd_error_out;
+	}
+
+	/* Names are little-endian. */
+	memcpy(fcport->node_name, pd->node_name, WWN_SIZE);
+
+	/* Get port_id of device. */
+	fcport->d_id.b.domain = pd->port_id[0];
+	fcport->d_id.b.area = pd->port_id[1];
+	fcport->d_id.b.al_pa = pd->port_id[2];
+	fcport->d_id.b.rsvd_1 = 0;
+
+	/* If not target must be initiator or unknown type. */
+	if ((pd->prli_svc_param_word_3[0] & BIT_4) == 0)
+		fcport->port_type = FCT_INITIATOR;
+	else
+		fcport->port_type = FCT_TARGET;
+
+	/* Passback COS information. */
+	fcport->supported_classes = (pd->flags & PDF_CLASS_2) ?
+		FC_COS_CLASS2 : FC_COS_CLASS3;
+
+	if (pd->prli_svc_param_word_3[0] & BIT_7) {
+		fcport->flags |= FCF_CONF_COMP_SUPPORTED;
+		fcport->conf_compl_supported = 1;
+	}
+
+gpd_error_out:
+	memset(&ea, 0, sizeof(ea));
+	ea.event = FCME_GPDB_DONE;
+	ea.rc = rval;
+	ea.fcport = fcport;
+	ea.sp = sp;
+
+	qla2x00_fcport_event_handler(vha, &ea);
+
+	dma_pool_free(ha->s_dma_pool, sp->u.iocb_cmd.u.mbx.in,
+		sp->u.iocb_cmd.u.mbx.in_dma);
+
+	sp->free(vha, sp);
+}
+
+static int qla24xx_post_gpdb_work(struct scsi_qla_host *vha, fc_port_t *fcport,
+    u8 opt)
+{
+	struct qla_work_evt *e;
+
+	e = qla2x00_alloc_work(vha, QLA_EVT_GPDB);
+	if (!e)
+		return QLA_FUNCTION_FAILED;
+
+	e->u.fcport.fcport = fcport;
+	e->u.fcport.opt = opt;
+	return qla2x00_post_work(vha, e);
+}
+
+int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt)
+{
+	srb_t *sp;
+	struct srb_iocb *mbx;
+	int rval = QLA_FUNCTION_FAILED;
+	u16 *mb;
+	dma_addr_t pd_dma;
+	struct port_database_24xx *pd;
+	struct qla_hw_data *ha = vha->hw;
+
+	if (!vha->flags.online)
+		goto done;
+
+	fcport->flags |= FCF_ASYNC_SENT;
+	fcport->disc_state = DSC_GPDB;
+
+	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+	if (!sp)
+		goto done;
+
+	pd = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &pd_dma);
+	if (pd == NULL) {
+		ql_log(ql_log_warn, vha, 0xffff,
+			"Failed to allocate port database structure.\n");
+		goto done_free_sp;
+	}
+	memset(pd, 0, max(PORT_DATABASE_SIZE, PORT_DATABASE_24XX_SIZE));
+
+	sp->type = SRB_MB_IOCB;
+	sp->name = "gpdb";
+	sp->gen1 = fcport->rscn_gen;
+	sp->gen2 = fcport->login_gen;
+	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2);
+
+	mb = sp->u.iocb_cmd.u.mbx.out_mb;
+	mb[0] = MBC_GET_PORT_DATABASE;
+	mb[1] = fcport->loop_id;
+	mb[2] = MSW(pd_dma);
+	mb[3] = LSW(pd_dma);
+	mb[6] = MSW(MSD(pd_dma));
+	mb[7] = LSW(MSD(pd_dma));
+	mb[9] = vha->vp_idx;
+	mb[10] = opt;
+
+	mbx = &sp->u.iocb_cmd;
+	mbx->timeout = qla2x00_async_iocb_timeout;
+	mbx->u.mbx.in = (void *)pd;
+	mbx->u.mbx.in_dma = pd_dma;
+
+	sp->done = qla24xx_async_gpdb_sp_done;
+
+	rval = qla2x00_start_sp(sp);
+	if (rval != QLA_SUCCESS)
+		goto done_free_sp;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"Async-%s %8phC hndl %x opt %x\n",
+		sp->name, fcport->port_name, sp->handle, opt);
+
+	return rval;
+
+done_free_sp:
+	if (pd)
+		dma_pool_free(ha->s_dma_pool, pd, pd_dma);
+
+	sp->free(vha, sp);
+done:
+	fcport->flags &= ~FCF_ASYNC_SENT;
+	qla24xx_post_gpdb_work(vha, fcport, opt);
 	return rval;
 }
 
+static
+void qla24xx_handle_gpdb_event(scsi_qla_host_t *vha, struct event_arg *ea)
+{
+	int rval = ea->rc;
+	fc_port_t *fcport = ea->fcport;
+	unsigned long flags;
+
+	fcport->flags &= ~FCF_ASYNC_SENT;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+	    "%s %8phC DS %d LS %d rval %d\n", __func__, fcport->port_name,
+	    fcport->disc_state, fcport->fw_login_state, rval);
+
+	if (ea->sp->gen2 != fcport->login_gen) {
+		/* target side must have changed it. */
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+		    "%s %8phC generation changed rscn %d|%d login %d|%d \n",
+		    __func__, fcport->port_name, fcport->last_rscn_gen,
+		    fcport->rscn_gen, fcport->last_login_gen,
+		    fcport->login_gen);
+		return;
+	} else if (ea->sp->gen1 != fcport->rscn_gen) {
+		ql_dbg(ql_dbg_disc, vha, 0xffff, "%s %d %8phC post gidpn\n",
+		    __func__, __LINE__, fcport->port_name);
+		qla24xx_post_gidpn_work(vha, fcport);
+		return;
+	}
+
+	if (rval != QLA_SUCCESS) {
+		ql_dbg(ql_dbg_disc, vha, 0xffff, "%s %d %8phC post del sess\n",
+		    __func__, __LINE__, fcport->port_name);
+		qlt_schedule_sess_for_deletion_lock(fcport);
+		return;
+	}
+
+	spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+	ea->fcport->login_gen++;
+	ea->fcport->deleted = 0;
+	ea->fcport->logout_on_delete = 1;
+
+	if (!ea->fcport->login_succ && !IS_SW_RESV_ADDR(ea->fcport->d_id)) {
+		vha->fcport_count++;
+		ea->fcport->login_succ = 1;
+
+		if (!IS_IIDMA_CAPABLE(vha->hw) ||
+		    !vha->hw->flags.gpsc_supported) {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			    "%s %d %8phC post upd_fcport fcp_cnt %d\n",
+			    __func__, __LINE__, fcport->port_name,
+			    vha->fcport_count);
+
+			qla24xx_post_upd_fcport_work(vha, fcport);
+		} else {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			    "%s %d %8phC post gpsc fcp_cnt %d\n",
+			    __func__, __LINE__, fcport->port_name,
+			    vha->fcport_count);
+
+			qla24xx_post_gpsc_work(vha, fcport);
+		}
+	}
+	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+} /* gpdb event */
+
+int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport)
+{
+	if (fcport->login_retry == 0)
+		return 0;
+
+	if (fcport->scan_state != QLA_FCPORT_FOUND)
+		return 0;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+	    "%s %8phC DS %d LS %d P %d fl %x confl %p rscn %d|%d login %d|%d retry %d lid %d\n",
+	    __func__, fcport->port_name, fcport->disc_state,
+	    fcport->fw_login_state, fcport->login_pause, fcport->flags,
+	    fcport->conflict, fcport->last_rscn_gen, fcport->rscn_gen,
+	    fcport->last_login_gen, fcport->login_gen, fcport->login_retry,
+	    fcport->loop_id);
+
+	fcport->login_retry--;
+
+	if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
+	    (fcport->fw_login_state == DSC_LS_PLOGI_COMP) ||
+	    (fcport->fw_login_state == DSC_LS_PRLI_PEND))
+		return 0;
+
+	/* for pure Target Mode. Login will not be initiated */
+	if (vha->host->active_mode == MODE_TARGET)
+		return 0;
+
+	if (fcport->flags & FCF_ASYNC_SENT) {
+		set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+		return 0;
+	}
+
+	switch (fcport->disc_state) {
+	case DSC_DELETED:
+		if (fcport->loop_id == FC_NO_LOOP_ID) {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post gnl\n",
+			   __func__, __LINE__, fcport->port_name);
+			qla24xx_async_gnl(vha, fcport);
+		} else {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post login\n",
+			   __func__, __LINE__, fcport->port_name);
+			fcport->disc_state = DSC_LOGIN_PEND;
+			qla2x00_post_async_login_work(vha, fcport, NULL);
+		}
+		break;
+
+	case DSC_GNL:
+		if (fcport->login_pause) {
+			fcport->last_rscn_gen = fcport->rscn_gen;
+			fcport->last_login_gen = fcport->login_gen;
+			set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+			break;
+		}
+
+		if (fcport->flags & FCF_FCP2_DEVICE) {
+			u8 opt = PDO_FORCE_ADISC;
+
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post gpdb\n",
+			   __func__, __LINE__, fcport->port_name);
+
+			fcport->disc_state = DSC_GPDB;
+			qla24xx_post_gpdb_work(vha, fcport, opt);
+		} else {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post login \n",
+			   __func__, __LINE__, fcport->port_name);
+			fcport->disc_state = DSC_LOGIN_PEND;
+			qla2x00_post_async_login_work(vha, fcport, NULL);
+		}
+
+		break;
+
+	case DSC_LOGIN_FAILED:
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post gidpn \n",
+			   __func__, __LINE__, fcport->port_name);
+
+		qla24xx_post_gidpn_work(vha, fcport);
+		break;
+
+	case DSC_LOGIN_COMPLETE:
+		/* recheck login state */
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post gpdb \n",
+			   __func__, __LINE__, fcport->port_name);
+
+		qla24xx_post_gpdb_work(vha, fcport, PDO_FORCE_ADISC);
+		break;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static
+void qla24xx_handle_rscn_event(fc_port_t *fcport, struct event_arg *ea)
+{
+	fcport->rscn_gen++;
+
+	ql_dbg(ql_dbg_disc, fcport->vha, 0xffff,
+		"%s %8phC DS %d LS %d\n",
+		__func__, fcport->port_name, fcport->disc_state,
+		fcport->fw_login_state);
+
+	if (fcport->flags & FCF_ASYNC_SENT)
+		return;
+
+	switch (fcport->disc_state) {
+	case DSC_DELETED:
+	case DSC_LOGIN_COMPLETE:
+		qla24xx_post_gidpn_work(fcport->vha, fcport);
+		break;
+
+	default:
+		break;
+	}
+}
+
+int qla24xx_post_newsess_work(struct scsi_qla_host *vha, port_id_t *id,
+	u8 *port_name, void *pla)
+{
+	struct qla_work_evt *e;
+	e = qla2x00_alloc_work(vha, QLA_EVT_NEW_SESS);
+	if (!e)
+		return QLA_FUNCTION_FAILED;
+
+	e->u.new_sess.id = *id;
+	e->u.new_sess.pla = pla;
+	memcpy(e->u.new_sess.port_name, port_name, WWN_SIZE);
+
+	return qla2x00_post_work(vha, e);
+}
+
+static
+int qla24xx_handle_delete_done_event(scsi_qla_host_t *vha,
+	struct event_arg *ea)
+{
+	fc_port_t *fcport = ea->fcport;
+
+	if (test_bit(UNLOADING, &vha->dpc_flags))
+		return 0;
+
+	switch (vha->host->active_mode) {
+	case MODE_INITIATOR:
+	case MODE_DUAL:
+		if (fcport->scan_state == QLA_FCPORT_FOUND)
+			qla24xx_fcport_handle_login(vha, fcport);
+		break;
+
+	case MODE_TARGET:
+	default:
+		/* no-op */
+		break;
+	}
+
+	return 0;
+}
+
+static
+void qla24xx_handle_relogin_event(scsi_qla_host_t *vha,
+	struct event_arg *ea)
+{
+	fc_port_t *fcport = ea->fcport;
+
+	if (fcport->scan_state != QLA_FCPORT_FOUND) {
+		fcport->login_retry++;
+		return;
+	}
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"%s %8phC DS %d LS %d P %d del %d cnfl %p rscn %d|%d login %d|%d fl %x\n",
+		__func__, fcport->port_name, fcport->disc_state,
+		fcport->fw_login_state, fcport->login_pause,
+		fcport->deleted, fcport->conflict,
+		fcport->last_rscn_gen, fcport->rscn_gen,
+		fcport->last_login_gen, fcport->login_gen,
+		fcport->flags);
+
+	if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) ||
+	    (fcport->fw_login_state == DSC_LS_PLOGI_COMP) ||
+	    (fcport->fw_login_state == DSC_LS_PRLI_PEND))
+		return;
+
+	if (fcport->flags & FCF_ASYNC_SENT) {
+		fcport->login_retry++;
+		set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+		return;
+	}
+
+	if (fcport->disc_state == DSC_DELETE_PEND) {
+		fcport->login_retry++;
+		return;
+	}
+
+	if (fcport->last_rscn_gen != fcport->rscn_gen) {
+		ql_dbg(ql_dbg_disc, vha, 0xffff, "%s %d %8phC post gidpn\n",
+		    __func__, __LINE__, fcport->port_name);
+
+		qla24xx_async_gidpn(vha, fcport);
+		return;
+	}
+
+	qla24xx_fcport_handle_login(vha, fcport);
+}
+
+void qla2x00_fcport_event_handler(scsi_qla_host_t *vha,
+	struct event_arg *ea)
+{
+	fc_port_t *fcport;
+	int rc;
+
+	switch (ea->event) {
+	case FCME_RELOGIN:
+		if (test_bit(UNLOADING, &vha->dpc_flags))
+			return;
+
+		qla24xx_handle_relogin_event(vha, ea);
+		break;
+	case FCME_RSCN:
+		if (test_bit(UNLOADING, &vha->dpc_flags))
+			return;
+
+		fcport = qla2x00_find_fcport_by_nportid(vha, &ea->id, 1);
+		if (!fcport) {
+			/* cable moved */
+			rc = qla24xx_post_gpnid_work(vha, &ea->id);
+			if (rc) {
+				ql_log(ql_log_warn, vha, 0xffff,
+					"RSCN GPNID work failed %02x%02x%02x\n",
+					ea->id.b.domain, ea->id.b.area,
+					ea->id.b.al_pa);
+			}
+		} else {
+			ea->fcport = fcport;
+			qla24xx_handle_rscn_event(fcport, ea);
+		}
+		break;
+	case FCME_GIDPN_DONE:
+		qla24xx_handle_gidpn_event(vha, ea);
+		break;
+	case FCME_GNL_DONE:
+		qla24xx_handle_gnl_done_event(vha, ea);
+		break;
+	case FCME_GPSC_DONE:
+		qla24xx_post_upd_fcport_work(vha, ea->fcport);
+		break;
+	case FCME_PLOGI_DONE:	/* Initiator side sent LLIOCB */
+		qla24xx_handle_plogi_done_event(vha, ea);
+		break;
+	case FCME_GPDB_DONE:
+		qla24xx_handle_gpdb_event(vha, ea);
+		break;
+	case FCME_GPNID_DONE:
+		qla24xx_handle_gpnid_event(vha, ea);
+		break;
+	case FCME_DELETE_DONE:
+		qla24xx_handle_delete_done_event(vha, ea);
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+}
+
 static void
 qla2x00_tmf_iocb_timeout(void *data)
 {
@@ -441,59 +1295,65 @@ qla24xx_async_abort_command(srb_t *sp)
 	return qla24xx_async_abort_cmd(sp);
 }
 
-void
-qla2x00_async_login_done(struct scsi_qla_host *vha, fc_port_t *fcport,
-    uint16_t *data)
+static void
+qla24xx_handle_plogi_done_event(struct scsi_qla_host *vha, struct event_arg *ea)
 {
-	int rval;
+	port_id_t cid;	/* conflict Nport id */
 
-	switch (data[0]) {
+	switch (ea->data[0]) {
 	case MBS_COMMAND_COMPLETE:
 		/*
 		 * Driver must validate login state - If PRLI not complete,
 		 * force a relogin attempt via implicit LOGO, PLOGI, and PRLI
 		 * requests.
 		 */
-		rval = qla2x00_get_port_database(vha, fcport, 0);
-		if (rval == QLA_NOT_LOGGED_IN) {
-			fcport->flags &= ~FCF_ASYNC_SENT;
-			fcport->flags |= FCF_LOGIN_NEEDED;
-			set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
-			break;
-		}
-
-		if (rval != QLA_SUCCESS) {
-			qla2x00_post_async_logout_work(vha, fcport, NULL);
-			qla2x00_post_async_login_work(vha, fcport, NULL);
-			break;
-		}
-		if (fcport->flags & FCF_FCP2_DEVICE) {
-			qla2x00_post_async_adisc_work(vha, fcport, data);
-			break;
-		}
-		qla2x00_update_fcport(vha, fcport);
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+			   "%s %d %8phC post gpdb\n",
+			   __func__, __LINE__, ea->fcport->port_name);
+		ea->fcport->chip_reset = vha->hw->chip_reset;
+		ea->fcport->logout_on_delete = 1;
+		qla24xx_post_gpdb_work(vha, ea->fcport, 0);
 		break;
 	case MBS_COMMAND_ERROR:
-		fcport->flags &= ~FCF_ASYNC_SENT;
-		if (data[1] & QLA_LOGIO_LOGIN_RETRIED)
+		ql_dbg(ql_dbg_disc, vha, 0xffff, "%s %d %8phC cmd error %x\n",
+		    __func__, __LINE__, ea->fcport->port_name, ea->data[1]);
+
+		ea->fcport->flags &= ~FCF_ASYNC_SENT;
+		ea->fcport->disc_state = DSC_LOGIN_FAILED;
+		if (ea->data[1] & QLA_LOGIO_LOGIN_RETRIED)
 			set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
 		else
-			qla2x00_mark_device_lost(vha, fcport, 1, 0);
-		break;
-	case MBS_PORT_ID_USED:
-		fcport->loop_id = data[1];
-		qla2x00_post_async_logout_work(vha, fcport, NULL);
-		qla2x00_post_async_login_work(vha, fcport, NULL);
+			qla2x00_mark_device_lost(vha, ea->fcport, 1, 0);
 		break;
 	case MBS_LOOP_ID_USED:
-		fcport->loop_id++;
-		rval = qla2x00_find_new_loop_id(vha, fcport);
-		if (rval != QLA_SUCCESS) {
-			fcport->flags &= ~FCF_ASYNC_SENT;
-			qla2x00_mark_device_lost(vha, fcport, 1, 0);
-			break;
+		/* data[1] = IO PARAM 1 = nport ID  */
+		cid.b.domain = (ea->iop[1] >> 16) & 0xff;
+		cid.b.area   = (ea->iop[1] >>  8) & 0xff;
+		cid.b.al_pa  = ea->iop[1] & 0xff;
+		cid.b.rsvd_1 = 0;
+
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+			"%s %d %8phC LoopID 0x%x in use post gnl\n",
+			__func__, __LINE__, ea->fcport->port_name,
+			ea->fcport->loop_id);
+
+		if (IS_SW_RESV_ADDR(cid)) {
+			set_bit(ea->fcport->loop_id, vha->hw->loop_id_map);
+			ea->fcport->loop_id = FC_NO_LOOP_ID;
+		} else {
+			qla2x00_clear_loop_id(ea->fcport);
 		}
-		qla2x00_post_async_login_work(vha, fcport, NULL);
+		qla24xx_post_gnl_work(vha, ea->fcport);
+		break;
+	case MBS_PORT_ID_USED:
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+			"%s %d %8phC NPortId %02x%02x%02x inuse post gidpn\n",
+			__func__, __LINE__, ea->fcport->port_name,
+			ea->fcport->d_id.b.domain, ea->fcport->d_id.b.area,
+			ea->fcport->d_id.b.al_pa);
+
+		qla2x00_clear_loop_id(ea->fcport);
+		qla24xx_post_gidpn_work(vha, ea->fcport);
 		break;
 	}
 	return;
@@ -503,10 +1363,9 @@ void
 qla2x00_async_logout_done(struct scsi_qla_host *vha, fc_port_t *fcport,
     uint16_t *data)
 {
-	/* Don't re-login in target mode */
-	if (!fcport->tgt_session)
-		qla2x00_mark_device_lost(vha, fcport, 1, 0);
+	qla2x00_mark_device_lost(vha, fcport, 1, 0);
 	qlt_logo_completion_handler(fcport, data[0]);
+	fcport->login_gen++;
 	return;
 }
 
@@ -709,7 +1568,8 @@ qla2x00_initialize_adapter(scsi_qla_host_t *vha)
 		}
 	}
 
-	if (qla_ini_mode_enabled(vha))
+	if (qla_ini_mode_enabled(vha) ||
+		qla_dual_mode_enabled(vha))
 		rval = qla2x00_init_rings(vha);
 
 	ha->flags.chip_reset_done = 1;
@@ -2968,8 +3828,14 @@ qla2x00_rport_del(void *data)
 	rport = fcport->drport ? fcport->drport: fcport->rport;
 	fcport->drport = NULL;
 	spin_unlock_irqrestore(fcport->vha->host->host_lock, flags);
-	if (rport)
+	if (rport) {
+		ql_dbg(ql_dbg_disc, fcport->vha, 0xffff,
+			"%s %8phN. rport %p roles %x \n",
+			__func__, fcport->port_name, rport,
+			rport->roles);
+
 		fc_remote_port_delete(rport);
+	}
 }
 
 /**
@@ -2995,9 +3861,42 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags)
 	qla2x00_set_fcport_state(fcport, FCS_UNCONFIGURED);
 	fcport->supported_classes = FC_COS_UNSPECIFIED;
 
+	fcport->ct_desc.ct_sns = dma_alloc_coherent(&vha->hw->pdev->dev,
+		sizeof(struct ct_sns_pkt), &fcport->ct_desc.ct_sns_dma,
+			GFP_ATOMIC);
+	fcport->disc_state = DSC_DELETED;
+	fcport->fw_login_state = DSC_LS_PORT_UNAVAIL;
+	fcport->deleted = QLA_SESS_DELETED;
+	fcport->login_retry = vha->hw->login_retry_count;
+	fcport->login_retry = 5;
+	fcport->logout_on_delete = 1;
+
+	if (!fcport->ct_desc.ct_sns) {
+		ql_log(ql_log_warn, vha, 0xffff,
+		    "Failed to allocate ct_sns request.\n");
+		kfree(fcport);
+		fcport = NULL;
+	}
+	INIT_WORK(&fcport->del_work, qla24xx_delete_sess_fn);
+	INIT_LIST_HEAD(&fcport->gnl_entry);
+	INIT_LIST_HEAD(&fcport->list);
+
 	return fcport;
 }
 
+void
+qla2x00_free_fcport(fc_port_t *fcport)
+{
+	if (fcport->ct_desc.ct_sns) {
+		dma_free_coherent(&fcport->vha->hw->pdev->dev,
+			sizeof(struct ct_sns_pkt), fcport->ct_desc.ct_sns,
+			fcport->ct_desc.ct_sns_dma);
+
+		fcport->ct_desc.ct_sns = NULL;
+	}
+	kfree(fcport);
+}
+
 /*
  * qla2x00_configure_loop
  *      Updates Fibre Channel Device Database with what is actually on loop.
@@ -3334,6 +4233,7 @@ qla2x00_iidma_fcport(scsi_qla_host_t *vha, fc_port_t *fcport)
 	}
 }
 
+/* qla2x00_reg_remote_port is reserved for Initiator Mode only.*/
 static void
 qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport)
 {
@@ -3364,6 +4264,12 @@ qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport)
 		rport_ids.roles |= FC_RPORT_ROLE_FCP_INITIATOR;
 	if (fcport->port_type == FCT_TARGET)
 		rport_ids.roles |= FC_RPORT_ROLE_FCP_TARGET;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"%s %8phN. rport %p is %s mode \n",
+		__func__, fcport->port_name, rport,
+		(fcport->port_type == FCT_TARGET) ? "tgt" : "ini");
+
 	fc_remote_port_rolechg(rport, rport_ids.roles);
 }
 
@@ -3387,20 +4293,45 @@ qla2x00_update_fcport(scsi_qla_host_t *vha, fc_port_t *fcport)
 {
 	fcport->vha = vha;
 
+	if (IS_SW_RESV_ADDR(fcport->d_id))
+		return;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff, "%s %8phC \n",
+	    __func__, fcport->port_name);
+
 	if (IS_QLAFX00(vha->hw)) {
 		qla2x00_set_fcport_state(fcport, FCS_ONLINE);
 		goto reg_port;
 	}
 	fcport->login_retry = 0;
 	fcport->flags &= ~(FCF_LOGIN_NEEDED | FCF_ASYNC_SENT);
+	fcport->disc_state = DSC_LOGIN_COMPLETE;
+	fcport->deleted = 0;
+	fcport->logout_on_delete = 1;
 
 	qla2x00_set_fcport_state(fcport, FCS_ONLINE);
 	qla2x00_iidma_fcport(vha, fcport);
 	qla24xx_update_fcport_fcp_prio(vha, fcport);
 
 reg_port:
-	if (qla_ini_mode_enabled(vha))
+	switch (vha->host->active_mode) {
+	case MODE_INITIATOR:
+		qla2x00_reg_remote_port(vha, fcport);
+		break;
+	case MODE_TARGET:
+		if (!vha->vha_tgt.qla_tgt->tgt_stop &&
+			!vha->vha_tgt.qla_tgt->tgt_stopped)
+			qlt_fc_port_added(vha, fcport);
+		break;
+	case MODE_DUAL:
 		qla2x00_reg_remote_port(vha, fcport);
+		if (!vha->vha_tgt.qla_tgt->tgt_stop &&
+			!vha->vha_tgt.qla_tgt->tgt_stopped)
+			qlt_fc_port_added(vha, fcport);
+		break;
+	default:
+		break;
+	}
 }
 
 /*
@@ -3418,13 +4349,11 @@ static int
 qla2x00_configure_fabric(scsi_qla_host_t *vha)
 {
 	int	rval;
-	fc_port_t	*fcport, *fcptemp;
-	uint16_t	next_loopid;
+	fc_port_t	*fcport;
 	uint16_t	mb[MAILBOX_REGISTER_COUNT];
 	uint16_t	loop_id;
 	LIST_HEAD(new_fcports);
 	struct qla_hw_data *ha = vha->hw;
-	struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
 	int		discovery_gen;
 
 	/* If FL port exists, then SNS is present */
@@ -3443,6 +4372,8 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha)
 	vha->device_flags |= SWITCH_FOUND;
 
 	do {
+		qla2x00_mgmt_svr_login(vha);
+
 		/* FDMI support. */
 		if (ql2xfdmienable &&
 		    test_and_clear_bit(REGISTER_FDMI_NEEDED, &vha->dpc_flags))
@@ -3489,9 +4420,6 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha)
 			}
 		}
 
-#define QLA_FCPORT_SCAN		1
-#define QLA_FCPORT_FOUND	2
-
 		list_for_each_entry(fcport, &vha->vp_fcports, list) {
 			fcport->scan_state = QLA_FCPORT_SCAN;
 		}
@@ -3504,174 +4432,14 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha)
 		 * will be newer than discovery_gen. */
 		qlt_do_generation_tick(vha, &discovery_gen);
 
-		rval = qla2x00_find_all_fabric_devs(vha, &new_fcports);
+		rval = qla2x00_find_all_fabric_devs(vha);
 		if (rval != QLA_SUCCESS)
 			break;
-
-		/*
-		 * Logout all previous fabric devices marked lost, except
-		 * FCP2 devices.
-		 */
-		list_for_each_entry(fcport, &vha->vp_fcports, list) {
-			if (test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags))
-				break;
-
-			if ((fcport->flags & FCF_FABRIC_DEVICE) == 0)
-				continue;
-
-			if (fcport->scan_state == QLA_FCPORT_SCAN) {
-				if (qla_ini_mode_enabled(base_vha) &&
-				    atomic_read(&fcport->state) == FCS_ONLINE) {
-					qla2x00_mark_device_lost(vha, fcport,
-					    ql2xplogiabsentdevice, 0);
-					if (fcport->loop_id != FC_NO_LOOP_ID &&
-					    (fcport->flags & FCF_FCP2_DEVICE) == 0 &&
-					    fcport->port_type != FCT_INITIATOR &&
-					    fcport->port_type != FCT_BROADCAST) {
-						ha->isp_ops->fabric_logout(vha,
-						    fcport->loop_id,
-						    fcport->d_id.b.domain,
-						    fcport->d_id.b.area,
-						    fcport->d_id.b.al_pa);
-						qla2x00_clear_loop_id(fcport);
-					}
-				} else if (!qla_ini_mode_enabled(base_vha)) {
-					/*
-					 * In target mode, explicitly kill
-					 * sessions and log out of devices
-					 * that are gone, so that we don't
-					 * end up with an initiator using the
-					 * wrong ACL (if the fabric recycles
-					 * an FC address and we have a stale
-					 * session around) and so that we don't
-					 * report initiators that are no longer
-					 * on the fabric.
-					 */
-					ql_dbg(ql_dbg_tgt_mgt, vha, 0xf077,
-					    "port gone, logging out/killing session: "
-					    "%8phC state 0x%x flags 0x%x fc4_type 0x%x "
-					    "scan_state %d\n",
-					    fcport->port_name,
-					    atomic_read(&fcport->state),
-					    fcport->flags, fcport->fc4_type,
-					    fcport->scan_state);
-					qlt_fc_port_deleted(vha, fcport,
-					    discovery_gen);
-				}
-			}
-		}
-
-		/* Starting free loop ID. */
-		next_loopid = ha->min_external_loopid;
-
-		/*
-		 * Scan through our port list and login entries that need to be
-		 * logged in.
-		 */
-		list_for_each_entry(fcport, &vha->vp_fcports, list) {
-			if (atomic_read(&vha->loop_down_timer) ||
-			    test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags))
-				break;
-
-			if ((fcport->flags & FCF_FABRIC_DEVICE) == 0 ||
-			    (fcport->flags & FCF_LOGIN_NEEDED) == 0)
-				continue;
-
-			/*
-			 * If we're not an initiator, skip looking for devices
-			 * and logging in.  There's no reason for us to do it,
-			 * and it seems to actively cause problems in target
-			 * mode if we race with the initiator logging into us
-			 * (we might get the "port ID used" status back from
-			 * our login command and log out the initiator, which
-			 * seems to cause havoc).
-			 */
-			if (!qla_ini_mode_enabled(base_vha)) {
-				if (fcport->scan_state == QLA_FCPORT_FOUND) {
-					ql_dbg(ql_dbg_tgt_mgt, vha, 0xf078,
-					    "port %8phC state 0x%x flags 0x%x fc4_type 0x%x "
-					    "scan_state %d (initiator mode disabled; skipping "
-					    "login)\n", fcport->port_name,
-					    atomic_read(&fcport->state),
-					    fcport->flags, fcport->fc4_type,
-					    fcport->scan_state);
-				}
-				continue;
-			}
-
-			if (fcport->loop_id == FC_NO_LOOP_ID) {
-				fcport->loop_id = next_loopid;
-				rval = qla2x00_find_new_loop_id(
-				    base_vha, fcport);
-				if (rval != QLA_SUCCESS) {
-					/* Ran out of IDs to use */
-					break;
-				}
-			}
-			/* Login and update database */
-			qla2x00_fabric_dev_login(vha, fcport, &next_loopid);
-		}
-
-		/* Exit if out of loop IDs. */
-		if (rval != QLA_SUCCESS) {
-			break;
-		}
-
-		/*
-		 * Login and add the new devices to our port list.
-		 */
-		list_for_each_entry_safe(fcport, fcptemp, &new_fcports, list) {
-			if (atomic_read(&vha->loop_down_timer) ||
-			    test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags))
-				break;
-
-			/*
-			 * If we're not an initiator, skip looking for devices
-			 * and logging in.  There's no reason for us to do it,
-			 * and it seems to actively cause problems in target
-			 * mode if we race with the initiator logging into us
-			 * (we might get the "port ID used" status back from
-			 * our login command and log out the initiator, which
-			 * seems to cause havoc).
-			 */
-			if (qla_ini_mode_enabled(base_vha)) {
-				/* Find a new loop ID to use. */
-				fcport->loop_id = next_loopid;
-				rval = qla2x00_find_new_loop_id(base_vha,
-				    fcport);
-				if (rval != QLA_SUCCESS) {
-					/* Ran out of IDs to use */
-					break;
-				}
-
-				/* Login and update database */
-				qla2x00_fabric_dev_login(vha, fcport,
-				    &next_loopid);
-			} else {
-				ql_dbg(ql_dbg_tgt_mgt, vha, 0xf079,
-					"new port %8phC state 0x%x flags 0x%x fc4_type "
-					"0x%x scan_state %d (initiator mode disabled; "
-					"skipping login)\n",
-					fcport->port_name,
-					atomic_read(&fcport->state),
-					fcport->flags, fcport->fc4_type,
-					fcport->scan_state);
-			}
-
-			list_move_tail(&fcport->list, &vha->vp_fcports);
-		}
 	} while (0);
 
-	/* Free all new device structures not processed. */
-	list_for_each_entry_safe(fcport, fcptemp, &new_fcports, list) {
-		list_del(&fcport->list);
-		kfree(fcport);
-	}
-
-	if (rval) {
+	if (rval)
 		ql_dbg(ql_dbg_disc, vha, 0x2068,
 		    "Configure fabric error exit rval=%d.\n", rval);
-	}
 
 	return (rval);
 }
@@ -3690,12 +4458,11 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha)
  *	Kernel context.
  */
 static int
-qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
-	struct list_head *new_fcports)
+qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha)
 {
 	int		rval;
 	uint16_t	loop_id;
-	fc_port_t	*fcport, *new_fcport, *fcptemp;
+	fc_port_t	*fcport, *new_fcport;
 	int		found;
 
 	sw_info_t	*swl;
@@ -3704,6 +4471,7 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
 	port_id_t	wrap = {}, nxt_d_id;
 	struct qla_hw_data *ha = vha->hw;
 	struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev);
+	unsigned long flags;
 
 	rval = QLA_SUCCESS;
 
@@ -3724,9 +4492,8 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
 			swl = NULL;
 		} else if (qla2x00_gnn_id(vha, swl) != QLA_SUCCESS) {
 			swl = NULL;
-		} else if (ql2xiidmaenable &&
-		    qla2x00_gfpn_id(vha, swl) == QLA_SUCCESS) {
-			qla2x00_gpsc(vha, swl);
+		} else if (qla2x00_gfpn_id(vha, swl) != QLA_SUCCESS) {
+			swl = NULL;
 		}
 
 		/* If other queries succeeded probe for FC-4 type */
@@ -3788,11 +4555,6 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
 				ql_log(ql_log_warn, vha, 0x2064,
 				    "SNS scan failed -- assuming "
 				    "zero-entry result.\n");
-				list_for_each_entry_safe(fcport, fcptemp,
-				    new_fcports, list) {
-					list_del(&fcport->list);
-					kfree(fcport);
-				}
 				rval = QLA_SUCCESS;
 				break;
 			}
@@ -3835,6 +4597,8 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
 		    new_fcport->fc4_type != FC4_TYPE_UNKNOWN))
 			continue;
 
+		spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+
 		/* Locate matching device in database. */
 		found = 0;
 		list_for_each_entry(fcport, &vha->vp_fcports, list) {
@@ -3857,7 +4621,7 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
 			 */
 			if (fcport->d_id.b24 == new_fcport->d_id.b24 &&
 			    (atomic_read(&fcport->state) == FCS_ONLINE ||
-			     !qla_ini_mode_enabled(base_vha))) {
+			     (vha->host->active_mode == MODE_TARGET))) {
 				break;
 			}
 
@@ -3877,7 +4641,7 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
 			 * Log it out if still logged in and mark it for
 			 * relogin later.
 			 */
-			if (!qla_ini_mode_enabled(base_vha)) {
+			if (qla_tgt_mode_enabled(base_vha)) {
 				ql_dbg(ql_dbg_tgt_mgt, vha, 0xf080,
 					 "port changed FC ID, %8phC"
 					 " old %x:%x:%x (loop_id 0x%04x)-> new %x:%x:%x\n",
@@ -3895,25 +4659,19 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
 
 			fcport->d_id.b24 = new_fcport->d_id.b24;
 			fcport->flags |= FCF_LOGIN_NEEDED;
-			if (fcport->loop_id != FC_NO_LOOP_ID &&
-			    (fcport->flags & FCF_FCP2_DEVICE) == 0 &&
-			    (fcport->flags & FCF_ASYNC_SENT) == 0 &&
-			    fcport->port_type != FCT_INITIATOR &&
-			    fcport->port_type != FCT_BROADCAST) {
-				ha->isp_ops->fabric_logout(vha, fcport->loop_id,
-				    fcport->d_id.b.domain, fcport->d_id.b.area,
-				    fcport->d_id.b.al_pa);
-				qla2x00_clear_loop_id(fcport);
-			}
-
 			break;
 		}
 
-		if (found)
+		if (found) {
+			spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 			continue;
+		}
 		/* If device was not in our fcports list, then add it. */
 		new_fcport->scan_state = QLA_FCPORT_FOUND;
-		list_add_tail(&new_fcport->list, new_fcports);
+		list_add_tail(&new_fcport->list, &vha->vp_fcports);
+
+		spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
 
 		/* Allocate a new replacement fcport. */
 		nxt_d_id.b24 = new_fcport->d_id.b24;
@@ -3927,8 +4685,44 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha,
 		new_fcport->d_id.b24 = nxt_d_id.b24;
 	}
 
-	kfree(new_fcport);
+	qla2x00_free_fcport(new_fcport);
+
+	/*
+	 * Logout all previous fabric dev marked lost, except FCP2 devices.
+	 */
+	list_for_each_entry(fcport, &vha->vp_fcports, list) {
+		if (test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags))
+			break;
+
+		if ((fcport->flags & FCF_FABRIC_DEVICE) == 0 ||
+		    (fcport->flags & FCF_LOGIN_NEEDED) == 0)
+			continue;
+
+		if (fcport->scan_state == QLA_FCPORT_SCAN) {
+			if ((qla_dual_mode_enabled(vha) ||
+			    qla_ini_mode_enabled(vha)) &&
+			    atomic_read(&fcport->state) == FCS_ONLINE) {
+				qla2x00_mark_device_lost(vha, fcport,
+					ql2xplogiabsentdevice, 0);
+				if (fcport->loop_id != FC_NO_LOOP_ID &&
+				    (fcport->flags & FCF_FCP2_DEVICE) == 0 &&
+				    fcport->port_type != FCT_INITIATOR &&
+				    fcport->port_type != FCT_BROADCAST) {
+					ql_dbg(ql_dbg_disc, vha, 0xffff,
+					    "%s %d %8phC post del sess\n",
+					    __func__, __LINE__,
+					    fcport->port_name);
+
+					qlt_schedule_sess_for_deletion_lock
+						(fcport);
+					continue;
+				}
+			}
+		}
 
+		if (fcport->scan_state == QLA_FCPORT_FOUND)
+			qla24xx_fcport_handle_login(vha, fcport);
+	}
 	return (rval);
 }
 
@@ -3980,64 +4774,6 @@ qla2x00_find_new_loop_id(scsi_qla_host_t *vha, fc_port_t *dev)
 	return (rval);
 }
 
-/*
- * qla2x00_fabric_dev_login
- *	Login fabric target device and update FC port database.
- *
- * Input:
- *	ha:		adapter state pointer.
- *	fcport:		port structure list pointer.
- *	next_loopid:	contains value of a new loop ID that can be used
- *			by the next login attempt.
- *
- * Returns:
- *	qla2x00 local function return status code.
- *
- * Context:
- *	Kernel context.
- */
-static int
-qla2x00_fabric_dev_login(scsi_qla_host_t *vha, fc_port_t *fcport,
-    uint16_t *next_loopid)
-{
-	int	rval;
-	uint8_t opts;
-	struct qla_hw_data *ha = vha->hw;
-
-	rval = QLA_SUCCESS;
-
-	if (IS_ALOGIO_CAPABLE(ha)) {
-		if (fcport->flags & FCF_ASYNC_SENT)
-			return rval;
-		fcport->flags |= FCF_ASYNC_SENT;
-		rval = qla2x00_post_async_login_work(vha, fcport, NULL);
-		if (!rval)
-			return rval;
-	}
-
-	fcport->flags &= ~FCF_ASYNC_SENT;
-	rval = qla2x00_fabric_login(vha, fcport, next_loopid);
-	if (rval == QLA_SUCCESS) {
-		/* Send an ADISC to FCP2 devices.*/
-		opts = 0;
-		if (fcport->flags & FCF_FCP2_DEVICE)
-			opts |= BIT_1;
-		rval = qla2x00_get_port_database(vha, fcport, opts);
-		if (rval != QLA_SUCCESS) {
-			ha->isp_ops->fabric_logout(vha, fcport->loop_id,
-			    fcport->d_id.b.domain, fcport->d_id.b.area,
-			    fcport->d_id.b.al_pa);
-			qla2x00_mark_device_lost(vha, fcport, 1, 0);
-		} else {
-			qla2x00_update_fcport(vha, fcport);
-		}
-	} else {
-		/* Retry Login. */
-		qla2x00_mark_device_lost(vha, fcport, 1, 0);
-	}
-
-	return (rval);
-}
 
 /*
  * qla2x00_fabric_login
@@ -4329,13 +5065,6 @@ qla2x00_update_fcports(scsi_qla_host_t *base_vha)
 				spin_unlock_irqrestore(&ha->vport_slock, flags);
 				qla2x00_rport_del(fcport);
 
-				/*
-				 * Release the target mode FC NEXUS in
-				 * qla_target.c, if target mod is enabled.
-				 */
-				qlt_fc_port_deleted(vha, fcport,
-				    base_vha->total_fcport_update_gen);
-
 				spin_lock_irqsave(&ha->vport_slock, flags);
 			}
 		}
@@ -4718,6 +5447,8 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
 	if (!(IS_P3P_TYPE(ha)))
 		ha->isp_ops->reset_chip(vha);
 
+	ha->chip_reset++;
+
 	atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME);
 	if (atomic_read(&vha->loop_state) != LOOP_DOWN) {
 		atomic_set(&vha->loop_state, LOOP_DOWN);
@@ -4772,8 +5503,6 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
 		/* Requeue all commands in outstanding command list. */
 		qla2x00_abort_all_cmds(vha, DID_RESET << 16);
 	}
-
-	ha->chip_reset++;
 	/* memory barrier */
 	wmb();
 }
@@ -4969,7 +5698,6 @@ qla2x00_restart_isp(scsi_qla_host_t *vha)
 		if (!status) {
 			/* Issue a marker after FW becomes ready. */
 			qla2x00_marker(vha, req, rsp, 0, 0, MK_SYNC_ALL);
-
 			set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
 		}
 
@@ -5197,7 +5925,7 @@ qla24xx_nvram_config(scsi_qla_host_t *vha)
 		rval = 1;
 	}
 
-	if (!qla_ini_mode_enabled(vha)) {
+	if (qla_tgt_mode_enabled(vha)) {
 		/* Don't enable full login after initial LIP */
 		nv->firmware_options_1 &= cpu_to_le32(~BIT_13);
 		/* Don't enable LIP full login for initiator */
diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
index 44e404583c86..647828b9e622 100644
--- a/drivers/scsi/qla2xxx/qla_inline.h
+++ b/drivers/scsi/qla2xxx/qla_inline.h
@@ -166,8 +166,8 @@ qla2x00_set_fcport_state(fc_port_t *fcport, int state)
 	/* Don't print state transitions during initial allocation of fcport */
 	if (old_state && old_state != state) {
 		ql_dbg(ql_dbg_disc, fcport->vha, 0x207d,
-		    "FCPort state transitioned from %s to %s - "
-		    "portid=%02x%02x%02x.\n",
+		    "FCPort %8phC state transitioned from %s to %s - "
+			"portid=%02x%02x%02x.\n", fcport->port_name,
 		    port_state_str[old_state], port_state_str[state],
 		    fcport->d_id.b.domain, fcport->d_id.b.area,
 		    fcport->d_id.b.al_pa);
@@ -263,6 +263,7 @@ qla2x00_get_sp(scsi_qla_host_t *vha, fc_port_t *fcport, gfp_t flag)
 	memset(sp, 0, sizeof(*sp));
 	sp->fcport = fcport;
 	sp->iocbs = 1;
+	sp->vha = vha;
 done:
 	if (!sp)
 		QLA_VHA_MARK_NOT_BUSY(vha);
@@ -285,7 +286,7 @@ qla2x00_init_timer(srb_t *sp, unsigned long tmo)
 	sp->u.iocb_cmd.timer.function = qla2x00_sp_timeout;
 	add_timer(&sp->u.iocb_cmd.timer);
 	sp->free = qla2x00_sp_free;
-	if ((IS_QLAFX00(sp->fcport->vha->hw)) &&
+	if ((IS_QLAFX00(((scsi_qla_host_t *)sp->vha)->hw)) &&
 	    (sp->type == SRB_FXIOCB_DCMD))
 		init_completion(&sp->u.iocb_cmd.u.fxiocb.fxiocb_comp);
 	if (sp->type == SRB_ELS_DCMD)
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 834e22197842..33d3d90c089b 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -2247,7 +2247,7 @@ qla24xx_logout_iocb(srb_t *sp, struct logio_entry_24xx *logio)
 	logio->entry_type = LOGINOUT_PORT_IOCB_TYPE;
 	logio->control_flags =
 	    cpu_to_le16(LCF_COMMAND_LOGO|LCF_IMPL_LOGO);
-	if (!sp->fcport->tgt_session ||
+	if (!sp->fcport->se_sess ||
 	    !sp->fcport->keep_nport_handle)
 		logio->control_flags |= cpu_to_le16(LCF_FREE_NPORT);
 	logio->nport_handle = cpu_to_le16(sp->fcport->loop_id);
@@ -3079,19 +3079,69 @@ qla24xx_abort_iocb(srb_t *sp, struct abort_entry_24xx *abt_iocb)
 	wmb();
 }
 
+static void
+qla2x00_mb_iocb(srb_t *sp, struct mbx_24xx_entry *mbx)
+{
+	int i, sz;
+
+	mbx->entry_type = MBX_IOCB_TYPE;
+	mbx->handle = sp->handle;
+	sz = min(ARRAY_SIZE(mbx->mb), ARRAY_SIZE(sp->u.iocb_cmd.u.mbx.out_mb));
+
+	for (i = 0; i < sz; i++)
+		mbx->mb[i] = cpu_to_le16(sp->u.iocb_cmd.u.mbx.out_mb[i]);
+}
+
+static void
+qla2x00_ctpthru_cmd_iocb(srb_t *sp, struct ct_entry_24xx *ct_pkt)
+{
+	sp->u.iocb_cmd.u.ctarg.iocb = ct_pkt;
+	qla24xx_prep_ms_iocb(sp->vha, &sp->u.iocb_cmd.u.ctarg);
+	ct_pkt->handle = sp->handle;
+}
+
+static void qla2x00_send_notify_ack_iocb(srb_t *sp,
+	struct nack_to_isp *nack)
+{
+	struct imm_ntfy_from_isp *ntfy = sp->u.iocb_cmd.u.nack.ntfy;
+
+	nack->entry_type = NOTIFY_ACK_TYPE;
+	nack->entry_count = 1;
+	nack->ox_id = ntfy->ox_id;
+
+	nack->u.isp24.handle = sp->handle;
+	nack->u.isp24.nport_handle = ntfy->u.isp24.nport_handle;
+	if (le16_to_cpu(ntfy->u.isp24.status) == IMM_NTFY_ELS) {
+		nack->u.isp24.flags = ntfy->u.isp24.flags &
+			cpu_to_le32(NOTIFY24XX_FLAGS_PUREX_IOCB);
+	}
+	nack->u.isp24.srr_rx_id = ntfy->u.isp24.srr_rx_id;
+	nack->u.isp24.status = ntfy->u.isp24.status;
+	nack->u.isp24.status_subcode = ntfy->u.isp24.status_subcode;
+	nack->u.isp24.fw_handle = ntfy->u.isp24.fw_handle;
+	nack->u.isp24.exchange_address = ntfy->u.isp24.exchange_address;
+	nack->u.isp24.srr_rel_offs = ntfy->u.isp24.srr_rel_offs;
+	nack->u.isp24.srr_ui = ntfy->u.isp24.srr_ui;
+	nack->u.isp24.srr_flags = 0;
+	nack->u.isp24.srr_reject_code = 0;
+	nack->u.isp24.srr_reject_code_expl = 0;
+	nack->u.isp24.vp_index = ntfy->u.isp24.vp_index;
+}
+
 int
 qla2x00_start_sp(srb_t *sp)
 {
 	int rval;
-	struct qla_hw_data *ha = sp->fcport->vha->hw;
+	scsi_qla_host_t *vha = (scsi_qla_host_t *)sp->vha;
+	struct qla_hw_data *ha = vha->hw;
 	void *pkt;
 	unsigned long flags;
 
 	rval = QLA_FUNCTION_FAILED;
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-	pkt = qla2x00_alloc_iocbs(sp->fcport->vha, sp);
+	pkt = qla2x00_alloc_iocbs(vha, sp);
 	if (!pkt) {
-		ql_log(ql_log_warn, sp->fcport->vha, 0x700c,
+		ql_log(ql_log_warn, vha, 0x700c,
 		    "qla2x00_alloc_iocbs failed.\n");
 		goto done;
 	}
@@ -3139,12 +3189,23 @@ qla2x00_start_sp(srb_t *sp)
 	case SRB_ELS_DCMD:
 		qla24xx_els_logo_iocb(sp, pkt);
 		break;
+	case SRB_CT_PTHRU_CMD:
+		qla2x00_ctpthru_cmd_iocb(sp, pkt);
+		break;
+	case SRB_MB_IOCB:
+		qla2x00_mb_iocb(sp, pkt);
+		break;
+	case SRB_NACK_PLOGI:
+	case SRB_NACK_PRLI:
+	case SRB_NACK_LOGO:
+		qla2x00_send_notify_ack_iocb(sp, pkt);
+		break;
 	default:
 		break;
 	}
 
 	wmb();
-	qla2x00_start_iocbs(sp->fcport->vha, ha->req_q_map[0]);
+	qla2x00_start_iocbs(vha, ha->req_q_map[0]);
 done:
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 	return rval;
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 802b50cc89d0..bb747d7ebdab 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -561,14 +561,50 @@ qla2x00_is_a_vp_did(scsi_qla_host_t *vha, uint32_t rscn_entry)
 	return ret;
 }
 
-static inline fc_port_t *
+fc_port_t *
 qla2x00_find_fcport_by_loopid(scsi_qla_host_t *vha, uint16_t loop_id)
 {
-	fc_port_t *fcport;
+	fc_port_t *f, *tf;
+
+	f = tf = NULL;
+	list_for_each_entry_safe(f, tf, &vha->vp_fcports, list)
+		if (f->loop_id == loop_id)
+			return f;
+	return NULL;
+}
+
+fc_port_t *
+qla2x00_find_fcport_by_wwpn(scsi_qla_host_t *vha, u8 *wwpn, u8 incl_deleted)
+{
+	fc_port_t *f, *tf;
+
+	f = tf = NULL;
+	list_for_each_entry_safe(f, tf, &vha->vp_fcports, list) {
+		if (memcmp(f->port_name, wwpn, WWN_SIZE) == 0) {
+			if (incl_deleted)
+				return f;
+			else if (f->deleted == 0)
+				return f;
+		}
+	}
+	return NULL;
+}
 
-	list_for_each_entry(fcport, &vha->vp_fcports, list)
-		if (fcport->loop_id == loop_id)
-			return fcport;
+fc_port_t *
+qla2x00_find_fcport_by_nportid(scsi_qla_host_t *vha, port_id_t *id,
+	u8 incl_deleted)
+{
+	fc_port_t *f, *tf;
+
+	f = tf = NULL;
+	list_for_each_entry_safe(f, tf, &vha->vp_fcports, list) {
+		if (f->d_id.b24 == id->b24) {
+			if (incl_deleted)
+				return f;
+			else if (f->deleted == 0)
+				return f;
+		}
+	}
 	return NULL;
 }
 
@@ -934,7 +970,11 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb)
 			ql_dbg(ql_dbg_async, vha, 0x508a,
 			    "Marking port lost loopid=%04x portid=%06x.\n",
 			    fcport->loop_id, fcport->d_id.b24);
-			qla2x00_mark_device_lost(fcport->vha, fcport, 1, 1);
+			if (qla_ini_mode_enabled(vha)) {
+				qla2x00_mark_device_lost(fcport->vha, fcport, 1, 1);
+				fcport->logout_on_delete = 0;
+				qlt_schedule_sess_for_deletion_lock(fcport);
+			}
 			break;
 
 global_port_update:
@@ -1024,27 +1064,17 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb)
 		if (qla2x00_is_a_vp_did(vha, rscn_entry))
 			break;
 
-		/*
-		 * Search for the rport related to this RSCN entry and mark it
-		 * as lost.
-		 */
-		list_for_each_entry(fcport, &vha->vp_fcports, list) {
-			if (atomic_read(&fcport->state) != FCS_ONLINE)
-				continue;
-			if (fcport->d_id.b24 == rscn_entry) {
-				qla2x00_mark_device_lost(vha, fcport, 0, 0);
-				break;
-			}
-		}
-
 		atomic_set(&vha->loop_down_timer, 0);
 		vha->flags.management_server_logged_in = 0;
+		{
+			struct event_arg ea;
 
-		set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
-		set_bit(RSCN_UPDATE, &vha->dpc_flags);
-		qla2x00_post_aen_work(vha, FCH_EVT_RSCN, rscn_entry);
+			memset(&ea, 0, sizeof(ea));
+			ea.event = FCME_RSCN;
+			ea.id.b24 = rscn_entry;
+			qla2x00_fcport_event_handler(vha, &ea);
+		}
 		break;
-
 	/* case MBA_RIO_RESPONSE: */
 	case MBA_ZIO_RESPONSE:
 		ql_dbg(ql_dbg_async, vha, 0x5015,
@@ -1235,7 +1265,8 @@ qla2x00_get_sp_from_handle(scsi_qla_host_t *vha, const char *func,
 	index = LSW(pkt->handle);
 	if (index >= req->num_outstanding_cmds) {
 		ql_log(ql_log_warn, vha, 0x5031,
-		    "Invalid command index (%x).\n", index);
+			   "Invalid command index (%x) type %8ph.\n",
+			   index, iocb);
 		if (IS_P3P_TYPE(ha))
 			set_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags);
 		else
@@ -1346,6 +1377,49 @@ qla2x00_mbx_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
 	sp->done(vha, sp, 0);
 }
 
+static void
+qla24xx_mbx_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
+    struct mbx_24xx_entry *pkt)
+{
+	const char func[] = "MBX-IOCB2";
+	srb_t *sp;
+	struct srb_iocb *si;
+	u16 sz, i;
+	int res;
+
+	sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
+	if (!sp)
+		return;
+
+	si = &sp->u.iocb_cmd;
+	sz = min(ARRAY_SIZE(pkt->mb), ARRAY_SIZE(sp->u.iocb_cmd.u.mbx.in_mb));
+
+	for (i = 0; i < sz; i++)
+		si->u.mbx.in_mb[i] = le16_to_cpu(pkt->mb[i]);
+
+	res = (si->u.mbx.in_mb[0] & MBS_MASK);
+
+	sp->done(vha, sp, res);
+}
+
+static void
+qla24xxx_nack_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
+    struct nack_to_isp *pkt)
+{
+	const char func[] = "nack";
+	srb_t *sp;
+	int res = 0;
+
+	sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
+	if (!sp)
+		return;
+
+	if (pkt->u.isp2x.status != cpu_to_le16(NOTIFY_ACK_SUCCESS))
+		res = QLA_FUNCTION_FAILED;
+
+	sp->done(vha, sp, res);
+}
+
 static void
 qla2x00_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
     sts_entry_t *pkt, int iocb_type)
@@ -1356,50 +1430,63 @@ qla2x00_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
 	struct bsg_job *bsg_job;
 	struct fc_bsg_reply *bsg_reply;
 	uint16_t comp_status;
-	int res;
+	int res = 0;
 
 	sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
 	if (!sp)
 		return;
 
-	bsg_job = sp->u.bsg_job;
-	bsg_reply = bsg_job->reply;
-
-	type = "ct pass-through";
-
-	comp_status = le16_to_cpu(pkt->comp_status);
-
-	/* return FC_CTELS_STATUS_OK and leave the decoding of the ELS/CT
-	 * fc payload  to the caller
-	 */
-	bsg_reply->reply_data.ctels_reply.status = FC_CTELS_STATUS_OK;
-	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
-
-	if (comp_status != CS_COMPLETE) {
-		if (comp_status == CS_DATA_UNDERRUN) {
-			res = DID_OK << 16;
-			bsg_reply->reply_payload_rcv_len =
-			    le16_to_cpu(((sts_entry_t *)pkt)->rsp_info_len);
-
-			ql_log(ql_log_warn, vha, 0x5048,
-			    "CT pass-through-%s error "
-			    "comp_status-status=0x%x total_byte = 0x%x.\n",
-			    type, comp_status,
-			    bsg_reply->reply_payload_rcv_len);
-		} else {
-			ql_log(ql_log_warn, vha, 0x5049,
-			    "CT pass-through-%s error "
-			    "comp_status-status=0x%x.\n", type, comp_status);
-			res = DID_ERROR << 16;
-			bsg_reply->reply_payload_rcv_len = 0;
-		}
-		ql_dump_buffer(ql_dbg_async + ql_dbg_buffer, vha, 0x5035,
-		    (uint8_t *)pkt, sizeof(*pkt));
-	} else {
-		res = DID_OK << 16;
-		bsg_reply->reply_payload_rcv_len =
-		    bsg_job->reply_payload.payload_len;
-		bsg_job->reply_len = 0;
+	switch (sp->type) {
+	case SRB_CT_CMD:
+	    bsg_job = sp->u.bsg_job;
+	    bsg_reply = bsg_job->reply;
+
+	    type = "ct pass-through";
+
+	    comp_status = le16_to_cpu(pkt->comp_status);
+
+	    /*
+	     * return FC_CTELS_STATUS_OK and leave the decoding of the ELS/CT
+	     * fc payload  to the caller
+	     */
+	    bsg_reply->reply_data.ctels_reply.status = FC_CTELS_STATUS_OK;
+	    bsg_job->reply_len = sizeof(struct fc_bsg_reply);
+
+	    if (comp_status != CS_COMPLETE) {
+		    if (comp_status == CS_DATA_UNDERRUN) {
+			    res = DID_OK << 16;
+			    bsg_reply->reply_payload_rcv_len =
+				le16_to_cpu(((sts_entry_t *)pkt)->rsp_info_len);
+
+			    ql_log(ql_log_warn, vha, 0x5048,
+				"CT pass-through-%s error comp_status=0x%x total_byte=0x%x.\n",
+				type, comp_status,
+				bsg_reply->reply_payload_rcv_len);
+		    } else {
+			    ql_log(ql_log_warn, vha, 0x5049,
+				"CT pass-through-%s error comp_status=0x%x.\n",
+				type, comp_status);
+			    res = DID_ERROR << 16;
+			    bsg_reply->reply_payload_rcv_len = 0;
+		    }
+		    ql_dump_buffer(ql_dbg_async + ql_dbg_buffer, vha, 0x5035,
+			(uint8_t *)pkt, sizeof(*pkt));
+	    } else {
+		    res = DID_OK << 16;
+		    bsg_reply->reply_payload_rcv_len =
+			bsg_job->reply_payload.payload_len;
+		    bsg_job->reply_len = 0;
+	    }
+	    break;
+	case SRB_CT_PTHRU_CMD:
+	    /*
+	     * borrowing sts_entry_24xx.comp_status.
+	     * same location as ct_entry_24xx.comp_status
+	     */
+	     res = qla2x00_chk_ms_status(vha, (ms_iocb_entry_t *)pkt,
+		 (struct ct_sns_rsp *)sp->u.iocb_cmd.u.ctarg.rsp,
+		 sp->name);
+	     break;
 	}
 
 	sp->done(vha, sp, res);
@@ -1440,6 +1527,15 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
 		    "Completing %s: (%p) type=%d.\n", type, sp, sp->type);
 		sp->done(vha, sp, 0);
 		return;
+	case SRB_CT_PTHRU_CMD:
+		/* borrowing sts_entry_24xx.comp_status.
+		   same location as ct_entry_24xx.comp_status
+		 */
+		res = qla2x00_chk_ms_status(vha, (ms_iocb_entry_t *)pkt,
+			(struct ct_sns_rsp *)sp->u.iocb_cmd.u.ctarg.rsp,
+			sp->name);
+		sp->done(vha, sp, res);
+		return;
 	default:
 		ql_dbg(ql_dbg_user, vha, 0x503e,
 		    "Unrecognized SRB: (%p) type=%d.\n", sp, sp->type);
@@ -1566,6 +1662,8 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
 
 	iop[0] = le32_to_cpu(logio->io_parameter[0]);
 	iop[1] = le32_to_cpu(logio->io_parameter[1]);
+	lio->u.logio.iop[0] = iop[0];
+	lio->u.logio.iop[1] = iop[1];
 	switch (iop[0]) {
 	case LSC_SCODE_PORTID_USED:
 		data[0] = MBS_PORT_ID_USED;
@@ -2074,6 +2172,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 	int res = 0;
 	uint16_t state_flags = 0;
 	uint16_t retry_delay = 0;
+	uint8_t no_logout = 0;
 
 	sts = (sts_entry_t *) pkt;
 	sts24 = (struct sts_entry_24xx *) pkt;
@@ -2334,6 +2433,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 		break;
 
 	case CS_PORT_LOGGED_OUT:
+		no_logout = 1;
 	case CS_PORT_CONFIG_CHG:
 	case CS_PORT_BUSY:
 	case CS_INCOMPLETE:
@@ -2356,14 +2456,21 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 				break;
 		}
 
-		ql_dbg(ql_dbg_io, fcport->vha, 0x3021,
-		    "Port to be marked lost on fcport=%02x%02x%02x, current "
-		    "port state= %s.\n", fcport->d_id.b.domain,
-		    fcport->d_id.b.area, fcport->d_id.b.al_pa,
-		    port_state_str[atomic_read(&fcport->state)]);
+		if (atomic_read(&fcport->state) == FCS_ONLINE) {
+			ql_dbg(ql_dbg_disc, fcport->vha, 0x3021,
+				"Port to be marked lost on fcport=%02x%02x%02x, current "
+				"port state= %s comp_status %x.\n", fcport->d_id.b.domain,
+				fcport->d_id.b.area, fcport->d_id.b.al_pa,
+				port_state_str[atomic_read(&fcport->state)],
+				comp_status);
+
+			if (no_logout)
+				fcport->logout_on_delete = 0;
 
-		if (atomic_read(&fcport->state) == FCS_ONLINE)
 			qla2x00_mark_device_lost(fcport->vha, fcport, 1, 1);
+			qlt_schedule_sess_for_deletion_lock(fcport);
+		}
+
 		break;
 
 	case CS_ABORTED:
@@ -2627,10 +2734,16 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
 			}
 		case ABTS_RESP_24XX:
 		case CTIO_TYPE7:
-		case NOTIFY_ACK_TYPE:
 		case CTIO_CRC2:
 			qlt_response_pkt_all_vps(vha, (response_t *)pkt);
 			break;
+		case NOTIFY_ACK_TYPE:
+			if (pkt->handle == QLA_TGT_SKIP_HANDLE)
+				qlt_response_pkt_all_vps(vha, (response_t *)pkt);
+			else
+				qla24xxx_nack_iocb_entry(vha, rsp->req,
+					(struct nack_to_isp *)pkt);
+			break;
 		case MARKER_TYPE:
 			/* Do nothing in this case, this check is to prevent it
 			 * from falling into default case
@@ -2640,6 +2753,10 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
 			qla24xx_abort_iocb_entry(vha, rsp->req,
 			    (struct abort_entry_24xx *)pkt);
 			break;
+		case MBX_IOCB_TYPE:
+			qla24xx_mbx_iocb_entry(vha, rsp->req,
+			    (struct mbx_24xx_entry *)pkt);
+			break;
 		default:
 			/* Type Not Supported. */
 			ql_dbg(ql_dbg_async, vha, 0x5042,
@@ -2656,8 +2773,9 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
 	if (IS_P3P_TYPE(ha)) {
 		struct device_reg_82xx __iomem *reg = &ha->iobase->isp82;
 		WRT_REG_DWORD(&reg->rsp_q_out[0], rsp->ring_index);
-	} else
+	} else {
 		WRT_REG_DWORD(rsp->rsp_q_out, rsp->ring_index);
+	}
 }
 
 static void
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 67f64db390b0..64f04aa2a503 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -1637,94 +1637,6 @@ qla2x00_init_firmware(scsi_qla_host_t *vha, uint16_t size)
 	return rval;
 }
 
-/*
- * qla2x00_get_node_name_list
- *      Issue get node name list mailbox command, kmalloc()
- *      and return the resulting list. Caller must kfree() it!
- *
- * Input:
- *      ha = adapter state pointer.
- *      out_data = resulting list
- *      out_len = length of the resulting list
- *
- * Returns:
- *      qla2x00 local function return status code.
- *
- * Context:
- *      Kernel context.
- */
-int
-qla2x00_get_node_name_list(scsi_qla_host_t *vha, void **out_data, int *out_len)
-{
-	struct qla_hw_data *ha = vha->hw;
-	struct qla_port_24xx_data *list = NULL;
-	void *pmap;
-	mbx_cmd_t mc;
-	dma_addr_t pmap_dma;
-	ulong dma_size;
-	int rval, left;
-
-	left = 1;
-	while (left > 0) {
-		dma_size = left * sizeof(*list);
-		pmap = dma_alloc_coherent(&ha->pdev->dev, dma_size,
-					 &pmap_dma, GFP_KERNEL);
-		if (!pmap) {
-			ql_log(ql_log_warn, vha, 0x113f,
-			    "%s(%ld): DMA Alloc failed of %ld\n",
-			    __func__, vha->host_no, dma_size);
-			rval = QLA_MEMORY_ALLOC_FAILED;
-			goto out;
-		}
-
-		mc.mb[0] = MBC_PORT_NODE_NAME_LIST;
-		mc.mb[1] = BIT_1 | BIT_3;
-		mc.mb[2] = MSW(pmap_dma);
-		mc.mb[3] = LSW(pmap_dma);
-		mc.mb[6] = MSW(MSD(pmap_dma));
-		mc.mb[7] = LSW(MSD(pmap_dma));
-		mc.mb[8] = dma_size;
-		mc.out_mb = MBX_0|MBX_1|MBX_2|MBX_3|MBX_6|MBX_7|MBX_8;
-		mc.in_mb = MBX_0|MBX_1;
-		mc.tov = 30;
-		mc.flags = MBX_DMA_IN;
-
-		rval = qla2x00_mailbox_command(vha, &mc);
-		if (rval != QLA_SUCCESS) {
-			if ((mc.mb[0] == MBS_COMMAND_ERROR) &&
-			    (mc.mb[1] == 0xA)) {
-				left += le16_to_cpu(mc.mb[2]) /
-				    sizeof(struct qla_port_24xx_data);
-				goto restart;
-			}
-			goto out_free;
-		}
-
-		left = 0;
-
-		list = kmemdup(pmap, dma_size, GFP_KERNEL);
-		if (!list) {
-			ql_log(ql_log_warn, vha, 0x1140,
-			    "%s(%ld): failed to allocate node names list "
-			    "structure.\n", __func__, vha->host_no);
-			rval = QLA_MEMORY_ALLOC_FAILED;
-			goto out_free;
-		}
-
-restart:
-		dma_free_coherent(&ha->pdev->dev, dma_size, pmap, pmap_dma);
-	}
-
-	*out_data = list;
-	*out_len = dma_size;
-
-out:
-	return rval;
-
-out_free:
-	dma_free_coherent(&ha->pdev->dev, dma_size, pmap, pmap_dma);
-	return rval;
-}
 
 /*
  * qla2x00_get_port_database
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 62ca1ddb0cc7..9c4699623410 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1043,6 +1043,34 @@ qla2x00_wait_for_hba_online(scsi_qla_host_t *vha)
 	return (return_status);
 }
 
+static inline int test_fcport_count(scsi_qla_host_t *vha)
+{
+	struct qla_hw_data *ha = vha->hw;
+	unsigned long flags;
+	int res;
+
+	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+	ql_dbg(ql_dbg_init, vha, 0xffff,
+		"tgt %p, fcport_count=%d\n",
+		vha, vha->fcport_count);
+	res = (vha->fcport_count == 0);
+	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+
+	return res;
+}
+
+/*
+ * qla2x00_wait_for_sess_deletion can only be called from remove_one.
+ * it has dependency on UNLOADING flag to stop device discovery
+ */
+static void
+qla2x00_wait_for_sess_deletion(scsi_qla_host_t *vha)
+{
+	qla2x00_mark_all_devices_lost(vha, 0);
+
+	wait_event(vha->fcport_waitQ, test_fcport_count(vha));
+}
+
 /*
  * qla2x00_wait_for_hba_ready
  * Wait till the HBA is ready before doing driver unload
@@ -2904,6 +2932,18 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (ret)
 		goto probe_init_failed;
 
+	base_vha->gnl.size = (ha->max_loop_id + 1) *
+		sizeof(struct get_name_list_extended);
+	base_vha->gnl.l = dma_alloc_coherent(&ha->pdev->dev,
+		base_vha->gnl.size, &base_vha->gnl.ldma, GFP_KERNEL);
+	INIT_LIST_HEAD(&base_vha->gnl.fcports);
+
+	if (base_vha->gnl.l == NULL) {
+		ql_log(ql_log_fatal, base_vha, 0xffff,
+			"Alloc failed for name list.\n");
+		goto probe_init_failed;
+	}
+
 	/* Alloc arrays of request and response ring ptrs */
 	if (!qla2x00_alloc_queues(ha, req, rsp)) {
 		ql_log(ql_log_fatal, base_vha, 0x003d,
@@ -3123,7 +3163,8 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	ql_dbg(ql_dbg_init, base_vha, 0x00f2,
 	    "Init done and hba is online.\n");
 
-	if (qla_ini_mode_enabled(base_vha))
+	if (qla_ini_mode_enabled(base_vha) ||
+		qla_dual_mode_enabled(base_vha))
 		scsi_scan_host(host);
 	else
 		ql_dbg(ql_dbg_init, base_vha, 0x0122,
@@ -3372,21 +3413,26 @@ qla2x00_remove_one(struct pci_dev *pdev)
 	 * resources.
 	 */
 	if (!atomic_read(&pdev->enable_cnt)) {
+		dma_free_coherent(&ha->pdev->dev, base_vha->gnl.size,
+		    base_vha->gnl.l, base_vha->gnl.ldma);
+
 		scsi_host_put(base_vha->host);
 		kfree(ha);
 		pci_set_drvdata(pdev, NULL);
 		return;
 	}
-
 	qla2x00_wait_for_hba_ready(base_vha);
 
-	/* if UNLOAD flag is already set, then continue unload,
+	/*
+	 * if UNLOAD flag is already set, then continue unload,
 	 * where it was set first.
 	 */
 	if (test_bit(UNLOADING, &base_vha->dpc_flags))
 		return;
 
 	set_bit(UNLOADING, &base_vha->dpc_flags);
+	dma_free_coherent(&ha->pdev->dev,
+		base_vha->gnl.size, base_vha->gnl.l, base_vha->gnl.ldma);
 
 	if (IS_QLAFX00(ha))
 		qlafx00_driver_shutdown(base_vha, 20);
@@ -3535,10 +3581,14 @@ qla2x00_schedule_rport_del(struct scsi_qla_host *vha, fc_port_t *fcport,
 		qla2xxx_wake_dpc(base_vha);
 	} else {
 		int now;
-		if (rport)
+		if (rport) {
+			ql_dbg(ql_dbg_disc, fcport->vha, 0xffff,
+				"%s %8phN. rport %p roles %x \n",
+				__func__, fcport->port_name, rport,
+				rport->roles);
 			fc_remote_port_delete(rport);
+		}
 		qlt_do_generation_tick(vha, &now);
-		qlt_fc_port_deleted(vha, fcport, now);
 	}
 }
 
@@ -3581,7 +3631,7 @@ void qla2x00_mark_device_lost(scsi_qla_host_t *vha, fc_port_t *fcport,
 		fcport->login_retry = vha->hw->login_retry_count;
 
 		ql_dbg(ql_dbg_disc, vha, 0x2067,
-		    "Port login retry %8phN, id = 0x%04x retry cnt=%d.\n",
+		    "Port login retry %8phN, lid 0x%04x retry cnt=%d.\n",
 		    fcport->port_name, fcport->loop_id, fcport->login_retry);
 	}
 }
@@ -3604,7 +3654,13 @@ qla2x00_mark_all_devices_lost(scsi_qla_host_t *vha, int defer)
 {
 	fc_port_t *fcport;
 
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		   "Mark all dev lost\n");
+
 	list_for_each_entry(fcport, &vha->vp_fcports, list) {
+		fcport->scan_state = 0;
+		qlt_schedule_sess_for_deletion_lock(fcport);
+
 		if (vha->vp_idx != 0 && vha->vp_idx != fcport->vha->vp_idx)
 			continue;
 
@@ -4219,6 +4275,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
 
 	spin_lock_init(&vha->work_lock);
 	spin_lock_init(&vha->cmd_list_lock);
+	init_waitqueue_head(&vha->fcport_waitQ);
 
 	sprintf(vha->host_str, "%s_%ld", QLA2XXX_DRIVER_NAME, vha->host_no);
 	ql_dbg(ql_dbg_init, vha, 0x0041,
@@ -4232,7 +4289,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
 	return vha;
 }
 
-static struct qla_work_evt *
+struct qla_work_evt *
 qla2x00_alloc_work(struct scsi_qla_host *vha, enum qla_work_type type)
 {
 	struct qla_work_evt *e;
@@ -4254,7 +4311,7 @@ qla2x00_alloc_work(struct scsi_qla_host *vha, enum qla_work_type type)
 	return e;
 }
 
-static int
+int
 qla2x00_post_work(struct scsi_qla_host *vha, struct qla_work_evt *e)
 {
 	unsigned long flags;
@@ -4315,7 +4372,6 @@ int qla2x00_post_async_##name##_work(		\
 }
 
 qla2x00_post_async_work(login, QLA_EVT_ASYNC_LOGIN);
-qla2x00_post_async_work(login_done, QLA_EVT_ASYNC_LOGIN_DONE);
 qla2x00_post_async_work(logout, QLA_EVT_ASYNC_LOGOUT);
 qla2x00_post_async_work(logout_done, QLA_EVT_ASYNC_LOGOUT_DONE);
 qla2x00_post_async_work(adisc, QLA_EVT_ASYNC_ADISC);
@@ -4368,6 +4424,67 @@ qlafx00_post_aenfx_work(struct scsi_qla_host *vha,  uint32_t evtcode,
 	return qla2x00_post_work(vha, e);
 }
 
+int qla24xx_post_upd_fcport_work(struct scsi_qla_host *vha, fc_port_t *fcport)
+{
+	struct qla_work_evt *e;
+
+	e = qla2x00_alloc_work(vha, QLA_EVT_UPD_FCPORT);
+	if (!e)
+		return QLA_FUNCTION_FAILED;
+
+	e->u.fcport.fcport = fcport;
+	return qla2x00_post_work(vha, e);
+}
+
+static
+void qla24xx_create_new_sess(struct scsi_qla_host *vha, struct qla_work_evt *e)
+{
+	unsigned long flags;
+	fc_port_t *fcport =  NULL;
+	struct qlt_plogi_ack_t *pla =
+	    (struct qlt_plogi_ack_t *)e->u.new_sess.pla;
+
+	spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+	fcport = qla2x00_find_fcport_by_wwpn(vha, e->u.new_sess.port_name, 1);
+	if (fcport) {
+		fcport->d_id = e->u.new_sess.id;
+		if (pla) {
+			fcport->fw_login_state = DSC_LS_PLOGI_PEND;
+			qlt_plogi_ack_link(vha, pla, fcport, QLT_PLOGI_LINK_SAME_WWN);
+			/* we took an extra ref_count to prevent PLOGI ACK when
+			 * fcport/sess has not been created.
+			 */
+			pla->ref_count--;
+		}
+	} else {
+		fcport = qla2x00_alloc_fcport(vha, GFP_KERNEL);
+		if (fcport) {
+			fcport->d_id = e->u.new_sess.id;
+			fcport->scan_state = QLA_FCPORT_FOUND;
+			fcport->flags |= FCF_FABRIC_DEVICE;
+			fcport->fw_login_state = DSC_LS_PLOGI_PEND;
+
+			memcpy(fcport->port_name, e->u.new_sess.port_name,
+			    WWN_SIZE);
+			list_add_tail(&fcport->list, &vha->vp_fcports);
+
+			if (pla) {
+				qlt_plogi_ack_link(vha, pla, fcport,
+				    QLT_PLOGI_LINK_SAME_WWN);
+				pla->ref_count--;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
+	if (fcport) {
+		if (pla)
+			qlt_plogi_ack_unref(vha, pla);
+		else
+			qla24xx_async_gnl(vha, fcport);
+	}
+}
+
 void
 qla2x00_do_work(struct scsi_qla_host *vha)
 {
@@ -4394,10 +4511,6 @@ qla2x00_do_work(struct scsi_qla_host *vha)
 			qla2x00_async_login(vha, e->u.logio.fcport,
 			    e->u.logio.data);
 			break;
-		case QLA_EVT_ASYNC_LOGIN_DONE:
-			qla2x00_async_login_done(vha, e->u.logio.fcport,
-			    e->u.logio.data);
-			break;
 		case QLA_EVT_ASYNC_LOGOUT:
 			qla2x00_async_logout(vha, e->u.logio.fcport);
 			break;
@@ -4419,6 +4532,34 @@ qla2x00_do_work(struct scsi_qla_host *vha)
 		case QLA_EVT_AENFX:
 			qlafx00_process_aen(vha, e);
 			break;
+		case QLA_EVT_GIDPN:
+			qla24xx_async_gidpn(vha, e->u.fcport.fcport);
+			break;
+		case QLA_EVT_GPNID:
+			qla24xx_async_gpnid(vha, &e->u.gpnid.id);
+			break;
+		case QLA_EVT_GPNID_DONE:
+			qla24xx_async_gpnid_done(vha, e->u.iosb.sp);
+			break;
+		case QLA_EVT_NEW_SESS:
+			qla24xx_create_new_sess(vha, e);
+			break;
+		case QLA_EVT_GPDB:
+			qla24xx_async_gpdb(vha, e->u.fcport.fcport,
+			    e->u.fcport.opt);
+			break;
+		case QLA_EVT_GPSC:
+			qla24xx_async_gpsc(vha, e->u.fcport.fcport);
+			break;
+		case QLA_EVT_UPD_FCPORT:
+			qla2x00_update_fcport(vha, e->u.fcport.fcport);
+			break;
+		case QLA_EVT_GNL:
+			qla24xx_async_gnl(vha, e->u.fcport.fcport);
+			break;
+		case QLA_EVT_NACK:
+			qla24xx_do_nack_work(vha, e);
+			break;
 		}
 		if (e->flags & QLA_EVT_FLAG_FREE)
 			kfree(e);
@@ -4435,9 +4576,7 @@ void qla2x00_relogin(struct scsi_qla_host *vha)
 {
 	fc_port_t       *fcport;
 	int status;
-	uint16_t        next_loopid = 0;
-	struct qla_hw_data *ha = vha->hw;
-	uint16_t data[2];
+	struct event_arg ea;
 
 	list_for_each_entry(fcport, &vha->vp_fcports, list) {
 	/*
@@ -4448,77 +4587,38 @@ void qla2x00_relogin(struct scsi_qla_host *vha)
 		    fcport->login_retry && !(fcport->flags & FCF_ASYNC_SENT)) {
 			fcport->login_retry--;
 			if (fcport->flags & FCF_FABRIC_DEVICE) {
-				if (fcport->flags & FCF_FCP2_DEVICE)
-					ha->isp_ops->fabric_logout(vha,
-							fcport->loop_id,
-							fcport->d_id.b.domain,
-							fcport->d_id.b.area,
-							fcport->d_id.b.al_pa);
-
-				if (fcport->loop_id == FC_NO_LOOP_ID) {
-					fcport->loop_id = next_loopid =
-					    ha->min_external_loopid;
-					status = qla2x00_find_new_loop_id(
-					    vha, fcport);
-					if (status != QLA_SUCCESS) {
-						/* Ran out of IDs to use */
-						break;
-					}
-				}
-
-				if (IS_ALOGIO_CAPABLE(ha)) {
-					fcport->flags |= FCF_ASYNC_SENT;
-					data[0] = 0;
-					data[1] = QLA_LOGIO_LOGIN_RETRIED;
-					status = qla2x00_post_async_login_work(
-					    vha, fcport, data);
-					if (status == QLA_SUCCESS)
-						continue;
-					/* Attempt a retry. */
-					status = 1;
-				} else {
-					status = qla2x00_fabric_login(vha,
-					    fcport, &next_loopid);
-					if (status ==  QLA_SUCCESS) {
-						int status2;
-						uint8_t opts;
-
-						opts = 0;
-						if (fcport->flags &
-						    FCF_FCP2_DEVICE)
-							opts |= BIT_1;
-						status2 =
-						    qla2x00_get_port_database(
-							vha, fcport, opts);
-						if (status2 != QLA_SUCCESS)
-							status = 1;
-					}
-				}
-			} else
+				ql_dbg(ql_dbg_disc, fcport->vha, 0xffff,
+				    "%s %8phC DS %d LS %d\n", __func__,
+				    fcport->port_name, fcport->disc_state,
+				    fcport->fw_login_state);
+				memset(&ea, 0, sizeof(ea));
+				ea.event = FCME_RELOGIN;
+				ea.fcport = fcport;
+				qla2x00_fcport_event_handler(vha, &ea);
+			} else {
 				status = qla2x00_local_device_login(vha,
 								fcport);
+				if (status == QLA_SUCCESS) {
+					fcport->old_loop_id = fcport->loop_id;
+					ql_dbg(ql_dbg_disc, vha, 0x2003,
+					    "Port login OK: logged in ID 0x%x.\n",
+					    fcport->loop_id);
+					qla2x00_update_fcport(vha, fcport);
+				} else if (status == 1) {
+					set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+					/* retry the login again */
+					ql_dbg(ql_dbg_disc, vha, 0x2007,
+					    "Retrying %d login again loop_id 0x%x.\n",
+					    fcport->login_retry,
+					    fcport->loop_id);
+				} else {
+					fcport->login_retry = 0;
+				}
 
-			if (status == QLA_SUCCESS) {
-				fcport->old_loop_id = fcport->loop_id;
-
-				ql_dbg(ql_dbg_disc, vha, 0x2003,
-				    "Port login OK: logged in ID 0x%x.\n",
-				    fcport->loop_id);
-
-				qla2x00_update_fcport(vha, fcport);
-
-			} else if (status == 1) {
-				set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
-				/* retry the login again */
-				ql_dbg(ql_dbg_disc, vha, 0x2007,
-				    "Retrying %d login again loop_id 0x%x.\n",
-				    fcport->login_retry, fcport->loop_id);
-			} else {
-				fcport->login_retry = 0;
+				if (fcport->login_retry == 0 &&
+				    status != QLA_SUCCESS)
+					qla2x00_clear_loop_id(fcport);
 			}
-
-			if (fcport->login_retry == 0 && status != QLA_SUCCESS)
-				qla2x00_clear_loop_id(fcport);
 		}
 		if (test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags))
 			break;
@@ -5182,7 +5282,8 @@ qla2x00_disable_board_on_pci_error(struct work_struct *work)
 	struct pci_dev *pdev = ha->pdev;
 	scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev);
 
-	/* if UNLOAD flag is already set, then continue unload,
+	/*
+	 * if UNLOAD flag is already set, then continue unload,
 	 * where it was set first.
 	 */
 	if (test_bit(UNLOADING, &base_vha->dpc_flags))
@@ -5191,6 +5292,8 @@ qla2x00_disable_board_on_pci_error(struct work_struct *work)
 	ql_log(ql_log_warn, base_vha, 0x015b,
 	    "Disabling adapter.\n");
 
+	qla2x00_wait_for_sess_deletion(base_vha);
+
 	set_bit(UNLOADING, &base_vha->dpc_flags);
 
 	qla2x00_delete_all_vps(ha, base_vha);
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 40b61a327786..e4d7d32c82e7 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -118,6 +118,9 @@ static void qlt_send_notify_ack(struct scsi_qla_host *vha,
 	uint16_t srr_flags, uint16_t srr_reject_code, uint8_t srr_explan);
 static void qlt_send_term_imm_notif(struct scsi_qla_host *vha,
 	struct imm_ntfy_from_isp *imm, int ha_locked);
+static struct fc_port *qlt_create_sess(struct scsi_qla_host *vha,
+	fc_port_t *fcport, bool local);
+void qlt_unreg_sess(struct fc_port *sess);
 /*
  * Global Variables
  */
@@ -378,6 +381,247 @@ void qlt_response_pkt_all_vps(struct scsi_qla_host *vha, response_t *pkt)
 /*
  * All qlt_plogi_ack_t operations are protected by hardware_lock
  */
+static int qla24xx_post_nack_work(struct scsi_qla_host *vha, fc_port_t *fcport,
+	struct imm_ntfy_from_isp *ntfy, int type)
+{
+	struct qla_work_evt *e;
+	e = qla2x00_alloc_work(vha, QLA_EVT_NACK);
+	if (!e)
+		return QLA_FUNCTION_FAILED;
+
+	e->u.nack.fcport = fcport;
+	e->u.nack.type = type;
+	memcpy(e->u.nack.iocb, ntfy, sizeof(struct imm_ntfy_from_isp));
+	return qla2x00_post_work(vha, e);
+}
+
+static
+void qla2x00_async_nack_sp_done(void *v, void *s, int res)
+{
+	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
+	struct srb *sp = (struct srb *)s;
+	unsigned long flags;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"Async done-%s res %x %8phC  type %d\n",
+		sp->name, res, sp->fcport->port_name, sp->type);
+
+	spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+	sp->fcport->flags &= ~FCF_ASYNC_SENT;
+	sp->fcport->chip_reset = vha->hw->chip_reset;
+
+	switch (sp->type) {
+	case SRB_NACK_PLOGI:
+		sp->fcport->login_gen++;
+		sp->fcport->fw_login_state = DSC_LS_PLOGI_COMP;
+		sp->fcport->logout_on_delete = 1;
+		break;
+
+	case SRB_NACK_PRLI:
+		sp->fcport->fw_login_state = DSC_LS_PRLI_COMP;
+		sp->fcport->deleted = 0;
+
+		if (!sp->fcport->login_succ &&
+		    !IS_SW_RESV_ADDR(sp->fcport->d_id)) {
+			sp->fcport->login_succ = 1;
+
+			vha->fcport_count++;
+
+			if (!IS_IIDMA_CAPABLE(vha->hw) ||
+			    !vha->hw->flags.gpsc_supported) {
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+					"%s %d %8phC post upd_fcport fcp_cnt %d\n",
+					__func__, __LINE__,
+					sp->fcport->port_name,
+					vha->fcport_count);
+
+				qla24xx_post_upd_fcport_work(vha, sp->fcport);
+			} else {
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+					"%s %d %8phC post gpsc fcp_cnt %d\n",
+					__func__, __LINE__,
+					sp->fcport->port_name,
+					vha->fcport_count);
+
+				qla24xx_post_gpsc_work(vha, sp->fcport);
+			}
+		}
+		break;
+
+	case SRB_NACK_LOGO:
+		sp->fcport->login_gen++;
+		sp->fcport->fw_login_state = DSC_LS_PORT_UNAVAIL;
+		qlt_logo_completion_handler(sp->fcport, MBS_COMMAND_COMPLETE);
+		break;
+	}
+	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
+	sp->free(vha, sp);
+}
+
+int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport,
+	struct imm_ntfy_from_isp *ntfy, int type)
+{
+	int rval = QLA_FUNCTION_FAILED;
+	srb_t *sp;
+	char *c = NULL;
+
+	fcport->flags |= FCF_ASYNC_SENT;
+	switch (type) {
+	case SRB_NACK_PLOGI:
+		fcport->fw_login_state = DSC_LS_PLOGI_PEND;
+		c = "PLOGI";
+		break;
+	case SRB_NACK_PRLI:
+		fcport->fw_login_state = DSC_LS_PRLI_PEND;
+		c = "PRLI";
+		break;
+	case SRB_NACK_LOGO:
+		fcport->fw_login_state = DSC_LS_LOGO_PEND;
+		c = "LOGO";
+		break;
+	}
+
+	sp = qla2x00_get_sp(vha, fcport, GFP_ATOMIC);
+	if (!sp)
+		goto done;
+
+	sp->type = type;
+	sp->name = "nack";
+
+	qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha)+2);
+
+	sp->u.iocb_cmd.u.nack.ntfy = ntfy;
+
+	sp->done = qla2x00_async_nack_sp_done;
+
+	rval = qla2x00_start_sp(sp);
+	if (rval != QLA_SUCCESS)
+		goto done_free_sp;
+
+	ql_dbg(ql_dbg_disc, vha, 0xffff,
+		"Async-%s %8phC hndl %x %s\n",
+		sp->name, fcport->port_name, sp->handle, c);
+
+	return rval;
+
+done_free_sp:
+	sp->free(vha, sp);
+done:
+	fcport->flags &= ~FCF_ASYNC_SENT;
+	return rval;
+}
+
+void qla24xx_do_nack_work(struct scsi_qla_host *vha, struct qla_work_evt *e)
+{
+	fc_port_t *t;
+	unsigned long flags;
+
+	switch (e->u.nack.type) {
+	case SRB_NACK_PRLI:
+		mutex_lock(&vha->vha_tgt.tgt_mutex);
+		t = qlt_create_sess(vha, e->u.nack.fcport, 0);
+		mutex_unlock(&vha->vha_tgt.tgt_mutex);
+		if (t) {
+			ql_log(ql_log_info, vha, 0xffff,
+			    "%s create sess success %p", __func__, t);
+			spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+			/* create sess has an extra kref */
+			vha->hw->tgt.tgt_ops->put_sess(e->u.nack.fcport);
+			spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+		}
+		break;
+	}
+	qla24xx_async_notify_ack(vha, e->u.nack.fcport,
+	    (struct imm_ntfy_from_isp*)e->u.nack.iocb, e->u.nack.type);
+}
+
+void qla24xx_delete_sess_fn(struct work_struct *work)
+{
+	fc_port_t *fcport = container_of(work, struct fc_port, del_work);
+	struct qla_hw_data *ha = fcport->vha->hw;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+
+	if (fcport->se_sess) {
+		ha->tgt.tgt_ops->shutdown_sess(fcport);
+		ha->tgt.tgt_ops->put_sess(fcport);
+	} else {
+		qlt_unreg_sess(fcport);
+	}
+	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+}
+
+/*
+ * Called from qla2x00_reg_remote_port()
+ */
+void qlt_fc_port_added(struct scsi_qla_host *vha, fc_port_t *fcport)
+{
+	struct qla_hw_data *ha = vha->hw;
+	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
+	struct fc_port *sess = fcport;
+	unsigned long flags;
+
+	if (!vha->hw->tgt.tgt_ops)
+		return;
+
+	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+	if (tgt->tgt_stop) {
+		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+		return;
+	}
+
+	if (fcport->disc_state == DSC_DELETE_PEND) {
+		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+		return;
+	}
+
+	if (!sess->se_sess) {
+		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+
+		mutex_lock(&vha->vha_tgt.tgt_mutex);
+		sess = qlt_create_sess(vha, fcport, false);
+		mutex_unlock(&vha->vha_tgt.tgt_mutex);
+
+		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+	} else {
+		if (fcport->fw_login_state == DSC_LS_PRLI_COMP) {
+			spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+			return;
+		}
+
+		if (!kref_get_unless_zero(&sess->sess_kref)) {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			    "%s: kref_get fail sess %8phC \n",
+			    __func__, sess->port_name);
+			spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+			return;
+		}
+
+		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf04c,
+		    "qla_target(%u): %ssession for port %8phC "
+		    "(loop ID %d) reappeared\n", vha->vp_idx,
+		    sess->local ? "local " : "", sess->port_name, sess->loop_id);
+
+		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf007,
+		    "Reappeared sess %p\n", sess);
+
+		ha->tgt.tgt_ops->update_sess(sess, fcport->d_id,
+		    fcport->loop_id,
+		    (fcport->flags & FCF_CONF_COMP_SUPPORTED));
+	}
+
+	if (sess && sess->local) {
+		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf04d,
+		    "qla_target(%u): local session for "
+		    "port %8phC (loop ID %d) became global\n", vha->vp_idx,
+		    fcport->port_name, sess->loop_id);
+		sess->local = 0;
+	}
+	ha->tgt.tgt_ops->put_sess(sess);
+	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+}
 
 /*
  * This is a zero-base ref-counting solution, since hardware_lock
@@ -413,30 +657,51 @@ qlt_plogi_ack_find_add(struct scsi_qla_host *vha, port_id_t *id,
 	return pla;
 }
 
-static void qlt_plogi_ack_unref(struct scsi_qla_host *vha,
+void qlt_plogi_ack_unref(struct scsi_qla_host *vha,
     struct qlt_plogi_ack_t *pla)
 {
 	struct imm_ntfy_from_isp *iocb = &pla->iocb;
+	port_id_t port_id;
+	uint16_t loop_id;
+	fc_port_t *fcport = pla->fcport;
+
 	BUG_ON(!pla->ref_count);
 	pla->ref_count--;
 
 	if (pla->ref_count)
 		return;
 
-	ql_dbg(ql_dbg_async, vha, 0x5089,
+	ql_dbg(ql_dbg_disc, vha, 0x5089,
 	    "Sending PLOGI ACK to wwn %8phC s_id %02x:%02x:%02x loop_id %#04x"
 	    " exch %#x ox_id %#x\n", iocb->u.isp24.port_name,
 	    iocb->u.isp24.port_id[2], iocb->u.isp24.port_id[1],
 	    iocb->u.isp24.port_id[0],
 	    le16_to_cpu(iocb->u.isp24.nport_handle),
 	    iocb->u.isp24.exchange_address, iocb->ox_id);
-	qlt_send_notify_ack(vha, iocb, 0, 0, 0, 0, 0, 0);
+
+	port_id.b.domain = iocb->u.isp24.port_id[2];
+	port_id.b.area   = iocb->u.isp24.port_id[1];
+	port_id.b.al_pa  = iocb->u.isp24.port_id[0];
+	port_id.b.rsvd_1 = 0;
+
+	loop_id = le16_to_cpu(iocb->u.isp24.nport_handle);
+
+	fcport->loop_id = loop_id;
+	fcport->d_id = port_id;
+	qla24xx_post_nack_work(vha, fcport, iocb, SRB_NACK_PLOGI);
+
+	list_for_each_entry(fcport, &vha->vp_fcports, list) {
+		if (fcport->plogi_link[QLT_PLOGI_LINK_SAME_WWN] == pla)
+			fcport->plogi_link[QLT_PLOGI_LINK_SAME_WWN] = NULL;
+		if (fcport->plogi_link[QLT_PLOGI_LINK_CONFLICT] == pla)
+			fcport->plogi_link[QLT_PLOGI_LINK_CONFLICT] = NULL;
+	}
 
 	list_del(&pla->list);
 	kmem_cache_free(qla_tgt_plogi_cachep, pla);
 }
 
-static void
+void
 qlt_plogi_ack_link(struct scsi_qla_host *vha, struct qlt_plogi_ack_t *pla,
     struct fc_port *sess, enum qlt_plogi_link_t link)
 {
@@ -444,15 +709,19 @@ qlt_plogi_ack_link(struct scsi_qla_host *vha, struct qlt_plogi_ack_t *pla,
 	/* Inc ref_count first because link might already be pointing at pla */
 	pla->ref_count++;
 
+	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf097,
+		"Linking sess %p [%d] wwn %8phC with PLOGI ACK to wwn %8phC"
+		" s_id %02x:%02x:%02x, ref=%d pla %p link %d\n",
+		sess, link, sess->port_name,
+		iocb->u.isp24.port_name, iocb->u.isp24.port_id[2],
+		iocb->u.isp24.port_id[1], iocb->u.isp24.port_id[0],
+		pla->ref_count, pla, link);
+
 	if (sess->plogi_link[link])
 		qlt_plogi_ack_unref(vha, sess->plogi_link[link]);
 
-	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf097,
-	    "Linking sess %p [%d] wwn %8phC with PLOGI ACK to wwn %8phC"
-	    " s_id %02x:%02x:%02x, ref=%d\n", sess, link, sess->port_name,
-	    iocb->u.isp24.port_name, iocb->u.isp24.port_id[2],
-	    iocb->u.isp24.port_id[1], iocb->u.isp24.port_id[0],
-	    pla->ref_count);
+	if (link == QLT_PLOGI_LINK_SAME_WWN)
+		pla->fcport = sess;
 
 	sess->plogi_link[link] = pla;
 }
@@ -512,7 +781,7 @@ static void qlt_free_session_done(struct work_struct *work)
 	struct qla_hw_data *ha = vha->hw;
 	unsigned long flags;
 	bool logout_started = false;
-	fc_port_t fcport;
+	struct event_arg ea;
 
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf084,
 		"%s: se_sess %p / sess %p from port %8phC loop_id %#04x"
@@ -522,8 +791,8 @@ static void qlt_free_session_done(struct work_struct *work)
 		sess->logout_on_delete, sess->keep_nport_handle,
 		sess->send_els_logo);
 
-	BUG_ON(!tgt);
 
+	if (!IS_SW_RESV_ADDR(sess->d_id)) {
 	if (sess->send_els_logo) {
 		qlt_port_logo_t logo;
 		logo.id = sess->d_id;
@@ -533,14 +802,7 @@ static void qlt_free_session_done(struct work_struct *work)
 
 	if (sess->logout_on_delete) {
 		int rc;
-
-		memset(&fcport, 0, sizeof(fcport));
-		fcport.loop_id = sess->loop_id;
-		fcport.d_id = sess->d_id;
-		memcpy(fcport.port_name, sess->port_name, WWN_SIZE);
-		fcport.vha = vha;
-
-		rc = qla2x00_post_async_logout_work(vha, &fcport, NULL);
+		rc = qla2x00_post_async_logout_work(vha, sess, NULL);
 		if (rc != QLA_SUCCESS)
 			ql_log(ql_log_warn, vha, 0xf085,
 			       "Schedule logo failed sess %p rc %d\n",
@@ -548,6 +810,7 @@ static void qlt_free_session_done(struct work_struct *work)
 		else
 			logout_started = true;
 	}
+	}
 
 	/*
 	 * Release the target session for FC Nexus from fabric module code.
@@ -568,12 +831,38 @@ static void qlt_free_session_done(struct work_struct *work)
 			msleep(100);
 		}
 
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf087,
+		ql_dbg(ql_dbg_disc, vha, 0xf087,
 			"%s: sess %p logout completed\n",
 			__func__, sess);
 	}
 
-	spin_lock_irqsave(&ha->hardware_lock, flags);
+	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+	if (sess->se_sess) {
+		sess->se_sess = NULL;
+		if (tgt && !IS_SW_RESV_ADDR(sess->d_id))
+			tgt->sess_count--;
+	}
+
+	sess->disc_state = DSC_DELETED;
+	sess->fw_login_state = DSC_LS_PORT_UNAVAIL;
+	sess->deleted = QLA_SESS_DELETED;
+	sess->login_retry = vha->hw->login_retry_count;
+
+	if (sess->login_succ && !IS_SW_RESV_ADDR(sess->d_id)) {
+		vha->fcport_count--;
+		sess->login_succ = 0;
+	}
+
+	if (sess->chip_reset != sess->vha->hw->chip_reset)
+		qla2x00_clear_loop_id(sess);
+
+	if (sess->conflict) {
+		sess->conflict->login_pause = 0;
+		sess->conflict = NULL;
+		if (!test_bit(UNLOADING, &vha->dpc_flags))
+			set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
+	}
+
 	{
 		struct qlt_plogi_ack_t *own =
 		    sess->plogi_link[QLT_PLOGI_LINK_SAME_WWN];
@@ -591,6 +880,7 @@ static void qlt_free_session_done(struct work_struct *work)
 				 own ? own->ref_count : -1,
 				 iocb->u.isp24.port_name, con->ref_count);
 			qlt_plogi_ack_unref(vha, con);
+			sess->plogi_link[QLT_PLOGI_LINK_CONFLICT] = NULL;
 		} else {
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf09a,
 			    "se_sess %p / sess %p port %8phC is gone, %s (ref=%d)\n",
@@ -600,25 +890,30 @@ static void qlt_free_session_done(struct work_struct *work)
 			    own ? own->ref_count : -1);
 		}
 
-		if (own)
+		if (own) {
+			sess->fw_login_state = DSC_LS_PLOGI_PEND;
 			qlt_plogi_ack_unref(vha, own);
+			sess->plogi_link[QLT_PLOGI_LINK_SAME_WWN] = NULL;
+		}
 	}
-	spin_unlock_irqrestore(&ha->hardware_lock, flags);
-
-	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-	sess->se_sess = NULL;
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf001,
-	    "Unregistration of sess %p finished\n", sess);
+	    "Unregistration of sess %p %8phC finished fcp_cnt %d\n",
+		sess, sess->port_name, vha->fcport_count);
 
-	/*
-	 * We need to protect against race, when tgt is freed before or
-	 * inside wake_up()
-	 */
-	tgt->sess_count--;
-	if (tgt->sess_count == 0)
+	if (tgt && (tgt->sess_count == 0))
 		wake_up_all(&tgt->waitQ);
+
+	if (vha->fcport_count == 0)
+		wake_up_all(&vha->fcport_waitQ);
+
+	if (!tgt || !tgt->tgt_stop) {
+		memset(&ea, 0, sizeof(ea));
+		ea.event = FCME_DELETE_DONE;
+		ea.fcport = sess;
+		qla2x00_fcport_event_handler(vha, &ea);
+	}
 }
 
 /* ha->tgt.sess_lock supposed to be held on entry */
@@ -636,6 +931,9 @@ void qlt_unreg_sess(struct fc_port *sess)
 	qla2x00_mark_device_lost(vha, sess, 1, 1);
 
 	sess->deleted = QLA_SESS_DELETION_IN_PROGRESS;
+	sess->disc_state = DSC_DELETE_PEND;
+	sess->last_rscn_gen = sess->rscn_gen;
+	sess->last_login_gen = sess->login_gen;
 
 	INIT_WORK(&sess->free_work, qlt_free_session_done);
 	schedule_work(&sess->free_work);
@@ -679,48 +977,57 @@ static int qlt_reset(struct scsi_qla_host *vha, void *iocb, int mcmd)
 	return qlt_issue_task_mgmt(sess, 0, mcmd, iocb, QLA24XX_MGMT_SEND_NACK);
 }
 
+static void qla24xx_chk_fcp_state(struct fc_port *sess)
+{
+	if (sess->chip_reset != sess->vha->hw->chip_reset) {
+		sess->logout_on_delete = 0;
+		sess->logo_ack_needed = 0;
+		sess->fw_login_state = DSC_LS_PORT_UNAVAIL;
+		sess->scan_state = 0;
+	}
+}
+
 /* ha->tgt.sess_lock supposed to be held on entry */
-static void qlt_schedule_sess_for_deletion(struct fc_port *sess,
+void qlt_schedule_sess_for_deletion(struct fc_port *sess,
 	bool immediate)
 {
 	struct qla_tgt *tgt = sess->tgt;
-	uint32_t dev_loss_tmo = tgt->ha->port_down_retry_count + 5;
 
-	if (sess->deleted) {
-		/* Upgrade to unconditional deletion in case it was temporary */
-		if (immediate && sess->deleted == QLA_SESS_DELETION_PENDING)
-			list_del(&sess->del_list_entry);
-		else
+	if (sess->disc_state == DSC_DELETE_PEND)
+		return;
+
+	if (sess->disc_state == DSC_DELETED) {
+		if (tgt && tgt->tgt_stop && (tgt->sess_count == 0))
+			wake_up_all(&tgt->waitQ);
+		if (sess->vha->fcport_count == 0)
+			wake_up_all(&sess->vha->fcport_waitQ);
+
+		if (!sess->plogi_link[QLT_PLOGI_LINK_SAME_WWN] &&
+			!sess->plogi_link[QLT_PLOGI_LINK_CONFLICT])
 			return;
 	}
 
-	ql_dbg(ql_dbg_tgt, sess->vha, 0xe001,
-	    "Scheduling sess %p for deletion\n", sess);
+	sess->disc_state = DSC_DELETE_PEND;
 
-	if (immediate) {
-		dev_loss_tmo = 0;
-		sess->deleted = QLA_SESS_DELETION_IN_PROGRESS;
-		list_add(&sess->del_list_entry, &tgt->del_sess_list);
-	} else {
-		sess->deleted = QLA_SESS_DELETION_PENDING;
-		list_add_tail(&sess->del_list_entry, &tgt->del_sess_list);
-	}
+	if (sess->deleted == QLA_SESS_DELETED)
+		sess->logout_on_delete = 0;
+
+	sess->deleted = QLA_SESS_DELETION_IN_PROGRESS;
+	qla24xx_chk_fcp_state(sess);
 
-	sess->expires = jiffies + dev_loss_tmo * HZ;
+	ql_dbg(ql_dbg_tgt, sess->vha, 0xe001,
+	    "Scheduling sess %p for deletion\n", sess);
 
-	ql_dbg(ql_dbg_tgt, sess->vha, 0xe048,
-	    "qla_target(%d): session for port %8phC (loop ID %d s_id %02x:%02x:%02x)"
-	    " scheduled for deletion in %u secs (expires: %lu) immed: %d, logout: %d, gen: %#x\n",
-	    sess->vha->vp_idx, sess->port_name, sess->loop_id,
-	    sess->d_id.b.domain, sess->d_id.b.area, sess->d_id.b.al_pa,
-	    dev_loss_tmo, sess->expires, immediate, sess->logout_on_delete,
-	    sess->generation);
+	schedule_work(&sess->del_work);
+}
 
-	if (immediate)
-		mod_delayed_work(system_wq, &tgt->sess_del_work, 0);
-	else
-		schedule_delayed_work(&tgt->sess_del_work,
-		    sess->expires - jiffies);
+void qlt_schedule_sess_for_deletion_lock(struct fc_port *sess)
+{
+	unsigned long flags;
+	struct qla_hw_data *ha = sess->vha->hw;
+	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+	qlt_schedule_sess_for_deletion(sess, 1);
+	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 }
 
 /* ha->tgt.sess_lock supposed to be held on entry */
@@ -731,7 +1038,7 @@ static void qlt_clear_tgt_db(struct qla_tgt *tgt)
 
 	list_for_each_entry(sess, &vha->vp_fcports, list) {
 		if (sess->se_sess)
-			qlt_schedule_sess_for_deletion(sess, true);
+			qlt_schedule_sess_for_deletion(sess, 1);
 	}
 
 	/* At this point tgt could be already dead */
@@ -786,49 +1093,6 @@ static int qla24xx_get_loop_id(struct scsi_qla_host *vha, const uint8_t *s_id,
 	return res;
 }
 
-/* ha->tgt.sess_lock supposed to be held on entry */
-static void qlt_undelete_sess(struct fc_port *sess)
-{
-	BUG_ON(sess->deleted != QLA_SESS_DELETION_PENDING);
-
-	list_del_init(&sess->del_list_entry);
-	sess->deleted = 0;
-}
-
-static void qlt_del_sess_work_fn(struct delayed_work *work)
-{
-	struct qla_tgt *tgt = container_of(work, struct qla_tgt,
-	    sess_del_work);
-	struct scsi_qla_host *vha = tgt->vha;
-	struct qla_hw_data *ha = vha->hw;
-	struct fc_port *sess;
-	unsigned long flags, elapsed;
-
-	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-	while (!list_empty(&tgt->del_sess_list)) {
-		sess = list_entry(tgt->del_sess_list.next, typeof(*sess),
-		    del_list_entry);
-		elapsed = jiffies;
-		if (time_after_eq(elapsed, sess->expires)) {
-			/* No turning back */
-			list_del_init(&sess->del_list_entry);
-			sess->deleted = QLA_SESS_DELETION_IN_PROGRESS;
-
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf004,
-			    "Timeout: sess %p about to be deleted\n",
-			    sess);
-			if (sess->se_sess)
-				ha->tgt.tgt_ops->shutdown_sess(sess);
-			ha->tgt.tgt_ops->put_sess(sess);
-		} else {
-			schedule_delayed_work(&tgt->sess_del_work,
-			    sess->expires - elapsed);
-			break;
-		}
-	}
-	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
-}
-
 /*
  * Adds an extra ref to allow to drop hw lock after adding sess to the list.
  * Caller must put it.
@@ -839,93 +1103,65 @@ static struct fc_port *qlt_create_sess(
 	bool local)
 {
 	struct qla_hw_data *ha = vha->hw;
-	struct fc_port *sess;
+	struct fc_port *sess = fcport;
 	unsigned long flags;
 
-	/* Check to avoid double sessions */
-	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-	list_for_each_entry(sess, &vha->vp_fcports, list) {
-		if (!memcmp(sess->port_name, fcport->port_name, WWN_SIZE)) {
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf005,
-			    "Double sess %p found (s_id %x:%x:%x, "
-			    "loop_id %d), updating to d_id %x:%x:%x, "
-			    "loop_id %d", sess, sess->d_id.b.domain,
-			    sess->d_id.b.al_pa, sess->d_id.b.area,
-			    sess->loop_id, fcport->d_id.b.domain,
-			    fcport->d_id.b.al_pa, fcport->d_id.b.area,
-			    fcport->loop_id);
-
-			/* Cannot undelete at this point */
-			if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) {
-				spin_unlock_irqrestore(&ha->tgt.sess_lock,
-				    flags);
-				return NULL;
-			}
-
-			if (sess->deleted)
-				qlt_undelete_sess(sess);
-
-			if (!sess->se_sess) {
-				if (ha->tgt.tgt_ops->check_initiator_node_acl(vha,
-				    &sess->port_name[0], sess) < 0) {
-					spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
-					return NULL;
-				}
-			}
-
-			kref_get(&sess->sess_kref);
-			ha->tgt.tgt_ops->update_sess(sess, fcport->d_id, fcport->loop_id,
-						(fcport->flags & FCF_CONF_COMP_SUPPORTED));
-
-			if (sess->local && !local)
-				sess->local = 0;
-
-			qlt_do_generation_tick(vha, &sess->generation);
-
-			spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+	if (vha->vha_tgt.qla_tgt->tgt_stop)
+		return NULL;
 
-			return sess;
+	if (fcport->se_sess) {
+		if (!kref_get_unless_zero(&sess->sess_kref)) {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			    "%s: kref_get_unless_zero failed for %8phC\n",
+			    __func__, sess->port_name);
+			return NULL;
 		}
-	}
-	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
-
-	sess = kzalloc(sizeof(*sess), GFP_KERNEL);
-	if (!sess) {
-		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf04a,
-		    "qla_target(%u): session allocation failed, all commands "
-		    "from port %8phC will be refused", vha->vp_idx,
-		    fcport->port_name);
-
-		return NULL;
+		return fcport;
 	}
 	sess->tgt = vha->vha_tgt.qla_tgt;
-	sess->vha = vha;
-	sess->d_id = fcport->d_id;
-	sess->loop_id = fcport->loop_id;
 	sess->local = local;
-	kref_init(&sess->sess_kref);
-	INIT_LIST_HEAD(&sess->del_list_entry);
 
-	/* Under normal circumstances we want to logout from firmware when
+	/*
+	 * Under normal circumstances we want to logout from firmware when
 	 * session eventually ends and release corresponding nport handle.
 	 * In the exception cases (e.g. when new PLOGI is waiting) corresponding
-	 * code will adjust these flags as necessary. */
+	 * code will adjust these flags as necessary.
+	 */
 	sess->logout_on_delete = 1;
 	sess->keep_nport_handle = 0;
+	sess->logout_completed = 0;
 
-	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf006,
-	    "Adding sess %p to tgt %p via ->check_initiator_node_acl()\n",
-	    sess, vha->vha_tgt.qla_tgt);
+	if (ha->tgt.tgt_ops->check_initiator_node_acl(vha,
+	    &fcport->port_name[0], sess) < 0) {
+		ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+		    "(%d) %8phC check_initiator_node_acl failed\n",
+		    vha->vp_idx, fcport->port_name);
+		return NULL;
+	} else {
+		kref_init(&fcport->sess_kref);
+		/*
+		 * Take an extra reference to ->sess_kref here to handle
+		 * fc_port access across ->tgt.sess_lock reaquire.
+		 */
+		if (!kref_get_unless_zero(&sess->sess_kref)) {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			    "%s: kref_get_unless_zero failed for %8phC\n",
+			    __func__, sess->port_name);
+			return NULL;
+		}
 
-	sess->conf_compl_supported = (fcport->flags & FCF_CONF_COMP_SUPPORTED);
-	BUILD_BUG_ON(sizeof(sess->port_name) != sizeof(fcport->port_name));
-	memcpy(sess->port_name, fcport->port_name, sizeof(sess->port_name));
+		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+		if (!IS_SW_RESV_ADDR(sess->d_id))
+			vha->vha_tgt.qla_tgt->sess_count++;
 
-	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-	list_add_tail(&sess->list, &vha->vp_fcports);
-	vha->vha_tgt.qla_tgt->sess_count++;
-	qlt_do_generation_tick(vha, &sess->generation);
-	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+		qlt_do_generation_tick(vha, &sess->generation);
+		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+	}
+
+	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf006,
+	    "Adding sess %p se_sess %p  to tgt %p sess_count %d\n",
+	    sess, sess->se_sess, vha->vha_tgt.qla_tgt,
+	    vha->vha_tgt.qla_tgt->sess_count);
 
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf04b,
 	    "qla_target(%d): %ssession for wwn %8phC (loop_id %d, "
@@ -934,23 +1170,6 @@ static struct fc_port *qlt_create_sess(
 	    fcport->loop_id, sess->d_id.b.domain, sess->d_id.b.area,
 	    sess->d_id.b.al_pa, sess->conf_compl_supported ?  "" : "not ");
 
-	/*
-	 * Determine if this fc_port->port_name is allowed to access
-	 * target mode using explict NodeACLs+MappedLUNs, or using
-	 * TPG demo mode.  If this is successful a target mode FC nexus
-	 * is created.
-	 */
-	if (ha->tgt.tgt_ops->check_initiator_node_acl(vha,
-	    &fcport->port_name[0], sess) < 0) {
-		return NULL;
-	} else {
-		/*
-		 * Take an extra reference to ->sess_kref here to handle fc_port
-		 * access across ->tgt.sess_lock reaquire.
-		 */
-		kref_get(&sess->sess_kref);
-	}
-
 	return sess;
 }
 
@@ -1007,12 +1226,12 @@ static inline int test_tgt_sess_count(struct qla_tgt *tgt)
 	 * We need to protect against race, when tgt is freed before or
 	 * inside wake_up()
 	 */
-	spin_lock_irqsave(&ha->hardware_lock, flags);
+	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
 	ql_dbg(ql_dbg_tgt, tgt->vha, 0xe002,
 	    "tgt %p, sess_count=%d\n",
 	    tgt, tgt->sess_count);
 	res = (tgt->sess_count == 0);
-	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 
 	return res;
 }
@@ -1060,8 +1279,6 @@ int qlt_stop_phase1(struct qla_tgt *tgt)
 	mutex_unlock(&vha->vha_tgt.tgt_mutex);
 	mutex_unlock(&qla_tgt_mutex);
 
-	flush_delayed_work(&tgt->sess_del_work);
-
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf009,
 	    "Waiting for sess works (tgt %p)", tgt);
 	spin_lock_irqsave(&tgt->sess_work_lock, flags);
@@ -1205,6 +1422,7 @@ static void qlt_send_notify_ack(struct scsi_qla_host *vha,
 	nack = (struct nack_to_isp *)pkt;
 	nack->ox_id = ntfy->ox_id;
 
+	nack->u.isp24.handle = QLA_TGT_SKIP_HANDLE;
 	nack->u.isp24.nport_handle = ntfy->u.isp24.nport_handle;
 	if (le16_to_cpu(ntfy->u.isp24.status) == IMM_NTFY_ELS) {
 		nack->u.isp24.flags = ntfy->u.isp24.flags &
@@ -1552,7 +1770,7 @@ static void qlt_24xx_handle_abts(struct scsi_qla_host *vha,
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 
 
-	if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) {
+	if (sess->deleted) {
 		qlt_24xx_send_abts_resp(vha, abts, FCP_TMF_REJECTED, false);
 		return;
 	}
@@ -1650,10 +1868,19 @@ void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *mcmd)
 		return;
 	}
 
-	if (mcmd->flags == QLA24XX_MGMT_SEND_NACK)
-		qlt_send_notify_ack(vha, &mcmd->orig_iocb.imm_ntfy,
-		    0, 0, 0, 0, 0, 0);
-	else {
+	if (mcmd->flags == QLA24XX_MGMT_SEND_NACK) {
+		if (mcmd->orig_iocb.imm_ntfy.u.isp24.status_subcode ==
+		    ELS_LOGO) {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+			    "TM response logo %phC status %#x state %#x",
+			    mcmd->sess->port_name, mcmd->fc_tm_rsp,
+			    mcmd->flags);
+			qlt_schedule_sess_for_deletion_lock(mcmd->sess);
+		} else {
+			qlt_send_notify_ack(vha, &mcmd->orig_iocb.imm_ntfy,
+				0, 0, 0, 0, 0, 0);
+		}
+	} else {
 		if (mcmd->orig_iocb.atio.u.raw.entry_type == ABTS_RECV_24XX)
 			qlt_24xx_send_abts_resp(vha, &mcmd->orig_iocb.abts,
 			    mcmd->fc_tm_rsp, false);
@@ -2470,7 +2697,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type,
 	int res;
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-	if (cmd->sess && cmd->sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) {
+	if (cmd->sess && cmd->sess->deleted) {
 		cmd->state = QLA_TGT_STATE_PROCESSED;
 		if (cmd->sess->logout_completed)
 			/* no need to terminate. FW already freed exchange. */
@@ -2645,7 +2872,7 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd)
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 
 	if (!vha->flags.online || (cmd->reset_count != ha->chip_reset) ||
-	    (cmd->sess && cmd->sess->deleted == QLA_SESS_DELETION_IN_PROGRESS)) {
+	    (cmd->sess && cmd->sess->deleted)) {
 		/*
 		 * Either the port is not online or this request was from
 		 * previous life, just abort the processing.
@@ -3345,7 +3572,11 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
 				 */
 				cmd->sess->logout_on_delete = 0;
 				cmd->sess->send_els_logo = 1;
-				qlt_schedule_sess_for_deletion(cmd->sess, true);
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+				    "%s %d %8phC post del sess\n",
+				    __func__, __LINE__, cmd->sess->port_name);
+
+				qlt_schedule_sess_for_deletion_lock(cmd->sess);
 			}
 			break;
 		}
@@ -3649,6 +3880,7 @@ static void qlt_create_sess_from_atio(struct work_struct *work)
 		kfree(op);
 		return;
 	}
+
 	/*
 	 * __qlt_do_work() will call qlt_put_sess() to release
 	 * the extra reference taken above by qlt_make_local_sess()
@@ -3656,13 +3888,11 @@ static void qlt_create_sess_from_atio(struct work_struct *work)
 	__qlt_do_work(cmd);
 	kfree(op);
 	return;
-
 out_term:
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	qlt_send_term_exchange(vha, NULL, &op->atio, 1, 0);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 	kfree(op);
-
 }
 
 /* ha->hardware_lock supposed to be held on entry */
@@ -3702,7 +3932,7 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
 
 	/* Another WWN used to have our s_id. Our PLOGI scheduled its
 	 * session deletion, but it's still in sess_del_work wq */
-	if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) {
+	if (sess->deleted) {
 		ql_dbg(ql_dbg_io, vha, 0x3061,
 		    "New command while old session %p is being deleted\n",
 		    sess);
@@ -3712,7 +3942,13 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
 	/*
 	 * Do kref_get() before returning + dropping qla_hw_data->hardware_lock.
 	 */
-	kref_get(&sess->sess_kref);
+	if (!kref_get_unless_zero(&sess->sess_kref)) {
+		ql_dbg(ql_dbg_tgt, vha, 0xffff,
+		    "%s: kref_get fail, %8phC oxid %x \n",
+		    __func__, sess->port_name,
+		     be16_to_cpu(atio->u.isp24.fcp_hdr.ox_id));
+		return -EFAULT;
+	}
 
 	cmd = qlt_get_tag(vha, sess, atio);
 	if (!cmd) {
@@ -3729,9 +3965,9 @@ static int qlt_handle_cmd_for_atio(struct scsi_qla_host *vha,
 	cmd->se_cmd.cpuid = ha->msix_count ?
 		ha->tgt.rspq_vector_cpuid : WORK_CPU_UNBOUND;
 
-	spin_lock(&vha->cmd_list_lock);
+	spin_lock_irqsave(&vha->cmd_list_lock, flags);
 	list_add_tail(&cmd->cmd_list, &vha->qla_cmd_list);
-	spin_unlock(&vha->cmd_list_lock);
+	spin_unlock_irqrestore(&vha->cmd_list_lock, flags);
 
 	INIT_WORK(&cmd->work, qlt_do_work);
 	if (ha->msix_count) {
@@ -3826,7 +4062,7 @@ static int qlt_handle_task_mgmt(struct scsi_qla_host *vha, void *iocb)
 		    sizeof(struct atio_from_isp));
 	}
 
-	if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS)
+	if (sess->deleted)
 		return -EFAULT;
 
 	return qlt_issue_task_mgmt(sess, unpacked_lun, fn, iocb, 0);
@@ -3901,22 +4137,20 @@ static int qlt_abort_task(struct scsi_qla_host *vha,
 
 void qlt_logo_completion_handler(fc_port_t *fcport, int rc)
 {
-	if (fcport->tgt_session) {
-		if (rc != MBS_COMMAND_COMPLETE) {
-			ql_dbg(ql_dbg_tgt_mgt, fcport->vha, 0xf093,
-				"%s: se_sess %p / sess %p from"
-				" port %8phC loop_id %#04x s_id %02x:%02x:%02x"
-				" LOGO failed: %#x\n",
-				__func__,
-				fcport->se_sess,
-				fcport->tgt_session,
-				fcport->port_name, fcport->loop_id,
-				fcport->d_id.b.domain, fcport->d_id.b.area,
-				fcport->d_id.b.al_pa, rc);
-		}
-
-		fcport->logout_completed = 1;
+	if (rc != MBS_COMMAND_COMPLETE) {
+		ql_dbg(ql_dbg_tgt_mgt, fcport->vha, 0xf093,
+			"%s: se_sess %p / sess %p from"
+			" port %8phC loop_id %#04x s_id %02x:%02x:%02x"
+			" LOGO failed: %#x\n",
+			__func__,
+			fcport->se_sess,
+			fcport,
+			fcport->port_name, fcport->loop_id,
+			fcport->d_id.b.domain, fcport->d_id.b.area,
+			fcport->d_id.b.al_pa, rc);
 	}
+
+	fcport->logout_completed = 1;
 }
 
 /*
@@ -3926,13 +4160,12 @@ void qlt_logo_completion_handler(fc_port_t *fcport, int rc)
 * deletion. Returns existing session with matching wwn if present.
 * Null otherwise.
 */
-static struct fc_port *
-qlt_find_sess_invalidate_other(struct qla_tgt *tgt, uint64_t wwn,
+struct fc_port *
+qlt_find_sess_invalidate_other(scsi_qla_host_t *vha, uint64_t wwn,
     port_id_t port_id, uint16_t loop_id, struct fc_port **conflict_sess)
 {
 	struct fc_port *sess = NULL, *other_sess;
 	uint64_t other_wwn;
-	scsi_qla_host_t *vha = tgt->vha;
 
 	*conflict_sess = NULL;
 
@@ -3949,7 +4182,7 @@ qlt_find_sess_invalidate_other(struct qla_tgt *tgt, uint64_t wwn,
 		/* find other sess with nport_id collision */
 		if (port_id.b24 == other_sess->d_id.b24) {
 			if (loop_id != other_sess->loop_id) {
-				ql_dbg(ql_dbg_tgt_tmr, tgt->vha, 0x1000c,
+				ql_dbg(ql_dbg_tgt_tmr, vha, 0x1000c,
 				    "Invalidating sess %p loop_id %d wwn %llx.\n",
 				    other_sess, other_sess->loop_id, other_wwn);
 
@@ -3965,6 +4198,11 @@ qlt_find_sess_invalidate_other(struct qla_tgt *tgt, uint64_t wwn,
 				 * Another wwn used to have our s_id/loop_id
 				 * kill the session, but don't free the loop_id
 				 */
+				ql_dbg(ql_dbg_tgt_tmr, vha, 0xffff,
+				    "Invalidating sess %p loop_id %d wwn %llx.\n",
+				    other_sess, other_sess->loop_id, other_wwn);
+
+
 				other_sess->keep_nport_handle = 1;
 				*conflict_sess = other_sess;
 				qlt_schedule_sess_for_deletion(other_sess,
@@ -3974,8 +4212,9 @@ qlt_find_sess_invalidate_other(struct qla_tgt *tgt, uint64_t wwn,
 		}
 
 		/* find other sess with nport handle collision */
-		if (loop_id == other_sess->loop_id) {
-			ql_dbg(ql_dbg_tgt_tmr, tgt->vha, 0x1000d,
+		if ((loop_id == other_sess->loop_id) &&
+			(loop_id != FC_NO_LOOP_ID)) {
+			ql_dbg(ql_dbg_tgt_tmr, vha, 0x1000d,
 			       "Invalidating sess %p loop_id %d wwn %llx.\n",
 			       other_sess, other_sess->loop_id, other_wwn);
 
@@ -4046,9 +4285,12 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 
 	loop_id = le16_to_cpu(iocb->u.isp24.nport_handle);
 
-	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf026,
-	    "qla_target(%d): Port ID: 0x%3phC ELS opcode: 0x%02x\n",
-	    vha->vp_idx, iocb->u.isp24.port_id, iocb->u.isp24.status_subcode);
+	ql_dbg(ql_dbg_disc, vha, 0xf026,
+	    "qla_target(%d): Port ID: %02x:%02x:%02x ELS opcode: 0x%02x lid %d %8phC\n",
+	    vha->vp_idx, iocb->u.isp24.port_id[2],
+		iocb->u.isp24.port_id[1], iocb->u.isp24.port_id[0],
+		   iocb->u.isp24.status_subcode, loop_id,
+		iocb->u.isp24.port_name);
 
 	/* res = 1 means ack at the end of thread
 	 * res = 0 means ack async/later.
@@ -4061,12 +4303,12 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 
 		if (wwn) {
 			spin_lock_irqsave(&tgt->ha->tgt.sess_lock, flags);
-			sess = qlt_find_sess_invalidate_other(tgt, wwn,
-			    port_id, loop_id, &conflict_sess);
+			sess = qlt_find_sess_invalidate_other(vha, wwn,
+				port_id, loop_id, &conflict_sess);
 			spin_unlock_irqrestore(&tgt->ha->tgt.sess_lock, flags);
 		}
 
-		if (IS_SW_RESV_ADDR(port_id) || (!sess && !conflict_sess)) {
+		if (IS_SW_RESV_ADDR(port_id)) {
 			res = 1;
 			break;
 		}
@@ -4074,42 +4316,66 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 		pla = qlt_plogi_ack_find_add(vha, &port_id, iocb);
 		if (!pla) {
 			qlt_send_term_imm_notif(vha, iocb, 1);
-
-			res = 0;
 			break;
 		}
 
 		res = 0;
 
-		if (conflict_sess)
+		if (conflict_sess) {
+			conflict_sess->login_gen++;
 			qlt_plogi_ack_link(vha, pla, conflict_sess,
-			    QLT_PLOGI_LINK_CONFLICT);
+				QLT_PLOGI_LINK_CONFLICT);
+		}
 
-		if (!sess)
+		if (!sess) {
+			pla->ref_count++;
+			qla24xx_post_newsess_work(vha, &port_id,
+				iocb->u.isp24.port_name, pla);
+			res = 0;
 			break;
+		}
 
 		qlt_plogi_ack_link(vha, pla, sess, QLT_PLOGI_LINK_SAME_WWN);
-		 /*
-		  * Under normal circumstances we want to release nport handle
-		  * during LOGO process to avoid nport handle leaks inside FW.
-		  * The exception is when LOGO is done while another PLOGI with
-		  * the same nport handle is waiting as might be the case here.
-		  * Note: there is always a possibily of a race where session
-		  * deletion has already started for other reasons (e.g. ACL
-		  * removal) and now PLOGI arrives:
-		  * 1. if PLOGI arrived in FW after nport handle has been freed,
-		  *    FW must have assigned this PLOGI a new/same handle and we
-		  *    can proceed ACK'ing it as usual when session deletion
-		  *    completes.
-		  * 2. if PLOGI arrived in FW before LOGO with LCF_FREE_NPORT
-		  *    bit reached it, the handle has now been released. We'll
-		  *    get an error when we ACK this PLOGI. Nothing will be sent
-		  *    back to initiator. Initiator should eventually retry
-		  *    PLOGI and situation will correct itself.
-		  */
-		sess->keep_nport_handle = ((sess->loop_id == loop_id) &&
-					   (sess->d_id.b24 == port_id.b24));
-		qlt_schedule_sess_for_deletion(sess, true);
+		sess->fw_login_state = DSC_LS_PLOGI_PEND;
+		sess->d_id = port_id;
+		sess->login_gen++;
+
+		switch (sess->disc_state) {
+		case DSC_DELETED:
+			qlt_plogi_ack_unref(vha, pla);
+			break;
+
+		default:
+			/*
+			 * Under normal circumstances we want to release nport handle
+			 * during LOGO process to avoid nport handle leaks inside FW.
+			 * The exception is when LOGO is done while another PLOGI with
+			 * the same nport handle is waiting as might be the case here.
+			 * Note: there is always a possibily of a race where session
+			 * deletion has already started for other reasons (e.g. ACL
+			 * removal) and now PLOGI arrives:
+			 * 1. if PLOGI arrived in FW after nport handle has been freed,
+			 *    FW must have assigned this PLOGI a new/same handle and we
+			 *    can proceed ACK'ing it as usual when session deletion
+			 *    completes.
+			 * 2. if PLOGI arrived in FW before LOGO with LCF_FREE_NPORT
+			 *    bit reached it, the handle has now been released. We'll
+			 *    get an error when we ACK this PLOGI. Nothing will be sent
+			 *    back to initiator. Initiator should eventually retry
+			 *    PLOGI and situation will correct itself.
+			 */
+			sess->keep_nport_handle = ((sess->loop_id == loop_id) &&
+			   (sess->d_id.b24 == port_id.b24));
+
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+				   "%s %d %8phC post del sess\n",
+				   __func__, __LINE__, sess->port_name);
+
+
+			qlt_schedule_sess_for_deletion_lock(sess);
+			break;
+		}
+
 		break;
 
 	case ELS_PRLI:
@@ -4117,8 +4383,8 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 
 		if (wwn) {
 			spin_lock_irqsave(&tgt->ha->tgt.sess_lock, flags);
-			sess = qlt_find_sess_invalidate_other(tgt, wwn, port_id,
-			    loop_id, &conflict_sess);
+			sess = qlt_find_sess_invalidate_other(vha, wwn, port_id,
+				loop_id, &conflict_sess);
 			spin_unlock_irqrestore(&tgt->ha->tgt.sess_lock, flags);
 		}
 
@@ -4132,7 +4398,7 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 		}
 
 		if (sess != NULL) {
-			if (sess->deleted) {
+			if (sess->fw_login_state == DSC_LS_PLOGI_PEND) {
 				/*
 				 * Impatient initiator sent PRLI before last
 				 * PLOGI could finish. Will force him to re-try,
@@ -4157,10 +4423,15 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 			sess->local = 0;
 			sess->loop_id = loop_id;
 			sess->d_id = port_id;
+			sess->fw_login_state = DSC_LS_PRLI_PEND;
 
 			if (wd3_lo & BIT_7)
 				sess->conf_compl_supported = 1;
 
+			if ((wd3_lo & BIT_4) == 0)
+				sess->port_type = FCT_INITIATOR;
+			else
+				sess->port_type = FCT_TARGET;
 		}
 		res = 1; /* send notify ack */
 
@@ -4170,15 +4441,50 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 			set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
 			qla2xxx_wake_dpc(vha);
 		} else {
-			/* todo: else - create sess here. */
-			res = 1; /* send notify ack */
-		}
+			if (sess) {
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+					   "%s %d %8phC post nack\n",
+					   __func__, __LINE__, sess->port_name);
 
+				qla24xx_post_nack_work(vha, sess, iocb,
+					SRB_NACK_PRLI);
+				res = 0;
+			}
+		}
 		break;
 
 	case ELS_LOGO:
 	case ELS_PRLO:
+		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+		sess = qla2x00_find_fcport_by_loopid(vha, loop_id);
+		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+
+		if (sess) {
+			sess->login_gen++;
+			sess->fw_login_state = DSC_LS_LOGO_PEND;
+			sess->logout_on_delete = 0;
+			sess->logo_ack_needed = 1;
+			memcpy(sess->iocb, iocb, IOCB_SIZE);
+		}
+
 		res = qlt_reset(vha, iocb, QLA_TGT_NEXUS_LOSS_SESS);
+
+		ql_dbg(ql_dbg_disc, vha, 0xffff,
+		    "%s: logo %llx res %d sess %p ",
+		    __func__, wwn, res, sess);
+		if (res == 0) {
+			/* cmd went up to ULP. look for qlt_xmit_tm_rsp()
+			   for LOGO_ACK */
+			BUG_ON(!sess);
+			res = 0;
+		} else {
+			/* cmd did not go upstair. */
+			if (sess) {
+				qlt_schedule_sess_for_deletion_lock(sess);
+				res = 0;
+			}
+			/* else logo will be ack */
+		}
 		break;
 	case ELS_PDISC:
 	case ELS_ADISC:
@@ -4189,6 +4495,16 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 			    0, 0, 0, 0, 0, 0);
 			tgt->link_reinit_iocb_pending = 0;
 		}
+
+		sess = qla2x00_find_fcport_by_wwpn(vha,
+		    iocb->u.isp24.port_name, 1);
+		if (sess) {
+			ql_dbg(ql_dbg_disc, vha, 0xffff,
+				"sess %p lid %d|%d DS %d LS %d\n",
+				sess, sess->loop_id, loop_id,
+				sess->disc_state, sess->fw_login_state);
+		}
+
 		res = 1; /* send notify ack */
 		break;
 	}
@@ -4966,7 +5282,6 @@ void qlt_async_event(uint16_t code, struct scsi_qla_host *vha,
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf03f,
 			    "Async MB 2: Port Logged Out\n");
 		break;
-
 	default:
 		break;
 	}
@@ -4977,8 +5292,10 @@ void qlt_async_event(uint16_t code, struct scsi_qla_host *vha,
 static fc_port_t *qlt_get_port_database(struct scsi_qla_host *vha,
 	uint16_t loop_id)
 {
-	fc_port_t *fcport;
+	fc_port_t *fcport, *tfcp, *del;
 	int rc;
+	unsigned long flags;
+	u8 newfcport = 0;
 
 	fcport = kzalloc(sizeof(*fcport), GFP_KERNEL);
 	if (!fcport) {
@@ -5000,6 +5317,59 @@ static fc_port_t *qlt_get_port_database(struct scsi_qla_host *vha,
 		return NULL;
 	}
 
+	del = NULL;
+	spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
+	tfcp = qla2x00_find_fcport_by_wwpn(vha, fcport->port_name, 1);
+
+	if (tfcp) {
+		tfcp->d_id = fcport->d_id;
+		tfcp->port_type = fcport->port_type;
+		tfcp->supported_classes = fcport->supported_classes;
+		tfcp->flags |= fcport->flags;
+
+		del = fcport;
+		fcport = tfcp;
+	} else {
+		if (vha->hw->current_topology == ISP_CFG_F)
+			fcport->flags |= FCF_FABRIC_DEVICE;
+
+		list_add_tail(&fcport->list, &vha->vp_fcports);
+		if (!IS_SW_RESV_ADDR(fcport->d_id))
+		   vha->fcport_count++;
+		fcport->login_gen++;
+		fcport->disc_state = DSC_LOGIN_COMPLETE;
+		fcport->login_succ = 1;
+		newfcport = 1;
+	}
+
+	fcport->deleted = 0;
+	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
+	switch (vha->host->active_mode) {
+	case MODE_INITIATOR:
+	case MODE_DUAL:
+		if (newfcport) {
+			if (!IS_IIDMA_CAPABLE(vha->hw) || !vha->hw->flags.gpsc_supported) {
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+				   "%s %d %8phC post upd_fcport fcp_cnt %d\n",
+				   __func__, __LINE__, fcport->port_name, vha->fcport_count);
+				qla24xx_post_upd_fcport_work(vha, fcport);
+			} else {
+				ql_dbg(ql_dbg_disc, vha, 0xffff,
+				   "%s %d %8phC post gpsc fcp_cnt %d\n",
+				   __func__, __LINE__, fcport->port_name, vha->fcport_count);
+				qla24xx_post_gpsc_work(vha, fcport);
+			}
+		}
+		break;
+
+	case MODE_TARGET:
+	default:
+		break;
+	}
+	if (del)
+		qla2x00_free_fcport(del);
+
 	return fcport;
 }
 
@@ -5012,6 +5382,17 @@ static struct fc_port *qlt_make_local_sess(struct scsi_qla_host *vha,
 	int rc, global_resets;
 	uint16_t loop_id = 0;
 
+	if ((s_id[0] == 0xFF) && (s_id[1] == 0xFC)) {
+		/*
+		 * This is Domain Controller, so it should be
+		 * OK to drop SCSI commands from it.
+		 */
+		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf042,
+		    "Unable to find initiator with S_ID %x:%x:%x",
+		    s_id[0], s_id[1], s_id[2]);
+		return NULL;
+	}
+
 	mutex_lock(&vha->vha_tgt.tgt_mutex);
 
 retry:
@@ -5022,21 +5403,11 @@ static struct fc_port *qlt_make_local_sess(struct scsi_qla_host *vha,
 	if (rc != 0) {
 		mutex_unlock(&vha->vha_tgt.tgt_mutex);
 
-		if ((s_id[0] == 0xFF) &&
-		    (s_id[1] == 0xFC)) {
-			/*
-			 * This is Domain Controller, so it should be
-			 * OK to drop SCSI commands from it.
-			 */
-			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf042,
-			    "Unable to find initiator with S_ID %x:%x:%x",
-			    s_id[0], s_id[1], s_id[2]);
-		} else
-			ql_log(ql_log_info, vha, 0xf071,
-			    "qla_target(%d): Unable to find "
-			    "initiator with S_ID %x:%x:%x",
-			    vha->vp_idx, s_id[0], s_id[1],
-			    s_id[2]);
+		ql_log(ql_log_info, vha, 0xf071,
+		    "qla_target(%d): Unable to find "
+		    "initiator with S_ID %x:%x:%x",
+		    vha->vp_idx, s_id[0], s_id[1],
+		    s_id[2]);
 
 		if (rc == -ENOENT) {
 			qlt_port_logo_t logo;
@@ -5104,12 +5475,18 @@ static void qlt_abort_work(struct qla_tgt *tgt,
 		if (!sess)
 			goto out_term2;
 	} else {
-		if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) {
+		if (sess->deleted) {
 			sess = NULL;
 			goto out_term2;
 		}
 
-		kref_get(&sess->sess_kref);
+		if (!kref_get_unless_zero(&sess->sess_kref)) {
+			ql_dbg(ql_dbg_tgt_tmr, vha, 0xffff,
+			    "%s: kref_get fail %8phC \n",
+			     __func__, sess->port_name);
+			sess = NULL;
+			goto out_term2;
+		}
 	}
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
@@ -5133,7 +5510,8 @@ static void qlt_abort_work(struct qla_tgt *tgt,
 	qlt_24xx_send_abts_resp(vha, &prm->abts, FCP_TMF_REJECTED, false);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
-	ha->tgt.tgt_ops->put_sess(sess);
+	if (sess)
+		ha->tgt.tgt_ops->put_sess(sess);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2);
 }
 
@@ -5168,12 +5546,18 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
 		if (!sess)
 			goto out_term;
 	} else {
-		if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) {
+		if (sess->deleted) {
 			sess = NULL;
 			goto out_term;
 		}
 
-		kref_get(&sess->sess_kref);
+		if (!kref_get_unless_zero(&sess->sess_kref)) {
+			ql_dbg(ql_dbg_tgt_tmr, vha, 0xffff,
+			    "%s: kref_get fail %8phC\n",
+			     __func__, sess->port_name);
+			sess = NULL;
+			goto out_term;
+		}
 	}
 
 	iocb = a;
@@ -5191,8 +5575,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt,
 
 out_term:
 	qlt_send_term_exchange(vha, NULL, &prm->tm_iocb2, 1, 0);
-	if (sess)
-		ha->tgt.tgt_ops->put_sess(sess);
+	ha->tgt.tgt_ops->put_sess(sess);
 	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
 }
 
@@ -5270,8 +5653,6 @@ int qlt_add_target(struct qla_hw_data *ha, struct scsi_qla_host *base_vha)
 	tgt->vha = base_vha;
 	init_waitqueue_head(&tgt->waitQ);
 	INIT_LIST_HEAD(&tgt->del_sess_list);
-	INIT_DELAYED_WORK(&tgt->sess_del_work,
-		(void (*)(struct work_struct *))qlt_del_sess_work_fn);
 	spin_lock_init(&tgt->sess_work_lock);
 	INIT_WORK(&tgt->sess_work, qlt_sess_work_fn);
 	INIT_LIST_HEAD(&tgt->sess_works_list);
@@ -5472,6 +5853,7 @@ static void qlt_clear_mode(struct scsi_qla_host *vha)
 		break;
 	case QLA2XXX_INI_MODE_ENABLED:
 		vha->host->active_mode &= ~MODE_TARGET;
+		vha->host->active_mode |= MODE_INITIATOR;
 		break;
 	default:
 		break;
@@ -5594,13 +5976,12 @@ qlt_rff_id(struct scsi_qla_host *vha, struct ct_sns_req *ct_req)
 	 * FC-4 Feature bit 0 indicates target functionality to the name server.
 	 */
 	if (qla_tgt_mode_enabled(vha)) {
-		if (qla_ini_mode_enabled(vha))
-			ct_req->req.rff_id.fc4_feature = BIT_0 | BIT_1;
-		else
-			ct_req->req.rff_id.fc4_feature = BIT_0;
+		ct_req->req.rff_id.fc4_feature = BIT_0;
 	} else if (qla_ini_mode_enabled(vha)) {
 		ct_req->req.rff_id.fc4_feature = BIT_1;
-	}
+	} else if (qla_dual_mode_enabled(vha))
+		ct_req->req.rff_id.fc4_feature = BIT_0 | BIT_1;
+
 }
 
 /*
@@ -5732,7 +6113,7 @@ qlt_24xx_config_nvram_stage1(struct scsi_qla_host *vha, struct nvram_24xx *nv)
 		nv->firmware_options_1 |= cpu_to_le32(BIT_4);
 
 		/* Disable ini mode, if requested */
-		if (!qla_ini_mode_enabled(vha))
+		if (qla_tgt_mode_enabled(vha))
 			nv->firmware_options_1 |= cpu_to_le32(BIT_5);
 
 		/* Disable Full Login after LIP */
@@ -5834,7 +6215,7 @@ qlt_81xx_config_nvram_stage1(struct scsi_qla_host *vha, struct nvram_81xx *nv)
 		nv->firmware_options_1 |= cpu_to_le32(BIT_4);
 
 		/* Disable ini mode, if requested */
-		if (!qla_ini_mode_enabled(vha))
+		if (qla_tgt_mode_enabled(vha))
 			nv->firmware_options_1 |= cpu_to_le32(BIT_5);
 		/* Disable Full Login after LIP */
 		nv->firmware_options_1 &= cpu_to_le32(~BIT_13);
@@ -5940,8 +6321,9 @@ qlt_modify_vp_config(struct scsi_qla_host *vha,
 {
 	if (qla_tgt_mode_enabled(vha))
 		vpmod->options_idx1 &= ~BIT_5;
-	/* Disable ini mode, if requested */
-	if (!qla_ini_mode_enabled(vha))
+
+	/* Disable ini mode, if requested. */
+	if (qla_tgt_mode_enabled(vha))
 		vpmod->options_idx1 &= ~BIT_4;
 }
 
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index 9c35ba15c687..5e56192a4877 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -799,7 +799,6 @@ struct qla_tgt {
 
 	/* Protected by hardware_lock */
 	struct list_head del_sess_list;
-	struct delayed_work sess_del_work;
 
 	spinlock_t sess_work_lock;
 	struct list_head sess_works_list;
@@ -823,13 +822,6 @@ struct qla_tgt_sess_op {
 	bool aborted;
 };
 
-enum qla_sess_deletion {
-	QLA_SESS_DELETION_NONE		= 0,
-	QLA_SESS_DELETION_PENDING	= 1, /* hopefully we can get rid of
-					      * this one */
-	QLA_SESS_DELETION_IN_PROGRESS	= 2,
-};
-
 enum trace_flags {
 	TRC_NEW_CMD = BIT_0,
 	TRC_DO_WORK = BIT_1,
@@ -987,12 +979,17 @@ extern int ql2x_ini_mode;
 
 static inline bool qla_tgt_mode_enabled(struct scsi_qla_host *ha)
 {
-	return ha->host->active_mode & MODE_TARGET;
+	return ha->host->active_mode == MODE_TARGET;
 }
 
 static inline bool qla_ini_mode_enabled(struct scsi_qla_host *ha)
 {
-	return ha->host->active_mode & MODE_INITIATOR;
+	return ha->host->active_mode == MODE_INITIATOR;
+}
+
+static inline bool qla_dual_mode_enabled(struct scsi_qla_host *ha)
+{
+	return (ha->host->active_mode == MODE_DUAL);
 }
 
 static inline void qla_reverse_ini_mode(struct scsi_qla_host *ha)
-- 
GitLab


From ead038556f646788e22ad7f0398556d10981ca5f Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Thu, 19 Jan 2017 22:28:01 -0800
Subject: [PATCH 0186/1084] qla2xxx: Add Dual mode support in the driver

Add switch to allow both Initiator Mode & Target
mode to operate at the same time.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_def.h     |   1 +
 drivers/scsi/qla2xxx/qla_init.c    |   6 +-
 drivers/scsi/qla2xxx/qla_isr.c     |  19 ++++-
 drivers/scsi/qla2xxx/qla_target.c  | 110 ++++++++++++++++++++++++-----
 drivers/scsi/qla2xxx/qla_target.h  |   3 +
 drivers/scsi/qla2xxx/tcm_qla2xxx.c |   2 +-
 6 files changed, 119 insertions(+), 22 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 79b4e88db3b1..f67a1c82502c 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -3389,6 +3389,7 @@ struct qla_hw_data {
 #define FLOGI_SP_SUPPORT        BIT_13
 
 	uint8_t		port_no;		/* Physical port of adapter */
+	uint8_t		exch_starvation;
 
 	/* Timeout timers. */
 	uint8_t 	loop_down_abort_time;    /* port down timer */
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 8b7c0468a0e0..4c0a2d8aa964 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -1568,8 +1568,7 @@ qla2x00_initialize_adapter(scsi_qla_host_t *vha)
 		}
 	}
 
-	if (qla_ini_mode_enabled(vha) ||
-		qla_dual_mode_enabled(vha))
+	if (qla_ini_mode_enabled(vha) || qla_dual_mode_enabled(vha))
 		rval = qla2x00_init_rings(vha);
 
 	ha->flags.chip_reset_done = 1;
@@ -3994,7 +3993,8 @@ qla2x00_configure_loop(scsi_qla_host_t *vha)
 			 * Process any ATIO queue entries that came in
 			 * while we weren't online.
 			 */
-			if (qla_tgt_mode_enabled(vha)) {
+			if (qla_tgt_mode_enabled(vha) ||
+			    qla_dual_mode_enabled(vha)) {
 				if (IS_QLA27XX(ha) || IS_QLA83XX(ha)) {
 					spin_lock_irqsave(&ha->tgt.atio_lock,
 					    flags);
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index bb747d7ebdab..a0a3412030cb 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1025,7 +1025,8 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb)
 
 		qla2x00_mark_all_devices_lost(vha, 1);
 
-		if (vha->vp_idx == 0 && !qla_ini_mode_enabled(vha))
+		if (vha->vp_idx == 0 &&
+		    (qla_tgt_mode_enabled(vha) || qla_dual_mode_enabled(vha)))
 			set_bit(SCR_PENDING, &vha->dpc_flags);
 
 		set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
@@ -1637,6 +1638,7 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
 		    fcport->d_id.b.area, fcport->d_id.b.al_pa,
 		    le32_to_cpu(logio->io_parameter[0]));
 
+		vha->hw->exch_starvation = 0;
 		data[0] = MBS_COMMAND_COMPLETE;
 		if (sp->type != SRB_LOGIN_CMD)
 			goto logio_done;
@@ -1672,6 +1674,21 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
 	case LSC_SCODE_NPORT_USED:
 		data[0] = MBS_LOOP_ID_USED;
 		break;
+	case LSC_SCODE_NOXCB:
+		vha->hw->exch_starvation++;
+		if (vha->hw->exch_starvation > 5) {
+			ql_log(ql_log_warn, vha, 0xffff,
+			    "Exchange starvation. Resetting RISC\n");
+
+			vha->hw->exch_starvation = 0;
+
+			if (IS_P3P_TYPE(vha->hw))
+				set_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags);
+			else
+				set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
+			qla2xxx_wake_dpc(vha);
+		}
+		/* drop through */
 	default:
 		data[0] = MBS_COMMAND_ERROR;
 		break;
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index e4d7d32c82e7..c8f312f8e0ff 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -55,8 +55,17 @@ MODULE_PARM_DESC(qlini_mode,
 	"disabled on enabling target mode and then on disabling target mode "
 	"enabled back; "
 	"\"disabled\" - initiator mode will never be enabled; "
+	"\"dual\" - Initiator Modes will be enabled. Target Mode can be activated "
+	"when ready "
 	"\"enabled\" (default) - initiator mode will always stay enabled.");
 
+static int ql_dm_tgt_ex_pct = 50;
+module_param(ql_dm_tgt_ex_pct, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(ql_dm_tgt_ex_pct,
+	"For Dual Mode (qlini_mode=dual), this parameter determines "
+	"the percentage of exchanges/cmds FW will allocate resources "
+	"for Target mode.");
+
 int ql2x_ini_mode = QLA2XXX_INI_MODE_EXCLUSIVE;
 
 static int temp_sam_status = SAM_STAT_BUSY;
@@ -1295,7 +1304,8 @@ int qlt_stop_phase1(struct qla_tgt *tgt)
 	wait_event(tgt->waitQ, test_tgt_sess_count(tgt));
 
 	/* Big hammer */
-	if (!ha->flags.host_shutting_down && qla_tgt_mode_enabled(vha))
+	if (!ha->flags.host_shutting_down &&
+	    (qla_tgt_mode_enabled(vha) || qla_dual_mode_enabled(vha)))
 		qlt_disable_vha(vha);
 
 	/* Wait for sessions to clear out (just in case) */
@@ -5266,6 +5276,32 @@ void qlt_async_event(uint16_t code, struct scsi_qla_host *vha,
 		    le16_to_cpu(mailbox[2]), le16_to_cpu(mailbox[3]));
 		break;
 
+	case MBA_REJECTED_FCP_CMD:
+		ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff,
+			"qla_target(%d): Async event LS_REJECT occurred "
+			"(m[0]=%x, m[1]=%x, m[2]=%x, m[3]=%x)", vha->vp_idx,
+			le16_to_cpu(mailbox[0]), le16_to_cpu(mailbox[1]),
+			le16_to_cpu(mailbox[2]), le16_to_cpu(mailbox[3]));
+
+		if (le16_to_cpu(mailbox[3]) == 1) {
+			/* exchange starvation. */
+			vha->hw->exch_starvation++;
+			if (vha->hw->exch_starvation > 5) {
+				ql_log(ql_log_warn, vha, 0xffff,
+				    "Exchange starvation-. Resetting RISC\n");
+
+				vha->hw->exch_starvation = 0;
+				if (IS_P3P_TYPE(vha->hw))
+					set_bit(FCOE_CTX_RESET_NEEDED,
+					    &vha->dpc_flags);
+				else
+					set_bit(ISP_ABORT_NEEDED,
+					    &vha->dpc_flags);
+				qla2xxx_wake_dpc(vha);
+			}
+		}
+		break;
+
 	case MBA_PORT_UPDATE:
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf03d,
 		    "qla_target(%d): Port update async event %#x "
@@ -5275,10 +5311,11 @@ void qlt_async_event(uint16_t code, struct scsi_qla_host *vha,
 		    le16_to_cpu(mailbox[2]), le16_to_cpu(mailbox[3]));
 
 		login_code = le16_to_cpu(mailbox[2]);
-		if (login_code == 0x4)
+		if (login_code == 0x4) {
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf03e,
 			    "Async MB 2: Got PLOGI Complete\n");
-		else if (login_code == 0x7)
+			vha->hw->exch_starvation = 0;
+		} else if (login_code == 0x7)
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf03f,
 			    "Async MB 2: Port Logged Out\n");
 		break;
@@ -5829,7 +5866,10 @@ static void qlt_set_mode(struct scsi_qla_host *vha)
 		vha->host->active_mode = MODE_TARGET;
 		break;
 	case QLA2XXX_INI_MODE_ENABLED:
-		vha->host->active_mode |= MODE_TARGET;
+		vha->host->active_mode = MODE_UNKNOWN;
+		break;
+	case QLA2XXX_INI_MODE_DUAL:
+		vha->host->active_mode = MODE_DUAL;
 		break;
 	default:
 		break;
@@ -5852,8 +5892,8 @@ static void qlt_clear_mode(struct scsi_qla_host *vha)
 		vha->host->active_mode = MODE_INITIATOR;
 		break;
 	case QLA2XXX_INI_MODE_ENABLED:
-		vha->host->active_mode &= ~MODE_TARGET;
-		vha->host->active_mode |= MODE_INITIATOR;
+	case QLA2XXX_INI_MODE_DUAL:
+		vha->host->active_mode = MODE_INITIATOR;
 		break;
 	default:
 		break;
@@ -5948,9 +5988,6 @@ static void qlt_disable_vha(struct scsi_qla_host *vha)
 void
 qlt_vport_create(struct scsi_qla_host *vha, struct qla_hw_data *ha)
 {
-	if (!qla_tgt_mode_enabled(vha))
-		return;
-
 	vha->vha_tgt.qla_tgt = NULL;
 
 	mutex_init(&vha->vha_tgt.tgt_mutex);
@@ -5981,7 +6018,6 @@ qlt_rff_id(struct scsi_qla_host *vha, struct ct_sns_req *ct_req)
 		ct_req->req.rff_id.fc4_feature = BIT_1;
 	} else if (qla_dual_mode_enabled(vha))
 		ct_req->req.rff_id.fc4_feature = BIT_0 | BIT_1;
-
 }
 
 /*
@@ -6000,7 +6036,7 @@ qlt_init_atio_q_entries(struct scsi_qla_host *vha)
 	uint16_t cnt;
 	struct atio_from_isp *pkt = (struct atio_from_isp *)ha->tgt.atio_ring;
 
-	if (!qla_tgt_mode_enabled(vha))
+	if (qla_ini_mode_enabled(vha))
 		return;
 
 	for (cnt = 0; cnt < ha->tgt.atio_q_length; cnt++) {
@@ -6093,8 +6129,10 @@ void
 qlt_24xx_config_nvram_stage1(struct scsi_qla_host *vha, struct nvram_24xx *nv)
 {
 	struct qla_hw_data *ha = vha->hw;
+	u32 tmp;
+	u16 t;
 
-	if (qla_tgt_mode_enabled(vha)) {
+	if (qla_tgt_mode_enabled(vha) || qla_dual_mode_enabled(vha)) {
 		if (!ha->tgt.saved_set) {
 			/* We save only once */
 			ha->tgt.saved_exchange_count = nv->exchange_count;
@@ -6107,7 +6145,24 @@ qlt_24xx_config_nvram_stage1(struct scsi_qla_host *vha, struct nvram_24xx *nv)
 			ha->tgt.saved_set = 1;
 		}
 
-		nv->exchange_count = cpu_to_le16(0xFFFF);
+		if (qla_tgt_mode_enabled(vha)) {
+			nv->exchange_count = cpu_to_le16(0xFFFF);
+		} else {			/* dual */
+			if (ql_dm_tgt_ex_pct > 100) {
+				ql_dm_tgt_ex_pct = 50;
+			} else if (ql_dm_tgt_ex_pct == 100) {
+				/* leave some for FW */
+				ql_dm_tgt_ex_pct = 95;
+			}
+
+			tmp = ha->orig_fw_xcb_count * ql_dm_tgt_ex_pct;
+			tmp = tmp/100;
+			if (tmp > 0xffff)
+				tmp = 0xffff;
+
+			t = tmp & 0xffff;
+			nv->exchange_count = cpu_to_le16(t);
+		}
 
 		/* Enable target mode */
 		nv->firmware_options_1 |= cpu_to_le32(BIT_4);
@@ -6192,11 +6247,13 @@ void
 qlt_81xx_config_nvram_stage1(struct scsi_qla_host *vha, struct nvram_81xx *nv)
 {
 	struct qla_hw_data *ha = vha->hw;
+	u32 tmp;
+	u16 t;
 
 	if (!QLA_TGT_MODE_ENABLED())
 		return;
 
-	if (qla_tgt_mode_enabled(vha)) {
+	if (qla_tgt_mode_enabled(vha) || qla_dual_mode_enabled(vha)) {
 		if (!ha->tgt.saved_set) {
 			/* We save only once */
 			ha->tgt.saved_exchange_count = nv->exchange_count;
@@ -6209,7 +6266,23 @@ qlt_81xx_config_nvram_stage1(struct scsi_qla_host *vha, struct nvram_81xx *nv)
 			ha->tgt.saved_set = 1;
 		}
 
-		nv->exchange_count = cpu_to_le16(0xFFFF);
+		if (qla_tgt_mode_enabled(vha)) {
+			nv->exchange_count = cpu_to_le16(0xFFFF);
+		} else {			/* dual */
+			if (ql_dm_tgt_ex_pct > 100) {
+				ql_dm_tgt_ex_pct = 50;
+			} else if (ql_dm_tgt_ex_pct == 100) {
+				/* leave some for FW */
+				ql_dm_tgt_ex_pct = 95;
+			}
+
+			tmp = ha->orig_fw_xcb_count * ql_dm_tgt_ex_pct;
+			tmp = tmp/100;
+			if (tmp > 0xffff)
+				tmp = 0xffff;
+			t = tmp & 0xffff;
+			nv->exchange_count = cpu_to_le16(t);
+		}
 
 		/* Enable target mode */
 		nv->firmware_options_1 |= cpu_to_le32(BIT_4);
@@ -6319,10 +6392,11 @@ void
 qlt_modify_vp_config(struct scsi_qla_host *vha,
 	struct vp_config_entry_24xx *vpmod)
 {
-	if (qla_tgt_mode_enabled(vha))
+	/* enable target mode.  Bit5 = 1 => disable */
+	if (qla_tgt_mode_enabled(vha) || qla_dual_mode_enabled(vha))
 		vpmod->options_idx1 &= ~BIT_5;
 
-	/* Disable ini mode, if requested. */
+	/* Disable ini mode, if requested.  bit4 = 1 => disable */
 	if (qla_tgt_mode_enabled(vha))
 		vpmod->options_idx1 &= ~BIT_4;
 }
@@ -6477,6 +6551,8 @@ static int __init qlt_parse_ini_mode(void)
 		ql2x_ini_mode = QLA2XXX_INI_MODE_DISABLED;
 	else if (strcasecmp(qlini_mode, QLA2XXX_INI_MODE_STR_ENABLED) == 0)
 		ql2x_ini_mode = QLA2XXX_INI_MODE_ENABLED;
+	else if (strcasecmp(qlini_mode, QLA2XXX_INI_MODE_STR_DUAL) == 0)
+		ql2x_ini_mode = QLA2XXX_INI_MODE_DUAL;
 	else
 		return false;
 
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index 5e56192a4877..ac86b379a269 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -45,10 +45,12 @@
 #define QLA2XXX_INI_MODE_STR_EXCLUSIVE	"exclusive"
 #define QLA2XXX_INI_MODE_STR_DISABLED	"disabled"
 #define QLA2XXX_INI_MODE_STR_ENABLED	"enabled"
+#define QLA2XXX_INI_MODE_STR_DUAL		"dual"
 
 #define QLA2XXX_INI_MODE_EXCLUSIVE	0
 #define QLA2XXX_INI_MODE_DISABLED	1
 #define QLA2XXX_INI_MODE_ENABLED	2
+#define QLA2XXX_INI_MODE_DUAL	3
 
 #define QLA2XXX_COMMAND_COUNT_INIT	250
 #define QLA2XXX_IMMED_NOTIFY_COUNT_INIT 250
@@ -975,6 +977,7 @@ extern void qlt_update_vp_map(struct scsi_qla_host *, int);
  * is not set. Right now, ha value is ignored.
  */
 #define QLA_TGT_MODE_ENABLED() (ql2x_ini_mode != QLA2XXX_INI_MODE_ENABLED)
+
 extern int ql2x_ini_mode;
 
 static inline bool qla_tgt_mode_enabled(struct scsi_qla_host *ha)
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index e37d7ee95473..8b878a2d7304 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -1744,7 +1744,7 @@ static int tcm_qla2xxx_lport_register_npiv_cb(struct scsi_qla_host *base_vha,
 			(struct tcm_qla2xxx_lport *)base_vha->vha_tgt.target_lport_ptr;
 	struct fc_vport_identifiers vport_id;
 
-	if (!qla_tgt_mode_enabled(base_vha)) {
+	if (qla_ini_mode_enabled(base_vha)) {
 		pr_err("qla2xxx base_vha not enabled for target mode\n");
 		return -EPERM;
 	}
-- 
GitLab


From 0ca55938497412f9ffdbcb5cb7c4f8d6d28356ee Mon Sep 17 00:00:00 2001
From: Himanshu Madhani <himanshu.madhani@cavium.com>
Date: Thu, 19 Jan 2017 22:28:02 -0800
Subject: [PATCH 0187/1084] qla2xxx: Remove unused reverse_ini_mode

With support for dual mode in the driver, this mode becomes
dead code. Remove reverse_ini_mode from code.

Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_def.h    |  1 -
 drivers/scsi/qla2xxx/qla_target.c | 10 ----------
 drivers/scsi/qla2xxx/qla_target.h |  8 --------
 3 files changed, 19 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index f67a1c82502c..4ff30d136924 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -3237,7 +3237,6 @@ struct qlt_hw_data {
 	/* Protected by hw lock */
 	uint32_t enable_class_2:1;
 	uint32_t enable_explicit_conf:1;
-	uint32_t ini_mode_force_reverse:1;
 	uint32_t node_name_set:1;
 
 	dma_addr_t atio_dma;	/* Physical address. */
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index c8f312f8e0ff..6571869c676a 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -5858,8 +5858,6 @@ EXPORT_SYMBOL(qlt_lport_deregister);
 /* Must be called under HW lock */
 static void qlt_set_mode(struct scsi_qla_host *vha)
 {
-	struct qla_hw_data *ha = vha->hw;
-
 	switch (ql2x_ini_mode) {
 	case QLA2XXX_INI_MODE_DISABLED:
 	case QLA2XXX_INI_MODE_EXCLUSIVE:
@@ -5874,16 +5872,11 @@ static void qlt_set_mode(struct scsi_qla_host *vha)
 	default:
 		break;
 	}
-
-	if (ha->tgt.ini_mode_force_reverse)
-		qla_reverse_ini_mode(vha);
 }
 
 /* Must be called under HW lock */
 static void qlt_clear_mode(struct scsi_qla_host *vha)
 {
-	struct qla_hw_data *ha = vha->hw;
-
 	switch (ql2x_ini_mode) {
 	case QLA2XXX_INI_MODE_DISABLED:
 		vha->host->active_mode = MODE_UNKNOWN;
@@ -5898,9 +5891,6 @@ static void qlt_clear_mode(struct scsi_qla_host *vha)
 	default:
 		break;
 	}
-
-	if (ha->tgt.ini_mode_force_reverse)
-		qla_reverse_ini_mode(vha);
 }
 
 /*
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index ac86b379a269..a7f90dcaae37 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -995,14 +995,6 @@ static inline bool qla_dual_mode_enabled(struct scsi_qla_host *ha)
 	return (ha->host->active_mode == MODE_DUAL);
 }
 
-static inline void qla_reverse_ini_mode(struct scsi_qla_host *ha)
-{
-	if (ha->host->active_mode & MODE_INITIATOR)
-		ha->host->active_mode &= ~MODE_INITIATOR;
-	else
-		ha->host->active_mode |= MODE_INITIATOR;
-}
-
 static inline uint32_t sid_to_key(const uint8_t *s_id)
 {
 	uint32_t key;
-- 
GitLab


From 41dc529a4602ac737020f423f84686a81de38e6d Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Thu, 19 Jan 2017 22:28:03 -0800
Subject: [PATCH 0188/1084] qla2xxx: Improve RSCN handling in driver

Current code blindly does State Change Registration when
the link is up. Move SCR behind fabric scan, so that arbitrated
loop scan would not get erroneous error message.

Some of the other improvements are as follows

- Add session deletion for TPRLO and send acknowledgment for TPRLO.
- Enable FW option to move ABTS, RIDA & PUREX from RSPQ to ATIOQ.
- Save NPort ID early in link init.
- Move ABTS & RIDA to ATIOQ helps in keeping command ordering and
  link up sequence ordering.
- Save Nport ID and update VP map so that SCSI CMD/ATIO won't be dropped.
- fcport alloc does the initializes memory to zero. Remove memset to
  zero since It might corrupt link list.
- Turn off Registration for State Change MB in loop mode.

Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_def.h    |  11 +-
 drivers/scsi/qla2xxx/qla_fw.h     |  75 ++++++++--
 drivers/scsi/qla2xxx/qla_gbl.h    |   2 +
 drivers/scsi/qla2xxx/qla_gs.c     |   4 +-
 drivers/scsi/qla2xxx/qla_init.c   | 150 +++++++++++++++----
 drivers/scsi/qla2xxx/qla_isr.c    |   6 +-
 drivers/scsi/qla2xxx/qla_mbx.c    | 144 +++++++++++-------
 drivers/scsi/qla2xxx/qla_os.c     |  48 +++---
 drivers/scsi/qla2xxx/qla_target.c | 238 +++++++++++++++++++++++++++---
 9 files changed, 533 insertions(+), 145 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 4ff30d136924..3881790e80da 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2226,6 +2226,13 @@ enum fcport_mgt_event {
 	FCME_DELETE_DONE,
 };
 
+enum rscn_addr_format {
+	RSCN_PORT_ADDR,
+	RSCN_AREA_ADDR,
+	RSCN_DOM_ADDR,
+	RSCN_FAB_ADDR,
+};
+
 /*
  * Fibre channel port structure.
  */
@@ -3956,7 +3963,7 @@ typedef struct scsi_qla_host {
 #define FCOE_CTX_RESET_NEEDED	18	/* Initiate FCoE context reset */
 #define MPI_RESET_NEEDED	19	/* Initiate MPI FW reset */
 #define ISP_QUIESCE_NEEDED	20	/* Driver need some quiescence */
-#define SCR_PENDING		21	/* SCR in target mode */
+#define FREE_BIT 21
 #define PORT_UPDATE_NEEDED	22
 #define FX00_RESET_RECOVERY	23
 #define FX00_TARGET_SCAN	24
@@ -4010,7 +4017,9 @@ typedef struct scsi_qla_host {
 	/* list of commands waiting on workqueue */
 	struct list_head	qla_cmd_list;
 	struct list_head	qla_sess_op_cmd_list;
+	struct list_head	unknown_atio_list;
 	spinlock_t		cmd_list_lock;
+	struct delayed_work	unknown_atio_work;
 
 	/* Counter to detect races between ELS and RSCN events */
 	atomic_t		generation_tick;
diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
index ee135cf96aee..1f808928763b 100644
--- a/drivers/scsi/qla2xxx/qla_fw.h
+++ b/drivers/scsi/qla2xxx/qla_fw.h
@@ -1301,27 +1301,76 @@ struct vp_config_entry_24xx {
 };
 
 #define VP_RPT_ID_IOCB_TYPE	0x32	/* Report ID Acquisition entry. */
+enum VP_STATUS {
+	VP_STAT_COMPL,
+	VP_STAT_FAIL,
+	VP_STAT_ID_CHG,
+	VP_STAT_SNS_TO,				/* timeout */
+	VP_STAT_SNS_RJT,
+	VP_STAT_SCR_TO,				/* timeout */
+	VP_STAT_SCR_RJT,
+};
+
+enum VP_FLAGS {
+	VP_FLAGS_CON_FLOOP = 1,
+	VP_FLAGS_CON_P2P = 2,
+	VP_FLAGS_CON_FABRIC = 3,
+	VP_FLAGS_NAME_VALID = BIT_5,
+};
+
 struct vp_rpt_id_entry_24xx {
 	uint8_t entry_type;		/* Entry type. */
 	uint8_t entry_count;		/* Entry count. */
 	uint8_t sys_define;		/* System defined. */
 	uint8_t entry_status;		/* Entry Status. */
-
-	uint32_t handle;		/* System handle. */
-
-	uint16_t vp_count;		/* Format 0 -- | VP setup | VP acq |. */
-					/* Format 1 -- | VP count |. */
-	uint16_t vp_idx;		/* Format 0 -- Reserved. */
-					/* Format 1 -- VP status and index. */
+	uint32_t resv1;
+	uint8_t vp_acquired;
+	uint8_t vp_setup;
+	uint8_t vp_idx;		/* Format 0=reserved */
+	uint8_t vp_status;	/* Format 0=reserved */
 
 	uint8_t port_id[3];
 	uint8_t format;
-
-	uint8_t vp_idx_map[16];
-
-	uint8_t reserved_4[24];
-	uint16_t bbcr;
-	uint8_t reserved_5[6];
+	union {
+		struct {
+			/* format 0 loop */
+			uint8_t vp_idx_map[16];
+			uint8_t reserved_4[32];
+		} f0;
+		struct {
+			/* format 1 fabric */
+			uint8_t vpstat1_subcode; /* vp_status=1 subcode */
+			uint8_t flags;
+			uint16_t fip_flags;
+			uint8_t rsv2[12];
+
+			uint8_t ls_rjt_vendor;
+			uint8_t ls_rjt_explanation;
+			uint8_t ls_rjt_reason;
+			uint8_t rsv3[5];
+
+			uint8_t port_name[8];
+			uint8_t node_name[8];
+			uint16_t bbcr;
+			uint8_t reserved_5[6];
+		} f1;
+		struct { /* format 2: N2N direct connect */
+		    uint8_t vpstat1_subcode;
+		    uint8_t flags;
+		    uint16_t rsv6;
+		    uint8_t rsv2[12];
+
+		    uint8_t ls_rjt_vendor;
+		    uint8_t ls_rjt_explanation;
+		    uint8_t ls_rjt_reason;
+		    uint8_t rsv3[5];
+
+		    uint8_t port_name[8];
+		    uint8_t node_name[8];
+		    uint32_t remote_nport_id;
+		    uint32_t reserved_5;
+		} f2;
+	} u;
 };
 
 #define VF_EVFP_IOCB_TYPE       0x26    /* Exchange Virtual Fabric Parameters entry. */
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index dd95ff620ae3..e0914575aab3 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -138,6 +138,7 @@ extern int ql2xmdenable;
 extern int ql2xexlogins;
 extern int ql2xexchoffld;
 extern int ql2xfwholdabts;
+extern int ql2xmvasynctoatio;
 
 extern int qla2x00_loop_reset(scsi_qla_host_t *);
 extern void qla2x00_abort_all_cmds(scsi_qla_host_t *, int);
@@ -844,5 +845,6 @@ extern void qlt_schedule_sess_for_deletion_lock(struct fc_port *);
 extern struct fc_port *qlt_find_sess_invalidate_other(scsi_qla_host_t *,
 	uint64_t wwn, port_id_t port_id, uint16_t loop_id, struct fc_port **);
 void qla24xx_delete_sess_fn(struct work_struct *);
+void qlt_unknown_atio_work_fn(struct work_struct *);
 
 #endif /* _QLA_GBL_H */
diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
index 8f7054dc742b..d1074fb0fff8 100644
--- a/drivers/scsi/qla2xxx/qla_gs.c
+++ b/drivers/scsi/qla2xxx/qla_gs.c
@@ -2914,8 +2914,10 @@ int qla24xx_async_gidpn(scsi_qla_host_t *vha, fc_port_t *fcport)
 int qla24xx_post_gidpn_work(struct scsi_qla_host *vha, fc_port_t *fcport)
 {
 	struct qla_work_evt *e;
+	int ls;
 
-	if ((atomic_read(&vha->loop_state) != LOOP_READY) ||
+	ls = atomic_read(&vha->loop_state);
+	if (((ls != LOOP_READY) && (ls != LOOP_UP)) ||
 		test_bit(UNLOADING, &vha->dpc_flags))
 		return 0;
 
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 4c0a2d8aa964..68430934fa8d 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -1071,10 +1071,10 @@ void qla24xx_handle_relogin_event(scsi_qla_host_t *vha,
 	qla24xx_fcport_handle_login(vha, fcport);
 }
 
-void qla2x00_fcport_event_handler(scsi_qla_host_t *vha,
-	struct event_arg *ea)
+void qla2x00_fcport_event_handler(scsi_qla_host_t *vha, struct event_arg *ea)
 {
-	fc_port_t *fcport;
+	fc_port_t *fcport, *f, *tf;
+	uint32_t id = 0, mask, rid;
 	int rc;
 
 	switch (ea->event) {
@@ -1087,20 +1087,55 @@ void qla2x00_fcport_event_handler(scsi_qla_host_t *vha,
 	case FCME_RSCN:
 		if (test_bit(UNLOADING, &vha->dpc_flags))
 			return;
+		switch (ea->id.b.rsvd_1) {
+		case RSCN_PORT_ADDR:
+			fcport = qla2x00_find_fcport_by_nportid(vha, &ea->id, 1);
+			if (!fcport) {
+				/* cable moved */
+				rc = qla24xx_post_gpnid_work(vha, &ea->id);
+				if (rc) {
+					ql_log(ql_log_warn, vha, 0xffff,
+						"RSCN GPNID work failed %02x%02x%02x\n",
+						ea->id.b.domain, ea->id.b.area,
+						ea->id.b.al_pa);
+				}
+			} else {
+				ea->fcport = fcport;
+				qla24xx_handle_rscn_event(fcport, ea);
+			}
+			break;
+		case RSCN_AREA_ADDR:
+		case RSCN_DOM_ADDR:
+			if (ea->id.b.rsvd_1 == RSCN_AREA_ADDR) {
+				mask = 0xffff00;
+				ql_log(ql_dbg_async, vha, 0xffff,
+					   "RSCN: Area 0x%06x was affected\n",
+					   ea->id.b24);
+			} else {
+				mask = 0xff0000;
+				ql_log(ql_dbg_async, vha, 0xffff,
+					   "RSCN: Domain 0x%06x was affected\n",
+					   ea->id.b24);
+			}
 
-		fcport = qla2x00_find_fcport_by_nportid(vha, &ea->id, 1);
-		if (!fcport) {
-			/* cable moved */
-			rc = qla24xx_post_gpnid_work(vha, &ea->id);
-			if (rc) {
-				ql_log(ql_log_warn, vha, 0xffff,
-					"RSCN GPNID work failed %02x%02x%02x\n",
-					ea->id.b.domain, ea->id.b.area,
-					ea->id.b.al_pa);
+			rid = ea->id.b24 & mask;
+			list_for_each_entry_safe(f, tf, &vha->vp_fcports,
+			    list) {
+				id = f->d_id.b24 & mask;
+				if (rid == id) {
+					ea->fcport = f;
+					qla24xx_handle_rscn_event(f, ea);
+				}
 			}
-		} else {
-			ea->fcport = fcport;
-			qla24xx_handle_rscn_event(fcport, ea);
+			break;
+		case RSCN_FAB_ADDR:
+		default:
+			ql_log(ql_log_warn, vha, 0xffff,
+				"RSCN: Fabric was affected. Addr format %d\n",
+				ea->id.b.rsvd_1);
+			qla2x00_mark_all_devices_lost(vha, 1);
+			set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
+			set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
 		}
 		break;
 	case FCME_GIDPN_DONE:
@@ -2947,6 +2982,21 @@ qla24xx_update_fw_options(scsi_qla_host_t *vha)
 			__func__, ha->fw_options[2]);
 	}
 
+	/* Move PUREX, ABTS RX & RIDA to ATIOQ */
+	if (ql2xmvasynctoatio) {
+		if (qla_tgt_mode_enabled(vha) ||
+		    qla_dual_mode_enabled(vha))
+			ha->fw_options[2] |= BIT_11;
+		else
+			ha->fw_options[2] &= ~BIT_11;
+	}
+
+	ql_dbg(ql_dbg_init, vha, 0xffff,
+		"%s, add FW options 1-3 = 0x%04x 0x%04x 0x%04x mode %x\n",
+		__func__, ha->fw_options[1], ha->fw_options[2],
+		ha->fw_options[3], vha->host->active_mode);
+	qla2x00_set_fw_options(vha, ha->fw_options);
+
 	/* Update Serial Link options. */
 	if ((le16_to_cpu(ha->fw_seriallink_options24[0]) & BIT_0) == 0)
 		return;
@@ -3953,10 +4003,11 @@ qla2x00_configure_loop(scsi_qla_host_t *vha)
 
 	} else if (ha->current_topology == ISP_CFG_N) {
 		clear_bit(RSCN_UPDATE, &flags);
-
+	} else if (ha->current_topology == ISP_CFG_NL) {
+		clear_bit(RSCN_UPDATE, &flags);
+		set_bit(LOCAL_LOOP_UPDATE, &flags);
 	} else if (!vha->flags.online ||
 	    (test_bit(ABORT_ISP_ACTIVE, &flags))) {
-
 		set_bit(RSCN_UPDATE, &flags);
 		set_bit(LOCAL_LOOP_UPDATE, &flags);
 	}
@@ -4058,6 +4109,7 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
 	uint16_t	loop_id;
 	uint8_t		domain, area, al_pa;
 	struct qla_hw_data *ha = vha->hw;
+	unsigned long flags;
 
 	found_devs = 0;
 	new_fcport = NULL;
@@ -4098,7 +4150,7 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
 			    "Marking port lost loop_id=0x%04x.\n",
 			    fcport->loop_id);
 
-			qla2x00_set_fcport_state(fcport, FCS_DEVICE_LOST);
+			qla2x00_mark_device_lost(vha, fcport, 0, 0);
 		}
 	}
 
@@ -4129,13 +4181,14 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
 		if (loop_id > LAST_LOCAL_LOOP_ID)
 			continue;
 
-		memset(new_fcport, 0, sizeof(fc_port_t));
+		memset(new_fcport->port_name, 0, WWN_SIZE);
 
 		/* Fill in member data. */
 		new_fcport->d_id.b.domain = domain;
 		new_fcport->d_id.b.area = area;
 		new_fcport->d_id.b.al_pa = al_pa;
 		new_fcport->loop_id = loop_id;
+
 		rval2 = qla2x00_get_port_database(vha, new_fcport, 0);
 		if (rval2 != QLA_SUCCESS) {
 			ql_dbg(ql_dbg_disc, vha, 0x201a,
@@ -4148,6 +4201,7 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
 			continue;
 		}
 
+		spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
 		/* Check for matching device in port list. */
 		found = 0;
 		fcport = NULL;
@@ -4163,6 +4217,12 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
 			memcpy(fcport->node_name, new_fcport->node_name,
 			    WWN_SIZE);
 
+			if (!fcport->login_succ) {
+				vha->fcport_count++;
+				fcport->login_succ = 1;
+				fcport->disc_state = DSC_LOGIN_COMPLETE;
+			}
+
 			found++;
 			break;
 		}
@@ -4173,16 +4233,28 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
 
 			/* Allocate a new replacement fcport. */
 			fcport = new_fcport;
+			if (!fcport->login_succ) {
+				vha->fcport_count++;
+				fcport->login_succ = 1;
+				fcport->disc_state = DSC_LOGIN_COMPLETE;
+			}
+
+			spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
 			new_fcport = qla2x00_alloc_fcport(vha, GFP_KERNEL);
+
 			if (new_fcport == NULL) {
 				ql_log(ql_log_warn, vha, 0x201c,
 				    "Failed to allocate memory for fcport.\n");
 				rval = QLA_MEMORY_ALLOC_FAILED;
 				goto cleanup_allocation;
 			}
+			spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags);
 			new_fcport->flags &= ~FCF_FABRIC_DEVICE;
 		}
 
+		spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+
 		/* Base iIDMA settings on HBA port speed. */
 		fcport->fp_speed = ha->link_data_rate;
 
@@ -4371,6 +4443,16 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha)
 	}
 	vha->device_flags |= SWITCH_FOUND;
 
+
+	if (qla_tgt_mode_enabled(vha) || qla_dual_mode_enabled(vha)) {
+		rval = qla2x00_send_change_request(vha, 0x3, 0);
+		if (rval != QLA_SUCCESS)
+			ql_log(ql_log_warn, vha, 0x121,
+				"Failed to enable receiving of RSCN requests: 0x%x.\n",
+				rval);
+	}
+
+
 	do {
 		qla2x00_mgmt_svr_login(vha);
 
@@ -6116,6 +6198,7 @@ uint8_t qla27xx_find_valid_image(struct scsi_qla_host *vha)
 
 	for (chksum = 0; cnt--; wptr++)
 		chksum += le32_to_cpu(*wptr);
+
 	if (chksum) {
 		ql_dbg(ql_dbg_init, vha, 0x018c,
 		    "Checksum validation failed for primary image (0x%x)\n",
@@ -7128,6 +7211,10 @@ qla81xx_nvram_config(scsi_qla_host_t *vha)
 		vha->flags.process_response_queue = 1;
 	}
 
+	 /* enable RIDA Format2 */
+	if (qla_tgt_mode_enabled(vha) || qla_dual_mode_enabled(vha))
+		icb->firmware_options_3 |= BIT_0;
+
 	if (rval) {
 		ql_log(ql_log_warn, vha, 0x0076,
 		    "NVRAM configuration failed.\n");
@@ -7252,13 +7339,26 @@ qla81xx_update_fw_options(scsi_qla_host_t *vha)
 			__func__, ha->fw_options[2]);
 	}
 
-	if (!ql2xetsenable)
-		goto out;
+	/* Move PUREX, ABTS RX & RIDA to ATIOQ */
+	if (ql2xmvasynctoatio) {
+		if (qla_tgt_mode_enabled(vha) ||
+		    qla_dual_mode_enabled(vha))
+			ha->fw_options[2] |= BIT_11;
+		else
+			ha->fw_options[2] &= ~BIT_11;
+	}
+
+	if (ql2xetsenable) {
+		/* Enable ETS Burst. */
+		memset(ha->fw_options, 0, sizeof(ha->fw_options));
+		ha->fw_options[2] |= BIT_9;
+	}
+
+	ql_dbg(ql_dbg_init, vha, 0xffff,
+		"%s, add FW options 1-3 = 0x%04x 0x%04x 0x%04x mode %x\n",
+		__func__, ha->fw_options[1], ha->fw_options[2],
+		ha->fw_options[3], vha->host->active_mode);
 
-	/* Enable ETS Burst. */
-	memset(ha->fw_options, 0, sizeof(ha->fw_options));
-	ha->fw_options[2] |= BIT_9;
-out:
 	qla2x00_set_fw_options(vha, ha->fw_options);
 }
 
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index a0a3412030cb..d9577db7ddbb 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1025,10 +1025,6 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb)
 
 		qla2x00_mark_all_devices_lost(vha, 1);
 
-		if (vha->vp_idx == 0 &&
-		    (qla_tgt_mode_enabled(vha) || qla_dual_mode_enabled(vha)))
-			set_bit(SCR_PENDING, &vha->dpc_flags);
-
 		set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
 		set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
 		set_bit(VP_CONFIG_OK, &vha->vp_flags);
@@ -1073,7 +1069,9 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb)
 			memset(&ea, 0, sizeof(ea));
 			ea.event = FCME_RSCN;
 			ea.id.b24 = rscn_entry;
+			ea.id.b.rsvd_1 = rscn_entry >> 24;
 			qla2x00_fcport_event_handler(vha, &ea);
+			qla2x00_post_aen_work(vha, FCH_EVT_RSCN, rscn_entry);
 		}
 		break;
 	/* case MBA_RIO_RESPONSE: */
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 64f04aa2a503..35079f417417 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -3599,10 +3599,8 @@ void
 qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 	struct vp_rpt_id_entry_24xx *rptid_entry)
 {
-	uint8_t vp_idx;
-	uint16_t stat = le16_to_cpu(rptid_entry->vp_idx);
 	struct qla_hw_data *ha = vha->hw;
-	scsi_qla_host_t *vp;
+	scsi_qla_host_t *vp = NULL;
 	unsigned long   flags;
 	int found;
 
@@ -3613,80 +3611,124 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
 		return;
 
 	if (rptid_entry->format == 0) {
+		/* loop */
 		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b7,
 		    "Format 0 : Number of VPs setup %d, number of "
-		    "VPs acquired %d.\n",
-		    MSB(le16_to_cpu(rptid_entry->vp_count)),
-		    LSB(le16_to_cpu(rptid_entry->vp_count)));
+		    "VPs acquired %d.\n", rptid_entry->vp_setup,
+		    rptid_entry->vp_acquired);
 		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b8,
 		    "Primary port id %02x%02x%02x.\n",
 		    rptid_entry->port_id[2], rptid_entry->port_id[1],
 		    rptid_entry->port_id[0]);
+
+		vha->d_id.b.domain = rptid_entry->port_id[2];
+		vha->d_id.b.area = rptid_entry->port_id[1];
+		vha->d_id.b.al_pa = rptid_entry->port_id[0];
+
+		spin_lock_irqsave(&ha->vport_slock, flags);
+		qlt_update_vp_map(vha, SET_AL_PA);
+		spin_unlock_irqrestore(&ha->vport_slock, flags);
+
 	} else if (rptid_entry->format == 1) {
-		vp_idx = LSB(stat);
+		/* fabric */
 		ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b9,
 		    "Format 1: VP[%d] enabled - status %d - with "
-		    "port id %02x%02x%02x.\n", vp_idx, MSB(stat),
+		    "port id %02x%02x%02x.\n", rptid_entry->vp_idx,
+			rptid_entry->vp_status,
 		    rptid_entry->port_id[2], rptid_entry->port_id[1],
 		    rptid_entry->port_id[0]);
 
 		/* buffer to buffer credit flag */
-		vha->flags.bbcr_enable = (rptid_entry->bbcr & 0xf) != 0;
-
-		/* FA-WWN is only for physical port */
-		if (!vp_idx) {
-			void *wwpn = ha->init_cb->port_name;
+		vha->flags.bbcr_enable = (rptid_entry->u.f1.bbcr & 0xf) != 0;
+
+		if (rptid_entry->vp_idx == 0) {
+			if (rptid_entry->vp_status == VP_STAT_COMPL) {
+				/* FA-WWN is only for physical port */
+				if (qla_ini_mode_enabled(vha) &&
+				    ha->flags.fawwpn_enabled &&
+				    (rptid_entry->u.f1.flags &
+				     VP_FLAGS_NAME_VALID)) {
+					memcpy(vha->port_name,
+					    rptid_entry->u.f1.port_name,
+					    WWN_SIZE);
+				}
 
-			if (!MSB(stat)) {
-				if (rptid_entry->vp_idx_map[1] & BIT_6)
-					wwpn = rptid_entry->reserved_4 + 8;
+				vha->d_id.b.domain = rptid_entry->port_id[2];
+				vha->d_id.b.area = rptid_entry->port_id[1];
+				vha->d_id.b.al_pa = rptid_entry->port_id[0];
+				spin_lock_irqsave(&ha->vport_slock, flags);
+				qlt_update_vp_map(vha, SET_AL_PA);
+				spin_unlock_irqrestore(&ha->vport_slock, flags);
 			}
-			memcpy(vha->port_name, wwpn, WWN_SIZE);
+
 			fc_host_port_name(vha->host) =
 			    wwn_to_u64(vha->port_name);
-			ql_dbg(ql_dbg_mbx, vha, 0x1018,
-			    "FA-WWN portname %016llx (%x)\n",
-			    fc_host_port_name(vha->host), MSB(stat));
-		}
-
-		vp = vha;
-		if (vp_idx == 0)
-			goto reg_needed;
 
-		if (MSB(stat) != 0 && MSB(stat) != 2) {
-			ql_dbg(ql_dbg_mbx, vha, 0x10ba,
-			    "Could not acquire ID for VP[%d].\n", vp_idx);
-			return;
-		}
+			if (qla_ini_mode_enabled(vha))
+				ql_dbg(ql_dbg_mbx, vha, 0x1018,
+				    "FA-WWN portname %016llx (%x)\n",
+				    fc_host_port_name(vha->host),
+				    rptid_entry->vp_status);
 
-		found = 0;
-		spin_lock_irqsave(&ha->vport_slock, flags);
-		list_for_each_entry(vp, &ha->vp_list, list) {
-			if (vp_idx == vp->vp_idx) {
-				found = 1;
-				break;
+			set_bit(REGISTER_FC4_NEEDED, &vha->dpc_flags);
+			set_bit(REGISTER_FDMI_NEEDED, &vha->dpc_flags);
+		} else {
+			if (rptid_entry->vp_status != VP_STAT_COMPL &&
+				rptid_entry->vp_status != VP_STAT_ID_CHG) {
+				ql_dbg(ql_dbg_mbx, vha, 0x10ba,
+				    "Could not acquire ID for VP[%d].\n",
+				    rptid_entry->vp_idx);
+				return;
 			}
-		}
-		spin_unlock_irqrestore(&ha->vport_slock, flags);
 
-		if (!found)
-			return;
+			found = 0;
+			spin_lock_irqsave(&ha->vport_slock, flags);
+			list_for_each_entry(vp, &ha->vp_list, list) {
+				if (rptid_entry->vp_idx == vp->vp_idx) {
+					found = 1;
+					break;
+				}
+			}
+			spin_unlock_irqrestore(&ha->vport_slock, flags);
 
-		vp->d_id.b.domain = rptid_entry->port_id[2];
-		vp->d_id.b.area =  rptid_entry->port_id[1];
-		vp->d_id.b.al_pa = rptid_entry->port_id[0];
+			if (!found)
+				return;
 
-		/*
-		 * Cannot configure here as we are still sitting on the
-		 * response queue. Handle it in dpc context.
-		 */
-		set_bit(VP_IDX_ACQUIRED, &vp->vp_flags);
+			vp->d_id.b.domain = rptid_entry->port_id[2];
+			vp->d_id.b.area =  rptid_entry->port_id[1];
+			vp->d_id.b.al_pa = rptid_entry->port_id[0];
+			spin_lock_irqsave(&ha->vport_slock, flags);
+			qlt_update_vp_map(vp, SET_AL_PA);
+			spin_unlock_irqrestore(&ha->vport_slock, flags);
 
-reg_needed:
-		set_bit(REGISTER_FC4_NEEDED, &vp->dpc_flags);
-		set_bit(REGISTER_FDMI_NEEDED, &vp->dpc_flags);
+			/*
+			 * Cannot configure here as we are still sitting on the
+			 * response queue. Handle it in dpc context.
+			 */
+			set_bit(VP_IDX_ACQUIRED, &vp->vp_flags);
+			set_bit(REGISTER_FC4_NEEDED, &vp->dpc_flags);
+			set_bit(REGISTER_FDMI_NEEDED, &vp->dpc_flags);
+		}
 		set_bit(VP_DPC_NEEDED, &vha->dpc_flags);
 		qla2xxx_wake_dpc(vha);
+	} else if (rptid_entry->format == 2) {
+		ql_dbg(ql_dbg_async, vha, 0xffff,
+		    "RIDA: format 2/N2N Primary port id %02x%02x%02x.\n",
+		    rptid_entry->port_id[2], rptid_entry->port_id[1],
+		    rptid_entry->port_id[0]);
+
+		ql_dbg(ql_dbg_async, vha, 0xffff,
+		    "N2N: Remote WWPN %8phC.\n",
+		    rptid_entry->u.f2.port_name);
+
+		/* N2N.  direct connect */
+		vha->d_id.b.domain = rptid_entry->port_id[2];
+		vha->d_id.b.area = rptid_entry->port_id[1];
+		vha->d_id.b.al_pa = rptid_entry->port_id[0];
+
+		spin_lock_irqsave(&ha->vport_slock, flags);
+		qlt_update_vp_map(vha, SET_AL_PA);
+		spin_unlock_irqrestore(&ha->vport_slock, flags);
 	}
 }
 
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 9c4699623410..bdaa238ca0ab 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -237,6 +237,13 @@ MODULE_PARM_DESC(ql2xfwholdabts,
 		"0 (Default) Do not set fw option. "
 		"1 - Set fw option to hold ABTS.");
 
+int ql2xmvasynctoatio = 1;
+module_param(ql2xmvasynctoatio, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(ql2xmvasynctoatio,
+		"Move PUREX, ABTS RX and RIDA IOCBs to ATIOQ"
+		"0 (Default). Do not move IOCBs"
+		"1 - Move IOCBs.");
+
 /*
  * SCSI host template entry points
  */
@@ -2932,18 +2939,6 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (ret)
 		goto probe_init_failed;
 
-	base_vha->gnl.size = (ha->max_loop_id + 1) *
-		sizeof(struct get_name_list_extended);
-	base_vha->gnl.l = dma_alloc_coherent(&ha->pdev->dev,
-		base_vha->gnl.size, &base_vha->gnl.ldma, GFP_KERNEL);
-	INIT_LIST_HEAD(&base_vha->gnl.fcports);
-
-	if (base_vha->gnl.l == NULL) {
-		ql_log(ql_log_fatal, base_vha, 0xffff,
-			"Alloc failed for name list.\n");
-		goto probe_init_failed;
-	}
-
 	/* Alloc arrays of request and response ring ptrs */
 	if (!qla2x00_alloc_queues(ha, req, rsp)) {
 		ql_log(ql_log_fatal, base_vha, 0x003d,
@@ -4250,10 +4245,10 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
 	struct scsi_qla_host *vha = NULL;
 
 	host = scsi_host_alloc(sht, sizeof(scsi_qla_host_t));
-	if (host == NULL) {
+	if (!host) {
 		ql_log_pci(ql_log_fatal, ha->pdev, 0x0107,
 		    "Failed to allocate host from the scsi layer, aborting.\n");
-		goto fail;
+		return NULL;
 	}
 
 	/* Clear our data area */
@@ -4272,11 +4267,23 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
 	INIT_LIST_HEAD(&vha->logo_list);
 	INIT_LIST_HEAD(&vha->plogi_ack_list);
 	INIT_LIST_HEAD(&vha->qp_list);
+	INIT_LIST_HEAD(&vha->gnl.fcports);
 
 	spin_lock_init(&vha->work_lock);
 	spin_lock_init(&vha->cmd_list_lock);
 	init_waitqueue_head(&vha->fcport_waitQ);
 
+	vha->gnl.size =
+	    sizeof(struct get_name_list_extended[ha->max_loop_id+1]);
+	vha->gnl.l = dma_alloc_coherent(&ha->pdev->dev,
+	    vha->gnl.size, &vha->gnl.ldma, GFP_KERNEL);
+	if (!vha->gnl.l) {
+		ql_log(ql_log_fatal, vha, 0xffff,
+		    "Alloc failed for name list.\n");
+		scsi_remove_host(vha->host);
+		return NULL;
+	}
+
 	sprintf(vha->host_str, "%s_%ld", QLA2XXX_DRIVER_NAME, vha->host_no);
 	ql_dbg(ql_dbg_init, vha, 0x0041,
 	    "Allocated the host=%p hw=%p vha=%p dev_name=%s",
@@ -4284,9 +4291,6 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
 	    dev_name(&(ha->pdev->dev)));
 
 	return vha;
-
-fail:
-	return vha;
 }
 
 struct qla_work_evt *
@@ -5512,16 +5516,6 @@ qla2x00_do_dpc(void *data)
 			qla2x00_update_fcports(base_vha);
 		}
 
-		if (test_bit(SCR_PENDING, &base_vha->dpc_flags)) {
-			int ret;
-			ret = qla2x00_send_change_request(base_vha, 0x3, 0);
-			if (ret != QLA_SUCCESS)
-				ql_log(ql_log_warn, base_vha, 0x121,
-				    "Failed to enable receiving of RSCN "
-				    "requests: 0x%x.\n", ret);
-			clear_bit(SCR_PENDING, &base_vha->dpc_flags);
-		}
-
 		if (IS_QLAFX00(ha))
 			goto loop_resync_check;
 
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 6571869c676a..1cd3734292f9 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -224,6 +224,105 @@ static inline void qlt_decr_num_pend_cmds(struct scsi_qla_host *vha)
 	spin_unlock_irqrestore(&vha->hw->tgt.q_full_lock, flags);
 }
 
+
+static void qlt_queue_unknown_atio(scsi_qla_host_t *vha,
+	struct atio_from_isp *atio,	uint8_t ha_locked)
+{
+	struct qla_tgt_sess_op *u;
+	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
+	unsigned long flags;
+
+	if (tgt->tgt_stop) {
+		ql_dbg(ql_dbg_async, vha, 0xffff,
+			   "qla_target(%d): dropping unknown ATIO_TYPE7, "
+			   "because tgt is being stopped", vha->vp_idx);
+		goto out_term;
+	}
+
+	u = kzalloc(sizeof(*u), GFP_ATOMIC);
+	if (u == NULL) {
+		ql_dbg(ql_dbg_async, vha, 0xffff,
+		    "Alloc of struct unknown_atio (size %zd) failed", sizeof(*u));
+		/* It should be harmless and on the next retry should work well */
+		goto out_term;
+	}
+
+	u->vha = vha;
+	memcpy(&u->atio, atio, sizeof(*atio));
+	INIT_LIST_HEAD(&u->cmd_list);
+
+	spin_lock_irqsave(&vha->cmd_list_lock, flags);
+	list_add_tail(&u->cmd_list, &vha->unknown_atio_list);
+	spin_unlock_irqrestore(&vha->cmd_list_lock, flags);
+
+	schedule_delayed_work(&vha->unknown_atio_work, 1);
+
+out:
+	return;
+
+out_term:
+	qlt_send_term_exchange(vha, NULL, atio, ha_locked, 0);
+	goto out;
+}
+
+static void qlt_try_to_dequeue_unknown_atios(struct scsi_qla_host *vha,
+	uint8_t ha_locked)
+{
+	struct qla_tgt_sess_op *u, *t;
+	scsi_qla_host_t *host;
+	struct qla_tgt *tgt = vha->vha_tgt.qla_tgt;
+	unsigned long flags;
+	uint8_t queued = 0;
+
+	list_for_each_entry_safe(u, t, &vha->unknown_atio_list, cmd_list) {
+		if (u->aborted) {
+			ql_dbg(ql_dbg_async, vha, 0xffff,
+			    "Freeing unknown %s %p, because of Abort",
+			    "ATIO_TYPE7", u);
+			qlt_send_term_exchange(vha, NULL, &u->atio,
+			    ha_locked, 0);
+			goto abort;
+		}
+
+		host = qlt_find_host_by_d_id(vha, u->atio.u.isp24.fcp_hdr.d_id);
+		if (host != NULL) {
+			ql_dbg(ql_dbg_async, vha, 0xffff,
+				"Requeuing unknown ATIO_TYPE7 %p", u);
+			qlt_24xx_atio_pkt(host, &u->atio, ha_locked);
+		} else if (tgt->tgt_stop) {
+			ql_dbg(ql_dbg_async, vha, 0xffff,
+				"Freeing unknown %s %p, because tgt is being stopped",
+				"ATIO_TYPE7", u);
+			qlt_send_term_exchange(vha, NULL, &u->atio,
+			    ha_locked, 0);
+		} else {
+			ql_dbg(ql_dbg_async, vha, 0xffff,
+				"u %p, vha %p, host %p, sched again..", u,
+				vha, host);
+			if (!queued) {
+				queued = 1;
+				schedule_delayed_work(&vha->unknown_atio_work,
+				    1);
+			}
+			continue;
+		}
+
+abort:
+		spin_lock_irqsave(&vha->cmd_list_lock, flags);
+		list_del(&u->cmd_list);
+		spin_unlock_irqrestore(&vha->cmd_list_lock, flags);
+		kfree(u);
+	}
+}
+
+void qlt_unknown_atio_work_fn(struct work_struct *work)
+{
+	struct scsi_qla_host *vha = container_of(to_delayed_work(work),
+	    struct scsi_qla_host, unknown_atio_work);
+
+	qlt_try_to_dequeue_unknown_atios(vha, 0);
+}
+
 static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
 	struct atio_from_isp *atio, uint8_t ha_locked)
 {
@@ -244,8 +343,14 @@ static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
 			    atio->u.isp24.fcp_hdr.d_id[0],
 			    atio->u.isp24.fcp_hdr.d_id[1],
 			    atio->u.isp24.fcp_hdr.d_id[2]);
+
+
+			qlt_queue_unknown_atio(vha, atio, ha_locked);
 			break;
 		}
+		if (unlikely(!list_empty(&vha->unknown_atio_list)))
+			qlt_try_to_dequeue_unknown_atios(vha, ha_locked);
+
 		qlt_24xx_atio_pkt(host, atio, ha_locked);
 		break;
 	}
@@ -273,6 +378,31 @@ static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha,
 		break;
 	}
 
+	case VP_RPT_ID_IOCB_TYPE:
+		qla24xx_report_id_acquisition(vha,
+			(struct vp_rpt_id_entry_24xx *)atio);
+		break;
+
+	case ABTS_RECV_24XX:
+	{
+		struct abts_recv_from_24xx *entry =
+			(struct abts_recv_from_24xx *)atio;
+		struct scsi_qla_host *host = qlt_find_host_by_vp_idx(vha,
+			entry->vp_index);
+		if (unlikely(!host)) {
+			ql_dbg(ql_dbg_tgt, vha, 0xffff,
+			    "qla_target(%d): Response pkt (ABTS_RECV_24XX) "
+			    "received, with unknown vp_index %d\n",
+			    vha->vp_idx, entry->vp_index);
+			break;
+		}
+		qlt_response_pkt(host, (response_t *)atio);
+		break;
+
+	}
+
+	/* case PUREX_IOCB_TYPE: ql2xmvasynctoatio */
+
 	default:
 		ql_dbg(ql_dbg_tgt, vha, 0xe040,
 		    "qla_target(%d): Received unknown ATIO atio "
@@ -791,6 +921,7 @@ static void qlt_free_session_done(struct work_struct *work)
 	unsigned long flags;
 	bool logout_started = false;
 	struct event_arg ea;
+	scsi_qla_host_t *base_vha;
 
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf084,
 		"%s: se_sess %p / sess %p from port %8phC loop_id %#04x"
@@ -802,23 +933,25 @@ static void qlt_free_session_done(struct work_struct *work)
 
 
 	if (!IS_SW_RESV_ADDR(sess->d_id)) {
-	if (sess->send_els_logo) {
-		qlt_port_logo_t logo;
-		logo.id = sess->d_id;
-		logo.cmd_count = 0;
-		qlt_send_first_logo(vha, &logo);
-	}
+		if (sess->send_els_logo) {
+			qlt_port_logo_t logo;
 
-	if (sess->logout_on_delete) {
-		int rc;
-		rc = qla2x00_post_async_logout_work(vha, sess, NULL);
-		if (rc != QLA_SUCCESS)
-			ql_log(ql_log_warn, vha, 0xf085,
-			       "Schedule logo failed sess %p rc %d\n",
-			       sess, rc);
-		else
-			logout_started = true;
-	}
+			logo.id = sess->d_id;
+			logo.cmd_count = 0;
+			qlt_send_first_logo(vha, &logo);
+		}
+
+		if (sess->logout_on_delete) {
+			int rc;
+
+			rc = qla2x00_post_async_logout_work(vha, sess, NULL);
+			if (rc != QLA_SUCCESS)
+				ql_log(ql_log_warn, vha, 0xf085,
+				    "Schedule logo failed sess %p rc %d\n",
+				    sess, rc);
+			else
+				logout_started = true;
+		}
 	}
 
 	/*
@@ -841,8 +974,13 @@ static void qlt_free_session_done(struct work_struct *work)
 		}
 
 		ql_dbg(ql_dbg_disc, vha, 0xf087,
-			"%s: sess %p logout completed\n",
-			__func__, sess);
+		    "%s: sess %p logout completed\n",__func__, sess);
+	}
+
+	if (sess->logo_ack_needed) {
+		sess->logo_ack_needed = 0;
+		qla24xx_async_notify_ack(vha, sess,
+			(struct imm_ntfy_from_isp *)sess->iocb, SRB_NACK_LOGO);
 	}
 
 	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
@@ -917,6 +1055,10 @@ static void qlt_free_session_done(struct work_struct *work)
 	if (vha->fcport_count == 0)
 		wake_up_all(&vha->fcport_waitQ);
 
+	base_vha = pci_get_drvdata(ha->pdev);
+	if (test_bit(PFLG_DRIVER_REMOVING, &base_vha->pci_flags))
+		return;
+
 	if (!tgt || !tgt->tgt_stop) {
 		memset(&ea, 0, sizeof(ea));
 		ea.event = FCME_DELETE_DONE;
@@ -1602,6 +1744,14 @@ static int abort_cmd_for_tag(struct scsi_qla_host *vha, uint32_t tag)
 		}
 	}
 
+	list_for_each_entry(op, &vha->unknown_atio_list, cmd_list) {
+		if (tag == op->atio.u.isp24.exchange_addr) {
+			op->aborted = true;
+			spin_unlock(&vha->cmd_list_lock);
+			return 1;
+		}
+	}
+
 	list_for_each_entry(cmd, &vha->qla_cmd_list, cmd_list) {
 		if (tag == cmd->atio.u.isp24.exchange_addr) {
 			cmd->aborted = 1;
@@ -1638,6 +1788,18 @@ static void abort_cmds_for_lun(struct scsi_qla_host *vha,
 		if (op_key == key && op_lun == lun)
 			op->aborted = true;
 	}
+
+	list_for_each_entry(op, &vha->unknown_atio_list, cmd_list) {
+		uint32_t op_key;
+		u64 op_lun;
+
+		op_key = sid_to_key(op->atio.u.isp24.fcp_hdr.s_id);
+		op_lun = scsilun_to_int(
+			(struct scsi_lun *)&op->atio.u.isp24.fcp_cmnd.lun);
+		if (op_key == key && op_lun == lun)
+			op->aborted = true;
+	}
+
 	list_for_each_entry(cmd, &vha->qla_cmd_list, cmd_list) {
 		uint32_t cmd_key;
 		uint32_t cmd_lun;
@@ -1880,7 +2042,11 @@ void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *mcmd)
 
 	if (mcmd->flags == QLA24XX_MGMT_SEND_NACK) {
 		if (mcmd->orig_iocb.imm_ntfy.u.isp24.status_subcode ==
-		    ELS_LOGO) {
+		    ELS_LOGO ||
+		    mcmd->orig_iocb.imm_ntfy.u.isp24.status_subcode ==
+		    ELS_PRLO ||
+		    mcmd->orig_iocb.imm_ntfy.u.isp24.status_subcode ==
+		    ELS_TPRLO) {
 			ql_dbg(ql_dbg_disc, vha, 0xffff,
 			    "TM response logo %phC status %#x state %#x",
 			    mcmd->sess->port_name, mcmd->fc_tm_rsp,
@@ -4252,11 +4418,21 @@ static int abort_cmds_for_s_id(struct scsi_qla_host *vha, port_id_t *s_id)
 	spin_lock(&vha->cmd_list_lock);
 	list_for_each_entry(op, &vha->qla_sess_op_cmd_list, cmd_list) {
 		uint32_t op_key = sid_to_key(op->atio.u.isp24.fcp_hdr.s_id);
+
 		if (op_key == key) {
 			op->aborted = true;
 			count++;
 		}
 	}
+
+	list_for_each_entry(op, &vha->unknown_atio_list, cmd_list) {
+		uint32_t op_key = sid_to_key(op->atio.u.isp24.fcp_hdr.s_id);
+		if (op_key == key) {
+			op->aborted = true;
+			count++;
+		}
+	}
+
 	list_for_each_entry(cmd, &vha->qla_cmd_list, cmd_list) {
 		uint32_t cmd_key = sid_to_key(cmd->atio.u.isp24.fcp_hdr.s_id);
 		if (cmd_key == key) {
@@ -4463,6 +4639,16 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 		}
 		break;
 
+
+	case ELS_TPRLO:
+		if (le16_to_cpu(iocb->u.isp24.flags) &
+			NOTIFY24XX_FLAGS_GLOBAL_TPRLO) {
+			loop_id = 0xFFFF;
+			qlt_reset(vha, iocb, QLA_TGT_NEXUS_LOSS);
+			res = 1;
+			break;
+		}
+		/* drop through */
 	case ELS_LOGO:
 	case ELS_PRLO:
 		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
@@ -4472,7 +4658,6 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 		if (sess) {
 			sess->login_gen++;
 			sess->fw_login_state = DSC_LS_LOGO_PEND;
-			sess->logout_on_delete = 0;
 			sess->logo_ack_needed = 1;
 			memcpy(sess->iocb, iocb, IOCB_SIZE);
 		}
@@ -4483,12 +4668,14 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha,
 		    "%s: logo %llx res %d sess %p ",
 		    __func__, wwn, res, sess);
 		if (res == 0) {
-			/* cmd went up to ULP. look for qlt_xmit_tm_rsp()
-			   for LOGO_ACK */
+			/*
+			 * cmd went upper layer, look for qlt_xmit_tm_rsp()
+			 * for LOGO_ACK & sess delete
+			 */
 			BUG_ON(!sess);
 			res = 0;
 		} else {
-			/* cmd did not go upstair. */
+			/* cmd did not go to upper layer. */
 			if (sess) {
 				qlt_schedule_sess_for_deletion_lock(sess);
 				res = 0;
@@ -6407,6 +6594,11 @@ qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha)
 
 	mutex_init(&base_vha->vha_tgt.tgt_mutex);
 	mutex_init(&base_vha->vha_tgt.tgt_host_action_mutex);
+
+	INIT_LIST_HEAD(&base_vha->unknown_atio_list);
+	INIT_DELAYED_WORK(&base_vha->unknown_atio_work,
+	    qlt_unknown_atio_work_fn);
+
 	qlt_clear_mode(base_vha);
 }
 
-- 
GitLab


From 25ff6af10562cfe30dd29452a8fc04ec022d4f18 Mon Sep 17 00:00:00 2001
From: Joe Carnuccio <joe.carnuccio@cavium.com>
Date: Thu, 19 Jan 2017 22:28:04 -0800
Subject: [PATCH 0189/1084] qla2xxx: Simplify usage of SRB structure in driver

This patch simplifies SRB structure usage in driver.

- Simplify sp->done() and sp->free() interfaces.
- Remove sp->fcport->vha to use vha pointer from sp.
- Use sp->vha context in qla2x00_rel_sp().

Signed-off-by: Joe Carnuccio <joe.carnuccio@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_bsg.c    |  23 +++----
 drivers/scsi/qla2xxx/qla_def.h    |   7 ++-
 drivers/scsi/qla2xxx/qla_gbl.h    |  14 ++---
 drivers/scsi/qla2xxx/qla_gs.c     |  32 +++++-----
 drivers/scsi/qla2xxx/qla_init.c   | 100 +++++++++++++++---------------
 drivers/scsi/qla2xxx/qla_inline.h |  13 ++--
 drivers/scsi/qla2xxx/qla_iocb.c   |  96 ++++++++++++++--------------
 drivers/scsi/qla2xxx/qla_isr.c    |  36 +++++------
 drivers/scsi/qla2xxx/qla_mr.c     |  48 +++++++-------
 drivers/scsi/qla2xxx/qla_os.c     |  35 +++++------
 drivers/scsi/qla2xxx/qla_target.c |   8 +--
 11 files changed, 201 insertions(+), 211 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
index 1bf8061ff803..975e7a117532 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.c
+++ b/drivers/scsi/qla2xxx/qla_bsg.c
@@ -13,28 +13,25 @@
 
 /* BSG support for ELS/CT pass through */
 void
-qla2x00_bsg_job_done(void *data, void *ptr, int res)
+qla2x00_bsg_job_done(void *ptr, int res)
 {
-	srb_t *sp = (srb_t *)ptr;
-	struct scsi_qla_host *vha = (scsi_qla_host_t *)data;
+	srb_t *sp = ptr;
 	struct bsg_job *bsg_job = sp->u.bsg_job;
 	struct fc_bsg_reply *bsg_reply = bsg_job->reply;
 
 	bsg_reply->result = res;
 	bsg_job_done(bsg_job, bsg_reply->result,
 		       bsg_reply->reply_payload_rcv_len);
-	sp->free(vha, sp);
+	sp->free(sp);
 }
 
 void
-qla2x00_bsg_sp_free(void *data, void *ptr)
+qla2x00_bsg_sp_free(void *ptr)
 {
-	srb_t *sp = (srb_t *)ptr;
-	struct scsi_qla_host *vha = sp->fcport->vha;
+	srb_t *sp = ptr;
+	struct qla_hw_data *ha = sp->vha->hw;
 	struct bsg_job *bsg_job = sp->u.bsg_job;
 	struct fc_bsg_request *bsg_request = bsg_job->request;
-
-	struct qla_hw_data *ha = vha->hw;
 	struct qla_mt_iocb_rqst_fx00 *piocb_rqst;
 
 	if (sp->type == SRB_FXIOCB_BCMD) {
@@ -62,7 +59,7 @@ qla2x00_bsg_sp_free(void *data, void *ptr)
 	    sp->type == SRB_FXIOCB_BCMD ||
 	    sp->type == SRB_ELS_CMD_HST)
 		kfree(sp->fcport);
-	qla2x00_rel_sp(vha, sp);
+	qla2x00_rel_sp(sp);
 }
 
 int
@@ -394,7 +391,7 @@ qla2x00_process_els(struct bsg_job *bsg_job)
 	if (rval != QLA_SUCCESS) {
 		ql_log(ql_log_warn, vha, 0x700e,
 		    "qla2x00_start_sp failed = %d\n", rval);
-		qla2x00_rel_sp(vha, sp);
+		qla2x00_rel_sp(sp);
 		rval = -EIO;
 		goto done_unmap_sg;
 	}
@@ -542,7 +539,7 @@ qla2x00_process_ct(struct bsg_job *bsg_job)
 	if (rval != QLA_SUCCESS) {
 		ql_log(ql_log_warn, vha, 0x7017,
 		    "qla2x00_start_sp failed=%d.\n", rval);
-		qla2x00_rel_sp(vha, sp);
+		qla2x00_rel_sp(sp);
 		rval = -EIO;
 		goto done_free_fcport;
 	}
@@ -2578,6 +2575,6 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job)
 
 done:
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
-	sp->free(vha, sp);
+	sp->free(sp);
 	return 0;
 }
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 3881790e80da..ac37e91aee20 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -433,7 +433,7 @@ struct srb_iocb {
 typedef struct srb {
 	atomic_t ref_count;
 	struct fc_port *fcport;
-	void *vha;
+	struct scsi_qla_host *vha;
 	uint32_t handle;
 	uint16_t flags;
 	uint16_t type;
@@ -447,8 +447,8 @@ typedef struct srb {
 		struct bsg_job *bsg_job;
 		struct srb_cmd scmd;
 	} u;
-	void (*done)(void *, void *, int);
-	void (*free)(void *, void *);
+	void (*done)(void *, int);
+	void (*free)(void *);
 } srb_t;
 
 #define GET_CMD_SP(sp) (sp->u.scmd.cmd)
@@ -3224,6 +3224,7 @@ struct qla_qpair {
 	struct qla_hw_data *hw;
 	struct work_struct q_work;
 	struct list_head qp_list_elem; /* vha->qp_list */
+	struct scsi_qla_host *vha;
 };
 
 /* Place holder for FW buffer parameters */
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index e0914575aab3..b3d6441d1d90 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -186,9 +186,9 @@ extern int qla2x00_post_uevent_work(struct scsi_qla_host *, u32);
 
 extern int qla2x00_post_uevent_work(struct scsi_qla_host *, u32);
 extern void qla2x00_disable_board_on_pci_error(struct work_struct *);
-extern void qla2x00_sp_compl(void *, void *, int);
-extern void qla2xxx_qpair_sp_free_dma(void *, void *);
-extern void qla2xxx_qpair_sp_compl(void *, void *, int);
+extern void qla2x00_sp_compl(void *, int);
+extern void qla2xxx_qpair_sp_free_dma(void *);
+extern void qla2xxx_qpair_sp_compl(void *, int);
 extern int qla24xx_post_upd_fcport_work(struct scsi_qla_host *, fc_port_t *);
 void qla2x00_handle_login_done_event(struct scsi_qla_host *, fc_port_t *,
 	uint16_t *);
@@ -215,7 +215,7 @@ extern void qla2x00_do_dpc_all_vps(scsi_qla_host_t *);
 extern int qla24xx_vport_create_req_sanity_check(struct fc_vport *);
 extern scsi_qla_host_t * qla24xx_create_vhost(struct fc_vport *);
 
-extern void qla2x00_sp_free_dma(void *, void *);
+extern void qla2x00_sp_free_dma(void *);
 extern char *qla2x00_get_fw_version_str(struct scsi_qla_host *, char *);
 
 extern void qla2x00_mark_device_lost(scsi_qla_host_t *, fc_port_t *, int, int);
@@ -733,10 +733,10 @@ extern int qla82xx_restart_isp(scsi_qla_host_t *);
 
 /* IOCB related functions */
 extern int qla82xx_start_scsi(srb_t *);
-extern void qla2x00_sp_free(void *, void *);
+extern void qla2x00_sp_free(void *);
 extern void qla2x00_sp_timeout(unsigned long);
-extern void qla2x00_bsg_job_done(void *, void *, int);
-extern void qla2x00_bsg_sp_free(void *, void *);
+extern void qla2x00_bsg_job_done(void *, int);
+extern void qla2x00_bsg_sp_free(void *);
 extern void qla2x00_start_iocbs(struct scsi_qla_host *, struct req_que *);
 
 /* Interrupt related */
diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
index d1074fb0fff8..6d500fd06768 100644
--- a/drivers/scsi/qla2xxx/qla_gs.c
+++ b/drivers/scsi/qla2xxx/qla_gs.c
@@ -2822,10 +2822,10 @@ void qla24xx_handle_gidpn_event(scsi_qla_host_t *vha, struct event_arg *ea)
 	}
 } /* gidpn_event */
 
-static void qla2x00_async_gidpn_sp_done(void *v, void *s, int res)
+static void qla2x00_async_gidpn_sp_done(void *s, int res)
 {
-	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
-	struct srb *sp = (struct srb *)s;
+	struct srb *sp = s;
+	struct scsi_qla_host *vha = sp->vha;
 	fc_port_t *fcport = sp->fcport;
 	u8 *id = fcport->ct_desc.ct_sns->p.rsp.rsp.gid_pn.port_id;
 	struct event_arg ea;
@@ -2847,7 +2847,7 @@ static void qla2x00_async_gidpn_sp_done(void *v, void *s, int res)
 
 	qla2x00_fcport_event_handler(vha, &ea);
 
-	sp->free(vha, sp);
+	sp->free(sp);
 }
 
 int qla24xx_async_gidpn(scsi_qla_host_t *vha, fc_port_t *fcport)
@@ -2905,7 +2905,7 @@ int qla24xx_async_gidpn(scsi_qla_host_t *vha, fc_port_t *fcport)
 	return rval;
 
 done_free_sp:
-	sp->free(vha, sp);
+	sp->free(sp);
 done:
 	fcport->flags &= ~FCF_ASYNC_SENT;
 	return rval;
@@ -2941,11 +2941,11 @@ int qla24xx_post_gpsc_work(struct scsi_qla_host *vha, fc_port_t *fcport)
 	return qla2x00_post_work(vha, e);
 }
 
-static void qla24xx_async_gpsc_sp_done(void *v, void *s, int res)
+static void qla24xx_async_gpsc_sp_done(void *s, int res)
 {
-	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
+	struct srb *sp = s;
+	struct scsi_qla_host *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
-	struct srb *sp = (struct srb *)s;
 	fc_port_t *fcport = sp->fcport;
 	struct ct_sns_rsp       *ct_rsp;
 	struct event_arg ea;
@@ -3011,7 +3011,7 @@ static void qla24xx_async_gpsc_sp_done(void *v, void *s, int res)
 	ea.fcport = fcport;
 	qla2x00_fcport_event_handler(vha, &ea);
 
-	sp->free(vha, sp);
+	sp->free(sp);
 }
 
 int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport)
@@ -3066,7 +3066,7 @@ int qla24xx_async_gpsc(scsi_qla_host_t *vha, fc_port_t *fcport)
 	return rval;
 
 done_free_sp:
-	sp->free(vha, sp);
+	sp->free(sp);
 done:
 	fcport->flags &= ~FCF_ASYNC_SENT;
 	return rval;
@@ -3104,7 +3104,7 @@ void qla24xx_async_gpnid_done(scsi_qla_host_t *vha, srb_t *sp)
 		sp->u.iocb_cmd.u.ctarg.rsp = NULL;
 	}
 
-	sp->free(vha, sp);
+	sp->free(sp);
 }
 
 void qla24xx_handle_gpnid_event(scsi_qla_host_t *vha, struct event_arg *ea)
@@ -3138,10 +3138,10 @@ void qla24xx_handle_gpnid_event(scsi_qla_host_t *vha, struct event_arg *ea)
 	}
 }
 
-static void qla2x00_async_gpnid_sp_done(void *v, void *s, int res)
+static void qla2x00_async_gpnid_sp_done(void *s, int res)
 {
-	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
-	struct srb *sp = (struct srb *)s;
+	struct srb *sp = s;
+	struct scsi_qla_host *vha = sp->vha;
 	struct ct_sns_req *ct_req =
 	    (struct ct_sns_req *)sp->u.iocb_cmd.u.ctarg.req;
 	struct ct_sns_rsp *ct_rsp =
@@ -3183,7 +3183,7 @@ static void qla2x00_async_gpnid_sp_done(void *v, void *s, int res)
 			sp->u.iocb_cmd.u.ctarg.rsp = NULL;
 		}
 
-		sp->free(vha, sp);
+		sp->free(sp);
 		return;
 	}
 
@@ -3272,7 +3272,7 @@ int qla24xx_async_gpnid(scsi_qla_host_t *vha, port_id_t *id)
 		sp->u.iocb_cmd.u.ctarg.rsp = NULL;
 	}
 
-	sp->free(vha, sp);
+	sp->free(sp);
 done:
 	return rval;
 }
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 68430934fa8d..f65431480833 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -47,29 +47,27 @@ qla2x00_sp_timeout(unsigned long __data)
 {
 	srb_t *sp = (srb_t *)__data;
 	struct srb_iocb *iocb;
-	scsi_qla_host_t *vha = (scsi_qla_host_t *)sp->vha;
-	struct qla_hw_data *ha = vha->hw;
+	scsi_qla_host_t *vha = sp->vha;
 	struct req_que *req;
 	unsigned long flags;
 
-	spin_lock_irqsave(&ha->hardware_lock, flags);
-	req = ha->req_q_map[0];
+	spin_lock_irqsave(&vha->hw->hardware_lock, flags);
+	req = vha->hw->req_q_map[0];
 	req->outstanding_cmds[sp->handle] = NULL;
 	iocb = &sp->u.iocb_cmd;
 	iocb->timeout(sp);
-	sp->free(vha, sp);
-	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+	sp->free(sp);
+	spin_unlock_irqrestore(&vha->hw->hardware_lock, flags);
 }
 
 void
-qla2x00_sp_free(void *data, void *ptr)
+qla2x00_sp_free(void *ptr)
 {
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
 	struct srb_iocb *iocb = &sp->u.iocb_cmd;
-	struct scsi_qla_host *vha = (scsi_qla_host_t *)data;
 
 	del_timer(&iocb->timer);
-	qla2x00_rel_sp(vha, sp);
+	qla2x00_rel_sp(sp);
 }
 
 /* Asynchronous Login/Logout Routines -------------------------------------- */
@@ -97,7 +95,7 @@ qla2x00_get_async_timeout(struct scsi_qla_host *vha)
 void
 qla2x00_async_iocb_timeout(void *data)
 {
-	srb_t *sp = (srb_t *)data;
+	srb_t *sp = data;
 	fc_port_t *fcport = sp->fcport;
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
 	struct event_arg ea;
@@ -130,22 +128,21 @@ qla2x00_async_iocb_timeout(void *data)
 	case SRB_NACK_PLOGI:
 	case SRB_NACK_PRLI:
 	case SRB_NACK_LOGO:
-		sp->done(sp->vha, sp, QLA_FUNCTION_TIMEOUT);
+		sp->done(sp, QLA_FUNCTION_TIMEOUT);
 		break;
 	}
 }
 
 static void
-qla2x00_async_login_sp_done(void *data, void *ptr, int res)
+qla2x00_async_login_sp_done(void *ptr, int res)
 {
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
+	struct scsi_qla_host *vha = sp->vha;
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
-	struct scsi_qla_host *vha = (scsi_qla_host_t *)data;
 	struct event_arg ea;
 
 	ql_dbg(ql_dbg_disc, vha, 0xffff,
-		"%s %8phC res %d \n",
-		   __func__, sp->fcport->port_name, res);
+	    "%s %8phC res %d \n", __func__, sp->fcport->port_name, res);
 
 	sp->fcport->flags &= ~FCF_ASYNC_SENT;
 	if (!test_bit(UNLOADING, &vha->dpc_flags)) {
@@ -160,7 +157,7 @@ qla2x00_async_login_sp_done(void *data, void *ptr, int res)
 		qla2x00_fcport_event_handler(vha, &ea);
 	}
 
-	sp->free(sp->fcport->vha, sp);
+	sp->free(sp);
 }
 
 int
@@ -212,24 +209,23 @@ qla2x00_async_login(struct scsi_qla_host *vha, fc_port_t *fcport,
 	return rval;
 
 done_free_sp:
-	sp->free(fcport->vha, sp);
+	sp->free(sp);
 done:
 	fcport->flags &= ~FCF_ASYNC_SENT;
 	return rval;
 }
 
 static void
-qla2x00_async_logout_sp_done(void *data, void *ptr, int res)
+qla2x00_async_logout_sp_done(void *ptr, int res)
 {
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
-	struct scsi_qla_host *vha = (scsi_qla_host_t *)data;
 
 	sp->fcport->flags &= ~FCF_ASYNC_SENT;
-	if (!test_bit(UNLOADING, &vha->dpc_flags))
-		qla2x00_post_async_logout_done_work(sp->fcport->vha, sp->fcport,
+	if (!test_bit(UNLOADING, &sp->vha->dpc_flags))
+		qla2x00_post_async_logout_done_work(sp->vha, sp->fcport,
 		    lio->u.logio.data);
-	sp->free(sp->fcport->vha, sp);
+	sp->free(sp);
 }
 
 int
@@ -264,23 +260,23 @@ qla2x00_async_logout(struct scsi_qla_host *vha, fc_port_t *fcport)
 	return rval;
 
 done_free_sp:
-	sp->free(fcport->vha, sp);
+	sp->free(sp);
 done:
 	fcport->flags &= ~FCF_ASYNC_SENT;
 	return rval;
 }
 
 static void
-qla2x00_async_adisc_sp_done(void *data, void *ptr, int res)
+qla2x00_async_adisc_sp_done(void *ptr, int res)
 {
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
+	struct scsi_qla_host *vha = sp->vha;
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
-	struct scsi_qla_host *vha = (scsi_qla_host_t *)data;
 
 	if (!test_bit(UNLOADING, &vha->dpc_flags))
-		qla2x00_post_async_adisc_done_work(sp->fcport->vha, sp->fcport,
+		qla2x00_post_async_adisc_done_work(sp->vha, sp->fcport,
 		    lio->u.logio.data);
-	sp->free(sp->fcport->vha, sp);
+	sp->free(sp);
 }
 
 int
@@ -317,7 +313,7 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport,
 	return rval;
 
 done_free_sp:
-	sp->free(fcport->vha, sp);
+	sp->free(sp);
 done:
 	fcport->flags &= ~FCF_ASYNC_SENT;
 	return rval;
@@ -479,10 +475,10 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha,
 } /* gnl_event */
 
 static void
-qla24xx_async_gnl_sp_done(void *v, void *s, int res)
+qla24xx_async_gnl_sp_done(void *s, int res)
 {
-	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
-	struct srb *sp = (struct srb *)s;
+	struct srb *sp = s;
+	struct scsi_qla_host *vha = sp->vha;
 	unsigned long flags;
 	struct fc_port *fcport = NULL, *tf;
 	u16 i, n = 0, loop_id;
@@ -541,7 +537,7 @@ qla24xx_async_gnl_sp_done(void *v, void *s, int res)
 
 	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 
-	sp->free(vha, sp);
+	sp->free(sp);
 }
 
 int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport)
@@ -609,7 +605,7 @@ int qla24xx_async_gnl(struct scsi_qla_host *vha, fc_port_t *fcport)
 	return rval;
 
 done_free_sp:
-	sp->free(fcport->vha, sp);
+	sp->free(sp);
 done:
 	fcport->flags &= ~FCF_ASYNC_SENT;
 	return rval;
@@ -628,10 +624,10 @@ int qla24xx_post_gnl_work(struct scsi_qla_host *vha, fc_port_t *fcport)
 }
 
 static
-void qla24xx_async_gpdb_sp_done(void *v, void *s, int res)
+void qla24xx_async_gpdb_sp_done(void *s, int res)
 {
-	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
-	struct srb *sp = (struct srb *)s;
+	struct srb *sp = s;
+	struct scsi_qla_host *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
 	uint64_t zero = 0;
 	struct port_database_24xx *pd;
@@ -708,7 +704,7 @@ void qla24xx_async_gpdb_sp_done(void *v, void *s, int res)
 	dma_pool_free(ha->s_dma_pool, sp->u.iocb_cmd.u.mbx.in,
 		sp->u.iocb_cmd.u.mbx.in_dma);
 
-	sp->free(vha, sp);
+	sp->free(sp);
 }
 
 static int qla24xx_post_gpdb_work(struct scsi_qla_host *vha, fc_port_t *fcport,
@@ -790,7 +786,7 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt)
 	if (pd)
 		dma_pool_free(ha->s_dma_pool, pd, pd_dma);
 
-	sp->free(vha, sp);
+	sp->free(sp);
 done:
 	fcport->flags &= ~FCF_ASYNC_SENT;
 	qla24xx_post_gpdb_work(vha, fcport, opt);
@@ -1168,7 +1164,7 @@ void qla2x00_fcport_event_handler(scsi_qla_host_t *vha, struct event_arg *ea)
 static void
 qla2x00_tmf_iocb_timeout(void *data)
 {
-	srb_t *sp = (srb_t *)data;
+	srb_t *sp = data;
 	struct srb_iocb *tmf = &sp->u.iocb_cmd;
 
 	tmf->u.tmf.comp_status = CS_TIMEOUT;
@@ -1176,10 +1172,11 @@ qla2x00_tmf_iocb_timeout(void *data)
 }
 
 static void
-qla2x00_tmf_sp_done(void *data, void *ptr, int res)
+qla2x00_tmf_sp_done(void *ptr, int res)
 {
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
 	struct srb_iocb *tmf = &sp->u.iocb_cmd;
+
 	complete(&tmf->u.tmf.comp);
 }
 
@@ -1237,7 +1234,7 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun,
 	}
 
 done_free_sp:
-	sp->free(vha, sp);
+	sp->free(sp);
 done:
 	return rval;
 }
@@ -1245,7 +1242,7 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun,
 static void
 qla24xx_abort_iocb_timeout(void *data)
 {
-	srb_t *sp = (srb_t *)data;
+	srb_t *sp = data;
 	struct srb_iocb *abt = &sp->u.iocb_cmd;
 
 	abt->u.abt.comp_status = CS_TIMEOUT;
@@ -1253,9 +1250,9 @@ qla24xx_abort_iocb_timeout(void *data)
 }
 
 static void
-qla24xx_abort_sp_done(void *data, void *ptr, int res)
+qla24xx_abort_sp_done(void *ptr, int res)
 {
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
 	struct srb_iocb *abt = &sp->u.iocb_cmd;
 
 	complete(&abt->u.abt.comp);
@@ -1264,7 +1261,7 @@ qla24xx_abort_sp_done(void *data, void *ptr, int res)
 static int
 qla24xx_async_abort_cmd(srb_t *cmd_sp)
 {
-	scsi_qla_host_t *vha = cmd_sp->fcport->vha;
+	scsi_qla_host_t *vha = cmd_sp->vha;
 	fc_port_t *fcport = cmd_sp->fcport;
 	struct srb_iocb *abt_iocb;
 	srb_t *sp;
@@ -1297,7 +1294,7 @@ qla24xx_async_abort_cmd(srb_t *cmd_sp)
 	    QLA_SUCCESS : QLA_FUNCTION_FAILED;
 
 done_free_sp:
-	sp->free(vha, sp);
+	sp->free(sp);
 done:
 	return rval;
 }
@@ -7564,6 +7561,7 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos, int v
 		memset(qpair, 0, sizeof(struct qla_qpair));
 
 		qpair->hw = vha->hw;
+		qpair->vha = vha;
 
 		/* Assign available que pair id */
 		mutex_lock(&ha->mq_lock);
diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
index 647828b9e622..66df6cec59da 100644
--- a/drivers/scsi/qla2xxx/qla_inline.h
+++ b/drivers/scsi/qla2xxx/qla_inline.h
@@ -232,6 +232,7 @@ qla2xxx_get_qpair_sp(struct qla_qpair *qpair, fc_port_t *fcport, gfp_t flag)
 	memset(sp, 0, sizeof(*sp));
 	sp->fcport = fcport;
 	sp->iocbs = 1;
+	sp->vha = qpair->vha;
 done:
 	if (!sp)
 		QLA_QPAIR_MARK_NOT_BUSY(qpair);
@@ -249,14 +250,13 @@ static inline srb_t *
 qla2x00_get_sp(scsi_qla_host_t *vha, fc_port_t *fcport, gfp_t flag)
 {
 	srb_t *sp = NULL;
-	struct qla_hw_data *ha = vha->hw;
 	uint8_t bail;
 
 	QLA_VHA_MARK_BUSY(vha, bail);
 	if (unlikely(bail))
 		return NULL;
 
-	sp = mempool_alloc(ha->srb_mempool, flag);
+	sp = mempool_alloc(vha->hw->srb_mempool, flag);
 	if (!sp)
 		goto done;
 
@@ -271,10 +271,10 @@ qla2x00_get_sp(scsi_qla_host_t *vha, fc_port_t *fcport, gfp_t flag)
 }
 
 static inline void
-qla2x00_rel_sp(scsi_qla_host_t *vha, srb_t *sp)
+qla2x00_rel_sp(srb_t *sp)
 {
-	mempool_free(sp, vha->hw->srb_mempool);
-	QLA_VHA_MARK_NOT_BUSY(vha);
+	QLA_VHA_MARK_NOT_BUSY(sp->vha);
+	mempool_free(sp, sp->vha->hw->srb_mempool);
 }
 
 static inline void
@@ -286,8 +286,7 @@ qla2x00_init_timer(srb_t *sp, unsigned long tmo)
 	sp->u.iocb_cmd.timer.function = qla2x00_sp_timeout;
 	add_timer(&sp->u.iocb_cmd.timer);
 	sp->free = qla2x00_sp_free;
-	if ((IS_QLAFX00(((scsi_qla_host_t *)sp->vha)->hw)) &&
-	    (sp->type == SRB_FXIOCB_DCMD))
+	if (IS_QLAFX00(sp->vha->hw) && (sp->type == SRB_FXIOCB_DCMD))
 		init_completion(&sp->u.iocb_cmd.u.fxiocb.fxiocb_comp);
 	if (sp->type == SRB_ELS_DCMD)
 		init_completion(&sp->u.iocb_cmd.u.els_logo.comp);
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 33d3d90c089b..535079280288 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -23,7 +23,7 @@ qla2x00_get_cmd_direction(srb_t *sp)
 {
 	uint16_t cflags;
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
-	struct scsi_qla_host *vha = sp->fcport->vha;
+	struct scsi_qla_host *vha = sp->vha;
 
 	cflags = 0;
 
@@ -210,7 +210,7 @@ void qla2x00_build_scsi_iocbs_32(srb_t *sp, cmd_entry_t *cmd_pkt,
 		return;
 	}
 
-	vha = sp->fcport->vha;
+	vha = sp->vha;
 	cmd_pkt->control_flags |= cpu_to_le16(qla2x00_get_cmd_direction(sp));
 
 	/* Three DSDs are available in the Command Type 2 IOCB */
@@ -267,7 +267,7 @@ void qla2x00_build_scsi_iocbs_64(srb_t *sp, cmd_entry_t *cmd_pkt,
 		return;
 	}
 
-	vha = sp->fcport->vha;
+	vha = sp->vha;
 	cmd_pkt->control_flags |= cpu_to_le16(qla2x00_get_cmd_direction(sp));
 
 	/* Two DSDs are available in the Command Type 3 IOCB */
@@ -324,7 +324,7 @@ qla2x00_start_scsi(srb_t *sp)
 	struct rsp_que *rsp;
 
 	/* Setup device pointers. */
-	vha = sp->fcport->vha;
+	vha = sp->vha;
 	ha = vha->hw;
 	reg = &ha->iobase->isp;
 	cmd = GET_CMD_SP(sp);
@@ -601,7 +601,7 @@ qla24xx_build_scsi_type_6_iocbs(srb_t *sp, struct cmd_type_6 *cmd_pkt,
 		return 0;
 	}
 
-	vha = sp->fcport->vha;
+	vha = sp->vha;
 	ha = vha->hw;
 
 	/* Set transfer direction */
@@ -716,7 +716,7 @@ qla24xx_build_scsi_iocbs(srb_t *sp, struct cmd_type_7 *cmd_pkt,
 		return;
 	}
 
-	vha = sp->fcport->vha;
+	vha = sp->vha;
 
 	/* Set transfer direction */
 	if (cmd->sc_data_direction == DMA_TO_DEVICE) {
@@ -1108,7 +1108,7 @@ qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
 	if (sp) {
 		cmd = GET_CMD_SP(sp);
 		sgl = scsi_prot_sglist(cmd);
-		vha = sp->fcport->vha;
+		vha = sp->vha;
 	} else if (tc) {
 		vha = tc->vha;
 		sgl = tc->prot_sg;
@@ -1215,7 +1215,7 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
 	/* Update entry type to indicate Command Type CRC_2 IOCB */
 	*((uint32_t *)(&cmd_pkt->entry_type)) = cpu_to_le32(COMMAND_TYPE_CRC_2);
 
-	vha = sp->fcport->vha;
+	vha = sp->vha;
 	ha = vha->hw;
 
 	/* No data transfer */
@@ -1225,7 +1225,7 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
 		return QLA_SUCCESS;
 	}
 
-	cmd_pkt->vp_index = sp->fcport->vha->vp_idx;
+	cmd_pkt->vp_index = sp->vha->vp_idx;
 
 	/* Set transfer direction */
 	if (cmd->sc_data_direction == DMA_TO_DEVICE) {
@@ -1415,7 +1415,7 @@ qla24xx_start_scsi(srb_t *sp)
 	struct req_que *req = NULL;
 	struct rsp_que *rsp = NULL;
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
-	struct scsi_qla_host *vha = sp->fcport->vha;
+	struct scsi_qla_host *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
 
 	/* Setup device pointers. */
@@ -1492,7 +1492,7 @@ qla24xx_start_scsi(srb_t *sp)
 	cmd_pkt->port_id[0] = sp->fcport->d_id.b.al_pa;
 	cmd_pkt->port_id[1] = sp->fcport->d_id.b.area;
 	cmd_pkt->port_id[2] = sp->fcport->d_id.b.domain;
-	cmd_pkt->vp_index = sp->fcport->vha->vp_idx;
+	cmd_pkt->vp_index = sp->vha->vp_idx;
 
 	int_to_scsilun(cmd->device->lun, &cmd_pkt->lun);
 	host_to_fcp_swap((uint8_t *)&cmd_pkt->lun, sizeof(cmd_pkt->lun));
@@ -1564,7 +1564,7 @@ qla24xx_dif_start_scsi(srb_t *sp)
 	struct req_que		*req = NULL;
 	struct rsp_que		*rsp = NULL;
 	struct scsi_cmnd	*cmd = GET_CMD_SP(sp);
-	struct scsi_qla_host	*vha = sp->fcport->vha;
+	struct scsi_qla_host	*vha = sp->vha;
 	struct qla_hw_data	*ha = vha->hw;
 	struct cmd_type_crc_2	*cmd_pkt;
 	uint32_t		status = 0;
@@ -2214,13 +2214,13 @@ qla24xx_login_iocb(srb_t *sp, struct logio_entry_24xx *logio)
 	logio->port_id[0] = sp->fcport->d_id.b.al_pa;
 	logio->port_id[1] = sp->fcport->d_id.b.area;
 	logio->port_id[2] = sp->fcport->d_id.b.domain;
-	logio->vp_index = sp->fcport->vha->vp_idx;
+	logio->vp_index = sp->vha->vp_idx;
 }
 
 static void
 qla2x00_login_iocb(srb_t *sp, struct mbx_entry *mbx)
 {
-	struct qla_hw_data *ha = sp->fcport->vha->hw;
+	struct qla_hw_data *ha = sp->vha->hw;
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
 	uint16_t opts;
 
@@ -2238,7 +2238,7 @@ qla2x00_login_iocb(srb_t *sp, struct mbx_entry *mbx)
 	mbx->mb2 = cpu_to_le16(sp->fcport->d_id.b.domain);
 	mbx->mb3 = cpu_to_le16(sp->fcport->d_id.b.area << 8 |
 	    sp->fcport->d_id.b.al_pa);
-	mbx->mb9 = cpu_to_le16(sp->fcport->vha->vp_idx);
+	mbx->mb9 = cpu_to_le16(sp->vha->vp_idx);
 }
 
 static void
@@ -2254,13 +2254,13 @@ qla24xx_logout_iocb(srb_t *sp, struct logio_entry_24xx *logio)
 	logio->port_id[0] = sp->fcport->d_id.b.al_pa;
 	logio->port_id[1] = sp->fcport->d_id.b.area;
 	logio->port_id[2] = sp->fcport->d_id.b.domain;
-	logio->vp_index = sp->fcport->vha->vp_idx;
+	logio->vp_index = sp->vha->vp_idx;
 }
 
 static void
 qla2x00_logout_iocb(srb_t *sp, struct mbx_entry *mbx)
 {
-	struct qla_hw_data *ha = sp->fcport->vha->hw;
+	struct qla_hw_data *ha = sp->vha->hw;
 
 	mbx->entry_type = MBX_IOCB_TYPE;
 	SET_TARGET_ID(ha, mbx->loop_id, sp->fcport->loop_id);
@@ -2271,7 +2271,7 @@ qla2x00_logout_iocb(srb_t *sp, struct mbx_entry *mbx)
 	mbx->mb2 = cpu_to_le16(sp->fcport->d_id.b.domain);
 	mbx->mb3 = cpu_to_le16(sp->fcport->d_id.b.area << 8 |
 	    sp->fcport->d_id.b.al_pa);
-	mbx->mb9 = cpu_to_le16(sp->fcport->vha->vp_idx);
+	mbx->mb9 = cpu_to_le16(sp->vha->vp_idx);
 	/* Implicit: mbx->mbx10 = 0. */
 }
 
@@ -2281,13 +2281,13 @@ qla24xx_adisc_iocb(srb_t *sp, struct logio_entry_24xx *logio)
 	logio->entry_type = LOGINOUT_PORT_IOCB_TYPE;
 	logio->control_flags = cpu_to_le16(LCF_COMMAND_ADISC);
 	logio->nport_handle = cpu_to_le16(sp->fcport->loop_id);
-	logio->vp_index = sp->fcport->vha->vp_idx;
+	logio->vp_index = sp->vha->vp_idx;
 }
 
 static void
 qla2x00_adisc_iocb(srb_t *sp, struct mbx_entry *mbx)
 {
-	struct qla_hw_data *ha = sp->fcport->vha->hw;
+	struct qla_hw_data *ha = sp->vha->hw;
 
 	mbx->entry_type = MBX_IOCB_TYPE;
 	SET_TARGET_ID(ha, mbx->loop_id, sp->fcport->loop_id);
@@ -2302,7 +2302,7 @@ qla2x00_adisc_iocb(srb_t *sp, struct mbx_entry *mbx)
 	mbx->mb3 = cpu_to_le16(LSW(ha->async_pd_dma));
 	mbx->mb6 = cpu_to_le16(MSW(MSD(ha->async_pd_dma)));
 	mbx->mb7 = cpu_to_le16(LSW(MSD(ha->async_pd_dma)));
-	mbx->mb9 = cpu_to_le16(sp->fcport->vha->vp_idx);
+	mbx->mb9 = cpu_to_le16(sp->vha->vp_idx);
 }
 
 static void
@@ -2338,32 +2338,30 @@ qla24xx_tm_iocb(srb_t *sp, struct tsk_mgmt_entry *tsk)
 }
 
 static void
-qla2x00_els_dcmd_sp_free(void *ptr, void *data)
+qla2x00_els_dcmd_sp_free(void *data)
 {
-	struct scsi_qla_host *vha = (scsi_qla_host_t *)ptr;
-	struct qla_hw_data *ha = vha->hw;
-	srb_t *sp = (srb_t *)data;
+	srb_t *sp = data;
 	struct srb_iocb *elsio = &sp->u.iocb_cmd;
 
 	kfree(sp->fcport);
 
 	if (elsio->u.els_logo.els_logo_pyld)
-		dma_free_coherent(&ha->pdev->dev, DMA_POOL_SIZE,
+		dma_free_coherent(&sp->vha->hw->pdev->dev, DMA_POOL_SIZE,
 		    elsio->u.els_logo.els_logo_pyld,
 		    elsio->u.els_logo.els_logo_pyld_dma);
 
 	del_timer(&elsio->timer);
-	qla2x00_rel_sp(vha, sp);
+	qla2x00_rel_sp(sp);
 }
 
 static void
 qla2x00_els_dcmd_iocb_timeout(void *data)
 {
-	srb_t *sp = (srb_t *)data;
-	struct srb_iocb *lio = &sp->u.iocb_cmd;
+	srb_t *sp = data;
 	fc_port_t *fcport = sp->fcport;
-	struct scsi_qla_host *vha = fcport->vha;
+	struct scsi_qla_host *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
+	struct srb_iocb *lio = &sp->u.iocb_cmd;
 	unsigned long flags = 0;
 
 	ql_dbg(ql_dbg_io, vha, 0x3069,
@@ -2386,12 +2384,12 @@ qla2x00_els_dcmd_iocb_timeout(void *data)
 }
 
 static void
-qla2x00_els_dcmd_sp_done(void *data, void *ptr, int res)
+qla2x00_els_dcmd_sp_done(void *ptr, int res)
 {
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
 	fc_port_t *fcport = sp->fcport;
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
-	struct scsi_qla_host *vha = fcport->vha;
+	struct scsi_qla_host *vha = sp->vha;
 
 	ql_dbg(ql_dbg_io, vha, 0x3072,
 	    "%s hdl=%x, portid=%02x%02x%02x done\n",
@@ -2449,7 +2447,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode,
 			    GFP_KERNEL);
 
 	if (!elsio->u.els_logo.els_logo_pyld) {
-		sp->free(vha, sp);
+		sp->free(sp);
 		return QLA_FUNCTION_FAILED;
 	}
 
@@ -2468,7 +2466,7 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode,
 
 	rval = qla2x00_start_sp(sp);
 	if (rval != QLA_SUCCESS) {
-		sp->free(vha, sp);
+		sp->free(sp);
 		return QLA_FUNCTION_FAILED;
 	}
 
@@ -2479,14 +2477,14 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode,
 
 	wait_for_completion(&elsio->u.els_logo.comp);
 
-	sp->free(vha, sp);
+	sp->free(sp);
 	return rval;
 }
 
 static void
 qla24xx_els_logo_iocb(srb_t *sp, struct els_entry_24xx *els_iocb)
 {
-	scsi_qla_host_t *vha = sp->fcport->vha;
+	scsi_qla_host_t *vha = sp->vha;
 	struct srb_iocb *elsio = &sp->u.iocb_cmd;
 
 	els_iocb->entry_type = ELS_IOCB_TYPE;
@@ -2518,7 +2516,7 @@ qla24xx_els_logo_iocb(srb_t *sp, struct els_entry_24xx *els_iocb)
 	els_iocb->rx_address[1] = 0;
 	els_iocb->rx_len = 0;
 
-	sp->fcport->vha->qla_stats.control_requests++;
+	sp->vha->qla_stats.control_requests++;
 }
 
 static void
@@ -2534,7 +2532,7 @@ qla24xx_els_iocb(srb_t *sp, struct els_entry_24xx *els_iocb)
         els_iocb->handle = sp->handle;
         els_iocb->nport_handle = cpu_to_le16(sp->fcport->loop_id);
 	els_iocb->tx_dsd_count = cpu_to_le16(bsg_job->request_payload.sg_cnt);
-	els_iocb->vp_index = sp->fcport->vha->vp_idx;
+	els_iocb->vp_index = sp->vha->vp_idx;
         els_iocb->sof_type = EST_SOFI3;
 	els_iocb->rx_dsd_count = cpu_to_le16(bsg_job->reply_payload.sg_cnt);
 
@@ -2565,7 +2563,7 @@ qla24xx_els_iocb(srb_t *sp, struct els_entry_24xx *els_iocb)
         els_iocb->rx_len = cpu_to_le32(sg_dma_len
             (bsg_job->reply_payload.sg_list));
 
-	sp->fcport->vha->qla_stats.control_requests++;
+	sp->vha->qla_stats.control_requests++;
 }
 
 static void
@@ -2576,7 +2574,7 @@ qla2x00_ct_iocb(srb_t *sp, ms_iocb_entry_t *ct_iocb)
 	struct scatterlist *sg;
 	int index;
 	uint16_t tot_dsds;
-	scsi_qla_host_t *vha = sp->fcport->vha;
+	scsi_qla_host_t *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
 	struct bsg_job *bsg_job = sp->u.bsg_job;
 	int loop_iterartion = 0;
@@ -2642,7 +2640,7 @@ qla2x00_ct_iocb(srb_t *sp, ms_iocb_entry_t *ct_iocb)
 	}
 	ct_iocb->entry_count = entry_count;
 
-	sp->fcport->vha->qla_stats.control_requests++;
+	sp->vha->qla_stats.control_requests++;
 }
 
 static void
@@ -2653,7 +2651,7 @@ qla24xx_ct_iocb(srb_t *sp, struct ct_entry_24xx *ct_iocb)
 	struct scatterlist *sg;
 	int index;
 	uint16_t tot_dsds;
-        scsi_qla_host_t *vha = sp->fcport->vha;
+	scsi_qla_host_t *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
 	struct bsg_job *bsg_job = sp->u.bsg_job;
 	int loop_iterartion = 0;
@@ -2665,7 +2663,7 @@ qla24xx_ct_iocb(srb_t *sp, struct ct_entry_24xx *ct_iocb)
         ct_iocb->handle = sp->handle;
 
 	ct_iocb->nport_handle = cpu_to_le16(sp->fcport->loop_id);
-	ct_iocb->vp_index = sp->fcport->vha->vp_idx;
+	ct_iocb->vp_index = sp->vha->vp_idx;
 	ct_iocb->comp_status = cpu_to_le16(0);
 
 	ct_iocb->cmd_dsd_count =
@@ -2739,7 +2737,7 @@ qla82xx_start_scsi(srb_t *sp)
 	uint32_t *fcp_dl;
 	uint8_t additional_cdb_len;
 	struct ct6_dsd *ctx;
-	struct scsi_qla_host *vha = sp->fcport->vha;
+	struct scsi_qla_host *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
 	struct req_que *req = NULL;
 	struct rsp_que *rsp = NULL;
@@ -2901,7 +2899,7 @@ qla82xx_start_scsi(srb_t *sp)
 		cmd_pkt->port_id[0] = sp->fcport->d_id.b.al_pa;
 		cmd_pkt->port_id[1] = sp->fcport->d_id.b.area;
 		cmd_pkt->port_id[2] = sp->fcport->d_id.b.domain;
-		cmd_pkt->vp_index = sp->fcport->vha->vp_idx;
+		cmd_pkt->vp_index = sp->vha->vp_idx;
 
 		/* Build IOCB segments */
 		if (qla24xx_build_scsi_type_6_iocbs(sp, cmd_pkt, tot_dsds))
@@ -2974,7 +2972,7 @@ qla82xx_start_scsi(srb_t *sp)
 		cmd_pkt->port_id[0] = sp->fcport->d_id.b.al_pa;
 		cmd_pkt->port_id[1] = sp->fcport->d_id.b.area;
 		cmd_pkt->port_id[2] = sp->fcport->d_id.b.domain;
-		cmd_pkt->vp_index = sp->fcport->vha->vp_idx;
+		cmd_pkt->vp_index = sp->vha->vp_idx;
 
 		int_to_scsilun(cmd->device->lun, &cmd_pkt->lun);
 		host_to_fcp_swap((uint8_t *)&cmd_pkt->lun,
@@ -3060,7 +3058,7 @@ static void
 qla24xx_abort_iocb(srb_t *sp, struct abort_entry_24xx *abt_iocb)
 {
 	struct srb_iocb *aio = &sp->u.iocb_cmd;
-	scsi_qla_host_t *vha = sp->fcport->vha;
+	scsi_qla_host_t *vha = sp->vha;
 	struct req_que *req = vha->req;
 
 	memset(abt_iocb, 0, sizeof(struct abort_entry_24xx));
@@ -3132,7 +3130,7 @@ int
 qla2x00_start_sp(srb_t *sp)
 {
 	int rval;
-	scsi_qla_host_t *vha = (scsi_qla_host_t *)sp->vha;
+	scsi_qla_host_t *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
 	void *pkt;
 	unsigned long flags;
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index d9577db7ddbb..914cc5a66b34 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1241,7 +1241,7 @@ qla2x00_process_completed_request(struct scsi_qla_host *vha,
 		req->outstanding_cmds[index] = NULL;
 
 		/* Save ISP completion status */
-		sp->done(vha, sp, DID_OK << 16);
+		sp->done(sp, DID_OK << 16);
 	} else {
 		ql_log(ql_log_warn, vha, 0x3016, "Invalid SCSI SRB.\n");
 
@@ -1373,7 +1373,7 @@ qla2x00_mbx_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
 	    le16_to_cpu(mbx->mb7));
 
 logio_done:
-	sp->done(vha, sp, 0);
+	sp->done(sp, 0);
 }
 
 static void
@@ -1398,7 +1398,7 @@ qla24xx_mbx_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
 
 	res = (si->u.mbx.in_mb[0] & MBS_MASK);
 
-	sp->done(vha, sp, res);
+	sp->done(sp, res);
 }
 
 static void
@@ -1416,7 +1416,7 @@ qla24xxx_nack_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
 	if (pkt->u.isp2x.status != cpu_to_le16(NOTIFY_ACK_SUCCESS))
 		res = QLA_FUNCTION_FAILED;
 
-	sp->done(vha, sp, res);
+	sp->done(sp, res);
 }
 
 static void
@@ -1488,7 +1488,7 @@ qla2x00_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
 	     break;
 	}
 
-	sp->done(vha, sp, res);
+	sp->done(sp, res);
 }
 
 static void
@@ -1524,7 +1524,7 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
 		type = "Driver ELS logo";
 		ql_dbg(ql_dbg_user, vha, 0x5047,
 		    "Completing %s: (%p) type=%d.\n", type, sp, sp->type);
-		sp->done(vha, sp, 0);
+		sp->done(sp, 0);
 		return;
 	case SRB_CT_PTHRU_CMD:
 		/* borrowing sts_entry_24xx.comp_status.
@@ -1533,7 +1533,7 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
 		res = qla2x00_chk_ms_status(vha, (ms_iocb_entry_t *)pkt,
 			(struct ct_sns_rsp *)sp->u.iocb_cmd.u.ctarg.rsp,
 			sp->name);
-		sp->done(vha, sp, res);
+		sp->done(sp, res);
 		return;
 	default:
 		ql_dbg(ql_dbg_user, vha, 0x503e,
@@ -1589,7 +1589,7 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
 		bsg_job->reply_len = 0;
 	}
 
-	sp->done(vha, sp, res);
+	sp->done(sp, res);
 }
 
 static void
@@ -1701,7 +1701,7 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req,
 	    le32_to_cpu(logio->io_parameter[1]));
 
 logio_done:
-	sp->done(vha, sp, 0);
+	sp->done(sp, 0);
 }
 
 static void
@@ -1751,7 +1751,7 @@ qla24xx_tm_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, void *tsk)
 		ql_dump_buffer(ql_dbg_async + ql_dbg_buffer, vha, 0x5055,
 		    (uint8_t *)sts, sizeof(*sts));
 
-	sp->done(vha, sp, 0);
+	sp->done(sp, 0);
 }
 
 /**
@@ -1839,7 +1839,7 @@ static inline void
 qla2x00_handle_sense(srb_t *sp, uint8_t *sense_data, uint32_t par_sense_len,
 		     uint32_t sense_len, struct rsp_que *rsp, int res)
 {
-	struct scsi_qla_host *vha = sp->fcport->vha;
+	struct scsi_qla_host *vha = sp->vha;
 	struct scsi_cmnd *cp = GET_CMD_SP(sp);
 	uint32_t track_sense_len;
 
@@ -1867,7 +1867,7 @@ qla2x00_handle_sense(srb_t *sp, uint8_t *sense_data, uint32_t par_sense_len,
 	if (sense_len) {
 		ql_dbg(ql_dbg_io + ql_dbg_buffer, vha, 0x301c,
 		    "Check condition Sense data, nexus%ld:%d:%llu cmd=%p.\n",
-		    sp->fcport->vha->host_no, cp->device->id, cp->device->lun,
+		    sp->vha->host_no, cp->device->id, cp->device->lun,
 		    cp);
 		ql_dump_buffer(ql_dbg_io + ql_dbg_buffer, vha, 0x302b,
 		    cp->sense_buffer, sense_len);
@@ -1889,7 +1889,7 @@ struct scsi_dif_tuple {
 static inline int
 qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24)
 {
-	struct scsi_qla_host *vha = sp->fcport->vha;
+	struct scsi_qla_host *vha = sp->vha;
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
 	uint8_t		*ap = &sts24->data[12];
 	uint8_t		*ep = &sts24->data[20];
@@ -2154,7 +2154,7 @@ qla25xx_process_bidir_status_iocb(scsi_qla_host_t *vha, void *pkt,
 	bsg_job->reply_len = sizeof(struct fc_bsg_reply);
 	/* Always return DID_OK, bsg will send the vendor specific response
 	 * in this case only */
-	sp->done(vha, sp, (DID_OK << 6));
+	sp->done(sp, DID_OK << 6);
 
 }
 
@@ -2527,7 +2527,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 		    resid_len, fw_resid_len, sp, cp);
 
 	if (rsp->status_srb == NULL)
-		sp->done(vha, sp, res);
+		sp->done(sp, res);
 }
 
 /**
@@ -2584,7 +2584,7 @@ qla2x00_status_cont_entry(struct rsp_que *rsp, sts_cont_entry_t *pkt)
 	/* Place command on done queue. */
 	if (sense_len == 0) {
 		rsp->status_srb = NULL;
-		sp->done(vha, sp, cp->result);
+		sp->done(sp, cp->result);
 	}
 }
 
@@ -2620,7 +2620,7 @@ qla2x00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, sts_entry_t *pkt)
 
 	sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
 	if (sp) {
-		sp->done(vha, sp, res);
+		sp->done(sp, res);
 		return;
 	}
 fatal:
@@ -2678,7 +2678,7 @@ qla24xx_abort_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
 
 	abt = &sp->u.iocb_cmd;
 	abt->u.abt.comp_status = le32_to_cpu(pkt->nport_handle);
-	sp->done(vha, sp, 0);
+	sp->done(sp, 0);
 }
 
 /**
diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c
index d38bc6452abd..945b6320e7d7 100644
--- a/drivers/scsi/qla2xxx/qla_mr.c
+++ b/drivers/scsi/qla2xxx/qla_mr.c
@@ -1789,16 +1789,16 @@ qlafx00_update_host_attr(scsi_qla_host_t *vha, struct port_info_data *pinfo)
 static void
 qla2x00_fxdisc_iocb_timeout(void *data)
 {
-	srb_t *sp = (srb_t *)data;
+	srb_t *sp = data;
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
 
 	complete(&lio->u.fxiocb.fxiocb_comp);
 }
 
 static void
-qla2x00_fxdisc_sp_done(void *data, void *ptr, int res)
+qla2x00_fxdisc_sp_done(void *ptr, int res)
 {
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
 	struct srb_iocb *lio = &sp->u.iocb_cmd;
 
 	complete(&lio->u.fxiocb.fxiocb_comp);
@@ -1999,7 +1999,7 @@ qlafx00_fx_disc(scsi_qla_host_t *vha, fc_port_t *fcport, uint16_t fx_type)
 		dma_free_coherent(&ha->pdev->dev, fdisc->u.fxiocb.req_len,
 		    fdisc->u.fxiocb.req_addr, fdisc->u.fxiocb.req_dma_handle);
 done_free_sp:
-	sp->free(vha, sp);
+	sp->free(sp);
 done:
 	return rval;
 }
@@ -2127,7 +2127,7 @@ static inline void
 qlafx00_handle_sense(srb_t *sp, uint8_t *sense_data, uint32_t par_sense_len,
 		     uint32_t sense_len, struct rsp_que *rsp, int res)
 {
-	struct scsi_qla_host *vha = sp->fcport->vha;
+	struct scsi_qla_host *vha = sp->vha;
 	struct scsi_cmnd *cp = GET_CMD_SP(sp);
 	uint32_t track_sense_len;
 
@@ -2162,7 +2162,7 @@ qlafx00_handle_sense(srb_t *sp, uint8_t *sense_data, uint32_t par_sense_len,
 	if (sense_len) {
 		ql_dbg(ql_dbg_io + ql_dbg_buffer, vha, 0x3039,
 		    "Check condition Sense data, nexus%ld:%d:%llu cmd=%p.\n",
-		    sp->fcport->vha->host_no, cp->device->id, cp->device->lun,
+		    sp->vha->host_no, cp->device->id, cp->device->lun,
 		    cp);
 		ql_dump_buffer(ql_dbg_io + ql_dbg_buffer, vha, 0x3049,
 		    cp->sense_buffer, sense_len);
@@ -2181,7 +2181,7 @@ qlafx00_tm_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
 	    (sstatus & cpu_to_le16((uint16_t)SS_RESPONSE_INFO_LEN_VALID)))
 		cpstatus = cpu_to_le16((uint16_t)CS_INCOMPLETE);
 	tmf->u.tmf.comp_status = cpstatus;
-	sp->done(vha, sp, 0);
+	sp->done(sp, 0);
 }
 
 static void
@@ -2198,7 +2198,7 @@ qlafx00_abort_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
 
 	abt = &sp->u.iocb_cmd;
 	abt->u.abt.comp_status = pkt->tgt_id_sts;
-	sp->done(vha, sp, 0);
+	sp->done(sp, 0);
 }
 
 static void
@@ -2264,7 +2264,7 @@ qlafx00_ioctl_iosb_entry(scsi_qla_host_t *vha, struct req_que *req,
 		bsg_reply->reply_payload_rcv_len =
 		    bsg_job->reply_payload.payload_len;
 	}
-	sp->done(vha, sp, res);
+	sp->done(sp, res);
 }
 
 /**
@@ -2537,7 +2537,7 @@ qlafx00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 		    par_sense_len, rsp_info_len);
 
 	if (rsp->status_srb == NULL)
-		sp->done(vha, sp, res);
+		sp->done(sp, res);
 }
 
 /**
@@ -2614,7 +2614,7 @@ qlafx00_status_cont_entry(struct rsp_que *rsp, sts_cont_entry_t *pkt)
 	/* Place command on done queue. */
 	if (sense_len == 0) {
 		rsp->status_srb = NULL;
-		sp->done(vha, sp, cp->result);
+		sp->done(sp, cp->result);
 	}
 }
 
@@ -2695,7 +2695,7 @@ qlafx00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp,
 
 	sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
 	if (sp) {
-		sp->done(vha, sp, res);
+		sp->done(sp, res);
 		return;
 	}
 
@@ -2997,7 +2997,7 @@ qlafx00_build_scsi_iocbs(srb_t *sp, struct cmd_type_7_fx00 *cmd_pkt,
 	cont_a64_entry_t lcont_pkt;
 	cont_a64_entry_t *cont_pkt;
 
-	vha = sp->fcport->vha;
+	vha = sp->vha;
 	req = vha->req;
 
 	cmd = GET_CMD_SP(sp);
@@ -3081,7 +3081,7 @@ qlafx00_start_scsi(srb_t *sp)
 	struct req_que *req = NULL;
 	struct rsp_que *rsp = NULL;
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
-	struct scsi_qla_host *vha = sp->fcport->vha;
+	struct scsi_qla_host *vha = sp->vha;
 	struct qla_hw_data *ha = vha->hw;
 	struct cmd_type_7_fx00 *cmd_pkt;
 	struct cmd_type_7_fx00 lcmd_pkt;
@@ -3205,7 +3205,7 @@ void
 qlafx00_tm_iocb(srb_t *sp, struct tsk_mgmt_entry_fx00 *ptm_iocb)
 {
 	struct srb_iocb *fxio = &sp->u.iocb_cmd;
-	scsi_qla_host_t *vha = sp->fcport->vha;
+	scsi_qla_host_t *vha = sp->vha;
 	struct req_que *req = vha->req;
 	struct tsk_mgmt_entry_fx00 tm_iocb;
 	struct scsi_lun llun;
@@ -3232,7 +3232,7 @@ void
 qlafx00_abort_iocb(srb_t *sp, struct abort_iocb_entry_fx00 *pabt_iocb)
 {
 	struct srb_iocb *fxio = &sp->u.iocb_cmd;
-	scsi_qla_host_t *vha = sp->fcport->vha;
+	scsi_qla_host_t *vha = sp->vha;
 	struct req_que *req = vha->req;
 	struct abort_iocb_entry_fx00 abt_iocb;
 
@@ -3346,8 +3346,7 @@ qlafx00_fxdisc_iocb(srb_t *sp, struct fxdisc_entry_fx00 *pfxiocb)
 					    REQUEST_ENTRY_SIZE);
 					cont_pkt =
 					    qlafx00_prep_cont_type1_iocb(
-						sp->fcport->vha->req,
-						&lcont_pkt);
+						sp->vha->req, &lcont_pkt);
 					cur_dsd = (__le32 *)
 					    lcont_pkt.dseg_0_address;
 					avail_dsds = 5;
@@ -3368,7 +3367,7 @@ qlafx00_fxdisc_iocb(srb_t *sp, struct fxdisc_entry_fx00 *pfxiocb)
 					    &lcont_pkt, REQUEST_ENTRY_SIZE);
 					ql_dump_buffer(
 					    ql_dbg_user + ql_dbg_verbose,
-					    sp->fcport->vha, 0x3042,
+					    sp->vha, 0x3042,
 					    (uint8_t *)&lcont_pkt,
 					     REQUEST_ENTRY_SIZE);
 				}
@@ -3377,7 +3376,7 @@ qlafx00_fxdisc_iocb(srb_t *sp, struct fxdisc_entry_fx00 *pfxiocb)
 				memcpy_toio((void __iomem *)cont_pkt,
 				    &lcont_pkt, REQUEST_ENTRY_SIZE);
 				ql_dump_buffer(ql_dbg_user + ql_dbg_verbose,
-				    sp->fcport->vha, 0x3043,
+				    sp->vha, 0x3043,
 				    (uint8_t *)&lcont_pkt, REQUEST_ENTRY_SIZE);
 			}
 		}
@@ -3409,8 +3408,7 @@ qlafx00_fxdisc_iocb(srb_t *sp, struct fxdisc_entry_fx00 *pfxiocb)
 					    REQUEST_ENTRY_SIZE);
 					cont_pkt =
 					    qlafx00_prep_cont_type1_iocb(
-						sp->fcport->vha->req,
-						&lcont_pkt);
+						sp->vha->req, &lcont_pkt);
 					cur_dsd = (__le32 *)
 					    lcont_pkt.dseg_0_address;
 					avail_dsds = 5;
@@ -3431,7 +3429,7 @@ qlafx00_fxdisc_iocb(srb_t *sp, struct fxdisc_entry_fx00 *pfxiocb)
 					    REQUEST_ENTRY_SIZE);
 					ql_dump_buffer(
 					    ql_dbg_user + ql_dbg_verbose,
-					    sp->fcport->vha, 0x3045,
+					    sp->vha, 0x3045,
 					    (uint8_t *)&lcont_pkt,
 					    REQUEST_ENTRY_SIZE);
 				}
@@ -3440,7 +3438,7 @@ qlafx00_fxdisc_iocb(srb_t *sp, struct fxdisc_entry_fx00 *pfxiocb)
 				memcpy_toio((void __iomem *)cont_pkt,
 				    &lcont_pkt, REQUEST_ENTRY_SIZE);
 				ql_dump_buffer(ql_dbg_user + ql_dbg_verbose,
-				    sp->fcport->vha, 0x3046,
+				    sp->vha, 0x3046,
 				    (uint8_t *)&lcont_pkt, REQUEST_ENTRY_SIZE);
 			}
 		}
@@ -3452,7 +3450,7 @@ qlafx00_fxdisc_iocb(srb_t *sp, struct fxdisc_entry_fx00 *pfxiocb)
 	}
 
 	ql_dump_buffer(ql_dbg_user + ql_dbg_verbose,
-	    sp->fcport->vha, 0x3047,
+	    sp->vha, 0x3047,
 	    (uint8_t *)&fx_iocb, sizeof(struct fxdisc_entry_fx00));
 
 	memcpy_toio((void __iomem *)pfxiocb, &fx_iocb,
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index bdaa238ca0ab..191aa94dc2f1 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -613,11 +613,11 @@ qla24xx_fw_version_str(struct scsi_qla_host *vha, char *str, size_t size)
 }
 
 void
-qla2x00_sp_free_dma(void *vha, void *ptr)
+qla2x00_sp_free_dma(void *ptr)
 {
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
+	struct qla_hw_data *ha = sp->vha->hw;
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
-	struct qla_hw_data *ha = sp->fcport->vha->hw;
 	void *ctx = GET_CMD_CTX_SP(sp);
 
 	if (sp->flags & SRB_DMA_VALID) {
@@ -656,20 +656,19 @@ qla2x00_sp_free_dma(void *vha, void *ptr)
 	}
 
 	CMD_SP(cmd) = NULL;
-	qla2x00_rel_sp(sp->fcport->vha, sp);
+	qla2x00_rel_sp(sp);
 }
 
 void
-qla2x00_sp_compl(void *data, void *ptr, int res)
+qla2x00_sp_compl(void *ptr, int res)
 {
-	struct qla_hw_data *ha = (struct qla_hw_data *)data;
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
 
 	cmd->result = res;
 
 	if (atomic_read(&sp->ref_count) == 0) {
-		ql_dbg(ql_dbg_io, sp->fcport->vha, 0x3015,
+		ql_dbg(ql_dbg_io, sp->vha, 0x3015,
 		    "SP reference-count to ZERO -- sp=%p cmd=%p.\n",
 		    sp, GET_CMD_SP(sp));
 		if (ql2xextended_error_logging & ql_dbg_io)
@@ -679,12 +678,12 @@ qla2x00_sp_compl(void *data, void *ptr, int res)
 	if (!atomic_dec_and_test(&sp->ref_count))
 		return;
 
-	qla2x00_sp_free_dma(ha, sp);
+	qla2x00_sp_free_dma(sp);
 	cmd->scsi_done(cmd);
 }
 
 void
-qla2xxx_qpair_sp_free_dma(void *vha, void *ptr)
+qla2xxx_qpair_sp_free_dma(void *ptr)
 {
 	srb_t *sp = (srb_t *)ptr;
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
@@ -730,9 +729,9 @@ qla2xxx_qpair_sp_free_dma(void *vha, void *ptr)
 }
 
 void
-qla2xxx_qpair_sp_compl(void *data, void *ptr, int res)
+qla2xxx_qpair_sp_compl(void *ptr, int res)
 {
-	srb_t *sp = (srb_t *)ptr;
+	srb_t *sp = ptr;
 	struct scsi_cmnd *cmd = GET_CMD_SP(sp);
 
 	cmd->result = res;
@@ -748,7 +747,7 @@ qla2xxx_qpair_sp_compl(void *data, void *ptr, int res)
 	if (!atomic_dec_and_test(&sp->ref_count))
 		return;
 
-	qla2xxx_qpair_sp_free_dma(sp->fcport->vha, sp);
+	qla2xxx_qpair_sp_free_dma(sp);
 	cmd->scsi_done(cmd);
 }
 
@@ -869,7 +868,7 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 	return 0;
 
 qc24_host_busy_free_sp:
-	qla2x00_sp_free_dma(ha, sp);
+	qla2x00_sp_free_dma(sp);
 
 qc24_host_busy:
 	return SCSI_MLQUEUE_HOST_BUSY;
@@ -958,7 +957,7 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd,
 	return 0;
 
 qc24_host_busy_free_sp:
-	qla2xxx_qpair_sp_free_dma(vha, sp);
+	qla2xxx_qpair_sp_free_dma(sp);
 
 qc24_host_busy:
 	return SCSI_MLQUEUE_HOST_BUSY;
@@ -1238,7 +1237,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
 	}
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
-	sp->done(vha, sp, 0);
+	sp->done(sp, 0);
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
 	/* Did the command return during mailbox execution? */
@@ -1283,7 +1282,7 @@ qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t,
 			continue;
 		if (sp->type != SRB_SCSI_CMD)
 			continue;
-		if (vha->vp_idx != sp->fcport->vha->vp_idx)
+		if (vha->vp_idx != sp->vha->vp_idx)
 			continue;
 		match = 0;
 		cmd = GET_CMD_SP(sp);
@@ -1663,7 +1662,7 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res)
 					spin_lock_irqsave(&ha->hardware_lock, flags);
 				}
 				req->outstanding_cmds[cnt] = NULL;
-				sp->done(vha, sp, res);
+				sp->done(sp, res);
 			}
 		}
 	}
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 1cd3734292f9..45f5077684f0 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -535,10 +535,10 @@ static int qla24xx_post_nack_work(struct scsi_qla_host *vha, fc_port_t *fcport,
 }
 
 static
-void qla2x00_async_nack_sp_done(void *v, void *s, int res)
+void qla2x00_async_nack_sp_done(void *s, int res)
 {
-	struct scsi_qla_host *vha = (struct scsi_qla_host *)v;
 	struct srb *sp = (struct srb *)s;
+	struct scsi_qla_host *vha = sp->vha;
 	unsigned long flags;
 
 	ql_dbg(ql_dbg_disc, vha, 0xffff,
@@ -595,7 +595,7 @@ void qla2x00_async_nack_sp_done(void *v, void *s, int res)
 	}
 	spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
 
-	sp->free(vha, sp);
+	sp->free(sp);
 }
 
 int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport,
@@ -645,7 +645,7 @@ int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport,
 	return rval;
 
 done_free_sp:
-	sp->free(vha, sp);
+	sp->free(sp);
 done:
 	fcport->flags &= ~FCF_ASYNC_SENT;
 	return rval;
-- 
GitLab


From 2fdbc65eae3989b566047b2ce21d66f52fe6bf69 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 20 Jan 2017 13:31:13 -0800
Subject: [PATCH 0190/1084] qla2xxx: Avoid using variable-length arrays

This patch does not change any functionality but avoids that sparse
complains about using variable-length arrays.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Acked-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Cc: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_os.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 191aa94dc2f1..4f5965281395 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -4272,8 +4272,8 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht,
 	spin_lock_init(&vha->cmd_list_lock);
 	init_waitqueue_head(&vha->fcport_waitQ);
 
-	vha->gnl.size =
-	    sizeof(struct get_name_list_extended[ha->max_loop_id+1]);
+	vha->gnl.size = sizeof(struct get_name_list_extended) *
+			(ha->max_loop_id + 1);
 	vha->gnl.l = dma_alloc_coherent(&ha->pdev->dev,
 	    vha->gnl.size, &vha->gnl.ldma, GFP_KERNEL);
 	if (!vha->gnl.l) {
-- 
GitLab


From 5cadafb236dffd8aa4772b32bf848af9128faedc Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 13 Jan 2017 10:41:52 -0800
Subject: [PATCH 0191/1084] target/cxgbit: Fix endianness annotations

This patch does not change any functionality but avoids that sparse
complains about endianness.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Acked-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h |  2 +-
 drivers/target/iscsi/cxgbit/cxgbit_cm.c            | 11 ++++++-----
 drivers/target/iscsi/cxgbit/cxgbit_main.c          |  2 +-
 drivers/target/iscsi/cxgbit/cxgbit_target.c        |  2 +-
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h
index e995a1a3840a..a91ad766cef0 100644
--- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h
@@ -59,7 +59,7 @@ struct cxgbi_pagepod_hdr {
 #define PPOD_PAGES_MAX			4
 struct cxgbi_pagepod {
 	struct cxgbi_pagepod_hdr hdr;
-	u64 addr[PPOD_PAGES_MAX + 1];
+	__be64 addr[PPOD_PAGES_MAX + 1];
 };
 
 /* ddp tag format
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
index 2fb1bf1a26c5..35aaa36e9af5 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
@@ -1726,7 +1726,7 @@ static bool cxgbit_credit_err(const struct cxgbit_sock *csk)
 	}
 
 	while (skb) {
-		credit += skb->csum;
+		credit += (__force u32)skb->csum;
 		skb = cxgbit_skcb_tx_wr_next(skb);
 	}
 
@@ -1753,6 +1753,7 @@ static void cxgbit_fw4_ack(struct cxgbit_sock *csk, struct sk_buff *skb)
 
 	while (credits) {
 		struct sk_buff *p = cxgbit_sock_peek_wr(csk);
+		const u32 csum = (__force u32)p->csum;
 
 		if (unlikely(!p)) {
 			pr_err("csk 0x%p,%u, cr %u,%u+%u, empty.\n",
@@ -1761,17 +1762,17 @@ static void cxgbit_fw4_ack(struct cxgbit_sock *csk, struct sk_buff *skb)
 			break;
 		}
 
-		if (unlikely(credits < p->csum)) {
+		if (unlikely(credits < csum)) {
 			pr_warn("csk 0x%p,%u, cr %u,%u+%u, < %u.\n",
 				csk,  csk->tid,
 				credits, csk->wr_cred, csk->wr_una_cred,
-				p->csum);
-			p->csum -= credits;
+				csum);
+			p->csum = (__force __wsum)(csum - credits);
 			break;
 		}
 
 		cxgbit_sock_dequeue_wr(csk);
-		credits -= p->csum;
+		credits -= csum;
 		kfree_skb(p);
 	}
 
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_main.c b/drivers/target/iscsi/cxgbit/cxgbit_main.c
index 96eedfc49c94..6531aaac2b96 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_main.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_main.c
@@ -454,7 +454,7 @@ cxgbit_uld_lro_rx_handler(void *hndl, const __be64 *rsp,
 		if (unlikely(op != *(u8 *)gl->va)) {
 			pr_info("? FL 0x%p,RSS%#llx,FL %#llx,len %u.\n",
 				gl->va, be64_to_cpu(*rsp),
-				be64_to_cpu(*(u64 *)gl->va),
+				get_unaligned_be64(gl->va),
 				gl->tot_len);
 			return 0;
 		}
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c
index 8bcb9b71f764..e183e0bdd3bf 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_target.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c
@@ -243,7 +243,7 @@ void cxgbit_push_tx_frames(struct cxgbit_sock *csk)
 		}
 		__skb_unlink(skb, &csk->txq);
 		set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx);
-		skb->csum = credits_needed + flowclen16;
+		skb->csum = (__force __wsum)(credits_needed + flowclen16);
 		csk->wr_cred -= credits_needed;
 		csk->wr_una_cred += credits_needed;
 
-- 
GitLab


From bdec5188745c4f03ecbc5b88bc2c3f76acb435ae Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Tue, 24 Jan 2017 17:07:02 +0530
Subject: [PATCH 0192/1084] target/cxgbit: Use T6 specific macro to set the
 force bit

For T6 adapters use T6 specific macro to set the force bit.

Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/net/ethernet/chelsio/cxgb4/t4_msg.h | 4 ++++
 drivers/target/iscsi/cxgbit/cxgbit_target.c | 4 +++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index a267173f5997..21fc2fe1fc10 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -1349,6 +1349,10 @@ struct cpl_tx_data {
 #define TX_FORCE_S	13
 #define TX_FORCE_V(x)	((x) << TX_FORCE_S)
 
+#define T6_TX_FORCE_S		20
+#define T6_TX_FORCE_V(x)	((x) << T6_TX_FORCE_S)
+#define T6_TX_FORCE_F		T6_TX_FORCE_V(1U)
+
 enum {
 	ULP_TX_MEM_READ = 2,
 	ULP_TX_MEM_WRITE = 3,
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c
index e183e0bdd3bf..8ae399dfe84f 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_target.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c
@@ -162,12 +162,14 @@ cxgbit_tx_data_wr(struct cxgbit_sock *csk, struct sk_buff *skb, u32 dlen,
 		  u32 len, u32 credits, u32 compl)
 {
 	struct fw_ofld_tx_data_wr *req;
+	const struct cxgb4_lld_info *lldi = &csk->com.cdev->lldi;
 	u32 submode = cxgbit_skcb_submode(skb);
 	u32 wr_ulp_mode = 0;
 	u32 hdr_size = sizeof(*req);
 	u32 opcode = FW_OFLD_TX_DATA_WR;
 	u32 immlen = 0;
-	u32 force = TX_FORCE_V(!submode);
+	u32 force = is_t5(lldi->adapter_type) ? TX_FORCE_V(!submode) :
+		    T6_TX_FORCE_F;
 
 	if (cxgbit_skcb_flags(skb) & SKCBF_TX_ISO) {
 		opcode = FW_ISCSI_TX_DATA_WR;
-- 
GitLab


From 664a722b4f1bab63fee0193459036d1a95e19e99 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Wed, 4 Jan 2017 08:13:34 +0100
Subject: [PATCH 0193/1084] target/tcm_fc: Remove a set-but-not-used variable

This was detected by building with W=1.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Johannes Thumshirn <jth@kernel.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/tcm_fc/tfc_cmd.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
index 9af7842b8178..ec372860106f 100644
--- a/drivers/target/tcm_fc/tfc_cmd.c
+++ b/drivers/target/tcm_fc/tfc_cmd.c
@@ -83,14 +83,12 @@ void ft_dump_cmd(struct ft_cmd *cmd, const char *caller)
 static void ft_free_cmd(struct ft_cmd *cmd)
 {
 	struct fc_frame *fp;
-	struct fc_lport *lport;
 	struct ft_sess *sess;
 
 	if (!cmd)
 		return;
 	sess = cmd->sess;
 	fp = cmd->req_frame;
-	lport = fr_dev(fp);
 	if (fr_seq(fp))
 		fc_seq_release(fr_seq(fp));
 	fc_frame_free(fp);
-- 
GitLab


From 1efaa949396b5d9e8d1e6edef7e97e9ce1a97319 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 23 Dec 2016 12:45:27 +0100
Subject: [PATCH 0194/1084] target/iscsi: Fix indentation in
 iscsi_target_start_negotiation()

This patch avoids that smatch complains about inconsistent
indentation in iscsi_target_start_negotiation().

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Nicholas A. Bellinger <nab@linux-iscsi.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target_nego.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index 46388c9e08da..5269e9ef031c 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -1249,16 +1249,16 @@ int iscsi_target_start_negotiation(
 {
 	int ret;
 
-       if (conn->sock) {
-               struct sock *sk = conn->sock->sk;
+	if (conn->sock) {
+		struct sock *sk = conn->sock->sk;
 
-               write_lock_bh(&sk->sk_callback_lock);
-               set_bit(LOGIN_FLAGS_READY, &conn->login_flags);
-               write_unlock_bh(&sk->sk_callback_lock);
-       }
+		write_lock_bh(&sk->sk_callback_lock);
+		set_bit(LOGIN_FLAGS_READY, &conn->login_flags);
+		write_unlock_bh(&sk->sk_callback_lock);
+	}
 
-       ret = iscsi_target_do_login(conn, login);
-       if (ret < 0) {
+	ret = iscsi_target_do_login(conn, login);
+	if (ret < 0) {
 		cancel_delayed_work_sync(&conn->login_work);
 		cancel_delayed_work_sync(&conn->login_cleanup_work);
 		iscsi_target_restore_sock_callbacks(conn);
-- 
GitLab


From 0d5efb8a3c62c598d092bedb579debacd3f746eb Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 23 Dec 2016 14:37:52 +0100
Subject: [PATCH 0195/1084] target/iscsi: Fix spelling of "perform"

Change two occurrences of "preform" into "perform".

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Nicholas A. Bellinger <nab@linux-iscsi.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c       | 2 +-
 drivers/target/iscsi/iscsi_target_login.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index da2c73a255de..3b770fddcb3b 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -1545,7 +1545,7 @@ iscsit_check_dataout_hdr(struct iscsi_conn *conn, unsigned char *buf,
 		}
 	}
 	/*
-	 * Preform DataSN, DataSequenceInOrder, DataPDUInOrder, and
+	 * Perform DataSN, DataSequenceInOrder, DataPDUInOrder, and
 	 * within-command recovery checks before receiving the payload.
 	 */
 	rc = iscsit_check_pre_dataout(cmd, buf);
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 450f51deb2a2..303cb6574a93 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -222,7 +222,7 @@ int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn)
 		return 0;
 
 	pr_debug("%s iSCSI Session SID %u is still active for %s,"
-		" preforming session reinstatement.\n", (sessiontype) ?
+		" performing session reinstatement.\n", (sessiontype) ?
 		"Discovery" : "Normal", sess->sid,
 		sess->sess_ops->InitiatorName);
 
-- 
GitLab


From 53c561dca9fd66330fe63d8a03ef859407e2cd91 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 23 Dec 2016 14:40:24 +0100
Subject: [PATCH 0196/1084] target/iscsi: Fix spelling of "reallegiance"

Fix the spelling of this word in a function name, messages and
source code comments.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Nicholas A. Bellinger <nab@linux-iscsi.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c      | 4 ++--
 drivers/target/iscsi/iscsi_target_erl2.c | 6 +++---
 drivers/target/iscsi/iscsi_target_erl2.h | 2 +-
 drivers/target/iscsi/iscsi_target_tmr.c  | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 3b770fddcb3b..e11f8d0f1fa3 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -4136,7 +4136,7 @@ int iscsit_close_connection(
 	/*
 	 * During Connection recovery drop unacknowledged out of order
 	 * commands for this connection, and prepare the other commands
-	 * for realligence.
+	 * for reallegiance.
 	 *
 	 * During normal operation clear the out of order commands (but
 	 * do not free the struct iscsi_ooo_cmdsn's) and release all
@@ -4144,7 +4144,7 @@ int iscsit_close_connection(
 	 */
 	if (atomic_read(&conn->connection_recovery)) {
 		iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(conn);
-		iscsit_prepare_cmds_for_realligance(conn);
+		iscsit_prepare_cmds_for_reallegiance(conn);
 	} else {
 		iscsit_clear_ooo_cmdsns_for_conn(conn);
 		iscsit_release_commands_from_conn(conn);
diff --git a/drivers/target/iscsi/iscsi_target_erl2.c b/drivers/target/iscsi/iscsi_target_erl2.c
index faf9ae014b30..8df9c90f3db3 100644
--- a/drivers/target/iscsi/iscsi_target_erl2.c
+++ b/drivers/target/iscsi/iscsi_target_erl2.c
@@ -312,7 +312,7 @@ int iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(struct iscsi_conn *conn)
 	return 0;
 }
 
-int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn)
+int iscsit_prepare_cmds_for_reallegiance(struct iscsi_conn *conn)
 {
 	u32 cmd_count = 0;
 	struct iscsi_cmd *cmd, *cmd_tmp;
@@ -347,7 +347,7 @@ int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn)
 
 		if ((cmd->iscsi_opcode != ISCSI_OP_SCSI_CMD) &&
 		    (cmd->iscsi_opcode != ISCSI_OP_NOOP_OUT)) {
-			pr_debug("Not performing realligence on"
+			pr_debug("Not performing reallegiance on"
 				" Opcode: 0x%02x, ITT: 0x%08x, CmdSN: 0x%08x,"
 				" CID: %hu\n", cmd->iscsi_opcode,
 				cmd->init_task_tag, cmd->cmd_sn, conn->cid);
@@ -382,7 +382,7 @@ int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn)
 		cmd_count++;
 		pr_debug("Preparing Opcode: 0x%02x, ITT: 0x%08x,"
 			" CmdSN: 0x%08x, StatSN: 0x%08x, CID: %hu for"
-			" realligence.\n", cmd->iscsi_opcode,
+			" reallegiance.\n", cmd->iscsi_opcode,
 			cmd->init_task_tag, cmd->cmd_sn, cmd->stat_sn,
 			conn->cid);
 
diff --git a/drivers/target/iscsi/iscsi_target_erl2.h b/drivers/target/iscsi/iscsi_target_erl2.h
index 7965f1e86506..634d01e13652 100644
--- a/drivers/target/iscsi/iscsi_target_erl2.h
+++ b/drivers/target/iscsi/iscsi_target_erl2.h
@@ -19,7 +19,7 @@ extern int iscsit_remove_cmd_from_connection_recovery(struct iscsi_cmd *,
 			struct iscsi_session *);
 extern void iscsit_discard_cr_cmds_by_expstatsn(struct iscsi_conn_recovery *, u32);
 extern int iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(struct iscsi_conn *);
-extern int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *);
+extern int iscsit_prepare_cmds_for_reallegiance(struct iscsi_conn *);
 extern int iscsit_connection_recovery_transport_reset(struct iscsi_conn *);
 
 #endif /*** ISCSI_TARGET_ERL2_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_tmr.c b/drivers/target/iscsi/iscsi_target_tmr.c
index 3d637055c36f..cb231c907d51 100644
--- a/drivers/target/iscsi/iscsi_target_tmr.c
+++ b/drivers/target/iscsi/iscsi_target_tmr.c
@@ -440,14 +440,14 @@ static int iscsit_task_reassign_complete(
 		break;
 	default:
 		 pr_err("Illegal iSCSI Opcode 0x%02x during"
-			" command realligence\n", cmd->iscsi_opcode);
+			" command reallegiance\n", cmd->iscsi_opcode);
 		return -1;
 	}
 
 	if (ret != 0)
 		return ret;
 
-	pr_debug("Completed connection realligence for Opcode: 0x%02x,"
+	pr_debug("Completed connection reallegiance for Opcode: 0x%02x,"
 		" ITT: 0x%08x to CID: %hu.\n", cmd->iscsi_opcode,
 			cmd->init_task_tag, conn->cid);
 
-- 
GitLab


From e381fe9e89909ced3d878b8c3c3a9b344c02cbdc Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 6 Jan 2017 11:32:08 +0100
Subject: [PATCH 0197/1084] target/iscsi: Introduce a helper function for TMF
 translation

This patch does not change any functionality.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Nicholas A. Bellinger <nab@linux-iscsi.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c | 48 ++++++++++++++---------------
 include/target/target_core_base.h   |  1 +
 2 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index e11f8d0f1fa3..d16729fa1f67 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -1920,6 +1920,28 @@ static int iscsit_handle_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	return ret;
 }
 
+static enum tcm_tmreq_table iscsit_convert_tmf(u8 iscsi_tmf)
+{
+	switch (iscsi_tmf) {
+	case ISCSI_TM_FUNC_ABORT_TASK:
+		return TMR_ABORT_TASK;
+	case ISCSI_TM_FUNC_ABORT_TASK_SET:
+		return TMR_ABORT_TASK_SET;
+	case ISCSI_TM_FUNC_CLEAR_ACA:
+		return TMR_CLEAR_ACA;
+	case ISCSI_TM_FUNC_CLEAR_TASK_SET:
+		return TMR_CLEAR_TASK_SET;
+	case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET:
+		return TMR_LUN_RESET;
+	case ISCSI_TM_FUNC_TARGET_WARM_RESET:
+		return TMR_TARGET_WARM_RESET;
+	case ISCSI_TM_FUNC_TARGET_COLD_RESET:
+		return TMR_TARGET_COLD_RESET;
+	default:
+		return TMR_UNKNOWN;
+	}
+}
+
 int
 iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 			   unsigned char *buf)
@@ -1985,30 +2007,8 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 
 		target_get_sess_cmd(&cmd->se_cmd, true);
 		sess_ref = true;
-
-		switch (function) {
-		case ISCSI_TM_FUNC_ABORT_TASK:
-			tcm_function = TMR_ABORT_TASK;
-			break;
-		case ISCSI_TM_FUNC_ABORT_TASK_SET:
-			tcm_function = TMR_ABORT_TASK_SET;
-			break;
-		case ISCSI_TM_FUNC_CLEAR_ACA:
-			tcm_function = TMR_CLEAR_ACA;
-			break;
-		case ISCSI_TM_FUNC_CLEAR_TASK_SET:
-			tcm_function = TMR_CLEAR_TASK_SET;
-			break;
-		case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET:
-			tcm_function = TMR_LUN_RESET;
-			break;
-		case ISCSI_TM_FUNC_TARGET_WARM_RESET:
-			tcm_function = TMR_TARGET_WARM_RESET;
-			break;
-		case ISCSI_TM_FUNC_TARGET_COLD_RESET:
-			tcm_function = TMR_TARGET_COLD_RESET;
-			break;
-		default:
+		tcm_function = iscsit_convert_tmf(function);
+		if (tcm_function == TMR_UNKNOWN) {
 			pr_err("Unknown iSCSI TMR Function:"
 			       " 0x%02x\n", function);
 			return iscsit_add_reject_cmd(cmd,
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index da854fb4530f..92d5628775c2 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -197,6 +197,7 @@ enum tcm_tmreq_table {
 	TMR_LUN_RESET		= 5,
 	TMR_TARGET_WARM_RESET	= 6,
 	TMR_TARGET_COLD_RESET	= 7,
+	TMR_UNKNOWN		= 0xff,
 };
 
 /* fabric independent task management response values */
-- 
GitLab


From 59b6986dbfcdab96a971f9663221849de79a7556 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Thu, 5 Jan 2017 12:39:57 +0100
Subject: [PATCH 0198/1084] target/iscsi: Fix iSCSI task reassignment handling

Allocate a task management request structure for all task management
requests, including task reassignment. This change avoids that the
se_tmr->response assignment dereferences an uninitialized se_tmr
pointer.

Reported-by: Moshe David <mdavid@infinidat.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Moshe David <mdavid@infinidat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index d16729fa1f67..b4f1d1cbe521 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -1951,7 +1951,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	struct iscsi_tm *hdr;
 	int out_of_order_cmdsn = 0, ret;
 	bool sess_ref = false;
-	u8 function;
+	u8 function, tcm_function = TMR_UNKNOWN;
 
 	hdr			= (struct iscsi_tm *) buf;
 	hdr->flags &= ~ISCSI_FLAG_CMD_FINAL;
@@ -1997,10 +1997,6 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	 * LIO-Target $FABRIC_MOD
 	 */
 	if (function != ISCSI_TM_FUNC_TASK_REASSIGN) {
-
-		u8 tcm_function;
-		int ret;
-
 		transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops,
 				      conn->sess->se_sess, 0, DMA_NONE,
 				      TCM_SIMPLE_TAG, cmd->sense_buffer + 2);
@@ -2014,15 +2010,14 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 			return iscsit_add_reject_cmd(cmd,
 				ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf);
 		}
-
-		ret = core_tmr_alloc_req(&cmd->se_cmd, cmd->tmr_req,
-					 tcm_function, GFP_KERNEL);
-		if (ret < 0)
-			return iscsit_add_reject_cmd(cmd,
+	}
+	ret = core_tmr_alloc_req(&cmd->se_cmd, cmd->tmr_req, tcm_function,
+				 GFP_KERNEL);
+	if (ret < 0)
+		return iscsit_add_reject_cmd(cmd,
 				ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf);
 
-		cmd->tmr_req->se_tmr_req = cmd->se_cmd.se_tmr_req;
-	}
+	cmd->tmr_req->se_tmr_req = cmd->se_cmd.se_tmr_req;
 
 	cmd->iscsi_opcode	= ISCSI_OP_SCSI_TMFUNC;
 	cmd->i_state		= ISTATE_SEND_TASKMGTRSP;
-- 
GitLab


From 4f4c6c3ddb15af396cd8cf81b487b59261199ee9 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 23 Dec 2016 13:23:45 +0100
Subject: [PATCH 0199/1084] target: Remove se_tmr_req.tmr_lun

Member tmr_lun of se_tmr_req is set but not used. Hence remove it.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Cc: Giridhar Malavali <giridhar.malavali@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_device.c | 1 -
 include/target/target_core_base.h   | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 26929c44d703..cb7047d66afc 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -163,7 +163,6 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd, u64 unpacked_lun)
 	rcu_read_lock();
 	deve = target_nacl_find_deve(nacl, unpacked_lun);
 	if (deve) {
-		se_tmr->tmr_lun = rcu_dereference(deve->se_lun);
 		se_cmd->se_lun = rcu_dereference(deve->se_lun);
 		se_lun = rcu_dereference(deve->se_lun);
 		se_cmd->pr_res_key = deve->pr_res_key;
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 92d5628775c2..c8b06e226f2e 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -398,7 +398,6 @@ struct se_tmr_req {
 	void 			*fabric_tmr_ptr;
 	struct se_cmd		*task_cmd;
 	struct se_device	*tmr_dev;
-	struct se_lun		*tmr_lun;
 	struct list_head	tmr_list;
 };
 
-- 
GitLab


From 3a1e7ca64f2dae69536054c42b7b90e480db1045 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 23 Dec 2016 13:47:38 +0100
Subject: [PATCH 0200/1084] target: Make core_tmr_abort_task() consider all
 commands

It is possible that two commands with the same tag are present on
sess_cmd_list because commands are removed from sess_cmd_list after
a response has been sent to the initiator. Hence continue searching
through sess_cmd_list even if a matching tag has already been found.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Himanshu Madhani <himanshu.madhani@cavium.com>
Cc: Giridhar Malavali <giridhar.malavali@cavium.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_tmr.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 4f229e711e1c..311dc3c2f1dc 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -175,10 +175,9 @@ void core_tmr_abort_task(
 		printk("ABORT_TASK: Found referenced %s task_tag: %llu\n",
 			se_cmd->se_tfo->get_fabric_name(), ref_tag);
 
-		if (!__target_check_io_state(se_cmd, se_sess, 0)) {
-			spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
-			goto out;
-		}
+		if (!__target_check_io_state(se_cmd, se_sess, 0))
+			continue;
+
 		list_del_init(&se_cmd->se_cmd_list);
 		spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 
@@ -195,7 +194,6 @@ void core_tmr_abort_task(
 	}
 	spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 
-out:
 	printk("ABORT_TASK: Sending TMR_TASK_DOES_NOT_EXIST for ref_tag: %lld\n",
 			tmr->ref_task_tag);
 	tmr->response = TMR_TASK_DOES_NOT_EXIST;
-- 
GitLab


From 2d4760ee4720d5f0eeda770ce248244166676283 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 10 Jan 2017 10:03:28 -0800
Subject: [PATCH 0201/1084] target: Correct transport_wait_for_tasks()
 documentation

transport_wait_for_tasks() not only waits for command completion
but also sets CMD_T_STOP. Additionally, this function is not only
called by frontend drivers but also by the target core. Update
the transport_wait_for_tasks() documentation to reflect this.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Andy Grover <agrover@redhat.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 437591bc7c08..62a50ea52a49 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2765,11 +2765,8 @@ __transport_wait_for_tasks(struct se_cmd *cmd, bool fabric_stop,
 }
 
 /**
- * transport_wait_for_tasks - wait for completion to occur
- * @cmd:	command to wait
- *
- * Called from frontend fabric context to wait for storage engine
- * to pause and/or release frontend generated struct se_cmd.
+ * transport_wait_for_tasks - set CMD_T_STOP and wait for t_transport_stop_comp
+ * @cmd: command to wait on
  */
 bool transport_wait_for_tasks(struct se_cmd *cmd)
 {
-- 
GitLab


From a5eb307f37d1e9909284b6f0a8f72bf55423c137 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Mon, 9 Jan 2017 10:14:54 -0800
Subject: [PATCH 0202/1084] target: Stop execution if CMD_T_STOP has been set

Stop execution if CMD_T_STOP has been set for a command just after
the command has been added to the device command list and before
.write_pending() is called. The following sequence can trigger this:
- transport_handle_cdb_direct() gets called. This function namely
  sets CMD_T_ACTIVE before it calls transport_generic_new_cmd().
- __transport_wait_for_tasks() is called concurrently. This function
  sets CMD_T_STOP for all active commands that have not been aborted.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Andy Grover <agrover@redhat.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 62a50ea52a49..2d867af1961f 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2452,7 +2452,8 @@ transport_generic_new_cmd(struct se_cmd *cmd)
 		target_execute_cmd(cmd);
 		return 0;
 	}
-	transport_cmd_check_stop(cmd, false, true);
+	if (transport_cmd_check_stop(cmd, false, true))
+		return 0;
 
 	ret = cmd->se_tfo->write_pending(cmd);
 	if (ret == -EAGAIN || ret == -ENOMEM)
-- 
GitLab


From 580ab13ac445a2bb717bee995d8639ec6836f78c Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Sun, 27 Dec 2015 13:48:09 +0100
Subject: [PATCH 0203/1084] target: Remove an overly chatty debug message

Enabling dynamic debug for the target_core_mod kernel module
causes the system log to be spammed with the "Incremented ..."
message. Hence remove it.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Andy Grover <agrover@redhat.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 2d867af1961f..738ab107ec12 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1979,8 +1979,6 @@ static void transport_complete_task_attr(struct se_cmd *cmd)
 	if (cmd->sam_task_attr == TCM_SIMPLE_TAG) {
 		atomic_dec_mb(&dev->simple_cmds);
 		dev->dev_cur_ordered_id++;
-		pr_debug("Incremented dev->dev_cur_ordered_id: %u for SIMPLE\n",
-			 dev->dev_cur_ordered_id);
 	} else if (cmd->sam_task_attr == TCM_HEAD_TAG) {
 		dev->dev_cur_ordered_id++;
 		pr_debug("Incremented dev_cur_ordered_id: %u for HEAD_OF_QUEUE\n",
-- 
GitLab


From b1a2ecdad72e0a4f0ebdbaff580b5330ebcf68c3 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Wed, 1 Feb 2017 16:58:35 -0800
Subject: [PATCH 0204/1084] target: Inline transport_cmd_check_stop()

The function transport_cmd_check_stop() has two callers. These callers
invoke this function as follows:
* transport_cmd_check_stop(cmd, true, false)
* transport_cmd_check_stop(cmd, false, true)
Hence inline this function into its callers.

This patch does not change any functionality but improves source
code readability.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Andy Grover <agrover@redhat.com>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 68 +++++++++++++-------------
 include/target/target_core_fabric.h    |  2 +-
 2 files changed, 34 insertions(+), 36 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 738ab107ec12..6f66a2d890b8 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -604,24 +604,18 @@ static void target_remove_from_state_list(struct se_cmd *cmd)
 	spin_unlock_irqrestore(&dev->execute_task_lock, flags);
 }
 
-static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists,
-				    bool write_pending)
+static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd)
 {
 	unsigned long flags;
 
-	if (remove_from_lists) {
-		target_remove_from_state_list(cmd);
+	target_remove_from_state_list(cmd);
 
-		/*
-		 * Clear struct se_cmd->se_lun before the handoff to FE.
-		 */
-		cmd->se_lun = NULL;
-	}
+	/*
+	 * Clear struct se_cmd->se_lun before the handoff to FE.
+	 */
+	cmd->se_lun = NULL;
 
 	spin_lock_irqsave(&cmd->t_state_lock, flags);
-	if (write_pending)
-		cmd->t_state = TRANSPORT_WRITE_PENDING;
-
 	/*
 	 * Determine if frontend context caller is requesting the stopping of
 	 * this command for frontend exceptions.
@@ -635,31 +629,18 @@ static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists,
 		complete_all(&cmd->t_transport_stop_comp);
 		return 1;
 	}
-
 	cmd->transport_state &= ~CMD_T_ACTIVE;
-	if (remove_from_lists) {
-		/*
-		 * Some fabric modules like tcm_loop can release
-		 * their internally allocated I/O reference now and
-		 * struct se_cmd now.
-		 *
-		 * Fabric modules are expected to return '1' here if the
-		 * se_cmd being passed is released at this point,
-		 * or zero if not being released.
-		 */
-		if (cmd->se_tfo->check_stop_free != NULL) {
-			spin_unlock_irqrestore(&cmd->t_state_lock, flags);
-			return cmd->se_tfo->check_stop_free(cmd);
-		}
-	}
-
 	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
-	return 0;
-}
 
-static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd)
-{
-	return transport_cmd_check_stop(cmd, true, false);
+	/*
+	 * Some fabric modules like tcm_loop can release their internally
+	 * allocated I/O reference and struct se_cmd now.
+	 *
+	 * Fabric modules are expected to return '1' here if the se_cmd being
+	 * passed is released at this point, or zero if not being released.
+	 */
+	return cmd->se_tfo->check_stop_free ? cmd->se_tfo->check_stop_free(cmd)
+		: 0;
 }
 
 static void transport_lun_remove_cmd(struct se_cmd *cmd)
@@ -2385,6 +2366,7 @@ EXPORT_SYMBOL(target_alloc_sgl);
 sense_reason_t
 transport_generic_new_cmd(struct se_cmd *cmd)
 {
+	unsigned long flags;
 	int ret = 0;
 	bool zero_flag = !(cmd->se_cmd_flags & SCF_SCSI_DATA_CDB);
 
@@ -2450,8 +2432,24 @@ transport_generic_new_cmd(struct se_cmd *cmd)
 		target_execute_cmd(cmd);
 		return 0;
 	}
-	if (transport_cmd_check_stop(cmd, false, true))
+
+	spin_lock_irqsave(&cmd->t_state_lock, flags);
+	cmd->t_state = TRANSPORT_WRITE_PENDING;
+	/*
+	 * Determine if frontend context caller is requesting the stopping of
+	 * this command for frontend exceptions.
+	 */
+	if (cmd->transport_state & CMD_T_STOP) {
+		pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08llx\n",
+			 __func__, __LINE__, cmd->tag);
+
+		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+
+		complete_all(&cmd->t_transport_stop_comp);
 		return 0;
+	}
+	cmd->transport_state &= ~CMD_T_ACTIVE;
+	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 
 	ret = cmd->se_tfo->write_pending(cmd);
 	if (ret == -EAGAIN || ret == -ENOMEM)
diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index 358041bad1da..d7dd1427fe0d 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -47,7 +47,7 @@ struct target_core_fabric_ops {
 	u32 (*tpg_get_inst_index)(struct se_portal_group *);
 	/*
 	 * Optional to release struct se_cmd and fabric dependent allocated
-	 * I/O descriptor in transport_cmd_check_stop().
+	 * I/O descriptor after command execution has finished.
 	 *
 	 * Returning 1 will signal a descriptor has been released.
 	 * Returning 0 will signal a descriptor has not been released.
-- 
GitLab


From 6b6427b6fd90ba1ff13e540bf7276f30a3e6f74f Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 7 Feb 2017 12:19:16 -0800
Subject: [PATCH 0205/1084] target: Move session check from
 target_put_sess_cmd() into target_release_cmd_kref()

This patch does not change any functionality.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 39 +++++++++++++-------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 6f66a2d890b8..e949db12c4d8 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2592,39 +2592,38 @@ static void target_release_cmd_kref(struct kref *kref)
 	unsigned long flags;
 	bool fabric_stop;
 
-	spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
+	if (se_sess) {
+		spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
 
-	spin_lock(&se_cmd->t_state_lock);
-	fabric_stop = (se_cmd->transport_state & CMD_T_FABRIC_STOP) &&
-		      (se_cmd->transport_state & CMD_T_ABORTED);
-	spin_unlock(&se_cmd->t_state_lock);
+		spin_lock(&se_cmd->t_state_lock);
+		fabric_stop = (se_cmd->transport_state & CMD_T_FABRIC_STOP) &&
+			      (se_cmd->transport_state & CMD_T_ABORTED);
+		spin_unlock(&se_cmd->t_state_lock);
 
-	if (se_cmd->cmd_wait_set || fabric_stop) {
+		if (se_cmd->cmd_wait_set || fabric_stop) {
+			list_del_init(&se_cmd->se_cmd_list);
+			spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+			target_free_cmd_mem(se_cmd);
+			complete(&se_cmd->cmd_wait_comp);
+			return;
+		}
 		list_del_init(&se_cmd->se_cmd_list);
 		spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
-		target_free_cmd_mem(se_cmd);
-		complete(&se_cmd->cmd_wait_comp);
-		return;
 	}
-	list_del_init(&se_cmd->se_cmd_list);
-	spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 
 	target_free_cmd_mem(se_cmd);
 	se_cmd->se_tfo->release_cmd(se_cmd);
 }
 
-/* target_put_sess_cmd - Check for active I/O shutdown via kref_put
- * @se_cmd:	command descriptor to drop
+/**
+ * target_put_sess_cmd - decrease the command reference count
+ * @se_cmd:	command to drop a reference from
+ *
+ * Returns 1 if and only if this target_put_sess_cmd() call caused the
+ * refcount to drop to zero. Returns zero otherwise.
  */
 int target_put_sess_cmd(struct se_cmd *se_cmd)
 {
-	struct se_session *se_sess = se_cmd->se_sess;
-
-	if (!se_sess) {
-		target_free_cmd_mem(se_cmd);
-		se_cmd->se_tfo->release_cmd(se_cmd);
-		return 1;
-	}
 	return kref_put(&se_cmd->cmd_kref, target_release_cmd_kref);
 }
 EXPORT_SYMBOL(target_put_sess_cmd);
-- 
GitLab


From fd5e64def9170392fdf258d1eee5966433ee3d45 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 3 Jan 2017 10:44:11 +0100
Subject: [PATCH 0206/1084] target: Remove command flag CMD_T_BUSY

The patch that reworks task management function handling guarantees
that target_remove_from_state_list() is always called with CMD_T_BUSY
cleared. Since that function is the only function that tests that flag
this means that that flag is now superfluous. Hence remove that flag.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Andy Grover <agrover@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_sbc.c       |  2 +-
 drivers/target/target_core_transport.c | 12 ++++--------
 include/target/target_core_base.h      |  1 -
 3 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index df7b6e95c019..68d8aef7ab78 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -604,7 +604,7 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool succes
 
 	spin_lock_irq(&cmd->t_state_lock);
 	cmd->t_state = TRANSPORT_PROCESSING;
-	cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT;
+	cmd->transport_state |= CMD_T_ACTIVE | CMD_T_SENT;
 	spin_unlock_irq(&cmd->t_state_lock);
 
 	__target_execute_cmd(cmd, false);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index e949db12c4d8..012ed95c3f40 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -593,9 +593,6 @@ static void target_remove_from_state_list(struct se_cmd *cmd)
 	if (!dev)
 		return;
 
-	if (cmd->transport_state & CMD_T_BUSY)
-		return;
-
 	spin_lock_irqsave(&dev->execute_task_lock, flags);
 	if (cmd->state_active) {
 		list_del(&cmd->state_list);
@@ -714,7 +711,6 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
 
 
 	spin_lock_irqsave(&cmd->t_state_lock, flags);
-	cmd->transport_state &= ~CMD_T_BUSY;
 
 	if (dev && dev->transport->transport_complete) {
 		dev->transport->transport_complete(cmd,
@@ -1782,7 +1778,7 @@ void __target_execute_cmd(struct se_cmd *cmd, bool do_checks)
 		return;
 err:
 	spin_lock_irq(&cmd->t_state_lock);
-	cmd->transport_state &= ~(CMD_T_BUSY|CMD_T_SENT);
+	cmd->transport_state &= ~CMD_T_SENT;
 	spin_unlock_irq(&cmd->t_state_lock);
 
 	transport_generic_request_failure(cmd, ret);
@@ -1810,7 +1806,7 @@ static int target_write_prot_action(struct se_cmd *cmd)
 					     sectors, 0, cmd->t_prot_sg, 0);
 		if (unlikely(cmd->pi_err)) {
 			spin_lock_irq(&cmd->t_state_lock);
-			cmd->transport_state &= ~(CMD_T_BUSY|CMD_T_SENT);
+			cmd->transport_state &= ~CMD_T_SENT;
 			spin_unlock_irq(&cmd->t_state_lock);
 			transport_generic_request_failure(cmd, cmd->pi_err);
 			return -1;
@@ -1899,7 +1895,7 @@ void target_execute_cmd(struct se_cmd *cmd)
 	}
 
 	cmd->t_state = TRANSPORT_PROCESSING;
-	cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT;
+	cmd->transport_state |= CMD_T_ACTIVE | CMD_T_SENT;
 	spin_unlock_irq(&cmd->t_state_lock);
 
 	if (target_write_prot_action(cmd))
@@ -1907,7 +1903,7 @@ void target_execute_cmd(struct se_cmd *cmd)
 
 	if (target_handle_task_attr(cmd)) {
 		spin_lock_irq(&cmd->t_state_lock);
-		cmd->transport_state &= ~(CMD_T_BUSY | CMD_T_SENT);
+		cmd->transport_state &= ~CMD_T_SENT;
 		spin_unlock_irq(&cmd->t_state_lock);
 		return;
 	}
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index c8b06e226f2e..7b350fd60cdb 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -489,7 +489,6 @@ struct se_cmd {
 #define CMD_T_SENT		(1 << 4)
 #define CMD_T_STOP		(1 << 5)
 #define CMD_T_DEV_ACTIVE	(1 << 7)
-#define CMD_T_BUSY		(1 << 9)
 #define CMD_T_TAS		(1 << 10)
 #define CMD_T_FABRIC_STOP	(1 << 11)
 	spinlock_t		t_state_lock;
-- 
GitLab


From b2c9652eba6c11787b5f6cfa53aaea752f58809e Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 5 Jun 2015 16:17:51 -0700
Subject: [PATCH 0207/1084] target: Remove command flag CMD_T_DEV_ACTIVE

The code that tests the CMD_T_DEV_ACTIVE flag has been removed. Hence
also remove the flag itself.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andy Grover <agrover@redhat.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 1 -
 include/target/target_core_base.h      | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 012ed95c3f40..22190003534d 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1223,7 +1223,6 @@ void transport_init_se_cmd(
 	init_completion(&cmd->cmd_wait_comp);
 	spin_lock_init(&cmd->t_state_lock);
 	kref_init(&cmd->cmd_kref);
-	cmd->transport_state = CMD_T_DEV_ACTIVE;
 
 	cmd->se_tfo = tfo;
 	cmd->se_sess = se_sess;
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 7b350fd60cdb..d7336f3c6b60 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -488,7 +488,6 @@ struct se_cmd {
 #define CMD_T_COMPLETE		(1 << 2)
 #define CMD_T_SENT		(1 << 4)
 #define CMD_T_STOP		(1 << 5)
-#define CMD_T_DEV_ACTIVE	(1 << 7)
 #define CMD_T_TAS		(1 << 10)
 #define CMD_T_FABRIC_STOP	(1 << 11)
 	spinlock_t		t_state_lock;
-- 
GitLab


From c690d141489b0788db9ba95cc33075ad37d3ec30 Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Thu, 5 Jan 2017 10:52:54 -0600
Subject: [PATCH 0208/1084] acpi:ipmi: Make IPMI user handler const

ipmi_create_user() now takes the user handlers as const, make
it const in the ACPI IPMI code.

Cc: linux-acpi@vger.kernel.org
Cc: Zhao Yakui <yakui.zhao@intel.com>
Cc: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
Acked-by: Rafael J. Wysocki <rjw@rjwysocki.net>
---
 drivers/acpi/acpi_ipmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/acpi_ipmi.c b/drivers/acpi/acpi_ipmi.c
index f77956c3fd45..747c2ba98534 100644
--- a/drivers/acpi/acpi_ipmi.c
+++ b/drivers/acpi/acpi_ipmi.c
@@ -56,7 +56,7 @@ struct acpi_ipmi_device {
 struct ipmi_driver_data {
 	struct list_head ipmi_devices;
 	struct ipmi_smi_watcher bmc_events;
-	struct ipmi_user_hndl ipmi_hndlrs;
+	const struct ipmi_user_hndl ipmi_hndlrs;
 	struct mutex ipmi_lock;
 
 	/*
-- 
GitLab


From db3b7e134185b27e6073618c1ad7eca1e4473eb4 Mon Sep 17 00:00:00 2001
From: Bhumika Goyal <bhumirks@gmail.com>
Date: Mon, 16 Jan 2017 00:45:00 +0530
Subject: [PATCH 0209/1084] char: ipmi: constify ipmi_smi_handlers structures

Declare ipmi_smi_handlers structures as const as they are only passed as
an argument to the function ipmi_register_smi. This argument is of type
const, so ipmi_smi_handlers structures having similar properties can be
declared const too.
Done using Coccinelle:

@r1 disable optional_qualifier@
identifier i;
position p;
@@
static struct ipmi_smi_handlers i@p={...};

@ok1@
identifier r1.i;
position p;
@@
ipmi_register_smi(&i@p,...)

@bad@
position p!={r1.p,ok1.p};
identifier r1.i;
@@
i@p

@depends on !bad disable optional_qualifier@
identifier r1.i;
@@
+const
struct ipmi_smi_handlers i;

Size details after cross compiling the .o file for powerpc architecture

File size before:
  text	   data	    bss	    dec	    hex	filename
  2777	    288	      0	   3065	    bf9	drivers/char/ipmi/ipmi_powernv.o

File size after:
  text	   data	    bss	    dec	    hex	filename
   2873	    192	      0	   3065	    bf9	drivers/char/ipmi/ipmi_powernv.o

Signed-off-by: Bhumika Goyal <bhumirks@gmail.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_powernv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/ipmi/ipmi_powernv.c b/drivers/char/ipmi/ipmi_powernv.c
index 6e658aa114f1..b338a4becbf8 100644
--- a/drivers/char/ipmi/ipmi_powernv.c
+++ b/drivers/char/ipmi/ipmi_powernv.c
@@ -196,7 +196,7 @@ static void ipmi_powernv_poll(void *send_info)
 	ipmi_powernv_recv(smi);
 }
 
-static struct ipmi_smi_handlers ipmi_powernv_smi_handlers = {
+static const struct ipmi_smi_handlers ipmi_powernv_smi_handlers = {
 	.owner			= THIS_MODULE,
 	.start_processing	= ipmi_powernv_start_processing,
 	.sender			= ipmi_powernv_send,
-- 
GitLab


From d23bb113952db88b03a71c9533e5a40e444e18d3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 8 Feb 2017 11:17:52 -0500
Subject: [PATCH 0210/1084] SUNRPC: Remove unused function rpc_get_timeout()

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/clnt.h |  1 -
 net/sunrpc/clnt.c           | 15 ---------------
 2 files changed, 16 deletions(-)

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 333ad11b3dd9..33f216edb434 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -182,7 +182,6 @@ int		rpc_protocol(struct rpc_clnt *);
 struct net *	rpc_net_ns(struct rpc_clnt *);
 size_t		rpc_max_payload(struct rpc_clnt *);
 size_t		rpc_max_bc_payload(struct rpc_clnt *);
-unsigned long	rpc_get_timeout(struct rpc_clnt *clnt);
 void		rpc_force_rebind(struct rpc_clnt *);
 size_t		rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
 const char	*rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 1dc9f3bac099..2838a1fab460 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1452,21 +1452,6 @@ size_t rpc_max_bc_payload(struct rpc_clnt *clnt)
 }
 EXPORT_SYMBOL_GPL(rpc_max_bc_payload);
 
-/**
- * rpc_get_timeout - Get timeout for transport in units of HZ
- * @clnt: RPC client to query
- */
-unsigned long rpc_get_timeout(struct rpc_clnt *clnt)
-{
-	unsigned long ret;
-
-	rcu_read_lock();
-	ret = rcu_dereference(clnt->cl_xprt)->timeout->to_initval;
-	rcu_read_unlock();
-	return ret;
-}
-EXPORT_SYMBOL_GPL(rpc_get_timeout);
-
 /**
  * rpc_force_rebind - force transport to check that remote port is unchanged
  * @clnt: client to rebind
-- 
GitLab


From 8d1b8c62e0805af7df900ef121389778d2126997 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 8 Feb 2017 11:17:53 -0500
Subject: [PATCH 0211/1084] SUNRPC: Refactor TCP socket timeout code into a
 helper function

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtsock.c | 45 +++++++++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 19 deletions(-)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index af392d9b9cec..c8ac649a51cb 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2235,6 +2235,31 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
 		xs_reset_transport(transport);
 }
 
+static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
+		struct socket *sock)
+{
+	unsigned int keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ);
+	unsigned int keepcnt = xprt->timeout->to_retries + 1;
+	unsigned int opt_on = 1;
+	unsigned int timeo;
+
+	/* TCP Keepalive options */
+	kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
+			(char *)&opt_on, sizeof(opt_on));
+	kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE,
+			(char *)&keepidle, sizeof(keepidle));
+	kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
+			(char *)&keepidle, sizeof(keepidle));
+	kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
+			(char *)&keepcnt, sizeof(keepcnt));
+
+	/* TCP user timeout (see RFC5482) */
+	timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
+		(xprt->timeout->to_retries + 1);
+	kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
+			(char *)&timeo, sizeof(timeo));
+}
+
 static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 {
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2242,22 +2267,8 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 
 	if (!transport->inet) {
 		struct sock *sk = sock->sk;
-		unsigned int keepidle = xprt->timeout->to_initval / HZ;
-		unsigned int keepcnt = xprt->timeout->to_retries + 1;
-		unsigned int opt_on = 1;
-		unsigned int timeo;
 		unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC;
 
-		/* TCP Keepalive options */
-		kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
-				(char *)&opt_on, sizeof(opt_on));
-		kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE,
-				(char *)&keepidle, sizeof(keepidle));
-		kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
-				(char *)&keepidle, sizeof(keepidle));
-		kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
-				(char *)&keepcnt, sizeof(keepcnt));
-
 		/* Avoid temporary address, they are bad for long-lived
 		 * connections such as NFS mounts.
 		 * RFC4941, section 3.6 suggests that:
@@ -2268,11 +2279,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 		kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES,
 				(char *)&addr_pref, sizeof(addr_pref));
 
-		/* TCP user timeout (see RFC5482) */
-		timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
-			(xprt->timeout->to_retries + 1);
-		kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
-				(char *)&timeo, sizeof(timeo));
+		xs_tcp_set_socket_timeouts(xprt, sock);
 
 		write_lock_bh(&sk->sk_callback_lock);
 
-- 
GitLab


From 7196dbb02ea05835b9ee56910ee82cb55422c7f1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 8 Feb 2017 11:17:54 -0500
Subject: [PATCH 0212/1084] SUNRPC: Allow changing of the TCP timeout
 parameters on the fly

When the NFSv4 server tells us the lease period, we usually want
to adjust down the timeout parameters on the TCP connection to
ensure that we don't miss lease renewals due to a faulty connection.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/xprt.h     |  4 +++
 include/linux/sunrpc/xprtsock.h |  3 ++
 net/sunrpc/clnt.c               | 30 ++++++++++++++-----
 net/sunrpc/xprtsock.c           | 51 ++++++++++++++++++++++++++++++---
 4 files changed, 77 insertions(+), 11 deletions(-)

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index a5da60b24d83..eab1c749e192 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -137,6 +137,9 @@ struct rpc_xprt_ops {
 	void		(*release_request)(struct rpc_task *task);
 	void		(*close)(struct rpc_xprt *xprt);
 	void		(*destroy)(struct rpc_xprt *xprt);
+	void		(*set_connect_timeout)(struct rpc_xprt *xprt,
+					unsigned long connect_timeout,
+					unsigned long reconnect_timeout);
 	void		(*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq);
 	int		(*enable_swap)(struct rpc_xprt *xprt);
 	void		(*disable_swap)(struct rpc_xprt *xprt);
@@ -221,6 +224,7 @@ struct rpc_xprt {
 	struct timer_list	timer;
 	unsigned long		last_used,
 				idle_timeout,
+				connect_timeout,
 				max_reconnect_timeout;
 
 	/*
diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index bef3fb0abb8f..c9959d7e3579 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -55,6 +55,8 @@ struct sock_xprt {
 	size_t			rcvsize,
 				sndsize;
 
+	struct rpc_timeout	tcp_timeout;
+
 	/*
 	 * Saved socket callback addresses
 	 */
@@ -81,6 +83,7 @@ struct sock_xprt {
 
 #define XPRT_SOCK_CONNECTING	1U
 #define XPRT_SOCK_DATA_READY	(2)
+#define XPRT_SOCK_UPD_TIMEOUT	(3)
 
 #endif /* __KERNEL__ */
 
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 2838a1fab460..b5bc0c589f6a 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2684,6 +2684,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
 {
 	struct rpc_xprt_switch *xps;
 	struct rpc_xprt *xprt;
+	unsigned long connect_timeout;
 	unsigned long reconnect_timeout;
 	unsigned char resvport;
 	int ret = 0;
@@ -2696,6 +2697,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
 		return -EAGAIN;
 	}
 	resvport = xprt->resvport;
+	connect_timeout = xprt->connect_timeout;
 	reconnect_timeout = xprt->max_reconnect_timeout;
 	rcu_read_unlock();
 
@@ -2705,7 +2707,10 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
 		goto out_put_switch;
 	}
 	xprt->resvport = resvport;
-	xprt->max_reconnect_timeout = reconnect_timeout;
+	if (xprt->ops->set_connect_timeout != NULL)
+		xprt->ops->set_connect_timeout(xprt,
+				connect_timeout,
+				reconnect_timeout);
 
 	rpc_xprt_switch_set_roundrobin(xps);
 	if (setup) {
@@ -2722,24 +2727,35 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
 }
 EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt);
 
+struct connect_timeout_data {
+	unsigned long connect_timeout;
+	unsigned long reconnect_timeout;
+};
+
 static int
-rpc_xprt_cap_max_reconnect_timeout(struct rpc_clnt *clnt,
+rpc_xprt_set_connect_timeout(struct rpc_clnt *clnt,
 		struct rpc_xprt *xprt,
 		void *data)
 {
-	unsigned long timeout = *((unsigned long *)data);
+	struct connect_timeout_data *timeo = data;
 
-	if (timeout < xprt->max_reconnect_timeout)
-		xprt->max_reconnect_timeout = timeout;
+	if (xprt->ops->set_connect_timeout)
+		xprt->ops->set_connect_timeout(xprt,
+				timeo->connect_timeout,
+				timeo->reconnect_timeout);
 	return 0;
 }
 
 void
 rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo)
 {
+	struct connect_timeout_data timeout = {
+		.connect_timeout = timeo,
+		.reconnect_timeout = timeo,
+	};
 	rpc_clnt_iterate_for_each_xprt(clnt,
-			rpc_xprt_cap_max_reconnect_timeout,
-			&timeo);
+			rpc_xprt_set_connect_timeout,
+			&timeout);
 }
 EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout);
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index c8ac649a51cb..810e9b59be16 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -52,6 +52,8 @@
 #include "sunrpc.h"
 
 static void xs_close(struct rpc_xprt *xprt);
+static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
+		struct socket *sock);
 
 /*
  * xprtsock tunables
@@ -666,6 +668,9 @@ static int xs_tcp_send_request(struct rpc_task *task)
 	if (task->tk_flags & RPC_TASK_SENT)
 		zerocopy = false;
 
+	if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state))
+		xs_tcp_set_socket_timeouts(xprt, transport->sock);
+
 	/* Continue transmitting the packet/record. We must be careful
 	 * to cope with writespace callbacks arriving _after_ we have
 	 * called sendmsg(). */
@@ -2238,11 +2243,20 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
 static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
 		struct socket *sock)
 {
-	unsigned int keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ);
-	unsigned int keepcnt = xprt->timeout->to_retries + 1;
+	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+	unsigned int keepidle;
+	unsigned int keepcnt;
 	unsigned int opt_on = 1;
 	unsigned int timeo;
 
+	spin_lock_bh(&xprt->transport_lock);
+	keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ);
+	keepcnt = xprt->timeout->to_retries + 1;
+	timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
+		(xprt->timeout->to_retries + 1);
+	clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
+	spin_unlock_bh(&xprt->transport_lock);
+
 	/* TCP Keepalive options */
 	kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
 			(char *)&opt_on, sizeof(opt_on));
@@ -2254,12 +2268,38 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
 			(char *)&keepcnt, sizeof(keepcnt));
 
 	/* TCP user timeout (see RFC5482) */
-	timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
-		(xprt->timeout->to_retries + 1);
 	kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
 			(char *)&timeo, sizeof(timeo));
 }
 
+static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
+		unsigned long connect_timeout,
+		unsigned long reconnect_timeout)
+{
+	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+	struct rpc_timeout to;
+	unsigned long initval;
+
+	spin_lock_bh(&xprt->transport_lock);
+	if (reconnect_timeout < xprt->max_reconnect_timeout)
+		xprt->max_reconnect_timeout = reconnect_timeout;
+	if (connect_timeout < xprt->connect_timeout) {
+		memcpy(&to, xprt->timeout, sizeof(to));
+		initval = DIV_ROUND_UP(connect_timeout, to.to_retries + 1);
+		/* Arbitrary lower limit */
+		if (initval <  XS_TCP_INIT_REEST_TO << 1)
+			initval = XS_TCP_INIT_REEST_TO << 1;
+		to.to_initval = initval;
+		to.to_maxval = initval;
+		memcpy(&transport->tcp_timeout, &to,
+				sizeof(transport->tcp_timeout));
+		xprt->timeout = &transport->tcp_timeout;
+		xprt->connect_timeout = connect_timeout;
+	}
+	set_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
+	spin_unlock_bh(&xprt->transport_lock);
+}
+
 static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 {
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2728,6 +2768,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
 	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
 	.close			= xs_tcp_shutdown,
 	.destroy		= xs_destroy,
+	.set_connect_timeout	= xs_tcp_set_connect_timeout,
 	.print_stats		= xs_tcp_print_stats,
 	.enable_swap		= xs_enable_swap,
 	.disable_swap		= xs_disable_swap,
@@ -3014,6 +3055,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 	xprt->timeout = &xs_tcp_default_timeout;
 
 	xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
+	xprt->connect_timeout = xprt->timeout->to_initval *
+		(xprt->timeout->to_retries + 1);
 
 	INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
 	INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
-- 
GitLab


From 26ae102f2cfd0215daa57dc790aa3bfe534403a9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 8 Feb 2017 11:17:55 -0500
Subject: [PATCH 0213/1084] NFSv4: Set the connection timeout to match the
 lease period

Set the timeout for TCP connections to be 1 lease period to ensure
that we don't lose our lease due to a faulty TCP connection.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4renewd.c         |  2 +-
 include/linux/sunrpc/clnt.h |  5 +++--
 net/sunrpc/clnt.c           | 10 ++++++----
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 82e77198d17e..1f8c2ae43a8d 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -153,7 +153,7 @@ void nfs4_set_lease_period(struct nfs_client *clp,
 	spin_unlock(&clp->cl_lock);
 
 	/* Cap maximum reconnect timeout at 1/2 lease period */
-	rpc_cap_max_reconnect_timeout(clp->cl_rpcclient, lease >> 1);
+	rpc_set_connect_timeout(clp->cl_rpcclient, lease, lease >> 1);
 }
 
 /*
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 33f216edb434..6095ecba0dde 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -201,8 +201,9 @@ int		rpc_clnt_add_xprt(struct rpc_clnt *, struct xprt_create *,
 				struct rpc_xprt *,
 				void *),
 			void *data);
-void		rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt,
-			unsigned long timeo);
+void		rpc_set_connect_timeout(struct rpc_clnt *clnt,
+			unsigned long connect_timeout,
+			unsigned long reconnect_timeout);
 
 int		rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *,
 			struct rpc_xprt_switch *,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index b5bc0c589f6a..52da3ce54bb5 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2747,17 +2747,19 @@ rpc_xprt_set_connect_timeout(struct rpc_clnt *clnt,
 }
 
 void
-rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo)
+rpc_set_connect_timeout(struct rpc_clnt *clnt,
+		unsigned long connect_timeout,
+		unsigned long reconnect_timeout)
 {
 	struct connect_timeout_data timeout = {
-		.connect_timeout = timeo,
-		.reconnect_timeout = timeo,
+		.connect_timeout = connect_timeout,
+		.reconnect_timeout = reconnect_timeout,
 	};
 	rpc_clnt_iterate_for_each_xprt(clnt,
 			rpc_xprt_set_connect_timeout,
 			&timeout);
 }
-EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout);
+EXPORT_SYMBOL_GPL(rpc_set_connect_timeout);
 
 void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt)
 {
-- 
GitLab


From d9cc34a6e1a8e7d26d243c54fa6cef93556d44f2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 2 Feb 2017 15:46:26 +0100
Subject: [PATCH 0214/1084] thermal: use cpumask_var_t for on-stack cpu masks

Putting a bare cpumask structure on the stack produces a warning on
large SMP configurations:

drivers/thermal/cpu_cooling.c: In function 'cpufreq_state2power':
drivers/thermal/cpu_cooling.c:644:1: warning: the frame size of 1056 bytes is larger than 1024 bytes [-Wframe-larger-than=]
drivers/thermal/cpu_cooling.c: In function '__cpufreq_cooling_register':
drivers/thermal/cpu_cooling.c:898:1: warning: the frame size of 1104 bytes is larger than 1024 bytes [-Wframe-larger-than=]

The recommended workaround is to use cpumask_var_t, which behaves just like
a normal cpu mask in most cases, but turns into a dynamic allocation
when CONFIG_CPUMASK_OFFSTACK is set.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/cpu_cooling.c | 39 +++++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 13 deletions(-)

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 9ce0e9eef923..d1f9e1563ac6 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -652,31 +652,39 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev,
 			       unsigned long state, u32 *power)
 {
 	unsigned int freq, num_cpus;
-	cpumask_t cpumask;
+	cpumask_var_t cpumask;
 	u32 static_power, dynamic_power;
 	int ret;
 	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
 
-	cpumask_and(&cpumask, &cpufreq_device->allowed_cpus, cpu_online_mask);
-	num_cpus = cpumask_weight(&cpumask);
+	if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
+		return -ENOMEM;
+
+	cpumask_and(cpumask, &cpufreq_device->allowed_cpus, cpu_online_mask);
+	num_cpus = cpumask_weight(cpumask);
 
 	/* None of our cpus are online, so no power */
 	if (num_cpus == 0) {
 		*power = 0;
-		return 0;
+		ret = 0;
+		goto out;
 	}
 
 	freq = cpufreq_device->freq_table[state];
-	if (!freq)
-		return -EINVAL;
+	if (!freq) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	dynamic_power = cpu_freq_to_power(cpufreq_device, freq) * num_cpus;
 	ret = get_static_power(cpufreq_device, tz, freq, &static_power);
 	if (ret)
-		return ret;
+		goto out;
 
 	*power = static_power + dynamic_power;
-	return 0;
+out:
+	free_cpumask_var(cpumask);
+	return ret;
 }
 
 /**
@@ -802,16 +810,20 @@ __cpufreq_cooling_register(struct device_node *np,
 	struct cpufreq_cooling_device *cpufreq_dev;
 	char dev_name[THERMAL_NAME_LENGTH];
 	struct cpufreq_frequency_table *pos, *table;
-	struct cpumask temp_mask;
+	cpumask_var_t temp_mask;
 	unsigned int freq, i, num_cpus;
 	int ret;
 	struct thermal_cooling_device_ops *cooling_ops;
 
-	cpumask_and(&temp_mask, clip_cpus, cpu_online_mask);
-	policy = cpufreq_cpu_get(cpumask_first(&temp_mask));
+	if (!alloc_cpumask_var(&temp_mask, GFP_KERNEL))
+		return ERR_PTR(-ENOMEM);
+
+	cpumask_and(temp_mask, clip_cpus, cpu_online_mask);
+	policy = cpufreq_cpu_get(cpumask_first(temp_mask));
 	if (!policy) {
 		pr_debug("%s: CPUFreq policy not found\n", __func__);
-		return ERR_PTR(-EPROBE_DEFER);
+		cool_dev = ERR_PTR(-EPROBE_DEFER);
+		goto free_cpumask;
 	}
 
 	table = policy->freq_table;
@@ -931,7 +943,8 @@ __cpufreq_cooling_register(struct device_node *np,
 	kfree(cpufreq_dev);
 put_policy:
 	cpufreq_cpu_put(policy);
-
+free_cpumask:
+	free_cpumask_var(temp_mask);
 	return cool_dev;
 }
 
-- 
GitLab


From bf7696a12071afaca0139d7b8b9eadfc77252876 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 22 Jan 2017 17:14:09 +0100
Subject: [PATCH 0215/1084] acpi: lpss: call pwm_add_table() for BSW PWM device

On x86 we do not have devicetree to link the PWM controller and
the display controller together. So someone needs to call
pwm_add_table() to create the link, so that the i915 driver's
pwm_get(dev, "pwm_backlight") call returns the lpss' pwm0.

The PWM subsystem does not want to have pwm_add_table() calls
directly in PWM drivers (this leads to probe ordering issues),
so lets do it here since the acpi-lpss code is always builtin.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Rafael J. Wysocki <rjw@rjwysocki.net>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/acpi/acpi_lpss.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 8ea836c046f8..cd5d2b6c9ed1 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -20,6 +20,7 @@
 #include <linux/platform_data/clk-lpss.h>
 #include <linux/pm_domain.h>
 #include <linux/pm_runtime.h>
+#include <linux/pwm.h>
 #include <linux/delay.h>
 
 #include "internal.h"
@@ -154,6 +155,18 @@ static void byt_i2c_setup(struct lpss_private_data *pdata)
 	writel(0, pdata->mmio_base + LPSS_I2C_ENABLE);
 }
 
+/* BSW PWM used for backlight control by the i915 driver */
+static struct pwm_lookup bsw_pwm_lookup[] = {
+	PWM_LOOKUP_WITH_MODULE("80862288:00", 0, "0000:00:02.0",
+			       "pwm_backlight", 0, PWM_POLARITY_NORMAL,
+			       "pwm-lpss-platform"),
+};
+
+static void bsw_pwm_setup(struct lpss_private_data *pdata)
+{
+	pwm_add_table(bsw_pwm_lookup, ARRAY_SIZE(bsw_pwm_lookup));
+}
+
 static const struct lpss_device_desc lpt_dev_desc = {
 	.flags = LPSS_CLK | LPSS_CLK_GATE | LPSS_CLK_DIVIDER | LPSS_LTR,
 	.prv_offset = 0x800,
@@ -191,6 +204,7 @@ static const struct lpss_device_desc byt_pwm_dev_desc = {
 
 static const struct lpss_device_desc bsw_pwm_dev_desc = {
 	.flags = LPSS_SAVE_CTX | LPSS_NO_D3_DELAY,
+	.setup = bsw_pwm_setup,
 };
 
 static const struct lpss_device_desc byt_uart_dev_desc = {
-- 
GitLab


From 24abdf1be15c478e2821d6fc903a4a4440beff02 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 8 Feb 2017 16:59:46 -0500
Subject: [PATCH 0216/1084] xprtrdma: Fix Read chunk padding

When pad optimization is disabled, rpcrdma_convert_iovs still
does not add explicit XDR round-up padding to a Read chunk.

Commit 677eb17e94ed ("xprtrdma: Fix XDR tail buffer marshalling")
incorrectly short-circuited the test for whether round-up padding
is needed that appears later in rpcrdma_convert_iovs.

However, if this is indeed a regular Read chunk (and not a
Position-Zero Read chunk), the tail iovec _always_ contains the
chunk's padding, and never anything else.

So, it's easy to just skip the tail when padding optimization is
enabled, and add the tail in a subsequent Read chunk segment, if
disabled.

Fixes: 677eb17e94ed ("xprtrdma: Fix XDR tail buffer marshalling")
Cc: stable@vger.kernel.org # v4.9+
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/rpc_rdma.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index c52e0f2ffe52..a524d3c2e8ac 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -226,8 +226,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
 	if (len && n == RPCRDMA_MAX_SEGS)
 		goto out_overflow;
 
-	/* When encoding the read list, the tail is always sent inline */
-	if (type == rpcrdma_readch)
+	/* When encoding a Read chunk, the tail iovec contains an
+	 * XDR pad and may be omitted.
+	 */
+	if (type == rpcrdma_readch && xprt_rdma_pad_optimize)
 		return n;
 
 	/* When encoding the Write list, some servers need to see an extra
@@ -238,10 +240,6 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
 		return n;
 
 	if (xdrbuf->tail[0].iov_len) {
-		/* the rpcrdma protocol allows us to omit any trailing
-		 * xdr pad bytes, saving the server an RDMA operation. */
-		if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
-			return n;
 		n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n);
 		if (n == RPCRDMA_MAX_SEGS)
 			goto out_overflow;
-- 
GitLab


From b5f0afbea4f2ea52c613ac2b06cb6de2ea18cb6d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 8 Feb 2017 16:59:54 -0500
Subject: [PATCH 0217/1084] xprtrdma: Per-connection pad optimization

Pad optimization is changed by echoing into
/proc/sys/sunrpc/rdma_pad_optimize. This is a global setting,
affecting all RPC-over-RDMA connections to all servers.

The marshaling code picks up that value and uses it for decisions
about how to construct each RPC-over-RDMA frame. Having it change
suddenly in mid-operation can result in unexpected failures. And
some servers a client mounts might need chunk round-up, while
others don't.

So instead, copy the pad_optimize setting into each connection's
rpcrdma_ia when the transport is created, and use the copy, which
can't change during the life of the connection, instead.

This also removes a hack: rpcrdma_convert_iovs was using
the remote-invalidation-expected flag to predict when it could leave
out Write chunk padding. This is because the Linux server handles
implicit XDR padding on Write chunks correctly, and only Linux
servers can set the connection's remote-invalidation-expected flag.

It's more sensible to use the pad optimization setting instead.

Fixes: 677eb17e94ed ("xprtrdma: Fix XDR tail buffer marshalling")
Cc: stable@vger.kernel.org # v4.9+
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/rpc_rdma.c  | 28 ++++++++++++++--------------
 net/sunrpc/xprtrdma/verbs.c     |  1 +
 net/sunrpc/xprtrdma/xprt_rdma.h |  1 +
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index a524d3c2e8ac..c634f0f3f9ce 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -186,9 +186,9 @@ rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n)
  */
 
 static int
-rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
-	enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg,
-	bool reminv_expected)
+rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
+		     unsigned int pos, enum rpcrdma_chunktype type,
+		     struct rpcrdma_mr_seg *seg)
 {
 	int len, n, p, page_base;
 	struct page **ppages;
@@ -229,14 +229,15 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
 	/* When encoding a Read chunk, the tail iovec contains an
 	 * XDR pad and may be omitted.
 	 */
-	if (type == rpcrdma_readch && xprt_rdma_pad_optimize)
+	if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup)
 		return n;
 
-	/* When encoding the Write list, some servers need to see an extra
-	 * segment for odd-length Write chunks. The upper layer provides
-	 * space in the tail iovec for this purpose.
+	/* When encoding a Write chunk, some servers need to see an
+	 * extra segment for non-XDR-aligned Write chunks. The upper
+	 * layer provides space in the tail iovec that may be used
+	 * for this purpose.
 	 */
-	if (type == rpcrdma_writech && reminv_expected)
+	if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup)
 		return n;
 
 	if (xdrbuf->tail[0].iov_len) {
@@ -291,7 +292,8 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
 	if (rtype == rpcrdma_areadch)
 		pos = 0;
 	seg = req->rl_segments;
-	nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false);
+	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
+				     rtype, seg);
 	if (nsegs < 0)
 		return ERR_PTR(nsegs);
 
@@ -353,10 +355,9 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 	}
 
 	seg = req->rl_segments;
-	nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf,
+	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
 				     rqst->rq_rcv_buf.head[0].iov_len,
-				     wtype, seg,
-				     r_xprt->rx_ia.ri_reminv_expected);
+				     wtype, seg);
 	if (nsegs < 0)
 		return ERR_PTR(nsegs);
 
@@ -421,8 +422,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
 	}
 
 	seg = req->rl_segments;
-	nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg,
-				     r_xprt->rx_ia.ri_reminv_expected);
+	nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
 	if (nsegs < 0)
 		return ERR_PTR(nsegs);
 
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 11d07748f699..2a6a367a2dac 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -208,6 +208,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
 
 	/* Default settings for RPC-over-RDMA Version One */
 	r_xprt->rx_ia.ri_reminv_expected = false;
+	r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize;
 	rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
 	wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
 
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index e35efd4ac1e4..c13715431419 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -75,6 +75,7 @@ struct rpcrdma_ia {
 	unsigned int		ri_max_inline_write;
 	unsigned int		ri_max_inline_read;
 	bool			ri_reminv_expected;
+	bool			ri_implicit_roundup;
 	enum ib_mr_type		ri_mrtype;
 	struct ib_qp_attr	ri_qp_attr;
 	struct ib_qp_init_attr	ri_qp_init_attr;
-- 
GitLab


From c95a3c6b88658bcb8f77f85f31a0b9d9036e8016 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 8 Feb 2017 17:00:02 -0500
Subject: [PATCH 0218/1084] xprtrdma: Disable pad optimization by default

Commit d5440e27d3e5 ("xprtrdma: Enable pad optimization") made the
Linux client omit XDR round-up padding in normal Read and Write
chunks so that the client doesn't have to register and invalidate
3-byte memory regions that contain no real data.

Unfortunately, my cheery 2014 assessment that this optimization "is
supported now by both Linux and Solaris servers" was premature.
We've found bugs in Solaris in this area since commit d5440e27d3e5
("xprtrdma: Enable pad optimization") was merged (SYMLINK is the
main offender).

So for maximum interoperability, I'm disabling this optimization
again. If a CM private message is exchanged when connecting, the
client recognizes that the server is Linux, and enables the
optimization for that connection.

Until now the Solaris server bugs did not impact common operations,
and were thus largely benign. Soon, less capable devices on Linux
NFS/RDMA clients will make use of Read chunks more often, and these
Solaris bugs will prevent interoperation in more cases.

Fixes: 677eb17e94ed ("xprtrdma: Fix XDR tail buffer marshalling")
Cc: stable@vger.kernel.org # v4.9+
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/transport.c | 2 +-
 net/sunrpc/xprtrdma/verbs.c     | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 534c178d2a7e..699058169cfc 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -67,7 +67,7 @@ unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
 static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
 static unsigned int xprt_rdma_inline_write_padding;
 static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
-		int xprt_rdma_pad_optimize = 1;
+		int xprt_rdma_pad_optimize = 0;
 
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 2a6a367a2dac..23f4da419a64 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -216,6 +216,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
 	    pmsg->cp_magic == rpcrdma_cmp_magic &&
 	    pmsg->cp_version == RPCRDMA_CMP_VERSION) {
 		r_xprt->rx_ia.ri_reminv_expected = true;
+		r_xprt->rx_ia.ri_implicit_roundup = true;
 		rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
 		wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
 	}
-- 
GitLab


From 16f906d66cd76fb9895cbc628f447532a7ac1faa Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 8 Feb 2017 17:00:10 -0500
Subject: [PATCH 0219/1084] xprtrdma: Reduce required number of send SGEs

The MAX_SEND_SGES check introduced in commit 655fec6987be
("xprtrdma: Use gathered Send for large inline messages") fails
for devices that have a small max_sge.

Instead of checking for a large fixed maximum number of SGEs,
check for a minimum small number. RPC-over-RDMA will switch to
using a Read chunk if an xdr_buf has more pages than can fit in
the device's max_sge limit. This is considerably better than
failing all together to mount the server.

This fix supports devices that have as few as three send SGEs
available.

Reported-by: Selvin Xavier <selvin.xavier@broadcom.com>
Reported-by: Devesh Sharma <devesh.sharma@broadcom.com>
Reported-by: Honggang Li <honli@redhat.com>
Reported-by: Ram Amrani <Ram.Amrani@cavium.com>
Fixes: 655fec6987be ("xprtrdma: Use gathered Send for large ...")
Cc: stable@vger.kernel.org # v4.9+
Tested-by: Honggang Li <honli@redhat.com>
Tested-by: Ram Amrani <Ram.Amrani@cavium.com>
Tested-by: Steve Wise <swise@opengridcomputing.com>
Reviewed-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/rpc_rdma.c  | 26 +++++++++++++++++++++++---
 net/sunrpc/xprtrdma/verbs.c     | 13 +++++++------
 net/sunrpc/xprtrdma/xprt_rdma.h |  2 ++
 3 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index c634f0f3f9ce..d88988365cd2 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -125,14 +125,34 @@ void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
 /* The client can send a request inline as long as the RPCRDMA header
  * plus the RPC call fit under the transport's inline limit. If the
  * combined call message size exceeds that limit, the client must use
- * the read chunk list for this operation.
+ * a Read chunk for this operation.
+ *
+ * A Read chunk is also required if sending the RPC call inline would
+ * exceed this device's max_sge limit.
  */
 static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
 				struct rpc_rqst *rqst)
 {
-	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+	struct xdr_buf *xdr = &rqst->rq_snd_buf;
+	unsigned int count, remaining, offset;
+
+	if (xdr->len > r_xprt->rx_ia.ri_max_inline_write)
+		return false;
 
-	return rqst->rq_snd_buf.len <= ia->ri_max_inline_write;
+	if (xdr->page_len) {
+		remaining = xdr->page_len;
+		offset = xdr->page_base & ~PAGE_MASK;
+		count = 0;
+		while (remaining) {
+			remaining -= min_t(unsigned int,
+					   PAGE_SIZE - offset, remaining);
+			offset = 0;
+			if (++count > r_xprt->rx_ia.ri_max_send_sges)
+				return false;
+		}
+	}
+
+	return true;
 }
 
 /* The client can't know how large the actual reply will be. Thus it
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 23f4da419a64..61d16c39e92c 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -488,18 +488,19 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
  */
 int
 rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
-				struct rpcrdma_create_data_internal *cdata)
+		  struct rpcrdma_create_data_internal *cdata)
 {
 	struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
+	unsigned int max_qp_wr, max_sge;
 	struct ib_cq *sendcq, *recvcq;
-	unsigned int max_qp_wr;
 	int rc;
 
-	if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_SEND_SGES) {
-		dprintk("RPC:       %s: insufficient sge's available\n",
-			__func__);
+	max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES);
+	if (max_sge < RPCRDMA_MIN_SEND_SGES) {
+		pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
 		return -ENOMEM;
 	}
+	ia->ri_max_send_sges = max_sge - RPCRDMA_MIN_SEND_SGES;
 
 	if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
 		dprintk("RPC:       %s: insufficient wqe's available\n",
@@ -524,7 +525,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 	ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
 	ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
 	ep->rep_attr.cap.max_recv_wr += 1;	/* drain cqe */
-	ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_SEND_SGES;
+	ep->rep_attr.cap.max_send_sge = max_sge;
 	ep->rep_attr.cap.max_recv_sge = 1;
 	ep->rep_attr.cap.max_inline_data = 0;
 	ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index c13715431419..3d7e9c9bad1f 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -74,6 +74,7 @@ struct rpcrdma_ia {
 	unsigned int		ri_max_frmr_depth;
 	unsigned int		ri_max_inline_write;
 	unsigned int		ri_max_inline_read;
+	unsigned int		ri_max_send_sges;
 	bool			ri_reminv_expected;
 	bool			ri_implicit_roundup;
 	enum ib_mr_type		ri_mrtype;
@@ -311,6 +312,7 @@ struct rpcrdma_mr_seg {		/* chunk descriptors */
  * - xdr_buf tail iovec
  */
 enum {
+	RPCRDMA_MIN_SEND_SGES = 3,
 	RPCRDMA_MAX_SEND_PAGES = PAGE_SIZE + RPCRDMA_MAX_INLINE - 1,
 	RPCRDMA_MAX_PAGE_SGES = (RPCRDMA_MAX_SEND_PAGES >> PAGE_SHIFT) + 1,
 	RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1,
-- 
GitLab


From c6f5b47f9fdeef12c0896e5af4bb3416c97d91c4 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 8 Feb 2017 17:00:18 -0500
Subject: [PATCH 0220/1084] xprtrdma: Shrink send SGEs array

We no longer need to accommodate an xdr_buf whose pages start at an
offset and cross extra page boundaries. If there are more partial or
whole pages to send than there are available SGEs, the marshaling
logic is now smart enough to use a Read chunk instead of failing.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/xprt_rdma.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 3d7e9c9bad1f..852dd0a750a5 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -305,16 +305,19 @@ struct rpcrdma_mr_seg {		/* chunk descriptors */
 	char		*mr_offset;	/* kva if no page, else offset */
 };
 
-/* Reserve enough Send SGEs to send a maximum size inline request:
+/* The Send SGE array is provisioned to send a maximum size
+ * inline request:
  * - RPC-over-RDMA header
  * - xdr_buf head iovec
- * - RPCRDMA_MAX_INLINE bytes, possibly unaligned, in pages
+ * - RPCRDMA_MAX_INLINE bytes, in pages
  * - xdr_buf tail iovec
+ *
+ * The actual number of array elements consumed by each RPC
+ * depends on the device's max_sge limit.
  */
 enum {
 	RPCRDMA_MIN_SEND_SGES = 3,
-	RPCRDMA_MAX_SEND_PAGES = PAGE_SIZE + RPCRDMA_MAX_INLINE - 1,
-	RPCRDMA_MAX_PAGE_SGES = (RPCRDMA_MAX_SEND_PAGES >> PAGE_SHIFT) + 1,
+	RPCRDMA_MAX_PAGE_SGES = RPCRDMA_MAX_INLINE >> PAGE_SHIFT,
 	RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1,
 };
 
-- 
GitLab


From 18c0fb31a034023e5cb2d3c9c1320d5d47d91afe Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 8 Feb 2017 17:00:27 -0500
Subject: [PATCH 0221/1084] xprtrdma: Properly recover FRWRs with in-flight
 FASTREG WRs

Sriharsha (sriharsha.basavapatna@broadcom.com) reports an occasional
double DMA unmap of an FRWR MR when a connection is lost. I see one
way this can happen.

When a request requires more than one segment or chunk,
rpcrdma_marshal_req loops, invoking ->frwr_op_map for each segment
(MR) in each chunk. Each call posts a FASTREG Work Request to
register one MR.

Now suppose that the transport connection is lost part-way through
marshaling this request. As part of recovering and resetting that
req, rpcrdma_marshal_req invokes ->frwr_op_unmap_safe, which hands
all the req's registered FRWRs to the MR recovery thread.

But note: FRWR registration is asynchronous. So it's possible that
some of these "already registered" FRWRs are fully registered, and
some are still waiting for their FASTREG WR to complete.

When the connection is lost, the "already registered" frmrs are
marked FRMR_IS_VALID, and the "still waiting" WRs flush. Then
frwr_wc_fastreg marks these frmrs FRMR_FLUSHED_FR.

But thanks to ->frwr_op_unmap_safe, the MR recovery thread is doing
an unreg / alloc_mr, a DMA unmap, and marking each of these frwrs
FRMR_IS_INVALID, at the same time frwr_wc_fastreg might be running.

- If the recovery thread runs last, then the frmr is marked
FRMR_IS_INVALID, and life continues.

- If frwr_wc_fastreg runs last, the frmr is marked FRMR_FLUSHED_FR,
but the recovery thread has already DMA unmapped that MR. When
->frwr_op_map later re-uses this frmr, it sees it is not marked
FRMR_IS_INVALID, and tries to recover it before using it, resulting
in a second DMA unmap of the same MR.

The fix is to guarantee in-flight FASTREG WRs have flushed before MR
recovery runs on those FRWRs. Thus we depend on ro_unmap_safe
(called from xprt_rdma_send_request on retransmit, or from
xprt_rdma_free) to clean up old registrations as needed.

Reported-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/rpc_rdma.c  | 14 ++++++++------
 net/sunrpc/xprtrdma/transport.c |  4 ----
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index d88988365cd2..72b3ca0253a0 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -759,13 +759,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
 	iptr = headerp->rm_body.rm_chunks;
 	iptr = rpcrdma_encode_read_list(r_xprt, req, rqst, iptr, rtype);
 	if (IS_ERR(iptr))
-		goto out_unmap;
+		goto out_err;
 	iptr = rpcrdma_encode_write_list(r_xprt, req, rqst, iptr, wtype);
 	if (IS_ERR(iptr))
-		goto out_unmap;
+		goto out_err;
 	iptr = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, iptr, wtype);
 	if (IS_ERR(iptr))
-		goto out_unmap;
+		goto out_err;
 	hdrlen = (unsigned char *)iptr - (unsigned char *)headerp;
 
 	dprintk("RPC: %5u %s: %s/%s: hdrlen %zd rpclen %zd\n",
@@ -776,12 +776,14 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
 	if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen,
 				       &rqst->rq_snd_buf, rtype)) {
 		iptr = ERR_PTR(-EIO);
-		goto out_unmap;
+		goto out_err;
 	}
 	return 0;
 
-out_unmap:
-	r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);
+out_err:
+	pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n",
+	       PTR_ERR(iptr));
+	r_xprt->rx_stats.failed_marshal_count++;
 	return PTR_ERR(iptr);
 }
 
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 699058169cfc..c717f5410776 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -709,10 +709,6 @@ xprt_rdma_send_request(struct rpc_task *task)
 	return 0;
 
 failed_marshal:
-	dprintk("RPC:       %s: rpcrdma_marshal_req failed, status %i\n",
-		__func__, rc);
-	if (rc == -EIO)
-		r_xprt->rx_stats.failed_marshal_count++;
 	if (rc != -ENOTCONN)
 		return rc;
 drop_connection:
-- 
GitLab


From 0a90487bf7182c74830616b91bd33f68f8c6e18b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 8 Feb 2017 17:00:35 -0500
Subject: [PATCH 0222/1084] xprtrdma: Handle stale connection rejection

A server rejects a connection attempt with STALE_CONNECTION when a
client attempts to connect to a working remote service, but uses a
QPN and GUID that corresponds to an old connection that was
abandoned. This might occur after a client crashes and restarts.

Fix rpcrdma_conn_upcall() to distinguish between a normal rejection
and rejection of stale connection parameters.

As an additional clean-up, remove the code that retries the
connection attempt with different ORD/IRD values. Code audit of
other ULP initiators shows no similar special case handling of
initiator_depth or responder_resources.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/verbs.c | 66 ++++++++++++-------------------------
 1 file changed, 21 insertions(+), 45 deletions(-)

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 61d16c39e92c..d1ee33fa8055 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -54,6 +54,7 @@
 #include <linux/sunrpc/svc_rdma.h>
 #include <asm/bitops.h>
 #include <linux/module.h> /* try_module_get()/module_put() */
+#include <rdma/ib_cm.h>
 
 #include "xprt_rdma.h"
 
@@ -279,7 +280,14 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 		connstate = -ENETDOWN;
 		goto connected;
 	case RDMA_CM_EVENT_REJECTED:
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+		pr_info("rpcrdma: connection to %pIS:%u on %s rejected: %s\n",
+			sap, rpc_get_port(sap), ia->ri_device->name,
+			rdma_reject_msg(id, event->status));
+#endif
 		connstate = -ECONNREFUSED;
+		if (event->status == IB_CM_REJ_STALE_CONN)
+			connstate = -EAGAIN;
 		goto connected;
 	case RDMA_CM_EVENT_DISCONNECTED:
 		connstate = -ECONNABORTED;
@@ -643,20 +651,21 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
 int
 rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
 {
+	struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
+						   rx_ia);
 	struct rdma_cm_id *id, *old;
+	struct sockaddr *sap;
+	unsigned int extras;
 	int rc = 0;
-	int retry_count = 0;
 
 	if (ep->rep_connected != 0) {
-		struct rpcrdma_xprt *xprt;
 retry:
 		dprintk("RPC:       %s: reconnecting...\n", __func__);
 
 		rpcrdma_ep_disconnect(ep, ia);
 
-		xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
-		id = rpcrdma_create_id(xprt, ia,
-				(struct sockaddr *)&xprt->rx_data.addr);
+		sap = (struct sockaddr *)&r_xprt->rx_data.addr;
+		id = rpcrdma_create_id(r_xprt, ia, sap);
 		if (IS_ERR(id)) {
 			rc = -EHOSTUNREACH;
 			goto out;
@@ -711,51 +720,18 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
 	}
 
 	wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
-
-	/*
-	 * Check state. A non-peer reject indicates no listener
-	 * (ECONNREFUSED), which may be a transient state. All
-	 * others indicate a transport condition which has already
-	 * undergone a best-effort.
-	 */
-	if (ep->rep_connected == -ECONNREFUSED &&
-	    ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
-		dprintk("RPC:       %s: non-peer_reject, retry\n", __func__);
-		goto retry;
-	}
 	if (ep->rep_connected <= 0) {
-		/* Sometimes, the only way to reliably connect to remote
-		 * CMs is to use same nonzero values for ORD and IRD. */
-		if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
-		    (ep->rep_remote_cma.responder_resources == 0 ||
-		     ep->rep_remote_cma.initiator_depth !=
-				ep->rep_remote_cma.responder_resources)) {
-			if (ep->rep_remote_cma.responder_resources == 0)
-				ep->rep_remote_cma.responder_resources = 1;
-			ep->rep_remote_cma.initiator_depth =
-				ep->rep_remote_cma.responder_resources;
+		if (ep->rep_connected == -EAGAIN)
 			goto retry;
-		}
 		rc = ep->rep_connected;
-	} else {
-		struct rpcrdma_xprt *r_xprt;
-		unsigned int extras;
-
-		dprintk("RPC:       %s: connected\n", __func__);
-
-		r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
-		extras = r_xprt->rx_buf.rb_bc_srv_max_requests;
-
-		if (extras) {
-			rc = rpcrdma_ep_post_extra_recv(r_xprt, extras);
-			if (rc) {
-				pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n",
-					__func__, rc);
-				rc = 0;
-			}
-		}
+		goto out;
 	}
 
+	dprintk("RPC:       %s: connected\n", __func__);
+	extras = r_xprt->rx_buf.rb_bc_srv_max_requests;
+	if (extras)
+		rpcrdma_ep_post_extra_recv(r_xprt, extras);
+
 out:
 	if (rc)
 		ep->rep_connected = rc;
-- 
GitLab


From 9a5c63e9c4056de8a73555131e6f698ddb0b9e0d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 8 Feb 2017 17:00:43 -0500
Subject: [PATCH 0223/1084] xprtrdma: Refactor management of mw_list field

Clean up some duplicate code.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtrdma/fmr_ops.c   |  5 +----
 net/sunrpc/xprtrdma/frwr_ops.c  | 11 ++++-------
 net/sunrpc/xprtrdma/rpc_rdma.c  |  6 +++---
 net/sunrpc/xprtrdma/verbs.c     | 15 +++++----------
 net/sunrpc/xprtrdma/xprt_rdma.h | 16 ++++++++++++++++
 5 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 1ebb09e1ac4f..59e64025ed96 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -310,10 +310,7 @@ fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 	struct rpcrdma_mw *mw;
 
 	while (!list_empty(&req->rl_registered)) {
-		mw = list_first_entry(&req->rl_registered,
-				      struct rpcrdma_mw, mw_list);
-		list_del_init(&mw->mw_list);
-
+		mw = rpcrdma_pop_mw(&req->rl_registered);
 		if (sync)
 			fmr_op_recover_mr(mw);
 		else
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 47bed5333c7f..f81dd93176c0 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -466,8 +466,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 	struct ib_send_wr *first, **prev, *last, *bad_wr;
 	struct rpcrdma_rep *rep = req->rl_reply;
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-	struct rpcrdma_mw *mw, *tmp;
 	struct rpcrdma_frmr *f;
+	struct rpcrdma_mw *mw;
 	int count, rc;
 
 	dprintk("RPC:       %s: req %p\n", __func__, req);
@@ -534,10 +534,10 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 	 * them to the free MW list.
 	 */
 unmap:
-	list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) {
+	while (!list_empty(&req->rl_registered)) {
+		mw = rpcrdma_pop_mw(&req->rl_registered);
 		dprintk("RPC:       %s: DMA unmapping frmr %p\n",
 			__func__, &mw->frmr);
-		list_del_init(&mw->mw_list);
 		ib_dma_unmap_sg(ia->ri_device,
 				mw->mw_sg, mw->mw_nents, mw->mw_dir);
 		rpcrdma_put_mw(r_xprt, mw);
@@ -571,10 +571,7 @@ frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 	struct rpcrdma_mw *mw;
 
 	while (!list_empty(&req->rl_registered)) {
-		mw = list_first_entry(&req->rl_registered,
-				      struct rpcrdma_mw, mw_list);
-		list_del_init(&mw->mw_list);
-
+		mw = rpcrdma_pop_mw(&req->rl_registered);
 		if (sync)
 			frwr_op_recover_mr(mw);
 		else
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 72b3ca0253a0..a044be2d6ad7 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -322,7 +322,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
 						 false, &mw);
 		if (n < 0)
 			return ERR_PTR(n);
-		list_add(&mw->mw_list, &req->rl_registered);
+		rpcrdma_push_mw(mw, &req->rl_registered);
 
 		*iptr++ = xdr_one;	/* item present */
 
@@ -390,7 +390,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 						 true, &mw);
 		if (n < 0)
 			return ERR_PTR(n);
-		list_add(&mw->mw_list, &req->rl_registered);
+		rpcrdma_push_mw(mw, &req->rl_registered);
 
 		iptr = xdr_encode_rdma_segment(iptr, mw);
 
@@ -455,7 +455,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
 						 true, &mw);
 		if (n < 0)
 			return ERR_PTR(n);
-		list_add(&mw->mw_list, &req->rl_registered);
+		rpcrdma_push_mw(mw, &req->rl_registered);
 
 		iptr = xdr_encode_rdma_segment(iptr, mw);
 
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index d1ee33fa8055..81cd31acf690 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -776,9 +776,7 @@ rpcrdma_mr_recovery_worker(struct work_struct *work)
 
 	spin_lock(&buf->rb_recovery_lock);
 	while (!list_empty(&buf->rb_stale_mrs)) {
-		mw = list_first_entry(&buf->rb_stale_mrs,
-				      struct rpcrdma_mw, mw_list);
-		list_del_init(&mw->mw_list);
+		mw = rpcrdma_pop_mw(&buf->rb_stale_mrs);
 		spin_unlock(&buf->rb_recovery_lock);
 
 		dprintk("RPC:       %s: recovering MR %p\n", __func__, mw);
@@ -796,7 +794,7 @@ rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw)
 	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
 
 	spin_lock(&buf->rb_recovery_lock);
-	list_add(&mw->mw_list, &buf->rb_stale_mrs);
+	rpcrdma_push_mw(mw, &buf->rb_stale_mrs);
 	spin_unlock(&buf->rb_recovery_lock);
 
 	schedule_delayed_work(&buf->rb_recovery_worker, 0);
@@ -1072,11 +1070,8 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
 	struct rpcrdma_mw *mw = NULL;
 
 	spin_lock(&buf->rb_mwlock);
-	if (!list_empty(&buf->rb_mws)) {
-		mw = list_first_entry(&buf->rb_mws,
-				      struct rpcrdma_mw, mw_list);
-		list_del_init(&mw->mw_list);
-	}
+	if (!list_empty(&buf->rb_mws))
+		mw = rpcrdma_pop_mw(&buf->rb_mws);
 	spin_unlock(&buf->rb_mwlock);
 
 	if (!mw)
@@ -1099,7 +1094,7 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
 	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
 
 	spin_lock(&buf->rb_mwlock);
-	list_add_tail(&mw->mw_list, &buf->rb_mws);
+	rpcrdma_push_mw(mw, &buf->rb_mws);
 	spin_unlock(&buf->rb_mwlock);
 }
 
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 852dd0a750a5..171a35116de9 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -354,6 +354,22 @@ rpcr_to_rdmar(struct rpc_rqst *rqst)
 	return rqst->rq_xprtdata;
 }
 
+static inline void
+rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list)
+{
+	list_add_tail(&mw->mw_list, list);
+}
+
+static inline struct rpcrdma_mw *
+rpcrdma_pop_mw(struct list_head *list)
+{
+	struct rpcrdma_mw *mw;
+
+	mw = list_first_entry(list, struct rpcrdma_mw, mw_list);
+	list_del(&mw->mw_list);
+	return mw;
+}
+
 /*
  * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for
  * inline requests/replies, and client/server credits.
-- 
GitLab


From b977b644ccf821ab1269582f7efe1d0d85faa1f6 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 8 Feb 2017 17:00:51 -0500
Subject: [PATCH 0224/1084] sunrpc: Allow xprt->ops->timer method to sleep

The transport lock is needed to protect the xprt_adjust_cwnd() call
in xs_udp_timer, but it is not necessary for accessing the
rq_reply_bytes_recvd or tk_status fields. It is correct to sublimate
the lock into UDP's xs_udp_timer method, where it is required.

The ->timer method has to take the transport lock if needed, but it
can now sleep safely, or even call back into the RPC scheduler.

This is more a clean-up than a fix, but the "issue" was introduced
by my transport switch patches back in 2005.

Fixes: 46c0ee8bc4ad ("RPC: separate xprt_timer implementations")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprt.c     | 2 --
 net/sunrpc/xprtsock.c | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 9a6be030ca7d..b530a2852ba8 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -897,13 +897,11 @@ static void xprt_timer(struct rpc_task *task)
 		return;
 	dprintk("RPC: %5u xprt_timer\n", task->tk_pid);
 
-	spin_lock_bh(&xprt->transport_lock);
 	if (!req->rq_reply_bytes_recvd) {
 		if (xprt->ops->timer)
 			xprt->ops->timer(xprt, task);
 	} else
 		task->tk_status = 0;
-	spin_unlock_bh(&xprt->transport_lock);
 }
 
 /**
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 810e9b59be16..18b4e7ff8879 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1739,7 +1739,9 @@ static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t
  */
 static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task)
 {
+	spin_lock_bh(&xprt->transport_lock);
 	xprt_adjust_cwnd(xprt, task, -ETIMEDOUT);
+	spin_unlock_bh(&xprt->transport_lock);
 }
 
 static unsigned short xs_get_random_port(void)
-- 
GitLab


From ca8d8e03b4c9ad447d1e882cc8014e538f653018 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Thu, 9 Feb 2017 10:03:41 -0500
Subject: [PATCH 0225/1084] iio: 104-quad-8: Fix off-by-one error when
 addressing flag register

The flag register is offset by 1 from the respective channel data
register. This patch fixes an off-by-one error when attempting to read a
channel flag register where the base address was not properly offset.

Fixes: 28e5d3bb0325 ("iio: 104-quad-8: Add IIO support for the ACCES 104-QUAD-8")
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/counter/104-quad-8.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/counter/104-quad-8.c b/drivers/iio/counter/104-quad-8.c
index a5913e97945e..f9b8fc9ae13f 100644
--- a/drivers/iio/counter/104-quad-8.c
+++ b/drivers/iio/counter/104-quad-8.c
@@ -76,7 +76,7 @@ static int quad8_read_raw(struct iio_dev *indio_dev,
 			return IIO_VAL_INT;
 		}
 
-		flags = inb(base_offset);
+		flags = inb(base_offset + 1);
 		borrow = flags & BIT(0);
 		carry = !!(flags & BIT(1));
 
-- 
GitLab


From c466bda60510eaac797e49db5d34555876d983ae Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 9 Feb 2017 12:00:16 +0000
Subject: [PATCH 0226/1084] ARM: add CPU_THUMB_CAPABLE to indicate possible
 Thumb support

Clean up arch/arm/mm/Kconfig a little to provide a symbol which
indicates whether the CPU may support the Thumb instruction set.  This
gets rid of the growing dependencies on ARM_THUMB, and also gives us a
useful Kconfig symbol for choosing the kuser code.

Reviewed-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/Kconfig | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index f68e8ec29447..ac395eca7dee 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -29,6 +29,7 @@ config CPU_ARM720T
 	select CPU_COPY_V4WT if MMU
 	select CPU_CP15_MMU
 	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
 	select CPU_TLB_V4WT if MMU
 	help
 	  A 32-bit RISC processor with 8kByte Cache, Write Buffer and
@@ -46,6 +47,7 @@ config CPU_ARM740T
 	select CPU_CACHE_V4
 	select CPU_CP15_MPU
 	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
 	help
 	  A 32-bit RISC processor with 8KB cache or 4KB variants,
 	  write buffer and MPU(Protection Unit) built around
@@ -79,6 +81,7 @@ config CPU_ARM920T
 	select CPU_COPY_V4WB if MMU
 	select CPU_CP15_MMU
 	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
 	select CPU_TLB_V4WBI if MMU
 	help
 	  The ARM920T is licensed to be produced by numerous vendors,
@@ -97,6 +100,7 @@ config CPU_ARM922T
 	select CPU_COPY_V4WB if MMU
 	select CPU_CP15_MMU
 	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
 	select CPU_TLB_V4WBI if MMU
 	help
 	  The ARM922T is a version of the ARM920T, but with smaller
@@ -116,6 +120,7 @@ config CPU_ARM925T
 	select CPU_COPY_V4WB if MMU
 	select CPU_CP15_MMU
 	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
 	select CPU_TLB_V4WBI if MMU
  	help
  	  The ARM925T is a mix between the ARM920T and ARM926T, but with
@@ -134,6 +139,7 @@ config CPU_ARM926T
 	select CPU_COPY_V4WB if MMU
 	select CPU_CP15_MMU
 	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
 	select CPU_TLB_V4WBI if MMU
 	help
 	  This is a variant of the ARM920.  It has slightly different
@@ -170,6 +176,7 @@ config CPU_ARM940T
 	select CPU_CACHE_VIVT
 	select CPU_CP15_MPU
 	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
 	help
 	  ARM940T is a member of the ARM9TDMI family of general-
 	  purpose microprocessors with MPU and separate 4KB
@@ -188,6 +195,7 @@ config CPU_ARM946E
 	select CPU_CACHE_VIVT
 	select CPU_CP15_MPU
 	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
 	help
 	  ARM946E-S is a member of the ARM9E-S family of high-
 	  performance, 32-bit system-on-chip processor solutions.
@@ -206,6 +214,7 @@ config CPU_ARM1020
 	select CPU_COPY_V4WB if MMU
 	select CPU_CP15_MMU
 	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
 	select CPU_TLB_V4WBI if MMU
 	help
 	  The ARM1020 is the 32K cached version of the ARM10 processor,
@@ -225,6 +234,7 @@ config CPU_ARM1020E
 	select CPU_COPY_V4WB if MMU
 	select CPU_CP15_MMU
 	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
 	select CPU_TLB_V4WBI if MMU
 
 # ARM1022E
@@ -236,6 +246,7 @@ config CPU_ARM1022
 	select CPU_COPY_V4WB if MMU # can probably do better
 	select CPU_CP15_MMU
 	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
 	select CPU_TLB_V4WBI if MMU
 	help
 	  The ARM1022E is an implementation of the ARMv5TE architecture
@@ -254,6 +265,7 @@ config CPU_ARM1026
 	select CPU_COPY_V4WB if MMU # can probably do better
 	select CPU_CP15_MMU
 	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
 	select CPU_TLB_V4WBI if MMU
 	help
 	  The ARM1026EJ-S is an implementation of the ARMv5TEJ architecture
@@ -302,6 +314,7 @@ config CPU_XSCALE
 	select CPU_CACHE_VIVT
 	select CPU_CP15_MMU
 	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
 	select CPU_TLB_V4WBI if MMU
 
 # XScale Core Version 3
@@ -312,6 +325,7 @@ config CPU_XSC3
 	select CPU_CACHE_VIVT
 	select CPU_CP15_MMU
 	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
 	select CPU_TLB_V4WBI if MMU
 	select IO_36
 
@@ -324,6 +338,7 @@ config CPU_MOHAWK
 	select CPU_COPY_V4WB if MMU
 	select CPU_CP15_MMU
 	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
 	select CPU_TLB_V4WBI if MMU
 
 # Feroceon
@@ -335,6 +350,7 @@ config CPU_FEROCEON
 	select CPU_COPY_FEROCEON if MMU
 	select CPU_CP15_MMU
 	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
 	select CPU_TLB_FEROCEON if MMU
 
 config CPU_FEROCEON_OLD_ID
@@ -367,6 +383,7 @@ config CPU_V6
 	select CPU_CP15_MMU
 	select CPU_HAS_ASID if MMU
 	select CPU_PABRT_V6
+	select CPU_THUMB_CAPABLE
 	select CPU_TLB_V6 if MMU
 
 # ARMv6k
@@ -381,6 +398,7 @@ config CPU_V6K
 	select CPU_CP15_MMU
 	select CPU_HAS_ASID if MMU
 	select CPU_PABRT_V6
+	select CPU_THUMB_CAPABLE
 	select CPU_TLB_V6 if MMU
 
 # ARMv7
@@ -396,6 +414,7 @@ config CPU_V7
 	select CPU_CP15_MPU if !MMU
 	select CPU_HAS_ASID if MMU
 	select CPU_PABRT_V7
+	select CPU_THUMB_CAPABLE
 	select CPU_TLB_V7 if MMU
 
 # ARMv7M
@@ -410,11 +429,17 @@ config CPU_V7M
 
 config CPU_THUMBONLY
 	bool
+	select CPU_THUMB_CAPABLE
 	# There are no CPUs available with MMU that don't implement an ARM ISA:
 	depends on !MMU
 	help
 	  Select this if your CPU doesn't support the 32 bit ARM instructions.
 
+config CPU_THUMB_CAPABLE
+	bool
+	help
+	  Select this if your CPU can support Thumb mode.
+
 # Figure out what processor architecture version we should be using.
 # This defines the compiler instruction set which depends on the machine type.
 config CPU_32v3
@@ -655,11 +680,7 @@ config ARCH_DMA_ADDR_T_64BIT
 
 config ARM_THUMB
 	bool "Support Thumb user binaries" if !CPU_THUMBONLY
-	depends on CPU_ARM720T || CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || \
-		CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || \
-		CPU_ARM1020 || CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || \
-		CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_V6 || CPU_V6K || \
-		CPU_V7 || CPU_FEROCEON || CPU_V7M
+	depends on CPU_THUMB_CAPABLE
 	default y
 	help
 	  Say Y if you want to include kernel support for running user space
-- 
GitLab


From 43a30c2a3171e0f92c28dfd31ca3eb7d9cc72b26 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 19 Dec 2016 11:09:34 -0500
Subject: [PATCH 0227/1084] radix tree test suite: Depend on tools/include/asm
 files

Changing tools/include/asm/bug.h showed a missing dependency in the
Makefile.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 tools/testing/radix-tree/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 3635e4d3eca7..7f7600424b14 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -23,6 +23,7 @@ find_next_bit.o: ../../lib/find_bit.c
 
 $(OFILES): *.h */*.h \
 	../../include/linux/*.h \
+	../../include/asm/*.h \
 	../../../include/linux/radix-tree.h
 
 radix-tree.c: ../../../lib/radix-tree.c
-- 
GitLab


From a3c7890790e742074bc9e5640b0fcf9c61a771a2 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 16 Dec 2016 15:12:41 -0500
Subject: [PATCH 0228/1084] radix tree test suite: Remove mempool

The radix tree hasn't used a mempool since the beginning of git history.
Remove the userspace mempool implementation.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 tools/testing/radix-tree/linux.c         | 22 ----------------------
 tools/testing/radix-tree/linux/mempool.h | 16 ----------------
 2 files changed, 38 deletions(-)
 delete mode 100644 tools/testing/radix-tree/linux/mempool.h

diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
index d31ea7c9abec..c27c11261c37 100644
--- a/tools/testing/radix-tree/linux.c
+++ b/tools/testing/radix-tree/linux.c
@@ -5,7 +5,6 @@
 #include <unistd.h>
 #include <assert.h>
 
-#include <linux/mempool.h>
 #include <linux/poison.h>
 #include <linux/slab.h>
 #include <linux/radix-tree.h>
@@ -22,27 +21,6 @@ struct kmem_cache {
 	void (*ctor)(void *);
 };
 
-void *mempool_alloc(mempool_t *pool, int gfp_mask)
-{
-	return pool->alloc(gfp_mask, pool->data);
-}
-
-void mempool_free(void *element, mempool_t *pool)
-{
-	pool->free(element, pool->data);
-}
-
-mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
-			mempool_free_t *free_fn, void *pool_data)
-{
-	mempool_t *ret = malloc(sizeof(*ret));
-
-	ret->alloc = alloc_fn;
-	ret->free = free_fn;
-	ret->data = pool_data;
-	return ret;
-}
-
 void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
 {
 	struct radix_tree_node *node;
diff --git a/tools/testing/radix-tree/linux/mempool.h b/tools/testing/radix-tree/linux/mempool.h
deleted file mode 100644
index 6a2dc55b41d6..000000000000
--- a/tools/testing/radix-tree/linux/mempool.h
+++ /dev/null
@@ -1,16 +0,0 @@
-
-#include <linux/slab.h>
-
-typedef void *(mempool_alloc_t)(int gfp_mask, void *pool_data);
-typedef void (mempool_free_t)(void *element, void *pool_data);
-
-typedef struct {
-	mempool_alloc_t *alloc;
-	mempool_free_t *free;
-	void *data;
-} mempool_t;
-
-void *mempool_alloc(mempool_t *pool, int gfp_mask);
-void mempool_free(void *element, mempool_t *pool);
-mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
-			mempool_free_t *free_fn, void *pool_data);
-- 
GitLab


From 12ea65390bd5a46f8a70f068eb0d48922576a781 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 16 Dec 2016 14:53:45 -0500
Subject: [PATCH 0229/1084] radix tree test suite: Remove types.h

Move the pieces we still need to tools/include and update a few implicit
includes.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/include/linux/compiler.h         |  4 ++++
 tools/include/linux/spinlock.h         |  5 +++++
 tools/testing/radix-tree/linux.c       |  1 +
 tools/testing/radix-tree/linux/gfp.h   |  2 ++
 tools/testing/radix-tree/linux/types.h | 23 -----------------------
 5 files changed, 12 insertions(+), 23 deletions(-)
 create mode 100644 tools/include/linux/spinlock.h
 delete mode 100644 tools/testing/radix-tree/linux/types.h

diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index e33fc1df3935..9d294161c494 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -21,6 +21,8 @@
 #endif
 
 #define __user
+#define __rcu
+#define __read_mostly
 
 #ifndef __attribute_const__
 # define __attribute_const__
@@ -50,6 +52,8 @@
 # define unlikely(x)		__builtin_expect(!!(x), 0)
 #endif
 
+#define uninitialized_var(x) x = *(&(x))
+
 #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
 
 #include <linux/types.h>
diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h
new file mode 100644
index 000000000000..58397dcb19d6
--- /dev/null
+++ b/tools/include/linux/spinlock.h
@@ -0,0 +1,5 @@
+#define spinlock_t		pthread_mutex_t
+#define DEFINE_SPINLOCK(x)	pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER;
+
+#define spin_lock_irqsave(x, f)		(void)f, pthread_mutex_lock(x)
+#define spin_unlock_irqrestore(x, f)	(void)f, pthread_mutex_unlock(x)
diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
index c27c11261c37..93f06614e91b 100644
--- a/tools/testing/radix-tree/linux.c
+++ b/tools/testing/radix-tree/linux.c
@@ -5,6 +5,7 @@
 #include <unistd.h>
 #include <assert.h>
 
+#include <linux/gfp.h>
 #include <linux/poison.h>
 #include <linux/slab.h>
 #include <linux/radix-tree.h>
diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h
index 5b09b2ce6c33..701209816a6b 100644
--- a/tools/testing/radix-tree/linux/gfp.h
+++ b/tools/testing/radix-tree/linux/gfp.h
@@ -1,6 +1,8 @@
 #ifndef _GFP_H
 #define _GFP_H
 
+#include <linux/types.h>
+
 #define __GFP_BITS_SHIFT 26
 #define __GFP_BITS_MASK ((gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
diff --git a/tools/testing/radix-tree/linux/types.h b/tools/testing/radix-tree/linux/types.h
deleted file mode 100644
index 8491d89873bb..000000000000
--- a/tools/testing/radix-tree/linux/types.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef _TYPES_H
-#define _TYPES_H
-
-#include "../../include/linux/types.h"
-
-#define __rcu
-#define __read_mostly
-
-static inline void INIT_LIST_HEAD(struct list_head *list)
-{
-	list->next = list;
-	list->prev = list;
-}
-
-typedef struct {
-	unsigned int x;
-} spinlock_t;
-
-#define uninitialized_var(x) x = x
-
-#include <linux/gfp.h>
-
-#endif
-- 
GitLab


From 7a4f11b88981f47d79eb64267e8a2989a18ce831 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Wed, 28 Dec 2016 19:40:49 -0500
Subject: [PATCH 0230/1084] radix tree test suite: Remove export.h

The tools/include export.h contains everything we need.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/linux/export.h | 2 --
 1 file changed, 2 deletions(-)
 delete mode 100644 tools/testing/radix-tree/linux/export.h

diff --git a/tools/testing/radix-tree/linux/export.h b/tools/testing/radix-tree/linux/export.h
deleted file mode 100644
index b6afd131998d..000000000000
--- a/tools/testing/radix-tree/linux/export.h
+++ /dev/null
@@ -1,2 +0,0 @@
-
-#define EXPORT_SYMBOL(sym)
-- 
GitLab


From ab3a1ffd11d90583c71bd606bf0fd5bfef30bce9 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 16 Dec 2016 15:11:05 -0500
Subject: [PATCH 0231/1084] radix tree test suite: Reduce kernel.h

Many of the definitions in the radix-tree kernel.h are redundant with
others in tools/include, or are no longer used, such as panic().
Move the definition of __init to init.h and in_interrupt() to preempt.h

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/linux/init.h    |  2 +-
 tools/testing/radix-tree/linux/kernel.h  | 39 +-----------------------
 tools/testing/radix-tree/linux/preempt.h | 10 ++++++
 3 files changed, 12 insertions(+), 39 deletions(-)

diff --git a/tools/testing/radix-tree/linux/init.h b/tools/testing/radix-tree/linux/init.h
index 360cabb3c4e7..1bb0afc21309 100644
--- a/tools/testing/radix-tree/linux/init.h
+++ b/tools/testing/radix-tree/linux/init.h
@@ -1 +1 @@
-/* An empty file stub that allows radix-tree.c to compile. */
+#define __init
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 9681463c91e2..dd1d9aefb14f 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -1,10 +1,9 @@
 #ifndef _KERNEL_H
 #define _KERNEL_H
 
-#include <assert.h>
+#include "../../include/linux/kernel.h"
 #include <string.h>
 #include <stdio.h>
-#include <stddef.h>
 #include <limits.h>
 
 #include <linux/compiler.h>
@@ -19,47 +18,11 @@
 #define RADIX_TREE_MAP_SHIFT	3
 #endif
 
-#ifndef NULL
-#define NULL	0
-#endif
-
-#define BUG_ON(expr)	assert(!(expr))
-#define __init
-#define __must_check
-#define panic(expr)
 #define printk printf
-#define __force
-#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
 #define pr_debug printk
 
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-#define cpu_relax()	barrier()
-
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
 
-#define container_of(ptr, type, member) ({                      \
-	const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
-	(type *)( (char *)__mptr - offsetof(type, member) );})
-#define min(a, b) ((a) < (b) ? (a) : (b))
-
-#define cond_resched()	sched_yield()
-
-static inline int in_interrupt(void)
-{
-	return 0;
-}
-
-/*
- * This looks more complex than it should be. But we need to
- * get the type for the ~ right in round_down (it needs to be
- * as wide as the result!), and we want to evaluate the macro
- * arguments just once each.
- */
-#define __round_mask(x, y) ((__typeof__(x))((y)-1))
-#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
-#define round_down(x, y) ((x) & ~__round_mask(x, y))
-
 #define xchg(ptr, x)	uatomic_xchg(ptr, x)
 
 #endif /* _KERNEL_H */
diff --git a/tools/testing/radix-tree/linux/preempt.h b/tools/testing/radix-tree/linux/preempt.h
index 65c04c226965..35c5ac81529f 100644
--- a/tools/testing/radix-tree/linux/preempt.h
+++ b/tools/testing/radix-tree/linux/preempt.h
@@ -1,4 +1,14 @@
+#ifndef __LINUX_PREEMPT_H
+#define __LINUX_PREEMPT_H
+
 extern int preempt_count;
 
 #define preempt_disable()	uatomic_inc(&preempt_count)
 #define preempt_enable()	uatomic_dec(&preempt_count)
+
+static inline int in_interrupt(void)
+{
+	return 0;
+}
+
+#endif /* __LINUX_PREEMPT_H */
-- 
GitLab


From 991af734c46d726f1c62cabd1681563beb533e14 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Wed, 28 Dec 2016 22:53:46 -0500
Subject: [PATCH 0232/1084] radix tree test suite: Use vpath to find lib files

Instead of specifying how to build find_bit.o from lib/find_bit.o,
use vpath to tell make where to find find_bit.c.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 tools/testing/radix-tree/Makefile | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 7f7600424b14..5274e88cd293 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -2,7 +2,7 @@
 CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE
 LDFLAGS += -lpthread -lurcu
 TARGETS = main
-OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_next_bit.o \
+OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_bit.o \
 	 regression1.o regression2.o regression3.o multiorder.o \
 	 iteration_check.o benchmark.o
 
@@ -18,8 +18,7 @@ main:	$(OFILES)
 clean:
 	$(RM) -f $(TARGETS) *.o radix-tree.c
 
-find_next_bit.o: ../../lib/find_bit.c
-	$(CC) $(CFLAGS) -c -o $@ $<
+vpath %.c ../../lib
 
 $(OFILES): *.h */*.h \
 	../../include/linux/*.h \
-- 
GitLab


From bfa11193c46d5ea8ef4d9a4f1c10221cdd985ab1 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Thu, 29 Dec 2016 14:49:48 -0500
Subject: [PATCH 0233/1084] radix tree test suite: Remove obsolete CONFIG

radix-tree.c doesn't use these CONFIG options any more.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 tools/testing/radix-tree/generated/autoconf.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tools/testing/radix-tree/generated/autoconf.h b/tools/testing/radix-tree/generated/autoconf.h
index ad18cf5a2a3a..cf88dc5b8832 100644
--- a/tools/testing/radix-tree/generated/autoconf.h
+++ b/tools/testing/radix-tree/generated/autoconf.h
@@ -1,3 +1 @@
 #define CONFIG_RADIX_TREE_MULTIORDER 1
-#define CONFIG_SHMEM 1
-#define CONFIG_SWAP 1
-- 
GitLab


From 30b888ba950d9f77326b50a4aa2d7d99557d5718 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Sat, 28 Jan 2017 09:55:20 -0500
Subject: [PATCH 0234/1084] radix-tree: Add radix_tree_iter_tag_clear()

The counterpart to radix_tree_iter_tag_set(), used by the IDR code

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 include/linux/radix-tree.h |  4 ++-
 lib/radix-tree.c           | 68 ++++++++++++++++++++++----------------
 2 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 32683c7c2e0d..8bf4ef448ce1 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -332,7 +332,9 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
 			unsigned long index, unsigned int tag);
 int radix_tree_tag_get(const struct radix_tree_root *,
 			unsigned long index, unsigned int tag);
-void radix_tree_iter_tag_set(struct radix_tree_root *root,
+void radix_tree_iter_tag_set(struct radix_tree_root *,
+		const struct radix_tree_iter *iter, unsigned int tag);
+void radix_tree_iter_tag_clear(struct radix_tree_root *,
 		const struct radix_tree_iter *iter, unsigned int tag);
 unsigned int
 radix_tree_gang_lookup_tag(const struct radix_tree_root *, void **results,
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 6f86fbac0e36..40f3091c5a6b 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1266,6 +1266,22 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
 }
 #endif
 
+static void node_tag_set(struct radix_tree_root *root,
+				struct radix_tree_node *node,
+				unsigned int tag, unsigned int offset)
+{
+	while (node) {
+		if (tag_get(node, tag, offset))
+			return;
+		tag_set(node, tag, offset);
+		offset = node->offset;
+		node = node->parent;
+	}
+
+	if (!root_tag_get(root, tag))
+		root_tag_set(root, tag);
+}
+
 /**
  *	radix_tree_tag_set - set a tag on a radix tree node
  *	@root:		radix tree root
@@ -1307,6 +1323,18 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
 }
 EXPORT_SYMBOL(radix_tree_tag_set);
 
+/**
+ * radix_tree_iter_tag_set - set a tag on the current iterator entry
+ * @root:	radix tree root
+ * @iter:	iterator state
+ * @tag:	tag to set
+ */
+void radix_tree_iter_tag_set(struct radix_tree_root *root,
+			const struct radix_tree_iter *iter, unsigned int tag)
+{
+	node_tag_set(root, iter->node, tag, iter_offset(iter));
+}
+
 static void node_tag_clear(struct radix_tree_root *root,
 				struct radix_tree_node *node,
 				unsigned int tag, unsigned int offset)
@@ -1327,34 +1355,6 @@ static void node_tag_clear(struct radix_tree_root *root,
 		root_tag_clear(root, tag);
 }
 
-static void node_tag_set(struct radix_tree_root *root,
-				struct radix_tree_node *node,
-				unsigned int tag, unsigned int offset)
-{
-	while (node) {
-		if (tag_get(node, tag, offset))
-			return;
-		tag_set(node, tag, offset);
-		offset = node->offset;
-		node = node->parent;
-	}
-
-	if (!root_tag_get(root, tag))
-		root_tag_set(root, tag);
-}
-
-/**
- * radix_tree_iter_tag_set - set a tag on the current iterator entry
- * @root:	radix tree root
- * @iter:	iterator state
- * @tag:	tag to set
- */
-void radix_tree_iter_tag_set(struct radix_tree_root *root,
-			const struct radix_tree_iter *iter, unsigned int tag)
-{
-	node_tag_set(root, iter->node, tag, iter_offset(iter));
-}
-
 /**
  *	radix_tree_tag_clear - clear a tag on a radix tree node
  *	@root:		radix tree root
@@ -1394,6 +1394,18 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
 }
 EXPORT_SYMBOL(radix_tree_tag_clear);
 
+/**
+  * radix_tree_iter_tag_clear - clear a tag on the current iterator entry
+  * @root: radix tree root
+  * @iter: iterator state
+  * @tag: tag to clear
+  */
+void radix_tree_iter_tag_clear(struct radix_tree_root *root,
+			const struct radix_tree_iter *iter, unsigned int tag)
+{
+	node_tag_clear(root, iter->node, tag, iter_offset(iter));
+}
+
 /**
  * radix_tree_tag_get - get a tag on a radix tree node
  * @root:		radix tree root
-- 
GitLab


From 0ac398ef391b53122976325ab6953456ce8e8310 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Sat, 28 Jan 2017 09:56:22 -0500
Subject: [PATCH 0235/1084] radix-tree: Add radix_tree_iter_delete

Factor the deletion code out into __radix_tree_delete() and provide a
nice iterator-based wrapper around it.  If we free the node, advance
the iterator to avoid reading from freed memory.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 include/linux/radix-tree.h |  2 +
 lib/radix-tree.c           | 91 +++++++++++++++++++++++++-------------
 2 files changed, 62 insertions(+), 31 deletions(-)

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 8bf4ef448ce1..05f715cb8062 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -311,6 +311,8 @@ void __radix_tree_delete_node(struct radix_tree_root *root,
 			      struct radix_tree_node *node,
 			      radix_tree_update_node_t update_node,
 			      void *private);
+void radix_tree_iter_delete(struct radix_tree_root *,
+				struct radix_tree_iter *iter, void **slot);
 void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
 void *radix_tree_delete(struct radix_tree_root *, unsigned long);
 void radix_tree_clear_tags(struct radix_tree_root *root,
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 40f3091c5a6b..7bf7d4e60e43 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -581,10 +581,12 @@ static int radix_tree_extend(struct radix_tree_root *root,
  *	radix_tree_shrink    -    shrink radix tree to minimum height
  *	@root		radix tree root
  */
-static inline void radix_tree_shrink(struct radix_tree_root *root,
+static inline bool radix_tree_shrink(struct radix_tree_root *root,
 				     radix_tree_update_node_t update_node,
 				     void *private)
 {
+	bool shrunk = false;
+
 	for (;;) {
 		struct radix_tree_node *node = root->rnode;
 		struct radix_tree_node *child;
@@ -645,20 +647,26 @@ static inline void radix_tree_shrink(struct radix_tree_root *root,
 
 		WARN_ON_ONCE(!list_empty(&node->private_list));
 		radix_tree_node_free(node);
+		shrunk = true;
 	}
+
+	return shrunk;
 }
 
-static void delete_node(struct radix_tree_root *root,
+static bool delete_node(struct radix_tree_root *root,
 			struct radix_tree_node *node,
 			radix_tree_update_node_t update_node, void *private)
 {
+	bool deleted = false;
+
 	do {
 		struct radix_tree_node *parent;
 
 		if (node->count) {
 			if (node == entry_to_node(root->rnode))
-				radix_tree_shrink(root, update_node, private);
-			return;
+				deleted |= radix_tree_shrink(root, update_node,
+								private);
+			return deleted;
 		}
 
 		parent = node->parent;
@@ -672,9 +680,12 @@ static void delete_node(struct radix_tree_root *root,
 
 		WARN_ON_ONCE(!list_empty(&node->private_list));
 		radix_tree_node_free(node);
+		deleted = true;
 
 		node = parent;
 	} while (node);
+
+	return deleted;
 }
 
 /**
@@ -1859,25 +1870,55 @@ void __radix_tree_delete_node(struct radix_tree_root *root,
 	delete_node(root, node, update_node, private);
 }
 
+static bool __radix_tree_delete(struct radix_tree_root *root,
+				struct radix_tree_node *node, void **slot)
+{
+	unsigned offset = get_slot_offset(node, slot);
+	int tag;
+
+	for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+		node_tag_clear(root, node, tag, offset);
+
+	replace_slot(root, node, slot, NULL, true);
+	return node && delete_node(root, node, NULL, NULL);
+}
+
 /**
- *	radix_tree_delete_item    -    delete an item from a radix tree
- *	@root:		radix tree root
- *	@index:		index key
- *	@item:		expected item
+ * radix_tree_iter_delete - delete the entry at this iterator position
+ * @root: radix tree root
+ * @iter: iterator state
+ * @slot: pointer to slot
  *
- *	Remove @item at @index from the radix tree rooted at @root.
+ * Delete the entry at the position currently pointed to by the iterator.
+ * This may result in the current node being freed; if it is, the iterator
+ * is advanced so that it will not reference the freed memory.  This
+ * function may be called without any locking if there are no other threads
+ * which can access this tree.
+ */
+void radix_tree_iter_delete(struct radix_tree_root *root,
+				struct radix_tree_iter *iter, void **slot)
+{
+	if (__radix_tree_delete(root, iter->node, slot))
+		iter->index = iter->next_index;
+}
+
+/**
+ * radix_tree_delete_item - delete an item from a radix tree
+ * @root: radix tree root
+ * @index: index key
+ * @item: expected item
+ *
+ * Remove @item at @index from the radix tree rooted at @root.
  *
- *	Returns the address of the deleted item, or NULL if it was not present
- *	or the entry at the given @index was not @item.
+ * Return: the deleted entry, or %NULL if it was not present
+ * or the entry at the given @index was not @item.
  */
 void *radix_tree_delete_item(struct radix_tree_root *root,
 			     unsigned long index, void *item)
 {
 	struct radix_tree_node *node;
-	unsigned int offset;
 	void **slot;
 	void *entry;
-	int tag;
 
 	entry = __radix_tree_lookup(root, index, &node, &slot);
 	if (!entry)
@@ -1886,32 +1927,20 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
 	if (item && entry != item)
 		return NULL;
 
-	if (!node) {
-		root_tag_clear_all(root);
-		root->rnode = NULL;
-		return entry;
-	}
-
-	offset = get_slot_offset(node, slot);
-
-	/* Clear all tags associated with the item to be deleted.  */
-	for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
-		node_tag_clear(root, node, tag, offset);
-
-	__radix_tree_replace(root, node, slot, NULL, NULL, NULL);
+	__radix_tree_delete(root, node, slot);
 
 	return entry;
 }
 EXPORT_SYMBOL(radix_tree_delete_item);
 
 /**
- *	radix_tree_delete    -    delete an item from a radix tree
- *	@root:		radix tree root
- *	@index:		index key
+ * radix_tree_delete - delete an entry from a radix tree
+ * @root: radix tree root
+ * @index: index key
  *
- *	Remove the item at @index from the radix tree rooted at @root.
+ * Remove the entry at @index from the radix tree rooted at @root.
  *
- *	Returns the address of the deleted item, or NULL if it was not present.
+ * Return: The deleted entry, or %NULL if it was not present.
  */
 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
 {
-- 
GitLab


From 251af29c320d86071664f02c76f0d063a19fefdf Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 11 Feb 2017 10:37:38 -0500
Subject: [PATCH 0236/1084] nlm: Ensure callback code also checks that the
 files match

It is not sufficient to just check that the lock pids match when
granting a callback, we also need to ensure that we're granting
the callback on the right file.

Reported-by: Pankaj Singh <psingh.ait@gmail.com>
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable@vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/lockd/lockd.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index c15373894a42..b37dee3acaba 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -355,7 +355,8 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp)
 static inline int nlm_compare_locks(const struct file_lock *fl1,
 				    const struct file_lock *fl2)
 {
-	return	fl1->fl_pid   == fl2->fl_pid
+	return file_inode(fl1->fl_file) == file_inode(fl2->fl_file)
+	     && fl1->fl_pid   == fl2->fl_pid
 	     && fl1->fl_owner == fl2->fl_owner
 	     && fl1->fl_start == fl2->fl_start
 	     && fl1->fl_end   == fl2->fl_end
-- 
GitLab


From 0a835c4f090af2c76fc2932c539c3b32fd21fbbb Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Tue, 20 Dec 2016 10:27:56 -0500
Subject: [PATCH 0237/1084] Reimplement IDR and IDA using the radix tree

The IDR is very similar to the radix tree.  It has some functionality that
the radix tree did not have (alloc next free, cyclic allocation, a
callback-based for_each, destroy tree), which is readily implementable on
top of the radix tree.  A few small changes were needed in order to use a
tag to represent nodes with free space below them.  More extensive
changes were needed to support storing NULL as a valid entry in an IDR.
Plain radix trees still interpret NULL as a not-present entry.

The IDA is reimplemented as a client of the newly enhanced radix tree.  As
in the current implementation, it uses a bitmap at the last level of the
tree.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/idr.h                     |  145 ++-
 include/linux/radix-tree.h              |   49 +-
 init/main.c                             |    3 +-
 lib/idr.c                               | 1178 +++++------------------
 lib/radix-tree.c                        |  375 ++++++--
 tools/testing/radix-tree/.gitignore     |    1 +
 tools/testing/radix-tree/Makefile       |   10 +-
 tools/testing/radix-tree/idr-test.c     |  342 +++++++
 tools/testing/radix-tree/linux/gfp.h    |    8 +-
 tools/testing/radix-tree/linux/idr.h    |    1 +
 tools/testing/radix-tree/linux/kernel.h |    1 +
 tools/testing/radix-tree/main.c         |    6 +
 tools/testing/radix-tree/test.h         |    2 +
 13 files changed, 995 insertions(+), 1126 deletions(-)
 create mode 100644 tools/testing/radix-tree/idr-test.c
 create mode 100644 tools/testing/radix-tree/linux/idr.h

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 3c01b89aed67..f58c0a3addc3 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -12,47 +12,28 @@
 #ifndef __IDR_H__
 #define __IDR_H__
 
-#include <linux/types.h>
-#include <linux/bitops.h>
-#include <linux/init.h>
-#include <linux/rcupdate.h>
+#include <linux/radix-tree.h>
+#include <linux/gfp.h>
+
+struct idr {
+	struct radix_tree_root	idr_rt;
+	unsigned int		idr_next;
+};
 
 /*
- * Using 6 bits at each layer allows us to allocate 7 layers out of each page.
- * 8 bits only gave us 3 layers out of every pair of pages, which is less
- * efficient except for trees with a largest element between 192-255 inclusive.
+ * The IDR API does not expose the tagging functionality of the radix tree
+ * to users.  Use tag 0 to track whether a node has free space below it.
  */
-#define IDR_BITS 6
-#define IDR_SIZE (1 << IDR_BITS)
-#define IDR_MASK ((1 << IDR_BITS)-1)
-
-struct idr_layer {
-	int			prefix;	/* the ID prefix of this idr_layer */
-	int			layer;	/* distance from leaf */
-	struct idr_layer __rcu	*ary[1<<IDR_BITS];
-	int			count;	/* When zero, we can release it */
-	union {
-		/* A zero bit means "space here" */
-		DECLARE_BITMAP(bitmap, IDR_SIZE);
-		struct rcu_head		rcu_head;
-	};
-};
+#define IDR_FREE	0
 
-struct idr {
-	struct idr_layer __rcu	*hint;	/* the last layer allocated from */
-	struct idr_layer __rcu	*top;
-	int			layers;	/* only valid w/o concurrent changes */
-	int			cur;	/* current pos for cyclic allocation */
-	spinlock_t		lock;
-	int			id_free_cnt;
-	struct idr_layer	*id_free;
-};
+/* Set the IDR flag and the IDR_FREE tag */
+#define IDR_RT_MARKER		((__force gfp_t)(3 << __GFP_BITS_SHIFT))
 
-#define IDR_INIT(name)							\
+#define IDR_INIT							\
 {									\
-	.lock			= __SPIN_LOCK_UNLOCKED(name.lock),	\
+	.idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER)			\
 }
-#define DEFINE_IDR(name)	struct idr name = IDR_INIT(name)
+#define DEFINE_IDR(name)	struct idr name = IDR_INIT
 
 /**
  * idr_get_cursor - Return the current position of the cyclic allocator
@@ -62,9 +43,9 @@ struct idr {
  * idr_alloc_cyclic() if it is free (otherwise the search will start from
  * this position).
  */
-static inline unsigned int idr_get_cursor(struct idr *idr)
+static inline unsigned int idr_get_cursor(const struct idr *idr)
 {
-	return READ_ONCE(idr->cur);
+	return READ_ONCE(idr->idr_next);
 }
 
 /**
@@ -77,7 +58,7 @@ static inline unsigned int idr_get_cursor(struct idr *idr)
  */
 static inline void idr_set_cursor(struct idr *idr, unsigned int val)
 {
-	WRITE_ONCE(idr->cur, val);
+	WRITE_ONCE(idr->idr_next, val);
 }
 
 /**
@@ -97,22 +78,31 @@ static inline void idr_set_cursor(struct idr *idr, unsigned int val)
  * period).
  */
 
-/*
- * This is what we export.
- */
-
-void *idr_find_slowpath(struct idr *idp, int id);
 void idr_preload(gfp_t gfp_mask);
-int idr_alloc(struct idr *idp, void *ptr, int start, int end, gfp_t gfp_mask);
-int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask);
-int idr_for_each(struct idr *idp,
+int idr_alloc(struct idr *, void *entry, int start, int end, gfp_t);
+int idr_alloc_cyclic(struct idr *, void *entry, int start, int end, gfp_t);
+int idr_for_each(const struct idr *,
 		 int (*fn)(int id, void *p, void *data), void *data);
-void *idr_get_next(struct idr *idp, int *nextid);
-void *idr_replace(struct idr *idp, void *ptr, int id);
-void idr_remove(struct idr *idp, int id);
-void idr_destroy(struct idr *idp);
-void idr_init(struct idr *idp);
-bool idr_is_empty(struct idr *idp);
+void *idr_get_next(struct idr *, int *nextid);
+void *idr_replace(struct idr *, void *, int id);
+void idr_destroy(struct idr *);
+
+static inline void idr_remove(struct idr *idr, int id)
+{
+	radix_tree_delete(&idr->idr_rt, id);
+}
+
+static inline void idr_init(struct idr *idr)
+{
+	INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER);
+	idr->idr_next = 0;
+}
+
+static inline bool idr_is_empty(const struct idr *idr)
+{
+	return radix_tree_empty(&idr->idr_rt) &&
+		radix_tree_tagged(&idr->idr_rt, IDR_FREE);
+}
 
 /**
  * idr_preload_end - end preload section started with idr_preload()
@@ -137,19 +127,14 @@ static inline void idr_preload_end(void)
  * This function can be called under rcu_read_lock(), given that the leaf
  * pointers lifetimes are correctly managed.
  */
-static inline void *idr_find(struct idr *idr, int id)
+static inline void *idr_find(const struct idr *idr, int id)
 {
-	struct idr_layer *hint = rcu_dereference_raw(idr->hint);
-
-	if (hint && (id & ~IDR_MASK) == hint->prefix)
-		return rcu_dereference_raw(hint->ary[id & IDR_MASK]);
-
-	return idr_find_slowpath(idr, id);
+	return radix_tree_lookup(&idr->idr_rt, id);
 }
 
 /**
  * idr_for_each_entry - iterate over an idr's elements of a given type
- * @idp:     idr handle
+ * @idr:     idr handle
  * @entry:   the type * to use as cursor
  * @id:      id entry's key
  *
@@ -157,57 +142,60 @@ static inline void *idr_find(struct idr *idr, int id)
  * after normal terminatinon @entry is left with the value NULL.  This
  * is convenient for a "not found" value.
  */
-#define idr_for_each_entry(idp, entry, id)			\
-	for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id)
+#define idr_for_each_entry(idr, entry, id)			\
+	for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; ++id)
 
 /**
- * idr_for_each_entry - continue iteration over an idr's elements of a given type
- * @idp:     idr handle
+ * idr_for_each_entry_continue - continue iteration over an idr's elements of a given type
+ * @idr:     idr handle
  * @entry:   the type * to use as cursor
  * @id:      id entry's key
  *
  * Continue to iterate over list of given type, continuing after
  * the current position.
  */
-#define idr_for_each_entry_continue(idp, entry, id)			\
-	for ((entry) = idr_get_next((idp), &(id));			\
+#define idr_for_each_entry_continue(idr, entry, id)			\
+	for ((entry) = idr_get_next((idr), &(id));			\
 	     entry;							\
-	     ++id, (entry) = idr_get_next((idp), &(id)))
+	     ++id, (entry) = idr_get_next((idr), &(id)))
 
 /*
  * IDA - IDR based id allocator, use when translation from id to
  * pointer isn't necessary.
- *
- * IDA_BITMAP_LONGS is calculated to be one less to accommodate
- * ida_bitmap->nr_busy so that the whole struct fits in 128 bytes.
  */
 #define IDA_CHUNK_SIZE		128	/* 128 bytes per chunk */
-#define IDA_BITMAP_LONGS	(IDA_CHUNK_SIZE / sizeof(long) - 1)
+#define IDA_BITMAP_LONGS	(IDA_CHUNK_SIZE / sizeof(long))
 #define IDA_BITMAP_BITS 	(IDA_BITMAP_LONGS * sizeof(long) * 8)
 
 struct ida_bitmap {
-	long			nr_busy;
 	unsigned long		bitmap[IDA_BITMAP_LONGS];
 };
 
 struct ida {
-	struct idr		idr;
+	struct radix_tree_root	ida_rt;
 	struct ida_bitmap	*free_bitmap;
 };
 
-#define IDA_INIT(name)		{ .idr = IDR_INIT((name).idr), .free_bitmap = NULL, }
-#define DEFINE_IDA(name)	struct ida name = IDA_INIT(name)
+#define IDA_INIT	{						\
+	.ida_rt = RADIX_TREE_INIT(IDR_RT_MARKER | GFP_NOWAIT),		\
+}
+#define DEFINE_IDA(name)	struct ida name = IDA_INIT
 
 int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
 int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
 void ida_remove(struct ida *ida, int id);
 void ida_destroy(struct ida *ida);
-void ida_init(struct ida *ida);
 
 int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
 		   gfp_t gfp_mask);
 void ida_simple_remove(struct ida *ida, unsigned int id);
 
+static inline void ida_init(struct ida *ida)
+{
+	INIT_RADIX_TREE(&ida->ida_rt, IDR_RT_MARKER | GFP_NOWAIT);
+	ida->free_bitmap = NULL;
+}
+
 /**
  * ida_get_new - allocate new ID
  * @ida:	idr handle
@@ -220,11 +208,8 @@ static inline int ida_get_new(struct ida *ida, int *p_id)
 	return ida_get_new_above(ida, 0, p_id);
 }
 
-static inline bool ida_is_empty(struct ida *ida)
+static inline bool ida_is_empty(const struct ida *ida)
 {
-	return idr_is_empty(&ida->idr);
+	return radix_tree_empty(&ida->ida_rt);
 }
-
-void __init idr_init_cache(void);
-
 #endif /* __IDR_H__ */
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 05f715cb8062..2ba0c1f46c84 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -105,7 +105,10 @@ struct radix_tree_node {
 	unsigned long	tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
 };
 
-/* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */
+/* The top bits of gfp_mask are used to store the root tags and the IDR flag */
+#define ROOT_IS_IDR	((__force gfp_t)(1 << __GFP_BITS_SHIFT))
+#define ROOT_TAG_SHIFT	(__GFP_BITS_SHIFT + 1)
+
 struct radix_tree_root {
 	gfp_t			gfp_mask;
 	struct radix_tree_node	__rcu *rnode;
@@ -358,10 +361,14 @@ int radix_tree_split(struct radix_tree_root *, unsigned long index,
 			unsigned new_order);
 int radix_tree_join(struct radix_tree_root *, unsigned long index,
 			unsigned new_order, void *);
+void **idr_get_free(struct radix_tree_root *, struct radix_tree_iter *,
+			gfp_t, int end);
 
-#define RADIX_TREE_ITER_TAG_MASK	0x00FF	/* tag index in lower byte */
-#define RADIX_TREE_ITER_TAGGED		0x0100	/* lookup tagged slots */
-#define RADIX_TREE_ITER_CONTIG		0x0200	/* stop at first hole */
+enum {
+	RADIX_TREE_ITER_TAG_MASK = 0x0f,	/* tag index in lower nybble */
+	RADIX_TREE_ITER_TAGGED   = 0x10,	/* lookup tagged slots */
+	RADIX_TREE_ITER_CONTIG   = 0x20,	/* stop at first hole */
+};
 
 /**
  * radix_tree_iter_init - initialize radix tree iterator
@@ -402,6 +409,40 @@ radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
 void **radix_tree_next_chunk(const struct radix_tree_root *,
 			     struct radix_tree_iter *iter, unsigned flags);
 
+/**
+ * radix_tree_iter_lookup - look up an index in the radix tree
+ * @root: radix tree root
+ * @iter: iterator state
+ * @index: key to look up
+ *
+ * If @index is present in the radix tree, this function returns the slot
+ * containing it and updates @iter to describe the entry.  If @index is not
+ * present, it returns NULL.
+ */
+static inline void **radix_tree_iter_lookup(const struct radix_tree_root *root,
+			struct radix_tree_iter *iter, unsigned long index)
+{
+	radix_tree_iter_init(iter, index);
+	return radix_tree_next_chunk(root, iter, RADIX_TREE_ITER_CONTIG);
+}
+
+/**
+ * radix_tree_iter_find - find a present entry
+ * @root: radix tree root
+ * @iter: iterator state
+ * @index: start location
+ *
+ * This function returns the slot containing the entry with the lowest index
+ * which is at least @index.  If @index is larger than any present entry, this
+ * function returns NULL.  The @iter is updated to describe the entry found.
+ */
+static inline void **radix_tree_iter_find(const struct radix_tree_root *root,
+			struct radix_tree_iter *iter, unsigned long index)
+{
+	radix_tree_iter_init(iter, index);
+	return radix_tree_next_chunk(root, iter, 0);
+}
+
 /**
  * radix_tree_iter_retry - retry this chunk of the iteration
  * @iter:	iterator state
diff --git a/init/main.c b/init/main.c
index b0c9d6facef9..a65e3aad31bc 100644
--- a/init/main.c
+++ b/init/main.c
@@ -553,7 +553,7 @@ asmlinkage __visible void __init start_kernel(void)
 	if (WARN(!irqs_disabled(),
 		 "Interrupts were enabled *very* early, fixing it\n"))
 		local_irq_disable();
-	idr_init_cache();
+	radix_tree_init();
 
 	/*
 	 * Allow workqueue creation and work item queueing/cancelling
@@ -568,7 +568,6 @@ asmlinkage __visible void __init start_kernel(void)
 	trace_init();
 
 	context_tracking_init();
-	radix_tree_init();
 	/* init some links before init_ISA_irqs() */
 	early_irq_init();
 	init_IRQ();
diff --git a/lib/idr.c b/lib/idr.c
index 52d2979a05e8..b87056e2cc4c 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -1,1068 +1,369 @@
-/*
- * 2002-10-18  written by Jim Houston jim.houston@ccur.com
- *	Copyright (C) 2002 by Concurrent Computer Corporation
- *	Distributed under the GNU GPL license version 2.
- *
- * Modified by George Anzinger to reuse immediately and to use
- * find bit instructions.  Also removed _irq on spinlocks.
- *
- * Modified by Nadia Derbey to make it RCU safe.
- *
- * Small id to pointer translation service.
- *
- * It uses a radix tree like structure as a sparse array indexed
- * by the id to obtain the pointer.  The bitmap makes allocating
- * a new id quick.
- *
- * You call it to allocate an id (an int) an associate with that id a
- * pointer or what ever, we treat it as a (void *).  You can pass this
- * id to a user for him to pass back at a later time.  You then pass
- * that id to this code and it returns your pointer.
- */
-
-#ifndef TEST                        // to test in user space...
-#include <linux/slab.h>
-#include <linux/init.h>
+#include <linux/bitmap.h>
 #include <linux/export.h>
-#endif
-#include <linux/err.h>
-#include <linux/string.h>
 #include <linux/idr.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/percpu.h>
 
-#define MAX_IDR_SHIFT		(sizeof(int) * 8 - 1)
-#define MAX_IDR_BIT		(1U << MAX_IDR_SHIFT)
-
-/* Leave the possibility of an incomplete final layer */
-#define MAX_IDR_LEVEL ((MAX_IDR_SHIFT + IDR_BITS - 1) / IDR_BITS)
-
-/* Number of id_layer structs to leave in free list */
-#define MAX_IDR_FREE (MAX_IDR_LEVEL * 2)
-
-static struct kmem_cache *idr_layer_cache;
-static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head);
-static DEFINE_PER_CPU(int, idr_preload_cnt);
 static DEFINE_SPINLOCK(simple_ida_lock);
 
-/* the maximum ID which can be allocated given idr->layers */
-static int idr_max(int layers)
-{
-	int bits = min_t(int, layers * IDR_BITS, MAX_IDR_SHIFT);
-
-	return (1 << bits) - 1;
-}
-
-/*
- * Prefix mask for an idr_layer at @layer.  For layer 0, the prefix mask is
- * all bits except for the lower IDR_BITS.  For layer 1, 2 * IDR_BITS, and
- * so on.
- */
-static int idr_layer_prefix_mask(int layer)
-{
-	return ~idr_max(layer + 1);
-}
-
-static struct idr_layer *get_from_free_list(struct idr *idp)
-{
-	struct idr_layer *p;
-	unsigned long flags;
-
-	spin_lock_irqsave(&idp->lock, flags);
-	if ((p = idp->id_free)) {
-		idp->id_free = p->ary[0];
-		idp->id_free_cnt--;
-		p->ary[0] = NULL;
-	}
-	spin_unlock_irqrestore(&idp->lock, flags);
-	return(p);
-}
-
-/**
- * idr_layer_alloc - allocate a new idr_layer
- * @gfp_mask: allocation mask
- * @layer_idr: optional idr to allocate from
- *
- * If @layer_idr is %NULL, directly allocate one using @gfp_mask or fetch
- * one from the per-cpu preload buffer.  If @layer_idr is not %NULL, fetch
- * an idr_layer from @idr->id_free.
- *
- * @layer_idr is to maintain backward compatibility with the old alloc
- * interface - idr_pre_get() and idr_get_new*() - and will be removed
- * together with per-pool preload buffer.
- */
-static struct idr_layer *idr_layer_alloc(gfp_t gfp_mask, struct idr *layer_idr)
-{
-	struct idr_layer *new;
-
-	/* this is the old path, bypass to get_from_free_list() */
-	if (layer_idr)
-		return get_from_free_list(layer_idr);
-
-	/*
-	 * Try to allocate directly from kmem_cache.  We want to try this
-	 * before preload buffer; otherwise, non-preloading idr_alloc()
-	 * users will end up taking advantage of preloading ones.  As the
-	 * following is allowed to fail for preloaded cases, suppress
-	 * warning this time.
-	 */
-	new = kmem_cache_zalloc(idr_layer_cache, gfp_mask | __GFP_NOWARN);
-	if (new)
-		return new;
-
-	/*
-	 * Try to fetch one from the per-cpu preload buffer if in process
-	 * context.  See idr_preload() for details.
-	 */
-	if (!in_interrupt()) {
-		preempt_disable();
-		new = __this_cpu_read(idr_preload_head);
-		if (new) {
-			__this_cpu_write(idr_preload_head, new->ary[0]);
-			__this_cpu_dec(idr_preload_cnt);
-			new->ary[0] = NULL;
-		}
-		preempt_enable();
-		if (new)
-			return new;
-	}
-
-	/*
-	 * Both failed.  Try kmem_cache again w/o adding __GFP_NOWARN so
-	 * that memory allocation failure warning is printed as intended.
-	 */
-	return kmem_cache_zalloc(idr_layer_cache, gfp_mask);
-}
-
-static void idr_layer_rcu_free(struct rcu_head *head)
-{
-	struct idr_layer *layer;
-
-	layer = container_of(head, struct idr_layer, rcu_head);
-	kmem_cache_free(idr_layer_cache, layer);
-}
-
-static inline void free_layer(struct idr *idr, struct idr_layer *p)
-{
-	if (idr->hint == p)
-		RCU_INIT_POINTER(idr->hint, NULL);
-	call_rcu(&p->rcu_head, idr_layer_rcu_free);
-}
-
-/* only called when idp->lock is held */
-static void __move_to_free_list(struct idr *idp, struct idr_layer *p)
-{
-	p->ary[0] = idp->id_free;
-	idp->id_free = p;
-	idp->id_free_cnt++;
-}
-
-static void move_to_free_list(struct idr *idp, struct idr_layer *p)
-{
-	unsigned long flags;
-
-	/*
-	 * Depends on the return element being zeroed.
-	 */
-	spin_lock_irqsave(&idp->lock, flags);
-	__move_to_free_list(idp, p);
-	spin_unlock_irqrestore(&idp->lock, flags);
-}
-
-static void idr_mark_full(struct idr_layer **pa, int id)
-{
-	struct idr_layer *p = pa[0];
-	int l = 0;
-
-	__set_bit(id & IDR_MASK, p->bitmap);
-	/*
-	 * If this layer is full mark the bit in the layer above to
-	 * show that this part of the radix tree is full.  This may
-	 * complete the layer above and require walking up the radix
-	 * tree.
-	 */
-	while (bitmap_full(p->bitmap, IDR_SIZE)) {
-		if (!(p = pa[++l]))
-			break;
-		id = id >> IDR_BITS;
-		__set_bit((id & IDR_MASK), p->bitmap);
-	}
-}
-
-static int __idr_pre_get(struct idr *idp, gfp_t gfp_mask)
-{
-	while (idp->id_free_cnt < MAX_IDR_FREE) {
-		struct idr_layer *new;
-		new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
-		if (new == NULL)
-			return (0);
-		move_to_free_list(idp, new);
-	}
-	return 1;
-}
-
-/**
- * sub_alloc - try to allocate an id without growing the tree depth
- * @idp: idr handle
- * @starting_id: id to start search at
- * @pa: idr_layer[MAX_IDR_LEVEL] used as backtrack buffer
- * @gfp_mask: allocation mask for idr_layer_alloc()
- * @layer_idr: optional idr passed to idr_layer_alloc()
- *
- * Allocate an id in range [@starting_id, INT_MAX] from @idp without
- * growing its depth.  Returns
- *
- *  the allocated id >= 0 if successful,
- *  -EAGAIN if the tree needs to grow for allocation to succeed,
- *  -ENOSPC if the id space is exhausted,
- *  -ENOMEM if more idr_layers need to be allocated.
- */
-static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa,
-		     gfp_t gfp_mask, struct idr *layer_idr)
-{
-	int n, m, sh;
-	struct idr_layer *p, *new;
-	int l, id, oid;
-
-	id = *starting_id;
- restart:
-	p = idp->top;
-	l = idp->layers;
-	pa[l--] = NULL;
-	while (1) {
-		/*
-		 * We run around this while until we reach the leaf node...
-		 */
-		n = (id >> (IDR_BITS*l)) & IDR_MASK;
-		m = find_next_zero_bit(p->bitmap, IDR_SIZE, n);
-		if (m == IDR_SIZE) {
-			/* no space available go back to previous layer. */
-			l++;
-			oid = id;
-			id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1;
-
-			/* if already at the top layer, we need to grow */
-			if (id > idr_max(idp->layers)) {
-				*starting_id = id;
-				return -EAGAIN;
-			}
-			p = pa[l];
-			BUG_ON(!p);
-
-			/* If we need to go up one layer, continue the
-			 * loop; otherwise, restart from the top.
-			 */
-			sh = IDR_BITS * (l + 1);
-			if (oid >> sh == id >> sh)
-				continue;
-			else
-				goto restart;
-		}
-		if (m != n) {
-			sh = IDR_BITS*l;
-			id = ((id >> sh) ^ n ^ m) << sh;
-		}
-		if ((id >= MAX_IDR_BIT) || (id < 0))
-			return -ENOSPC;
-		if (l == 0)
-			break;
-		/*
-		 * Create the layer below if it is missing.
-		 */
-		if (!p->ary[m]) {
-			new = idr_layer_alloc(gfp_mask, layer_idr);
-			if (!new)
-				return -ENOMEM;
-			new->layer = l-1;
-			new->prefix = id & idr_layer_prefix_mask(new->layer);
-			rcu_assign_pointer(p->ary[m], new);
-			p->count++;
-		}
-		pa[l--] = p;
-		p = p->ary[m];
-	}
-
-	pa[l] = p;
-	return id;
-}
-
-static int idr_get_empty_slot(struct idr *idp, int starting_id,
-			      struct idr_layer **pa, gfp_t gfp_mask,
-			      struct idr *layer_idr)
-{
-	struct idr_layer *p, *new;
-	int layers, v, id;
-	unsigned long flags;
-
-	id = starting_id;
-build_up:
-	p = idp->top;
-	layers = idp->layers;
-	if (unlikely(!p)) {
-		if (!(p = idr_layer_alloc(gfp_mask, layer_idr)))
-			return -ENOMEM;
-		p->layer = 0;
-		layers = 1;
-	}
-	/*
-	 * Add a new layer to the top of the tree if the requested
-	 * id is larger than the currently allocated space.
-	 */
-	while (id > idr_max(layers)) {
-		layers++;
-		if (!p->count) {
-			/* special case: if the tree is currently empty,
-			 * then we grow the tree by moving the top node
-			 * upwards.
-			 */
-			p->layer++;
-			WARN_ON_ONCE(p->prefix);
-			continue;
-		}
-		if (!(new = idr_layer_alloc(gfp_mask, layer_idr))) {
-			/*
-			 * The allocation failed.  If we built part of
-			 * the structure tear it down.
-			 */
-			spin_lock_irqsave(&idp->lock, flags);
-			for (new = p; p && p != idp->top; new = p) {
-				p = p->ary[0];
-				new->ary[0] = NULL;
-				new->count = 0;
-				bitmap_clear(new->bitmap, 0, IDR_SIZE);
-				__move_to_free_list(idp, new);
-			}
-			spin_unlock_irqrestore(&idp->lock, flags);
-			return -ENOMEM;
-		}
-		new->ary[0] = p;
-		new->count = 1;
-		new->layer = layers-1;
-		new->prefix = id & idr_layer_prefix_mask(new->layer);
-		if (bitmap_full(p->bitmap, IDR_SIZE))
-			__set_bit(0, new->bitmap);
-		p = new;
-	}
-	rcu_assign_pointer(idp->top, p);
-	idp->layers = layers;
-	v = sub_alloc(idp, &id, pa, gfp_mask, layer_idr);
-	if (v == -EAGAIN)
-		goto build_up;
-	return(v);
-}
-
-/*
- * @id and @pa are from a successful allocation from idr_get_empty_slot().
- * Install the user pointer @ptr and mark the slot full.
- */
-static void idr_fill_slot(struct idr *idr, void *ptr, int id,
-			  struct idr_layer **pa)
-{
-	/* update hint used for lookup, cleared from free_layer() */
-	rcu_assign_pointer(idr->hint, pa[0]);
-
-	rcu_assign_pointer(pa[0]->ary[id & IDR_MASK], (struct idr_layer *)ptr);
-	pa[0]->count++;
-	idr_mark_full(pa, id);
-}
-
-
-/**
- * idr_preload - preload for idr_alloc()
- * @gfp_mask: allocation mask to use for preloading
- *
- * Preload per-cpu layer buffer for idr_alloc().  Can only be used from
- * process context and each idr_preload() invocation should be matched with
- * idr_preload_end().  Note that preemption is disabled while preloaded.
- *
- * The first idr_alloc() in the preloaded section can be treated as if it
- * were invoked with @gfp_mask used for preloading.  This allows using more
- * permissive allocation masks for idrs protected by spinlocks.
- *
- * For example, if idr_alloc() below fails, the failure can be treated as
- * if idr_alloc() were called with GFP_KERNEL rather than GFP_NOWAIT.
- *
- *	idr_preload(GFP_KERNEL);
- *	spin_lock(lock);
- *
- *	id = idr_alloc(idr, ptr, start, end, GFP_NOWAIT);
- *
- *	spin_unlock(lock);
- *	idr_preload_end();
- *	if (id < 0)
- *		error;
- */
-void idr_preload(gfp_t gfp_mask)
-{
-	/*
-	 * Consuming preload buffer from non-process context breaks preload
-	 * allocation guarantee.  Disallow usage from those contexts.
-	 */
-	WARN_ON_ONCE(in_interrupt());
-	might_sleep_if(gfpflags_allow_blocking(gfp_mask));
-
-	preempt_disable();
-
-	/*
-	 * idr_alloc() is likely to succeed w/o full idr_layer buffer and
-	 * return value from idr_alloc() needs to be checked for failure
-	 * anyway.  Silently give up if allocation fails.  The caller can
-	 * treat failures from idr_alloc() as if idr_alloc() were called
-	 * with @gfp_mask which should be enough.
-	 */
-	while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) {
-		struct idr_layer *new;
-
-		preempt_enable();
-		new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
-		preempt_disable();
-		if (!new)
-			break;
-
-		/* link the new one to per-cpu preload list */
-		new->ary[0] = __this_cpu_read(idr_preload_head);
-		__this_cpu_write(idr_preload_head, new);
-		__this_cpu_inc(idr_preload_cnt);
-	}
-}
-EXPORT_SYMBOL(idr_preload);
-
 /**
- * idr_alloc - allocate new idr entry
- * @idr: the (initialized) idr
+ * idr_alloc - allocate an id
+ * @idr: idr handle
  * @ptr: pointer to be associated with the new id
  * @start: the minimum id (inclusive)
- * @end: the maximum id (exclusive, <= 0 for max)
- * @gfp_mask: memory allocation flags
+ * @end: the maximum id (exclusive)
+ * @gfp: memory allocation flags
  *
- * Allocate an id in [start, end) and associate it with @ptr.  If no ID is
- * available in the specified range, returns -ENOSPC.  On memory allocation
- * failure, returns -ENOMEM.
+ * Allocates an unused ID in the range [start, end).  Returns -ENOSPC
+ * if there are no unused IDs in that range.
  *
  * Note that @end is treated as max when <= 0.  This is to always allow
  * using @start + N as @end as long as N is inside integer range.
  *
- * The user is responsible for exclusively synchronizing all operations
- * which may modify @idr.  However, read-only accesses such as idr_find()
- * or iteration can be performed under RCU read lock provided the user
- * destroys @ptr in RCU-safe way after removal from idr.
+ * Simultaneous modifications to the @idr are not allowed and should be
+ * prevented by the user, usually with a lock.  idr_alloc() may be called
+ * concurrently with read-only accesses to the @idr, such as idr_find() and
+ * idr_for_each_entry().
  */
-int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask)
+int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
 {
-	int max = end > 0 ? end - 1 : INT_MAX;	/* inclusive upper limit */
-	struct idr_layer *pa[MAX_IDR_LEVEL + 1];
-	int id;
-
-	might_sleep_if(gfpflags_allow_blocking(gfp_mask));
+	void **slot;
+	struct radix_tree_iter iter;
 
-	/* sanity checks */
 	if (WARN_ON_ONCE(start < 0))
 		return -EINVAL;
-	if (unlikely(max < start))
-		return -ENOSPC;
+	if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
+		return -EINVAL;
 
-	/* allocate id */
-	id = idr_get_empty_slot(idr, start, pa, gfp_mask, NULL);
-	if (unlikely(id < 0))
-		return id;
-	if (unlikely(id > max))
-		return -ENOSPC;
+	radix_tree_iter_init(&iter, start);
+	slot = idr_get_free(&idr->idr_rt, &iter, gfp, end);
+	if (IS_ERR(slot))
+		return PTR_ERR(slot);
 
-	idr_fill_slot(idr, ptr, id, pa);
-	return id;
+	radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr);
+	radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE);
+	return iter.index;
 }
 EXPORT_SYMBOL_GPL(idr_alloc);
 
 /**
  * idr_alloc_cyclic - allocate new idr entry in a cyclical fashion
- * @idr: the (initialized) idr
+ * @idr: idr handle
  * @ptr: pointer to be associated with the new id
  * @start: the minimum id (inclusive)
- * @end: the maximum id (exclusive, <= 0 for max)
- * @gfp_mask: memory allocation flags
- *
- * Essentially the same as idr_alloc, but prefers to allocate progressively
- * higher ids if it can. If the "cur" counter wraps, then it will start again
- * at the "start" end of the range and allocate one that has already been used.
- */
-int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end,
-			gfp_t gfp_mask)
-{
-	int id;
-
-	id = idr_alloc(idr, ptr, max(start, idr->cur), end, gfp_mask);
-	if (id == -ENOSPC)
-		id = idr_alloc(idr, ptr, start, end, gfp_mask);
-
-	if (likely(id >= 0))
-		idr->cur = id + 1;
-	return id;
-}
-EXPORT_SYMBOL(idr_alloc_cyclic);
-
-static void idr_remove_warning(int id)
-{
-	WARN(1, "idr_remove called for id=%d which is not allocated.\n", id);
-}
-
-static void sub_remove(struct idr *idp, int shift, int id)
-{
-	struct idr_layer *p = idp->top;
-	struct idr_layer **pa[MAX_IDR_LEVEL + 1];
-	struct idr_layer ***paa = &pa[0];
-	struct idr_layer *to_free;
-	int n;
-
-	*paa = NULL;
-	*++paa = &idp->top;
-
-	while ((shift > 0) && p) {
-		n = (id >> shift) & IDR_MASK;
-		__clear_bit(n, p->bitmap);
-		*++paa = &p->ary[n];
-		p = p->ary[n];
-		shift -= IDR_BITS;
-	}
-	n = id & IDR_MASK;
-	if (likely(p != NULL && test_bit(n, p->bitmap))) {
-		__clear_bit(n, p->bitmap);
-		RCU_INIT_POINTER(p->ary[n], NULL);
-		to_free = NULL;
-		while(*paa && ! --((**paa)->count)){
-			if (to_free)
-				free_layer(idp, to_free);
-			to_free = **paa;
-			**paa-- = NULL;
-		}
-		if (!*paa)
-			idp->layers = 0;
-		if (to_free)
-			free_layer(idp, to_free);
-	} else
-		idr_remove_warning(id);
-}
-
-/**
- * idr_remove - remove the given id and free its slot
- * @idp: idr handle
- * @id: unique key
- */
-void idr_remove(struct idr *idp, int id)
-{
-	struct idr_layer *p;
-	struct idr_layer *to_free;
-
-	if (id < 0)
-		return;
-
-	if (id > idr_max(idp->layers)) {
-		idr_remove_warning(id);
-		return;
-	}
-
-	sub_remove(idp, (idp->layers - 1) * IDR_BITS, id);
-	if (idp->top && idp->top->count == 1 && (idp->layers > 1) &&
-	    idp->top->ary[0]) {
-		/*
-		 * Single child at leftmost slot: we can shrink the tree.
-		 * This level is not needed anymore since when layers are
-		 * inserted, they are inserted at the top of the existing
-		 * tree.
-		 */
-		to_free = idp->top;
-		p = idp->top->ary[0];
-		rcu_assign_pointer(idp->top, p);
-		--idp->layers;
-		to_free->count = 0;
-		bitmap_clear(to_free->bitmap, 0, IDR_SIZE);
-		free_layer(idp, to_free);
-	}
-}
-EXPORT_SYMBOL(idr_remove);
-
-static void __idr_remove_all(struct idr *idp)
-{
-	int n, id, max;
-	int bt_mask;
-	struct idr_layer *p;
-	struct idr_layer *pa[MAX_IDR_LEVEL + 1];
-	struct idr_layer **paa = &pa[0];
-
-	n = idp->layers * IDR_BITS;
-	*paa = idp->top;
-	RCU_INIT_POINTER(idp->top, NULL);
-	max = idr_max(idp->layers);
-
-	id = 0;
-	while (id >= 0 && id <= max) {
-		p = *paa;
-		while (n > IDR_BITS && p) {
-			n -= IDR_BITS;
-			p = p->ary[(id >> n) & IDR_MASK];
-			*++paa = p;
-		}
-
-		bt_mask = id;
-		id += 1 << n;
-		/* Get the highest bit that the above add changed from 0->1. */
-		while (n < fls(id ^ bt_mask)) {
-			if (*paa)
-				free_layer(idp, *paa);
-			n += IDR_BITS;
-			--paa;
-		}
-	}
-	idp->layers = 0;
-}
-
-/**
- * idr_destroy - release all cached layers within an idr tree
- * @idp: idr handle
- *
- * Free all id mappings and all idp_layers.  After this function, @idp is
- * completely unused and can be freed / recycled.  The caller is
- * responsible for ensuring that no one else accesses @idp during or after
- * idr_destroy().
+ * @end: the maximum id (exclusive)
+ * @gfp: memory allocation flags
  *
- * A typical clean-up sequence for objects stored in an idr tree will use
- * idr_for_each() to free all objects, if necessary, then idr_destroy() to
- * free up the id mappings and cached idr_layers.
+ * Allocates an ID larger than the last ID allocated if one is available.
+ * If not, it will attempt to allocate the smallest ID that is larger or
+ * equal to @start.
  */
-void idr_destroy(struct idr *idp)
-{
-	__idr_remove_all(idp);
-
-	while (idp->id_free_cnt) {
-		struct idr_layer *p = get_from_free_list(idp);
-		kmem_cache_free(idr_layer_cache, p);
-	}
-}
-EXPORT_SYMBOL(idr_destroy);
-
-void *idr_find_slowpath(struct idr *idp, int id)
+int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
 {
-	int n;
-	struct idr_layer *p;
+	int id, curr = idr->idr_next;
 
-	if (id < 0)
-		return NULL;
+	if (curr < start)
+		curr = start;
 
-	p = rcu_dereference_raw(idp->top);
-	if (!p)
-		return NULL;
-	n = (p->layer+1) * IDR_BITS;
+	id = idr_alloc(idr, ptr, curr, end, gfp);
+	if ((id == -ENOSPC) && (curr > start))
+		id = idr_alloc(idr, ptr, start, curr, gfp);
 
-	if (id > idr_max(p->layer + 1))
-		return NULL;
-	BUG_ON(n == 0);
+	if (id >= 0)
+		idr->idr_next = id + 1U;
 
-	while (n > 0 && p) {
-		n -= IDR_BITS;
-		BUG_ON(n != p->layer*IDR_BITS);
-		p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
-	}
-	return((void *)p);
+	return id;
 }
-EXPORT_SYMBOL(idr_find_slowpath);
+EXPORT_SYMBOL(idr_alloc_cyclic);
 
 /**
  * idr_for_each - iterate through all stored pointers
- * @idp: idr handle
+ * @idr: idr handle
  * @fn: function to be called for each pointer
- * @data: data passed back to callback function
+ * @data: data passed to callback function
  *
- * Iterate over the pointers registered with the given idr.  The
- * callback function will be called for each pointer currently
- * registered, passing the id, the pointer and the data pointer passed
- * to this function.  It is not safe to modify the idr tree while in
- * the callback, so functions such as idr_get_new and idr_remove are
- * not allowed.
+ * The callback function will be called for each entry in @idr, passing
+ * the id, the pointer and the data pointer passed to this function.
  *
- * We check the return of @fn each time. If it returns anything other
- * than %0, we break out and return that value.
+ * If @fn returns anything other than %0, the iteration stops and that
+ * value is returned from this function.
  *
- * The caller must serialize idr_for_each() vs idr_get_new() and idr_remove().
+ * idr_for_each() can be called concurrently with idr_alloc() and
+ * idr_remove() if protected by RCU.  Newly added entries may not be
+ * seen and deleted entries may be seen, but adding and removing entries
+ * will not cause other entries to be skipped, nor spurious ones to be seen.
  */
-int idr_for_each(struct idr *idp,
-		 int (*fn)(int id, void *p, void *data), void *data)
+int idr_for_each(const struct idr *idr,
+		int (*fn)(int id, void *p, void *data), void *data)
 {
-	int n, id, max, error = 0;
-	struct idr_layer *p;
-	struct idr_layer *pa[MAX_IDR_LEVEL + 1];
-	struct idr_layer **paa = &pa[0];
-
-	n = idp->layers * IDR_BITS;
-	*paa = rcu_dereference_raw(idp->top);
-	max = idr_max(idp->layers);
-
-	id = 0;
-	while (id >= 0 && id <= max) {
-		p = *paa;
-		while (n > 0 && p) {
-			n -= IDR_BITS;
-			p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
-			*++paa = p;
-		}
-
-		if (p) {
-			error = fn(id, (void *)p, data);
-			if (error)
-				break;
-		}
+	struct radix_tree_iter iter;
+	void **slot;
 
-		id += 1 << n;
-		while (n < fls(id)) {
-			n += IDR_BITS;
-			--paa;
-		}
+	radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) {
+		int ret = fn(iter.index, rcu_dereference_raw(*slot), data);
+		if (ret)
+			return ret;
 	}
 
-	return error;
+	return 0;
 }
 EXPORT_SYMBOL(idr_for_each);
 
 /**
- * idr_get_next - lookup next object of id to given id.
- * @idp: idr handle
- * @nextidp:  pointer to lookup key
- *
- * Returns pointer to registered object with id, which is next number to
- * given id. After being looked up, *@nextidp will be updated for the next
- * iteration.
- *
- * This function can be called under rcu_read_lock(), given that the leaf
- * pointers lifetimes are correctly managed.
+ * idr_get_next - Find next populated entry
+ * @idr: idr handle
+ * @nextid: Pointer to lowest possible ID to return
+ *
+ * Returns the next populated entry in the tree with an ID greater than
+ * or equal to the value pointed to by @nextid.  On exit, @nextid is updated
+ * to the ID of the found value.  To use in a loop, the value pointed to by
+ * nextid must be incremented by the user.
  */
-void *idr_get_next(struct idr *idp, int *nextidp)
+void *idr_get_next(struct idr *idr, int *nextid)
 {
-	struct idr_layer *p, *pa[MAX_IDR_LEVEL + 1];
-	struct idr_layer **paa = &pa[0];
-	int id = *nextidp;
-	int n, max;
+	struct radix_tree_iter iter;
+	void **slot;
 
-	/* find first ent */
-	p = *paa = rcu_dereference_raw(idp->top);
-	if (!p)
+	slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid);
+	if (!slot)
 		return NULL;
-	n = (p->layer + 1) * IDR_BITS;
-	max = idr_max(p->layer + 1);
-
-	while (id >= 0 && id <= max) {
-		p = *paa;
-		while (n > 0 && p) {
-			n -= IDR_BITS;
-			p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
-			*++paa = p;
-		}
 
-		if (p) {
-			*nextidp = id;
-			return p;
-		}
-
-		/*
-		 * Proceed to the next layer at the current level.  Unlike
-		 * idr_for_each(), @id isn't guaranteed to be aligned to
-		 * layer boundary at this point and adding 1 << n may
-		 * incorrectly skip IDs.  Make sure we jump to the
-		 * beginning of the next layer using round_up().
-		 */
-		id = round_up(id + 1, 1 << n);
-		while (n < fls(id)) {
-			n += IDR_BITS;
-			--paa;
-		}
-	}
-	return NULL;
+	*nextid = iter.index;
+	return rcu_dereference_raw(*slot);
 }
 EXPORT_SYMBOL(idr_get_next);
 
-
 /**
  * idr_replace - replace pointer for given id
- * @idp: idr handle
- * @ptr: pointer you want associated with the id
- * @id: lookup key
+ * @idr: idr handle
+ * @ptr: New pointer to associate with the ID
+ * @id: Lookup key
  *
- * Replace the pointer registered with an id and return the old value.
- * A %-ENOENT return indicates that @id was not found.
- * A %-EINVAL return indicates that @id was not within valid constraints.
+ * Replace the pointer registered with an ID and return the old value.
+ * This function can be called under the RCU read lock concurrently with
+ * idr_alloc() and idr_remove() (as long as the ID being removed is not
+ * the one being replaced!).
  *
- * The caller must serialize with writers.
+ * Returns: 0 on success.  %-ENOENT indicates that @id was not found.
+ * %-EINVAL indicates that @id or @ptr were not valid.
  */
-void *idr_replace(struct idr *idp, void *ptr, int id)
+void *idr_replace(struct idr *idr, void *ptr, int id)
 {
-	int n;
-	struct idr_layer *p, *old_p;
+	struct radix_tree_node *node;
+	void **slot = NULL;
+	void *entry;
 
-	if (id < 0)
+	if (WARN_ON_ONCE(id < 0))
+		return ERR_PTR(-EINVAL);
+	if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
 		return ERR_PTR(-EINVAL);
 
-	p = idp->top;
-	if (!p)
-		return ERR_PTR(-ENOENT);
-
-	if (id > idr_max(p->layer + 1))
-		return ERR_PTR(-ENOENT);
-
-	n = p->layer * IDR_BITS;
-	while ((n > 0) && p) {
-		p = p->ary[(id >> n) & IDR_MASK];
-		n -= IDR_BITS;
-	}
-
-	n = id & IDR_MASK;
-	if (unlikely(p == NULL || !test_bit(n, p->bitmap)))
+	entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot);
+	if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
 		return ERR_PTR(-ENOENT);
 
-	old_p = p->ary[n];
-	rcu_assign_pointer(p->ary[n], ptr);
+	__radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL, NULL);
 
-	return old_p;
+	return entry;
 }
 EXPORT_SYMBOL(idr_replace);
 
-void __init idr_init_cache(void)
-{
-	idr_layer_cache = kmem_cache_create("idr_layer_cache",
-				sizeof(struct idr_layer), 0, SLAB_PANIC, NULL);
-}
-
-/**
- * idr_init - initialize idr handle
- * @idp:	idr handle
- *
- * This function is use to set up the handle (@idp) that you will pass
- * to the rest of the functions.
- */
-void idr_init(struct idr *idp)
-{
-	memset(idp, 0, sizeof(struct idr));
-	spin_lock_init(&idp->lock);
-}
-EXPORT_SYMBOL(idr_init);
-
-static int idr_has_entry(int id, void *p, void *data)
-{
-	return 1;
-}
-
-bool idr_is_empty(struct idr *idp)
-{
-	return !idr_for_each(idp, idr_has_entry, NULL);
-}
-EXPORT_SYMBOL(idr_is_empty);
-
 /**
  * DOC: IDA description
- * IDA - IDR based ID allocator
- *
- * This is id allocator without id -> pointer translation.  Memory
- * usage is much lower than full blown idr because each id only
- * occupies a bit.  ida uses a custom leaf node which contains
- * IDA_BITMAP_BITS slots.
  *
- * 2007-04-25  written by Tejun Heo <htejun@gmail.com>
+ * The IDA is an ID allocator which does not provide the ability to
+ * associate an ID with a pointer.  As such, it only needs to store one
+ * bit per ID, and so is more space efficient than an IDR.  To use an IDA,
+ * define it using DEFINE_IDA() (or embed a &struct ida in a data structure,
+ * then initialise it using ida_init()).  To allocate a new ID, call
+ * ida_simple_get().  To free an ID, call ida_simple_remove().
+ *
+ * If you have more complex locking requirements, use a loop around
+ * ida_pre_get() and ida_get_new() to allocate a new ID.  Then use
+ * ida_remove() to free an ID.  You must make sure that ida_get_new() and
+ * ida_remove() cannot be called at the same time as each other for the
+ * same IDA.
+ *
+ * You can also use ida_get_new_above() if you need an ID to be allocated
+ * above a particular number.  ida_destroy() can be used to dispose of an
+ * IDA without needing to free the individual IDs in it.  You can use
+ * ida_is_empty() to find out whether the IDA has any IDs currently allocated.
+ *
+ * IDs are currently limited to the range [0-INT_MAX].  If this is an awkward
+ * limitation, it should be quite straightforward to raise the maximum.
  */
 
-static void free_bitmap(struct ida *ida, struct ida_bitmap *bitmap)
-{
-	unsigned long flags;
-
-	if (!ida->free_bitmap) {
-		spin_lock_irqsave(&ida->idr.lock, flags);
-		if (!ida->free_bitmap) {
-			ida->free_bitmap = bitmap;
-			bitmap = NULL;
-		}
-		spin_unlock_irqrestore(&ida->idr.lock, flags);
-	}
-
-	kfree(bitmap);
-}
-
 /**
  * ida_pre_get - reserve resources for ida allocation
- * @ida:	ida handle
- * @gfp_mask:	memory allocation flag
- *
- * This function should be called prior to locking and calling the
- * following function.  It preallocates enough memory to satisfy the
- * worst possible allocation.
+ * @ida: ida handle
+ * @gfp: memory allocation flags
  *
- * If the system is REALLY out of memory this function returns %0,
- * otherwise %1.
+ * This function should be called before calling ida_get_new_above().  If it
+ * is unable to allocate memory, it will return %0.  On success, it returns %1.
  */
-int ida_pre_get(struct ida *ida, gfp_t gfp_mask)
+int ida_pre_get(struct ida *ida, gfp_t gfp)
 {
-	/* allocate idr_layers */
-	if (!__idr_pre_get(&ida->idr, gfp_mask))
-		return 0;
+	struct ida_bitmap *bitmap;
 
-	/* allocate free_bitmap */
-	if (!ida->free_bitmap) {
-		struct ida_bitmap *bitmap;
+	/*
+	 * This looks weird, but the IDA API has no preload_end() equivalent.
+	 * Instead, ida_get_new() can return -EAGAIN, prompting the caller
+	 * to return to the ida_pre_get() step.
+	 */
+	idr_preload(gfp);
+	idr_preload_end();
 
-		bitmap = kmalloc(sizeof(struct ida_bitmap), gfp_mask);
+	if (!ida->free_bitmap) {
+		bitmap = kmalloc(sizeof(struct ida_bitmap), gfp);
 		if (!bitmap)
 			return 0;
-
-		free_bitmap(ida, bitmap);
+		bitmap = xchg(&ida->free_bitmap, bitmap);
+		kfree(bitmap);
 	}
 
 	return 1;
 }
 EXPORT_SYMBOL(ida_pre_get);
 
+#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS)
+
 /**
  * ida_get_new_above - allocate new ID above or equal to a start id
- * @ida:	ida handle
- * @starting_id: id to start search at
- * @p_id:	pointer to the allocated handle
+ * @ida: ida handle
+ * @start: id to start search at
+ * @id: pointer to the allocated handle
  *
- * Allocate new ID above or equal to @starting_id.  It should be called
- * with any required locks.
+ * Allocate new ID above or equal to @start.  It should be called
+ * with any required locks to ensure that concurrent calls to
+ * ida_get_new_above() / ida_get_new() / ida_remove() are not allowed.
+ * Consider using ida_simple_get() if you do not have complex locking
+ * requirements.
  *
  * If memory is required, it will return %-EAGAIN, you should unlock
  * and go back to the ida_pre_get() call.  If the ida is full, it will
- * return %-ENOSPC.
- *
- * Note that callers must ensure that concurrent access to @ida is not possible.
- * See ida_simple_get() for a varaint which takes care of locking.
+ * return %-ENOSPC.  On success, it will return 0.
  *
- * @p_id returns a value in the range @starting_id ... %0x7fffffff.
+ * @id returns a value in the range @start ... %0x7fffffff.
  */
-int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
+int ida_get_new_above(struct ida *ida, int start, int *id)
 {
-	struct idr_layer *pa[MAX_IDR_LEVEL + 1];
+	struct radix_tree_root *root = &ida->ida_rt;
+	void **slot;
+	struct radix_tree_iter iter;
 	struct ida_bitmap *bitmap;
-	unsigned long flags;
-	int idr_id = starting_id / IDA_BITMAP_BITS;
-	int offset = starting_id % IDA_BITMAP_BITS;
-	int t, id;
-
- restart:
-	/* get vacant slot */
-	t = idr_get_empty_slot(&ida->idr, idr_id, pa, 0, &ida->idr);
-	if (t < 0)
-		return t == -ENOMEM ? -EAGAIN : t;
-
-	if (t * IDA_BITMAP_BITS >= MAX_IDR_BIT)
-		return -ENOSPC;
-
-	if (t != idr_id)
-		offset = 0;
-	idr_id = t;
-
-	/* if bitmap isn't there, create a new one */
-	bitmap = (void *)pa[0]->ary[idr_id & IDR_MASK];
-	if (!bitmap) {
-		spin_lock_irqsave(&ida->idr.lock, flags);
-		bitmap = ida->free_bitmap;
-		ida->free_bitmap = NULL;
-		spin_unlock_irqrestore(&ida->idr.lock, flags);
-
-		if (!bitmap)
-			return -EAGAIN;
-
-		memset(bitmap, 0, sizeof(struct ida_bitmap));
-		rcu_assign_pointer(pa[0]->ary[idr_id & IDR_MASK],
-				(void *)bitmap);
-		pa[0]->count++;
-	}
-
-	/* lookup for empty slot */
-	t = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, offset);
-	if (t == IDA_BITMAP_BITS) {
-		/* no empty slot after offset, continue to the next chunk */
-		idr_id++;
-		offset = 0;
-		goto restart;
-	}
-
-	id = idr_id * IDA_BITMAP_BITS + t;
-	if (id >= MAX_IDR_BIT)
-		return -ENOSPC;
-
-	__set_bit(t, bitmap->bitmap);
-	if (++bitmap->nr_busy == IDA_BITMAP_BITS)
-		idr_mark_full(pa, idr_id);
+	unsigned long index;
+	unsigned bit;
+	int new;
+
+	index = start / IDA_BITMAP_BITS;
+	bit = start % IDA_BITMAP_BITS;
+
+	slot = radix_tree_iter_init(&iter, index);
+	for (;;) {
+		if (slot)
+			slot = radix_tree_next_slot(slot, &iter,
+						RADIX_TREE_ITER_TAGGED);
+		if (!slot) {
+			slot = idr_get_free(root, &iter, GFP_NOWAIT, IDA_MAX);
+			if (IS_ERR(slot)) {
+				if (slot == ERR_PTR(-ENOMEM))
+					return -EAGAIN;
+				return PTR_ERR(slot);
+			}
+		}
+		if (iter.index > index)
+			bit = 0;
+		new = iter.index * IDA_BITMAP_BITS;
+		bitmap = rcu_dereference_raw(*slot);
+		if (bitmap) {
+			bit = find_next_zero_bit(bitmap->bitmap,
+							IDA_BITMAP_BITS, bit);
+			new += bit;
+			if (new < 0)
+				return -ENOSPC;
+			if (bit == IDA_BITMAP_BITS)
+				continue;
 
-	*p_id = id;
+			__set_bit(bit, bitmap->bitmap);
+			if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
+				radix_tree_iter_tag_clear(root, &iter,
+								IDR_FREE);
+		} else {
+			new += bit;
+			if (new < 0)
+				return -ENOSPC;
+			bitmap = ida->free_bitmap;
+			if (!bitmap)
+				return -EAGAIN;
+			ida->free_bitmap = NULL;
+			memset(bitmap, 0, sizeof(*bitmap));
+			__set_bit(bit, bitmap->bitmap);
+			radix_tree_iter_replace(root, &iter, slot, bitmap);
+		}
 
-	/* Each leaf node can handle nearly a thousand slots and the
-	 * whole idea of ida is to have small memory foot print.
-	 * Throw away extra resources one by one after each successful
-	 * allocation.
-	 */
-	if (ida->idr.id_free_cnt || ida->free_bitmap) {
-		struct idr_layer *p = get_from_free_list(&ida->idr);
-		if (p)
-			kmem_cache_free(idr_layer_cache, p);
+		*id = new;
+		return 0;
 	}
-
-	return 0;
 }
 EXPORT_SYMBOL(ida_get_new_above);
 
 /**
- * ida_remove - remove the given ID
- * @ida:	ida handle
- * @id:		ID to free
+ * ida_remove - Free the given ID
+ * @ida: ida handle
+ * @id: ID to free
+ *
+ * This function should not be called at the same time as ida_get_new_above().
  */
 void ida_remove(struct ida *ida, int id)
 {
-	struct idr_layer *p = ida->idr.top;
-	int shift = (ida->idr.layers - 1) * IDR_BITS;
-	int idr_id = id / IDA_BITMAP_BITS;
-	int offset = id % IDA_BITMAP_BITS;
-	int n;
+	unsigned long index = id / IDA_BITMAP_BITS;
+	unsigned offset = id % IDA_BITMAP_BITS;
 	struct ida_bitmap *bitmap;
+	struct radix_tree_iter iter;
+	void **slot;
 
-	if (idr_id > idr_max(ida->idr.layers))
+	slot = radix_tree_iter_lookup(&ida->ida_rt, &iter, index);
+	if (!slot)
 		goto err;
 
-	/* clear full bits while looking up the leaf idr_layer */
-	while ((shift > 0) && p) {
-		n = (idr_id >> shift) & IDR_MASK;
-		__clear_bit(n, p->bitmap);
-		p = p->ary[n];
-		shift -= IDR_BITS;
-	}
-
-	if (p == NULL)
-		goto err;
-
-	n = idr_id & IDR_MASK;
-	__clear_bit(n, p->bitmap);
-
-	bitmap = (void *)p->ary[n];
-	if (!bitmap || !test_bit(offset, bitmap->bitmap))
+	bitmap = rcu_dereference_raw(*slot);
+	if (!test_bit(offset, bitmap->bitmap))
 		goto err;
 
-	/* update bitmap and remove it if empty */
 	__clear_bit(offset, bitmap->bitmap);
-	if (--bitmap->nr_busy == 0) {
-		__set_bit(n, p->bitmap);	/* to please idr_remove() */
-		idr_remove(&ida->idr, idr_id);
-		free_bitmap(ida, bitmap);
+	radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE);
+	if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) {
+		kfree(bitmap);
+		radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
 	}
-
 	return;
-
  err:
 	WARN(1, "ida_remove called for id=%d which is not allocated.\n", id);
 }
 EXPORT_SYMBOL(ida_remove);
 
 /**
- * ida_destroy - release all cached layers within an ida tree
- * @ida:		ida handle
+ * ida_destroy - Free the contents of an ida
+ * @ida: ida handle
+ *
+ * Calling this function releases all resources associated with an IDA.  When
+ * this call returns, the IDA is empty and can be reused or freed.  The caller
+ * should not allow ida_remove() or ida_get_new_above() to be called at the
+ * same time.
  */
 void ida_destroy(struct ida *ida)
 {
-	idr_destroy(&ida->idr);
+	struct radix_tree_iter iter;
+	void **slot;
+
+	radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) {
+		struct ida_bitmap *bitmap = rcu_dereference_raw(*slot);
+		kfree(bitmap);
+		radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
+	}
+
 	kfree(ida->free_bitmap);
+	ida->free_bitmap = NULL;
 }
 EXPORT_SYMBOL(ida_destroy);
 
@@ -1141,18 +442,3 @@ void ida_simple_remove(struct ida *ida, unsigned int id)
 	spin_unlock_irqrestore(&simple_ida_lock, flags);
 }
 EXPORT_SYMBOL(ida_simple_remove);
-
-/**
- * ida_init - initialize ida handle
- * @ida:	ida handle
- *
- * This function is use to set up the handle (@ida) that you will pass
- * to the rest of the functions.
- */
-void ida_init(struct ida *ida)
-{
-	memset(ida, 0, sizeof(struct ida));
-	idr_init(&ida->idr);
-
-}
-EXPORT_SYMBOL(ida_init);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 7bf7d4e60e43..eaea14b8f2ca 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -22,20 +22,21 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
 #include <linux/cpu.h>
 #include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/idr.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/radix-tree.h>
+#include <linux/kmemleak.h>
 #include <linux/percpu.h>
+#include <linux/preempt.h>		/* in_interrupt() */
+#include <linux/radix-tree.h>
+#include <linux/rcupdate.h>
 #include <linux/slab.h>
-#include <linux/kmemleak.h>
-#include <linux/cpu.h>
 #include <linux/string.h>
-#include <linux/bitops.h>
-#include <linux/rcupdate.h>
-#include <linux/preempt.h>		/* in_interrupt() */
 
 
 /* Number of nodes in fully populated tree of given height */
@@ -59,6 +60,15 @@ static struct kmem_cache *radix_tree_node_cachep;
  */
 #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1)
 
+/*
+ * The IDR does not have to be as high as the radix tree since it uses
+ * signed integers, not unsigned longs.
+ */
+#define IDR_INDEX_BITS		(8 /* CHAR_BIT */ * sizeof(int) - 1)
+#define IDR_MAX_PATH		(DIV_ROUND_UP(IDR_INDEX_BITS, \
+						RADIX_TREE_MAP_SHIFT))
+#define IDR_PRELOAD_SIZE	(IDR_MAX_PATH * 2 - 1)
+
 /*
  * Per-cpu pool of preloaded nodes
  */
@@ -149,27 +159,32 @@ static inline int tag_get(const struct radix_tree_node *node, unsigned int tag,
 
 static inline void root_tag_set(struct radix_tree_root *root, unsigned tag)
 {
-	root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
+	root->gfp_mask |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
 }
 
 static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
 {
-	root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
+	root->gfp_mask &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
 }
 
 static inline void root_tag_clear_all(struct radix_tree_root *root)
 {
-	root->gfp_mask &= __GFP_BITS_MASK;
+	root->gfp_mask &= (1 << ROOT_TAG_SHIFT) - 1;
 }
 
 static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag)
 {
-	return (__force int)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
+	return (__force int)root->gfp_mask & (1 << (tag + ROOT_TAG_SHIFT));
 }
 
 static inline unsigned root_tags_get(const struct radix_tree_root *root)
 {
-	return (__force unsigned)root->gfp_mask >> __GFP_BITS_SHIFT;
+	return (__force unsigned)root->gfp_mask >> ROOT_TAG_SHIFT;
+}
+
+static inline bool is_idr(const struct radix_tree_root *root)
+{
+	return !!(root->gfp_mask & ROOT_IS_IDR);
 }
 
 /*
@@ -187,6 +202,11 @@ static inline int any_tag_set(const struct radix_tree_node *node,
 	return 0;
 }
 
+static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag)
+{
+	bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE);
+}
+
 /**
  * radix_tree_find_next_bit - find the next set bit in a memory region
  *
@@ -240,6 +260,13 @@ static inline unsigned long node_maxindex(const struct radix_tree_node *node)
 	return shift_maxindex(node->shift);
 }
 
+static unsigned long next_index(unsigned long index,
+				const struct radix_tree_node *node,
+				unsigned long offset)
+{
+	return (index & ~node_maxindex(node)) + (offset << node->shift);
+}
+
 #ifndef __KERNEL__
 static void dump_node(struct radix_tree_node *node, unsigned long index)
 {
@@ -278,11 +305,52 @@ static void radix_tree_dump(struct radix_tree_root *root)
 {
 	pr_debug("radix root: %p rnode %p tags %x\n",
 			root, root->rnode,
-			root->gfp_mask >> __GFP_BITS_SHIFT);
+			root->gfp_mask >> ROOT_TAG_SHIFT);
 	if (!radix_tree_is_internal_node(root->rnode))
 		return;
 	dump_node(entry_to_node(root->rnode), 0);
 }
+
+static void dump_ida_node(void *entry, unsigned long index)
+{
+	unsigned long i;
+
+	if (!entry)
+		return;
+
+	if (radix_tree_is_internal_node(entry)) {
+		struct radix_tree_node *node = entry_to_node(entry);
+
+		pr_debug("ida node: %p offset %d indices %lu-%lu parent %p free %lx shift %d count %d\n",
+			node, node->offset, index * IDA_BITMAP_BITS,
+			((index | node_maxindex(node)) + 1) *
+				IDA_BITMAP_BITS - 1,
+			node->parent, node->tags[0][0], node->shift,
+			node->count);
+		for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
+			dump_ida_node(node->slots[i],
+					index | (i << node->shift));
+	} else {
+		struct ida_bitmap *bitmap = entry;
+
+		pr_debug("ida btmp: %p offset %d indices %lu-%lu data", bitmap,
+				(int)(index & RADIX_TREE_MAP_MASK),
+				index * IDA_BITMAP_BITS,
+				(index + 1) * IDA_BITMAP_BITS - 1);
+		for (i = 0; i < IDA_BITMAP_LONGS; i++)
+			pr_cont(" %lx", bitmap->bitmap[i]);
+		pr_cont("\n");
+	}
+}
+
+static void ida_dump(struct ida *ida)
+{
+	struct radix_tree_root *root = &ida->ida_rt;
+	pr_debug("ida: %p %p free %d bitmap %p\n", ida, root->rnode,
+				root->gfp_mask >> ROOT_TAG_SHIFT,
+				ida->free_bitmap);
+	dump_ida_node(root->rnode, 0);
+}
 #endif
 
 /*
@@ -290,13 +358,11 @@ static void radix_tree_dump(struct radix_tree_root *root)
  * that the caller has pinned this thread of control to the current CPU.
  */
 static struct radix_tree_node *
-radix_tree_node_alloc(struct radix_tree_root *root,
-			struct radix_tree_node *parent,
+radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
 			unsigned int shift, unsigned int offset,
 			unsigned int count, unsigned int exceptional)
 {
 	struct radix_tree_node *ret = NULL;
-	gfp_t gfp_mask = root_gfp_mask(root);
 
 	/*
 	 * Preload code isn't irq safe and it doesn't make sense to use
@@ -533,7 +599,7 @@ static unsigned radix_tree_load_root(const struct radix_tree_root *root,
 /*
  *	Extend a radix tree so it can store key @index.
  */
-static int radix_tree_extend(struct radix_tree_root *root,
+static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
 				unsigned long index, unsigned int shift)
 {
 	struct radix_tree_node *slot;
@@ -546,19 +612,27 @@ static int radix_tree_extend(struct radix_tree_root *root,
 		maxshift += RADIX_TREE_MAP_SHIFT;
 
 	slot = root->rnode;
-	if (!slot)
+	if (!slot && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
 		goto out;
 
 	do {
-		struct radix_tree_node *node = radix_tree_node_alloc(root,
-							NULL, shift, 0, 1, 0);
+		struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL,
+								shift, 0, 1, 0);
 		if (!node)
 			return -ENOMEM;
 
-		/* Propagate the aggregated tag info into the new root */
-		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
-			if (root_tag_get(root, tag))
-				tag_set(node, tag, 0);
+		if (is_idr(root)) {
+			all_tag_set(node, IDR_FREE);
+			if (!root_tag_get(root, IDR_FREE)) {
+				tag_clear(node, IDR_FREE, 0);
+				root_tag_set(root, IDR_FREE);
+			}
+		} else {
+			/* Propagate the aggregated tag info to the new child */
+			for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
+				if (root_tag_get(root, tag))
+					tag_set(node, tag, 0);
+			}
 		}
 
 		BUG_ON(shift > BITS_PER_LONG);
@@ -619,6 +693,8 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
 		 * one (root->rnode) as far as dependent read barriers go.
 		 */
 		root->rnode = child;
+		if (is_idr(root) && !tag_get(node, IDR_FREE, 0))
+			root_tag_clear(root, IDR_FREE);
 
 		/*
 		 * We have a dilemma here. The node's slot[0] must not be
@@ -674,7 +750,12 @@ static bool delete_node(struct radix_tree_root *root,
 			parent->slots[node->offset] = NULL;
 			parent->count--;
 		} else {
-			root_tag_clear_all(root);
+			/*
+			 * Shouldn't the tags already have all been cleared
+			 * by the caller?
+			 */
+			if (!is_idr(root))
+				root_tag_clear_all(root);
 			root->rnode = NULL;
 		}
 
@@ -714,6 +795,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 	unsigned long maxindex;
 	unsigned int shift, offset = 0;
 	unsigned long max = index | ((1UL << order) - 1);
+	gfp_t gfp = root_gfp_mask(root);
 
 	shift = radix_tree_load_root(root, &child, &maxindex);
 
@@ -721,7 +803,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 	if (order > 0 && max == ((1UL << order) - 1))
 		max++;
 	if (max > maxindex) {
-		int error = radix_tree_extend(root, max, shift);
+		int error = radix_tree_extend(root, gfp, max, shift);
 		if (error < 0)
 			return error;
 		shift = error;
@@ -732,7 +814,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 		shift -= RADIX_TREE_MAP_SHIFT;
 		if (child == NULL) {
 			/* Have to add a child node.  */
-			child = radix_tree_node_alloc(root, node, shift,
+			child = radix_tree_node_alloc(gfp, node, shift,
 							offset, 0, 0);
 			if (!child)
 				return -ENOMEM;
@@ -755,7 +837,6 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 	return 0;
 }
 
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
 /*
  * Free any nodes below this node.  The tree is presumed to not need
  * shrinking, and any user data in the tree is presumed to not need a
@@ -791,6 +872,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
 	}
 }
 
+#ifdef CONFIG_RADIX_TREE_MULTIORDER
 static inline int insert_entries(struct radix_tree_node *node, void **slot,
 				void *item, unsigned order, bool replace)
 {
@@ -996,69 +1078,70 @@ void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
 }
 EXPORT_SYMBOL(radix_tree_lookup);
 
-static inline int slot_count(struct radix_tree_node *node,
-						void **slot)
+static inline void replace_sibling_entries(struct radix_tree_node *node,
+				void **slot, int count, int exceptional)
 {
-	int n = 1;
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
 	void *ptr = node_to_entry(slot);
-	unsigned offset = get_slot_offset(node, slot);
-	int i;
+	unsigned offset = get_slot_offset(node, slot) + 1;
 
-	for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
-		if (node->slots[offset + i] != ptr)
+	while (offset < RADIX_TREE_MAP_SIZE) {
+		if (node->slots[offset] != ptr)
 			break;
-		n++;
+		if (count < 0) {
+			node->slots[offset] = NULL;
+			node->count--;
+		}
+		node->exceptional += exceptional;
+		offset++;
 	}
 #endif
-	return n;
 }
 
-static void replace_slot(struct radix_tree_root *root,
-			 struct radix_tree_node *node,
-			 void **slot, void *item,
-			 bool warn_typeswitch)
+static void replace_slot(void **slot, void *item, struct radix_tree_node *node,
+				int count, int exceptional)
 {
-	void *old = rcu_dereference_raw(*slot);
-	int count, exceptional;
-
-	WARN_ON_ONCE(radix_tree_is_internal_node(item));
-
-	count = !!item - !!old;
-	exceptional = !!radix_tree_exceptional_entry(item) -
-		      !!radix_tree_exceptional_entry(old);
-
-	WARN_ON_ONCE(warn_typeswitch && (count || exceptional));
+	if (WARN_ON_ONCE(radix_tree_is_internal_node(item)))
+		return;
 
-	if (node) {
+	if (node && (count || exceptional)) {
 		node->count += count;
-		if (exceptional) {
-			exceptional *= slot_count(node, slot);
-			node->exceptional += exceptional;
-		}
+		node->exceptional += exceptional;
+		replace_sibling_entries(node, slot, count, exceptional);
 	}
 
 	rcu_assign_pointer(*slot, item);
 }
 
-static inline void delete_sibling_entries(struct radix_tree_node *node,
-						void **slot)
+static bool node_tag_get(const struct radix_tree_root *root,
+				const struct radix_tree_node *node,
+				unsigned int tag, unsigned int offset)
 {
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-	bool exceptional = radix_tree_exceptional_entry(*slot);
-	void *ptr = node_to_entry(slot);
-	unsigned offset = get_slot_offset(node, slot);
-	int i;
+	if (node)
+		return tag_get(node, tag, offset);
+	return root_tag_get(root, tag);
+}
 
-	for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
-		if (node->slots[offset + i] != ptr)
-			break;
-		node->slots[offset + i] = NULL;
-		node->count--;
-		if (exceptional)
-			node->exceptional--;
+/*
+ * IDR users want to be able to store NULL in the tree, so if the slot isn't
+ * free, don't adjust the count, even if it's transitioning between NULL and
+ * non-NULL.  For the IDA, we mark slots as being IDR_FREE while they still
+ * have empty bits, but it only stores NULL in slots when they're being
+ * deleted.
+ */
+static int calculate_count(struct radix_tree_root *root,
+				struct radix_tree_node *node, void **slot,
+				void *item, void *old)
+{
+	if (is_idr(root)) {
+		unsigned offset = get_slot_offset(node, slot);
+		bool free = node_tag_get(root, node, IDR_FREE, offset);
+		if (!free)
+			return 0;
+		if (!old)
+			return 1;
 	}
-#endif
+	return !!item - !!old;
 }
 
 /**
@@ -1078,15 +1161,19 @@ void __radix_tree_replace(struct radix_tree_root *root,
 			  void **slot, void *item,
 			  radix_tree_update_node_t update_node, void *private)
 {
-	if (!item)
-		delete_sibling_entries(node, slot);
+	void *old = rcu_dereference_raw(*slot);
+	int exceptional = !!radix_tree_exceptional_entry(item) -
+				!!radix_tree_exceptional_entry(old);
+	int count = calculate_count(root, node, slot, item, old);
+
 	/*
 	 * This function supports replacing exceptional entries and
 	 * deleting entries, but that needs accounting against the
 	 * node unless the slot is root->rnode.
 	 */
-	replace_slot(root, node, slot, item,
-		     !node && slot != (void **)&root->rnode);
+	WARN_ON_ONCE(!node && (slot != (void **)&root->rnode) &&
+			(count || exceptional));
+	replace_slot(slot, item, node, count, exceptional);
 
 	if (!node)
 		return;
@@ -1116,7 +1203,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
 void radix_tree_replace_slot(struct radix_tree_root *root,
 			     void **slot, void *item)
 {
-	replace_slot(root, NULL, slot, item, true);
+	__radix_tree_replace(root, NULL, slot, item, NULL, NULL);
 }
 
 /**
@@ -1191,6 +1278,7 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
 	void **slot;
 	unsigned int offset, end;
 	unsigned n, tag, tags = 0;
+	gfp_t gfp = root_gfp_mask(root);
 
 	if (!__radix_tree_lookup(root, index, &parent, &slot))
 		return -ENOENT;
@@ -1228,7 +1316,7 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
 
 	for (;;) {
 		if (node->shift > order) {
-			child = radix_tree_node_alloc(root, node,
+			child = radix_tree_node_alloc(gfp, node,
 					node->shift - RADIX_TREE_MAP_SHIFT,
 					offset, 0, 0);
 			if (!child)
@@ -1444,8 +1532,6 @@ int radix_tree_tag_get(const struct radix_tree_root *root,
 	radix_tree_load_root(root, &node, &maxindex);
 	if (index > maxindex)
 		return 0;
-	if (node == NULL)
-		return 0;
 
 	while (radix_tree_is_internal_node(node)) {
 		unsigned offset;
@@ -1453,8 +1539,6 @@ int radix_tree_tag_get(const struct radix_tree_root *root,
 		parent = entry_to_node(node);
 		offset = radix_tree_descend(parent, &node, index);
 
-		if (!node)
-			return 0;
 		if (!tag_get(parent, tag, offset))
 			return 0;
 		if (node == RADIX_TREE_RETRY)
@@ -1481,6 +1565,11 @@ static void set_iter_tags(struct radix_tree_iter *iter,
 	unsigned tag_long = offset / BITS_PER_LONG;
 	unsigned tag_bit  = offset % BITS_PER_LONG;
 
+	if (!node) {
+		iter->tags = 1;
+		return;
+	}
+
 	iter->tags = node->tags[tag][tag_long] >> tag_bit;
 
 	/* This never happens if RADIX_TREE_TAG_LONGS == 1 */
@@ -1873,13 +1962,18 @@ void __radix_tree_delete_node(struct radix_tree_root *root,
 static bool __radix_tree_delete(struct radix_tree_root *root,
 				struct radix_tree_node *node, void **slot)
 {
+	void *old = rcu_dereference_raw(*slot);
+	int exceptional = radix_tree_exceptional_entry(old) ? -1 : 0;
 	unsigned offset = get_slot_offset(node, slot);
 	int tag;
 
-	for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
-		node_tag_clear(root, node, tag, offset);
+	if (is_idr(root))
+		node_tag_set(root, node, IDR_FREE, offset);
+	else
+		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+			node_tag_clear(root, node, tag, offset);
 
-	replace_slot(root, node, slot, NULL, true);
+	replace_slot(slot, NULL, node, -1, exceptional);
 	return node && delete_node(root, node, NULL, NULL);
 }
 
@@ -1916,12 +2010,13 @@ void radix_tree_iter_delete(struct radix_tree_root *root,
 void *radix_tree_delete_item(struct radix_tree_root *root,
 			     unsigned long index, void *item)
 {
-	struct radix_tree_node *node;
+	struct radix_tree_node *node = NULL;
 	void **slot;
 	void *entry;
 
 	entry = __radix_tree_lookup(root, index, &node, &slot);
-	if (!entry)
+	if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE,
+						get_slot_offset(node, slot))))
 		return NULL;
 
 	if (item && entry != item)
@@ -1957,8 +2052,7 @@ void radix_tree_clear_tags(struct radix_tree_root *root,
 		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
 			node_tag_clear(root, node, tag, offset);
 	} else {
-		/* Clear root node tags */
-		root->gfp_mask &= __GFP_BITS_MASK;
+		root_tag_clear_all(root);
 	}
 }
 
@@ -1973,6 +2067,111 @@ int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag)
 }
 EXPORT_SYMBOL(radix_tree_tagged);
 
+/**
+ * idr_preload - preload for idr_alloc()
+ * @gfp_mask: allocation mask to use for preloading
+ *
+ * Preallocate memory to use for the next call to idr_alloc().  This function
+ * returns with preemption disabled.  It will be enabled by idr_preload_end().
+ */
+void idr_preload(gfp_t gfp_mask)
+{
+	__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE);
+}
+EXPORT_SYMBOL(idr_preload);
+
+void **idr_get_free(struct radix_tree_root *root,
+			struct radix_tree_iter *iter, gfp_t gfp, int end)
+{
+	struct radix_tree_node *node = NULL, *child;
+	void **slot = (void **)&root->rnode;
+	unsigned long maxindex, start = iter->next_index;
+	unsigned long max = end > 0 ? end - 1 : INT_MAX;
+	unsigned int shift, offset = 0;
+
+ grow:
+	shift = radix_tree_load_root(root, &child, &maxindex);
+	if (!radix_tree_tagged(root, IDR_FREE))
+		start = max(start, maxindex + 1);
+	if (start > max)
+		return ERR_PTR(-ENOSPC);
+
+	if (start > maxindex) {
+		int error = radix_tree_extend(root, gfp, start, shift);
+		if (error < 0)
+			return ERR_PTR(error);
+		shift = error;
+		child = rcu_dereference_raw(root->rnode);
+	}
+
+	while (shift) {
+		shift -= RADIX_TREE_MAP_SHIFT;
+		if (child == NULL) {
+			/* Have to add a child node.  */
+			child = radix_tree_node_alloc(gfp, node, shift, offset,
+							0, 0);
+			if (!child)
+				return ERR_PTR(-ENOMEM);
+			all_tag_set(child, IDR_FREE);
+			rcu_assign_pointer(*slot, node_to_entry(child));
+			if (node)
+				node->count++;
+		} else if (!radix_tree_is_internal_node(child))
+			break;
+
+		node = entry_to_node(child);
+		offset = radix_tree_descend(node, &child, start);
+		if (!tag_get(node, IDR_FREE, offset)) {
+			offset = radix_tree_find_next_bit(node, IDR_FREE,
+							offset + 1);
+			start = next_index(start, node, offset);
+			if (start > max)
+				return ERR_PTR(-ENOSPC);
+			while (offset == RADIX_TREE_MAP_SIZE) {
+				offset = node->offset + 1;
+				node = node->parent;
+				if (!node)
+					goto grow;
+				shift = node->shift;
+			}
+			child = rcu_dereference_raw(node->slots[offset]);
+		}
+		slot = &node->slots[offset];
+	}
+
+	iter->index = start;
+	if (node)
+		iter->next_index = 1 + min(max, (start | node_maxindex(node)));
+	else
+		iter->next_index = 1;
+	iter->node = node;
+	__set_iter_shift(iter, shift);
+	set_iter_tags(iter, node, offset, IDR_FREE);
+
+	return slot;
+}
+
+/**
+ * idr_destroy - release all internal memory from an IDR
+ * @idr: idr handle
+ *
+ * After this function is called, the IDR is empty, and may be reused or
+ * the data structure containing it may be freed.
+ *
+ * A typical clean-up sequence for objects stored in an idr tree will use
+ * idr_for_each() to free all objects, if necessary, then idr_destroy() to
+ * free the memory used to keep track of those objects.
+ */
+void idr_destroy(struct idr *idr)
+{
+	struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.rnode);
+	if (radix_tree_is_internal_node(node))
+		radix_tree_free_nodes(node);
+	idr->idr_rt.rnode = NULL;
+	root_tag_set(&idr->idr_rt, IDR_FREE);
+}
+EXPORT_SYMBOL(idr_destroy);
+
 static void
 radix_tree_node_ctor(void *arg)
 {
diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore
index 11d888ca6a92..3b5534b643f0 100644
--- a/tools/testing/radix-tree/.gitignore
+++ b/tools/testing/radix-tree/.gitignore
@@ -1,2 +1,3 @@
 main
 radix-tree.c
+idr.c
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 5274e88cd293..3597a3a9f269 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -2,8 +2,8 @@
 CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE
 LDFLAGS += -lpthread -lurcu
 TARGETS = main
-OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_bit.o \
-	 regression1.o regression2.o regression3.o multiorder.o \
+OFILES = main.o radix-tree.o idr.o linux.o test.o tag_check.o find_bit.o \
+	 regression1.o regression2.o regression3.o multiorder.o idr-test.o \
 	 iteration_check.o benchmark.o
 
 ifdef BENCHMARK
@@ -23,7 +23,11 @@ vpath %.c ../../lib
 $(OFILES): *.h */*.h \
 	../../include/linux/*.h \
 	../../include/asm/*.h \
-	../../../include/linux/radix-tree.h
+	../../../include/linux/radix-tree.h \
+	../../../include/linux/idr.h
 
 radix-tree.c: ../../../lib/radix-tree.c
 	sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
+
+idr.c: ../../../lib/idr.c
+	sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
new file mode 100644
index 000000000000..4dffad9284e0
--- /dev/null
+++ b/tools/testing/radix-tree/idr-test.c
@@ -0,0 +1,342 @@
+/*
+ * idr-test.c: Test the IDR API
+ * Copyright (c) 2016 Matthew Wilcox <willy@infradead.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/idr.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+
+#include "test.h"
+
+#define DUMMY_PTR	((void *)0x12)
+
+int item_idr_free(int id, void *p, void *data)
+{
+	struct item *item = p;
+	assert(item->index == id);
+	free(p);
+
+	return 0;
+}
+
+void item_idr_remove(struct idr *idr, int id)
+{
+	struct item *item = idr_find(idr, id);
+	assert(item->index == id);
+	idr_remove(idr, id);
+	free(item);
+}
+
+void idr_alloc_test(void)
+{
+	unsigned long i;
+	DEFINE_IDR(idr);
+
+	assert(idr_alloc_cyclic(&idr, DUMMY_PTR, 0, 0x4000, GFP_KERNEL) == 0);
+	assert(idr_alloc_cyclic(&idr, DUMMY_PTR, 0x3ffd, 0x4000, GFP_KERNEL) == 0x3ffd);
+	idr_remove(&idr, 0x3ffd);
+	idr_remove(&idr, 0);
+
+	for (i = 0x3ffe; i < 0x4003; i++) {
+		int id;
+		struct item *item;
+
+		if (i < 0x4000)
+			item = item_create(i, 0);
+		else
+			item = item_create(i - 0x3fff, 0);
+
+		id = idr_alloc_cyclic(&idr, item, 1, 0x4000, GFP_KERNEL);
+		assert(id == item->index);
+	}
+
+	idr_for_each(&idr, item_idr_free, &idr);
+	idr_destroy(&idr);
+}
+
+void idr_replace_test(void)
+{
+	DEFINE_IDR(idr);
+
+	idr_alloc(&idr, (void *)-1, 10, 11, GFP_KERNEL);
+	idr_replace(&idr, &idr, 10);
+
+	idr_destroy(&idr);
+}
+
+/*
+ * Unlike the radix tree, you can put a NULL pointer -- with care -- into
+ * the IDR.  Some interfaces, like idr_find() do not distinguish between
+ * "present, value is NULL" and "not present", but that's exactly what some
+ * users want.
+ */
+void idr_null_test(void)
+{
+	int i;
+	DEFINE_IDR(idr);
+
+	assert(idr_is_empty(&idr));
+
+	assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
+	assert(!idr_is_empty(&idr));
+	idr_remove(&idr, 0);
+	assert(idr_is_empty(&idr));
+
+	assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
+	assert(!idr_is_empty(&idr));
+	idr_destroy(&idr);
+	assert(idr_is_empty(&idr));
+
+	for (i = 0; i < 10; i++) {
+		assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == i);
+	}
+
+	assert(idr_replace(&idr, DUMMY_PTR, 3) == NULL);
+	assert(idr_replace(&idr, DUMMY_PTR, 4) == NULL);
+	assert(idr_replace(&idr, NULL, 4) == DUMMY_PTR);
+	assert(idr_replace(&idr, DUMMY_PTR, 11) == ERR_PTR(-ENOENT));
+	idr_remove(&idr, 5);
+	assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 5);
+	idr_remove(&idr, 5);
+
+	for (i = 0; i < 9; i++) {
+		idr_remove(&idr, i);
+		assert(!idr_is_empty(&idr));
+	}
+	idr_remove(&idr, 8);
+	assert(!idr_is_empty(&idr));
+	idr_remove(&idr, 9);
+	assert(idr_is_empty(&idr));
+
+	assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
+	assert(idr_replace(&idr, DUMMY_PTR, 3) == ERR_PTR(-ENOENT));
+	assert(idr_replace(&idr, DUMMY_PTR, 0) == NULL);
+	assert(idr_replace(&idr, NULL, 0) == DUMMY_PTR);
+
+	idr_destroy(&idr);
+	assert(idr_is_empty(&idr));
+
+	for (i = 1; i < 10; i++) {
+		assert(idr_alloc(&idr, NULL, 1, 0, GFP_KERNEL) == i);
+	}
+
+	idr_destroy(&idr);
+	assert(idr_is_empty(&idr));
+}
+
+void idr_nowait_test(void)
+{
+	unsigned int i;
+	DEFINE_IDR(idr);
+
+	idr_preload(GFP_KERNEL);
+
+	for (i = 0; i < 3; i++) {
+		struct item *item = item_create(i, 0);
+		assert(idr_alloc(&idr, item, i, i + 1, GFP_NOWAIT) == i);
+	}
+
+	idr_preload_end();
+
+	idr_for_each(&idr, item_idr_free, &idr);
+	idr_destroy(&idr);
+}
+
+void idr_checks(void)
+{
+	unsigned long i;
+	DEFINE_IDR(idr);
+
+	for (i = 0; i < 10000; i++) {
+		struct item *item = item_create(i, 0);
+		assert(idr_alloc(&idr, item, 0, 20000, GFP_KERNEL) == i);
+	}
+
+	assert(idr_alloc(&idr, DUMMY_PTR, 5, 30, GFP_KERNEL) < 0);
+
+	for (i = 0; i < 5000; i++)
+		item_idr_remove(&idr, i);
+
+	idr_remove(&idr, 3);
+
+	idr_for_each(&idr, item_idr_free, &idr);
+	idr_destroy(&idr);
+
+	assert(idr_is_empty(&idr));
+
+	idr_remove(&idr, 3);
+	idr_remove(&idr, 0);
+
+	for (i = INT_MAX - 3UL; i < INT_MAX + 1UL; i++) {
+		struct item *item = item_create(i, 0);
+		assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i);
+	}
+	assert(idr_alloc(&idr, DUMMY_PTR, i - 2, i, GFP_KERNEL) == -ENOSPC);
+
+	idr_for_each(&idr, item_idr_free, &idr);
+	idr_destroy(&idr);
+	idr_destroy(&idr);
+
+	assert(idr_is_empty(&idr));
+
+	for (i = 1; i < 10000; i++) {
+		struct item *item = item_create(i, 0);
+		assert(idr_alloc(&idr, item, 1, 20000, GFP_KERNEL) == i);
+	}
+
+	idr_for_each(&idr, item_idr_free, &idr);
+	idr_destroy(&idr);
+
+	idr_replace_test();
+	idr_alloc_test();
+	idr_null_test();
+	idr_nowait_test();
+}
+
+/*
+ * Check that we get the correct error when we run out of memory doing
+ * allocations.  To ensure we run out of memory, just "forget" to preload.
+ * The first test is for not having a bitmap available, and the second test
+ * is for not being able to allocate a level of the radix tree.
+ */
+void ida_check_nomem(void)
+{
+	DEFINE_IDA(ida);
+	int id, err;
+
+	err = ida_get_new(&ida, &id);
+	assert(err == -EAGAIN);
+	err = ida_get_new_above(&ida, 1UL << 30, &id);
+	assert(err == -EAGAIN);
+}
+
+/*
+ * Check what happens when we fill a leaf and then delete it.  This may
+ * discover mishandling of IDR_FREE.
+ */
+void ida_check_leaf(void)
+{
+	DEFINE_IDA(ida);
+	int id;
+	unsigned long i;
+
+	for (i = 0; i < IDA_BITMAP_BITS; i++) {
+		assert(ida_pre_get(&ida, GFP_KERNEL));
+		assert(!ida_get_new(&ida, &id));
+		assert(id == i);
+	}
+
+	ida_destroy(&ida);
+	assert(ida_is_empty(&ida));
+
+	assert(ida_pre_get(&ida, GFP_KERNEL));
+	assert(!ida_get_new(&ida, &id));
+	assert(id == 0);
+	ida_destroy(&ida);
+	assert(ida_is_empty(&ida));
+}
+
+/*
+ * Check allocations up to and slightly above the maximum allowed (2^31-1) ID.
+ * Allocating up to 2^31-1 should succeed, and then allocating the next one
+ * should fail.
+ */
+void ida_check_max(void)
+{
+	DEFINE_IDA(ida);
+	int id, err;
+	unsigned long i, j;
+
+	for (j = 1; j < 65537; j *= 2) {
+		unsigned long base = (1UL << 31) - j;
+		for (i = 0; i < j; i++) {
+			assert(ida_pre_get(&ida, GFP_KERNEL));
+			assert(!ida_get_new_above(&ida, base, &id));
+			assert(id == base + i);
+		}
+		assert(ida_pre_get(&ida, GFP_KERNEL));
+		err = ida_get_new_above(&ida, base, &id);
+		assert(err == -ENOSPC);
+		ida_destroy(&ida);
+		assert(ida_is_empty(&ida));
+		rcu_barrier();
+	}
+}
+
+void ida_checks(void)
+{
+	DEFINE_IDA(ida);
+	int id;
+	unsigned long i;
+
+	radix_tree_cpu_dead(1);
+	ida_check_nomem();
+
+	for (i = 0; i < 10000; i++) {
+		assert(ida_pre_get(&ida, GFP_KERNEL));
+		assert(!ida_get_new(&ida, &id));
+		assert(id == i);
+	}
+
+	ida_remove(&ida, 20);
+	ida_remove(&ida, 21);
+	for (i = 0; i < 3; i++) {
+		assert(ida_pre_get(&ida, GFP_KERNEL));
+		assert(!ida_get_new(&ida, &id));
+		if (i == 2)
+			assert(id == 10000);
+	}
+
+	for (i = 0; i < 5000; i++)
+		ida_remove(&ida, i);
+
+	assert(ida_pre_get(&ida, GFP_KERNEL));
+	assert(!ida_get_new_above(&ida, 5000, &id));
+	assert(id == 10001);
+
+	ida_destroy(&ida);
+
+	assert(ida_is_empty(&ida));
+
+	assert(ida_pre_get(&ida, GFP_KERNEL));
+	assert(!ida_get_new_above(&ida, 1, &id));
+	assert(id == 1);
+
+	ida_remove(&ida, id);
+	assert(ida_is_empty(&ida));
+	ida_destroy(&ida);
+	assert(ida_is_empty(&ida));
+
+	assert(ida_pre_get(&ida, GFP_KERNEL));
+	assert(!ida_get_new_above(&ida, 1, &id));
+	ida_destroy(&ida);
+	assert(ida_is_empty(&ida));
+
+	assert(ida_pre_get(&ida, GFP_KERNEL));
+	assert(!ida_get_new_above(&ida, 1, &id));
+	assert(id == 1);
+	assert(ida_pre_get(&ida, GFP_KERNEL));
+	assert(!ida_get_new_above(&ida, 1025, &id));
+	assert(id == 1025);
+	assert(ida_pre_get(&ida, GFP_KERNEL));
+	assert(!ida_get_new_above(&ida, 10000, &id));
+	assert(id == 10000);
+	ida_remove(&ida, 1025);
+	ida_destroy(&ida);
+	assert(ida_is_empty(&ida));
+
+	ida_check_leaf();
+	ida_check_max();
+
+	radix_tree_cpu_dead(1);
+}
diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h
index 701209816a6b..39a0dcb9475a 100644
--- a/tools/testing/radix-tree/linux/gfp.h
+++ b/tools/testing/radix-tree/linux/gfp.h
@@ -15,10 +15,12 @@
 #define __GFP_DIRECT_RECLAIM	0x400000u
 #define __GFP_KSWAPD_RECLAIM	0x2000000u
 
-#define __GFP_RECLAIM		(__GFP_DIRECT_RECLAIM|__GFP_KSWAPD_RECLAIM)
+#define __GFP_RECLAIM	(__GFP_DIRECT_RECLAIM|__GFP_KSWAPD_RECLAIM)
+
+#define GFP_ATOMIC	(__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
+#define GFP_KERNEL	(__GFP_RECLAIM | __GFP_IO | __GFP_FS)
+#define GFP_NOWAIT	(__GFP_KSWAPD_RECLAIM)
 
-#define GFP_ATOMIC		(__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
-#define GFP_KERNEL		(__GFP_RECLAIM | __GFP_IO | __GFP_FS)
 
 static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
 {
diff --git a/tools/testing/radix-tree/linux/idr.h b/tools/testing/radix-tree/linux/idr.h
new file mode 100644
index 000000000000..4e342f2e37cf
--- /dev/null
+++ b/tools/testing/radix-tree/linux/idr.h
@@ -0,0 +1 @@
+#include "../../../../include/linux/idr.h"
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index dd1d9aefb14f..63fce553781a 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -20,6 +20,7 @@
 
 #define printk printf
 #define pr_debug printk
+#define pr_cont printk
 
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
 
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index f7e9801a6754..ddd90a11db3f 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -3,6 +3,7 @@
 #include <unistd.h>
 #include <time.h>
 #include <assert.h>
+#include <limits.h>
 
 #include <linux/slab.h>
 #include <linux/radix-tree.h>
@@ -314,6 +315,11 @@ static void single_thread_tests(bool long_run)
 	rcu_barrier();
 	printf("after dynamic_height_check: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
+	idr_checks();
+	ida_checks();
+	rcu_barrier();
+	printf("after idr_checks: %d allocated, preempt %d\n",
+		nr_allocated, preempt_count);
 	big_gang_check(long_run);
 	rcu_barrier();
 	printf("after big_gang_check: %d allocated, preempt %d\n",
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index 056a23b56467..b30e11d9d271 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -34,6 +34,8 @@ void tag_check(void);
 void multiorder_checks(void);
 void iteration_test(unsigned order, unsigned duration);
 void benchmark(void);
+void idr_checks(void);
+void ida_checks(void);
 
 struct item *
 item_tag_set(struct radix_tree_root *root, unsigned long index, int tag);
-- 
GitLab


From 7ad3d4d85c7af9632055a6ac0aa15b6b6a321c6b Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 16 Dec 2016 11:55:56 -0500
Subject: [PATCH 0238/1084] ida: Move ida_bitmap to a percpu variable

When we preload the IDA, we allocate an IDA bitmap.  Instead of storing
that preallocated bitmap in the IDA, we store it in a percpu variable.
Generally there are more IDAs in the system than CPUs, so this cuts down
on the number of preallocated bitmaps that are unused, and about half
of the IDA users did not call ida_destroy() so they were leaking IDA
bitmaps.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 include/linux/idr.h                     |  5 +--
 lib/idr.c                               | 39 ++-------------------
 lib/radix-tree.c                        | 45 +++++++++++++++++++++++--
 tools/testing/radix-tree/linux/kernel.h |  2 --
 tools/testing/radix-tree/linux/percpu.h |  5 ++-
 5 files changed, 51 insertions(+), 45 deletions(-)

diff --git a/include/linux/idr.h b/include/linux/idr.h
index f58c0a3addc3..2027c7aba50d 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -14,6 +14,7 @@
 
 #include <linux/radix-tree.h>
 #include <linux/gfp.h>
+#include <linux/percpu.h>
 
 struct idr {
 	struct radix_tree_root	idr_rt;
@@ -171,9 +172,10 @@ struct ida_bitmap {
 	unsigned long		bitmap[IDA_BITMAP_LONGS];
 };
 
+DECLARE_PER_CPU(struct ida_bitmap *, ida_bitmap);
+
 struct ida {
 	struct radix_tree_root	ida_rt;
-	struct ida_bitmap	*free_bitmap;
 };
 
 #define IDA_INIT	{						\
@@ -193,7 +195,6 @@ void ida_simple_remove(struct ida *ida, unsigned int id);
 static inline void ida_init(struct ida *ida)
 {
 	INIT_RADIX_TREE(&ida->ida_rt, IDR_RT_MARKER | GFP_NOWAIT);
-	ida->free_bitmap = NULL;
 }
 
 /**
diff --git a/lib/idr.c b/lib/idr.c
index b87056e2cc4c..2abd7769c430 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -4,6 +4,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
+DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap);
 static DEFINE_SPINLOCK(simple_ida_lock);
 
 /**
@@ -193,38 +194,6 @@ EXPORT_SYMBOL(idr_replace);
  * limitation, it should be quite straightforward to raise the maximum.
  */
 
-/**
- * ida_pre_get - reserve resources for ida allocation
- * @ida: ida handle
- * @gfp: memory allocation flags
- *
- * This function should be called before calling ida_get_new_above().  If it
- * is unable to allocate memory, it will return %0.  On success, it returns %1.
- */
-int ida_pre_get(struct ida *ida, gfp_t gfp)
-{
-	struct ida_bitmap *bitmap;
-
-	/*
-	 * This looks weird, but the IDA API has no preload_end() equivalent.
-	 * Instead, ida_get_new() can return -EAGAIN, prompting the caller
-	 * to return to the ida_pre_get() step.
-	 */
-	idr_preload(gfp);
-	idr_preload_end();
-
-	if (!ida->free_bitmap) {
-		bitmap = kmalloc(sizeof(struct ida_bitmap), gfp);
-		if (!bitmap)
-			return 0;
-		bitmap = xchg(&ida->free_bitmap, bitmap);
-		kfree(bitmap);
-	}
-
-	return 1;
-}
-EXPORT_SYMBOL(ida_pre_get);
-
 #define IDA_MAX (0x80000000U / IDA_BITMAP_BITS)
 
 /**
@@ -292,10 +261,9 @@ int ida_get_new_above(struct ida *ida, int start, int *id)
 			new += bit;
 			if (new < 0)
 				return -ENOSPC;
-			bitmap = ida->free_bitmap;
+			bitmap = this_cpu_xchg(ida_bitmap, NULL);
 			if (!bitmap)
 				return -EAGAIN;
-			ida->free_bitmap = NULL;
 			memset(bitmap, 0, sizeof(*bitmap));
 			__set_bit(bit, bitmap->bitmap);
 			radix_tree_iter_replace(root, &iter, slot, bitmap);
@@ -361,9 +329,6 @@ void ida_destroy(struct ida *ida)
 		kfree(bitmap);
 		radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
 	}
-
-	kfree(ida->free_bitmap);
-	ida->free_bitmap = NULL;
 }
 EXPORT_SYMBOL(ida_destroy);
 
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index eaea14b8f2ca..7b9f8515033e 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -69,6 +69,14 @@ static struct kmem_cache *radix_tree_node_cachep;
 						RADIX_TREE_MAP_SHIFT))
 #define IDR_PRELOAD_SIZE	(IDR_MAX_PATH * 2 - 1)
 
+/*
+ * The IDA is even shorter since it uses a bitmap at the last level.
+ */
+#define IDA_INDEX_BITS		(8 * sizeof(int) - 1 - ilog2(IDA_BITMAP_BITS))
+#define IDA_MAX_PATH		(DIV_ROUND_UP(IDA_INDEX_BITS, \
+						RADIX_TREE_MAP_SHIFT))
+#define IDA_PRELOAD_SIZE	(IDA_MAX_PATH * 2 - 1)
+
 /*
  * Per-cpu pool of preloaded nodes
  */
@@ -346,9 +354,8 @@ static void dump_ida_node(void *entry, unsigned long index)
 static void ida_dump(struct ida *ida)
 {
 	struct radix_tree_root *root = &ida->ida_rt;
-	pr_debug("ida: %p %p free %d bitmap %p\n", ida, root->rnode,
-				root->gfp_mask >> ROOT_TAG_SHIFT,
-				ida->free_bitmap);
+	pr_debug("ida: %p node %p free %d\n", ida, root->rnode,
+				root->gfp_mask >> ROOT_TAG_SHIFT);
 	dump_ida_node(root->rnode, 0);
 }
 #endif
@@ -2080,6 +2087,36 @@ void idr_preload(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(idr_preload);
 
+/**
+ * ida_pre_get - reserve resources for ida allocation
+ * @ida: ida handle
+ * @gfp: memory allocation flags
+ *
+ * This function should be called before calling ida_get_new_above().  If it
+ * is unable to allocate memory, it will return %0.  On success, it returns %1.
+ */
+int ida_pre_get(struct ida *ida, gfp_t gfp)
+{
+	__radix_tree_preload(gfp, IDA_PRELOAD_SIZE);
+	/*
+	 * The IDA API has no preload_end() equivalent.  Instead,
+	 * ida_get_new() can return -EAGAIN, prompting the caller
+	 * to return to the ida_pre_get() step.
+	 */
+	preempt_enable();
+
+	if (!this_cpu_read(ida_bitmap)) {
+		struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp);
+		if (!bitmap)
+			return 0;
+		bitmap = this_cpu_cmpxchg(ida_bitmap, NULL, bitmap);
+		kfree(bitmap);
+	}
+
+	return 1;
+}
+EXPORT_SYMBOL(ida_pre_get);
+
 void **idr_get_free(struct radix_tree_root *root,
 			struct radix_tree_iter *iter, gfp_t gfp, int end)
 {
@@ -2219,6 +2256,8 @@ static int radix_tree_cpu_dead(unsigned int cpu)
 		kmem_cache_free(radix_tree_node_cachep, node);
 		rtp->nr--;
 	}
+	kfree(per_cpu(ida_bitmap, cpu));
+	per_cpu(ida_bitmap, cpu) = NULL;
 	return 0;
 }
 
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 63fce553781a..677b8c0f60f9 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -24,6 +24,4 @@
 
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
 
-#define xchg(ptr, x)	uatomic_xchg(ptr, x)
-
 #endif /* _KERNEL_H */
diff --git a/tools/testing/radix-tree/linux/percpu.h b/tools/testing/radix-tree/linux/percpu.h
index 5837f1d56f17..3ea01a1a88c2 100644
--- a/tools/testing/radix-tree/linux/percpu.h
+++ b/tools/testing/radix-tree/linux/percpu.h
@@ -1,7 +1,10 @@
-
+#define DECLARE_PER_CPU(type, val) extern type val
 #define DEFINE_PER_CPU(type, val) type val
 
 #define __get_cpu_var(var)	var
 #define this_cpu_ptr(var)	var
+#define this_cpu_read(var)	var
+#define this_cpu_xchg(var, val)		uatomic_xchg(&var, val)
+#define this_cpu_cmpxchg(var, old, new)	uatomic_cmpxchg(&var, old, new)
 #define per_cpu_ptr(ptr, cpu)   ({ (void)(cpu); (ptr); })
 #define per_cpu(var, cpu)	(*per_cpu_ptr(&(var), cpu))
-- 
GitLab


From d37cacc5adace7f3e0824e1f559192ad7299d029 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Sat, 17 Dec 2016 08:18:17 -0500
Subject: [PATCH 0239/1084] ida: Use exceptional entries for small IDAs

We can use the root entry as a bitmap and save allocating a 128 byte
bitmap for an IDA that contains only a few entries (30 on a 32-bit
machine, 62 on a 64-bit machine).  This costs about 300 bytes of kernel
text on x86-64, so as long as 3 IDAs fall into this category, this
is a net win for memory consumption.

Thanks to Rasmus Villemoes for his work documenting the problem and
collecting statistics on IDAs.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 lib/idr.c                           | 87 +++++++++++++++++++++++++--
 lib/radix-tree.c                    |  8 +++
 tools/testing/radix-tree/idr-test.c | 93 ++++++++++++++++++++++++++++-
 3 files changed, 181 insertions(+), 7 deletions(-)

diff --git a/lib/idr.c b/lib/idr.c
index 2abd7769c430..7d25e240bc5a 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -194,6 +194,39 @@ EXPORT_SYMBOL(idr_replace);
  * limitation, it should be quite straightforward to raise the maximum.
  */
 
+/*
+ * Developer's notes:
+ *
+ * The IDA uses the functionality provided by the IDR & radix tree to store
+ * bitmaps in each entry.  The IDR_FREE tag means there is at least one bit
+ * free, unlike the IDR where it means at least one entry is free.
+ *
+ * I considered telling the radix tree that each slot is an order-10 node
+ * and storing the bit numbers in the radix tree, but the radix tree can't
+ * allow a single multiorder entry at index 0, which would significantly
+ * increase memory consumption for the IDA.  So instead we divide the index
+ * by the number of bits in the leaf bitmap before doing a radix tree lookup.
+ *
+ * As an optimisation, if there are only a few low bits set in any given
+ * leaf, instead of allocating a 128-byte bitmap, we use the 'exceptional
+ * entry' functionality of the radix tree to store BITS_PER_LONG - 2 bits
+ * directly in the entry.  By being really tricksy, we could store
+ * BITS_PER_LONG - 1 bits, but there're diminishing returns after optimising
+ * for 0-3 allocated IDs.
+ *
+ * We allow the radix tree 'exceptional' count to get out of date.  Nothing
+ * in the IDA nor the radix tree code checks it.  If it becomes important
+ * to maintain an accurate exceptional count, switch the rcu_assign_pointer()
+ * calls to radix_tree_iter_replace() which will correct the exceptional
+ * count.
+ *
+ * The IDA always requires a lock to alloc/free.  If we add a 'test_bit'
+ * equivalent, it will still need locking.  Going to RCU lookup would require
+ * using RCU to free bitmaps, and that's not trivial without embedding an
+ * RCU head in the bitmap, which adds a 2-pointer overhead to each 128-byte
+ * bitmap, which is excessive.
+ */
+
 #define IDA_MAX (0x80000000U / IDA_BITMAP_BITS)
 
 /**
@@ -221,11 +254,12 @@ int ida_get_new_above(struct ida *ida, int start, int *id)
 	struct radix_tree_iter iter;
 	struct ida_bitmap *bitmap;
 	unsigned long index;
-	unsigned bit;
+	unsigned bit, ebit;
 	int new;
 
 	index = start / IDA_BITMAP_BITS;
 	bit = start % IDA_BITMAP_BITS;
+	ebit = bit + RADIX_TREE_EXCEPTIONAL_SHIFT;
 
 	slot = radix_tree_iter_init(&iter, index);
 	for (;;) {
@@ -240,10 +274,29 @@ int ida_get_new_above(struct ida *ida, int start, int *id)
 				return PTR_ERR(slot);
 			}
 		}
-		if (iter.index > index)
+		if (iter.index > index) {
 			bit = 0;
+			ebit = RADIX_TREE_EXCEPTIONAL_SHIFT;
+		}
 		new = iter.index * IDA_BITMAP_BITS;
 		bitmap = rcu_dereference_raw(*slot);
+		if (radix_tree_exception(bitmap)) {
+			unsigned long tmp = (unsigned long)bitmap;
+			ebit = find_next_zero_bit(&tmp, BITS_PER_LONG, ebit);
+			if (ebit < BITS_PER_LONG) {
+				tmp |= 1UL << ebit;
+				rcu_assign_pointer(*slot, (void *)tmp);
+				*id = new + ebit - RADIX_TREE_EXCEPTIONAL_SHIFT;
+				return 0;
+			}
+			bitmap = this_cpu_xchg(ida_bitmap, NULL);
+			if (!bitmap)
+				return -EAGAIN;
+			memset(bitmap, 0, sizeof(*bitmap));
+			bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT;
+			rcu_assign_pointer(*slot, bitmap);
+		}
+
 		if (bitmap) {
 			bit = find_next_zero_bit(bitmap->bitmap,
 							IDA_BITMAP_BITS, bit);
@@ -261,6 +314,14 @@ int ida_get_new_above(struct ida *ida, int start, int *id)
 			new += bit;
 			if (new < 0)
 				return -ENOSPC;
+			if (ebit < BITS_PER_LONG) {
+				bitmap = (void *)((1UL << ebit) |
+						RADIX_TREE_EXCEPTIONAL_ENTRY);
+				radix_tree_iter_replace(root, &iter, slot,
+						bitmap);
+				*id = new;
+				return 0;
+			}
 			bitmap = this_cpu_xchg(ida_bitmap, NULL);
 			if (!bitmap)
 				return -EAGAIN;
@@ -287,6 +348,7 @@ void ida_remove(struct ida *ida, int id)
 	unsigned long index = id / IDA_BITMAP_BITS;
 	unsigned offset = id % IDA_BITMAP_BITS;
 	struct ida_bitmap *bitmap;
+	unsigned long *btmp;
 	struct radix_tree_iter iter;
 	void **slot;
 
@@ -295,12 +357,24 @@ void ida_remove(struct ida *ida, int id)
 		goto err;
 
 	bitmap = rcu_dereference_raw(*slot);
-	if (!test_bit(offset, bitmap->bitmap))
+	if (radix_tree_exception(bitmap)) {
+		btmp = (unsigned long *)slot;
+		offset += RADIX_TREE_EXCEPTIONAL_SHIFT;
+		if (offset >= BITS_PER_LONG)
+			goto err;
+	} else {
+		btmp = bitmap->bitmap;
+	}
+	if (!test_bit(offset, btmp))
 		goto err;
 
-	__clear_bit(offset, bitmap->bitmap);
+	__clear_bit(offset, btmp);
 	radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE);
-	if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) {
+	if (radix_tree_exception(bitmap)) {
+		if (rcu_dereference_raw(*slot) ==
+					(void *)RADIX_TREE_EXCEPTIONAL_ENTRY)
+			radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
+	} else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) {
 		kfree(bitmap);
 		radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
 	}
@@ -326,7 +400,8 @@ void ida_destroy(struct ida *ida)
 
 	radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) {
 		struct ida_bitmap *bitmap = rcu_dereference_raw(*slot);
-		kfree(bitmap);
+		if (!radix_tree_exception(bitmap))
+			kfree(bitmap);
 		radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
 	}
 }
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 7b9f8515033e..14130ab197c0 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -338,6 +338,14 @@ static void dump_ida_node(void *entry, unsigned long index)
 		for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
 			dump_ida_node(node->slots[i],
 					index | (i << node->shift));
+	} else if (radix_tree_exceptional_entry(entry)) {
+		pr_debug("ida excp: %p offset %d indices %lu-%lu data %lx\n",
+				entry, (int)(index & RADIX_TREE_MAP_MASK),
+				index * IDA_BITMAP_BITS,
+				index * IDA_BITMAP_BITS + BITS_PER_LONG -
+					RADIX_TREE_EXCEPTIONAL_SHIFT,
+				(unsigned long)entry >>
+					RADIX_TREE_EXCEPTIONAL_SHIFT);
 	} else {
 		struct ida_bitmap *bitmap = entry;
 
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 4dffad9284e0..59081122c63d 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -11,6 +11,7 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  */
+#include <linux/bitmap.h>
 #include <linux/idr.h>
 #include <linux/slab.h>
 #include <linux/kernel.h>
@@ -214,7 +215,7 @@ void ida_check_nomem(void)
 	DEFINE_IDA(ida);
 	int id, err;
 
-	err = ida_get_new(&ida, &id);
+	err = ida_get_new_above(&ida, 256, &id);
 	assert(err == -EAGAIN);
 	err = ida_get_new_above(&ida, 1UL << 30, &id);
 	assert(err == -EAGAIN);
@@ -246,6 +247,66 @@ void ida_check_leaf(void)
 	assert(ida_is_empty(&ida));
 }
 
+/*
+ * Check handling of conversions between exceptional entries and full bitmaps.
+ */
+void ida_check_conv(void)
+{
+	DEFINE_IDA(ida);
+	int id;
+	unsigned long i;
+
+	for (i = 0; i < IDA_BITMAP_BITS * 2; i += IDA_BITMAP_BITS) {
+		assert(ida_pre_get(&ida, GFP_KERNEL));
+		assert(!ida_get_new_above(&ida, i + 1, &id));
+		assert(id == i + 1);
+		assert(!ida_get_new_above(&ida, i + BITS_PER_LONG, &id));
+		assert(id == i + BITS_PER_LONG);
+		ida_remove(&ida, i + 1);
+		ida_remove(&ida, i + BITS_PER_LONG);
+		assert(ida_is_empty(&ida));
+	}
+
+	assert(ida_pre_get(&ida, GFP_KERNEL));
+
+	for (i = 0; i < IDA_BITMAP_BITS * 2; i++) {
+		assert(ida_pre_get(&ida, GFP_KERNEL));
+		assert(!ida_get_new(&ida, &id));
+		assert(id == i);
+	}
+
+	for (i = IDA_BITMAP_BITS * 2; i > 0; i--) {
+		ida_remove(&ida, i - 1);
+	}
+	assert(ida_is_empty(&ida));
+
+	for (i = 0; i < IDA_BITMAP_BITS + BITS_PER_LONG - 4; i++) {
+		assert(ida_pre_get(&ida, GFP_KERNEL));
+		assert(!ida_get_new(&ida, &id));
+		assert(id == i);
+	}
+
+	for (i = IDA_BITMAP_BITS + BITS_PER_LONG - 4; i > 0; i--) {
+		ida_remove(&ida, i - 1);
+	}
+	assert(ida_is_empty(&ida));
+
+	radix_tree_cpu_dead(1);
+	for (i = 0; i < 1000000; i++) {
+		int err = ida_get_new(&ida, &id);
+		if (err == -EAGAIN) {
+			assert((i % IDA_BITMAP_BITS) == (BITS_PER_LONG - 2));
+			assert(ida_pre_get(&ida, GFP_KERNEL));
+			err = ida_get_new(&ida, &id);
+		} else {
+			assert((i % IDA_BITMAP_BITS) != (BITS_PER_LONG - 2));
+		}
+		assert(!err);
+		assert(id == i);
+	}
+	ida_destroy(&ida);
+}
+
 /*
  * Check allocations up to and slightly above the maximum allowed (2^31-1) ID.
  * Allocating up to 2^31-1 should succeed, and then allocating the next one
@@ -273,6 +334,34 @@ void ida_check_max(void)
 	}
 }
 
+void ida_check_random(void)
+{
+	DEFINE_IDA(ida);
+	DECLARE_BITMAP(bitmap, 2048);
+	int id;
+	unsigned int i;
+	time_t s = time(NULL);
+
+ repeat:
+	memset(bitmap, 0, sizeof(bitmap));
+	for (i = 0; i < 100000; i++) {
+		int i = rand();
+		int bit = i & 2047;
+		if (test_bit(bit, bitmap)) {
+			__clear_bit(bit, bitmap);
+			ida_remove(&ida, bit);
+		} else {
+			__set_bit(bit, bitmap);
+			ida_pre_get(&ida, GFP_KERNEL);
+			assert(!ida_get_new_above(&ida, bit, &id));
+			assert(id == bit);
+		}
+	}
+	ida_destroy(&ida);
+	if (time(NULL) < s + 10)
+		goto repeat;
+}
+
 void ida_checks(void)
 {
 	DEFINE_IDA(ida);
@@ -337,6 +426,8 @@ void ida_checks(void)
 
 	ida_check_leaf();
 	ida_check_max();
+	ida_check_conv();
+	ida_check_random();
 
 	radix_tree_cpu_dead(1);
 }
-- 
GitLab


From 8ac04868315c6ffcb2c5a5ad9cd5cec61cad3576 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Sun, 18 Dec 2016 22:56:05 -0500
Subject: [PATCH 0240/1084] radix tree test suite: Build separate binaries for
 some tests

To allow developers to run a subset of tests, build separate multiorder
and idr-test binaries which will run just the tests in those files.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 tools/testing/radix-tree/.gitignore   |  4 +++-
 tools/testing/radix-tree/Makefile     | 18 ++++++++++++------
 tools/testing/radix-tree/idr-test.c   | 11 +++++++++++
 tools/testing/radix-tree/multiorder.c |  7 +++++++
 4 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore
index 3b5534b643f0..26dedaf57aab 100644
--- a/tools/testing/radix-tree/.gitignore
+++ b/tools/testing/radix-tree/.gitignore
@@ -1,3 +1,5 @@
+idr.c
+idr-test
 main
+multiorder
 radix-tree.c
-idr.c
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 3597a3a9f269..4aba7b75e43a 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -1,10 +1,10 @@
 
 CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE
 LDFLAGS += -lpthread -lurcu
-TARGETS = main
-OFILES = main.o radix-tree.o idr.o linux.o test.o tag_check.o find_bit.o \
-	 regression1.o regression2.o regression3.o multiorder.o idr-test.o \
-	 iteration_check.o benchmark.o
+TARGETS = main idr-test multiorder
+CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
+OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
+	 tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
 
 ifdef BENCHMARK
 	CFLAGS += -DBENCHMARK=1
@@ -13,10 +13,16 @@ endif
 targets: $(TARGETS)
 
 main:	$(OFILES)
-	$(CC) $(CFLAGS) $(LDFLAGS) $(OFILES) -o main
+	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o main
+
+idr-test: idr-test.o $(CORE_OFILES)
+	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o idr-test
+
+multiorder: multiorder.o $(CORE_OFILES)
+	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o multiorder
 
 clean:
-	$(RM) -f $(TARGETS) *.o radix-tree.c
+	$(RM) $(TARGETS) *.o radix-tree.c idr.c
 
 vpath %.c ../../lib
 
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 59081122c63d..a26098c6123d 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -431,3 +431,14 @@ void ida_checks(void)
 
 	radix_tree_cpu_dead(1);
 }
+
+int __weak main(void)
+{
+	radix_tree_init();
+	idr_checks();
+	ida_checks();
+	rcu_barrier();
+	if (nr_allocated)
+		printf("nr_allocated = %d\n", nr_allocated);
+	return 0;
+}
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index f79812a5e070..e465a3811b97 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -633,3 +633,10 @@ void multiorder_checks(void)
 
 	radix_tree_cpu_dead(0);
 }
+
+int __weak main(void)
+{
+	radix_tree_init();
+	multiorder_checks();
+	return 0;
+}
-- 
GitLab


From d3e709e63e97e5f3f129b639991cfe266da60bae Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Thu, 22 Dec 2016 13:30:22 -0500
Subject: [PATCH 0241/1084] idr: Return the deleted entry from idr_remove

It is a relatively common idiom (8 instances) to first look up an IDR
entry, and then remove it from the tree if it is found, possibly doing
further operations upon the entry afterwards.  If we change idr_remove()
to return the removed object, all of these users can save themselves a
walk of the IDR tree.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 drivers/atm/nicstar.c                       |  5 ++---
 drivers/block/drbd/drbd_main.c              |  6 ++----
 drivers/firewire/core-cdev.c                |  3 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c     | 10 +++-------
 drivers/net/wireless/marvell/mwifiex/txrx.c |  4 +---
 drivers/target/target_core_user.c           |  4 +---
 include/linux/idr.h                         |  4 ++--
 net/mac80211/status.c                       |  4 +---
 9 files changed, 15 insertions(+), 29 deletions(-)

diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index cb28579e8a94..d879f3bca107 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -1980,13 +1980,12 @@ static void dequeue_rx(ns_dev * card, ns_rsqe * rsqe)
 	card->lbfqc = ns_stat_lfbqc_get(stat);
 
 	id = le32_to_cpu(rsqe->buffer_handle);
-	skb = idr_find(&card->idr, id);
+	skb = idr_remove(&card->idr, id);
 	if (!skb) {
 		RXPRINTK(KERN_ERR
-			 "nicstar%d: idr_find() failed!\n", card->index);
+			 "nicstar%d: skb not found!\n", card->index);
 		return;
 	}
-	idr_remove(&card->idr, id);
 	dma_sync_single_for_cpu(&card->pcidev->dev,
 				NS_PRV_DMA(skb),
 				(NS_PRV_BUFTYPE(skb) == BUF_SM
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 83482721bc01..6bb3b80e7e51 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2915,11 +2915,9 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
 	idr_remove(&connection->peer_devices, vnr);
 out_idr_remove_from_resource:
 	for_each_connection(connection, resource) {
-		peer_device = idr_find(&connection->peer_devices, vnr);
-		if (peer_device) {
-			idr_remove(&connection->peer_devices, vnr);
+		peer_device = idr_remove(&connection->peer_devices, vnr);
+		if (peer_device)
 			kref_put(&connection->kref, drbd_destroy_connection);
-		}
 	}
 	for_each_peer_device_safe(peer_device, tmp_peer_device, device) {
 		list_del(&peer_device->peer_devices);
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index aee149bdf4c0..a301fcf46e88 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -1307,8 +1307,7 @@ static void iso_resource_work(struct work_struct *work)
 	 */
 	if (r->todo == ISO_RES_REALLOC && !success &&
 	    !client->in_shutdown &&
-	    idr_find(&client->resource_idr, r->resource.handle)) {
-		idr_remove(&client->resource_idr, r->resource.handle);
+	    idr_remove(&client->resource_idr, r->resource.handle)) {
 		client_put(client);
 		free = true;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index c02db01f6583..0218cea6be4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -70,10 +70,10 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
 	struct amdgpu_bo_list *list;
 
 	mutex_lock(&fpriv->bo_list_lock);
-	list = idr_find(&fpriv->bo_list_handles, id);
+	list = idr_remove(&fpriv->bo_list_handles, id);
 	if (list) {
+		/* Another user may have a reference to this list still */
 		mutex_lock(&list->lock);
-		idr_remove(&fpriv->bo_list_handles, id);
 		mutex_unlock(&list->lock);
 		amdgpu_bo_list_free(list);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 400c66ba4c6b..cf0500671353 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -135,15 +135,11 @@ static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
 	struct amdgpu_ctx *ctx;
 
 	mutex_lock(&mgr->lock);
-	ctx = idr_find(&mgr->ctx_handles, id);
-	if (ctx) {
-		idr_remove(&mgr->ctx_handles, id);
+	ctx = idr_remove(&mgr->ctx_handles, id);
+	if (ctx)
 		kref_put(&ctx->refcount, amdgpu_ctx_do_release);
-		mutex_unlock(&mgr->lock);
-		return 0;
-	}
 	mutex_unlock(&mgr->lock);
-	return -EINVAL;
+	return ctx ? 0 : -EINVAL;
 }
 
 static int amdgpu_ctx_query(struct amdgpu_device *adev,
diff --git a/drivers/net/wireless/marvell/mwifiex/txrx.c b/drivers/net/wireless/marvell/mwifiex/txrx.c
index abdd0cf710bf..fac28bd8fbee 100644
--- a/drivers/net/wireless/marvell/mwifiex/txrx.c
+++ b/drivers/net/wireless/marvell/mwifiex/txrx.c
@@ -346,9 +346,7 @@ void mwifiex_parse_tx_status_event(struct mwifiex_private *priv,
 		return;
 
 	spin_lock_irqsave(&priv->ack_status_lock, flags);
-	ack_skb = idr_find(&priv->ack_status_frames, tx_status->tx_token_id);
-	if (ack_skb)
-		idr_remove(&priv->ack_status_frames, tx_status->tx_token_id);
+	ack_skb = idr_remove(&priv->ack_status_frames, tx_status->tx_token_id);
 	spin_unlock_irqrestore(&priv->ack_status_lock, flags);
 
 	if (ack_skb) {
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 8041710b6972..18f0ec2e1f9c 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -642,9 +642,7 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
 		WARN_ON(tcmu_hdr_get_op(entry->hdr.len_op) != TCMU_OP_CMD);
 
 		spin_lock(&udev->commands_lock);
-		cmd = idr_find(&udev->commands, entry->hdr.cmd_id);
-		if (cmd)
-			idr_remove(&udev->commands, cmd->cmd_id);
+		cmd = idr_remove(&udev->commands, entry->hdr.cmd_id);
 		spin_unlock(&udev->commands_lock);
 
 		if (!cmd) {
diff --git a/include/linux/idr.h b/include/linux/idr.h
index 2027c7aba50d..bf70b3ef0a07 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -88,9 +88,9 @@ void *idr_get_next(struct idr *, int *nextid);
 void *idr_replace(struct idr *, void *, int id);
 void idr_destroy(struct idr *);
 
-static inline void idr_remove(struct idr *idr, int id)
+static inline void *idr_remove(struct idr *idr, int id)
 {
-	radix_tree_delete(&idr->idr_rt, id);
+	return radix_tree_delete_item(&idr->idr_rt, id, NULL);
 }
 
 static inline void idr_init(struct idr *idr)
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index ddf71c648cab..43dd3316d8a4 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -462,9 +462,7 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
 	unsigned long flags;
 
 	spin_lock_irqsave(&local->ack_status_lock, flags);
-	skb = idr_find(&local->ack_status_frames, info->ack_frame_id);
-	if (skb)
-		idr_remove(&local->ack_status_frames, info->ack_frame_id);
+	skb = idr_remove(&local->ack_status_frames, info->ack_frame_id);
 	spin_unlock_irqrestore(&local->ack_status_lock, flags);
 
 	if (!skb)
-- 
GitLab


From 5eeb2d23df29212f901a36dabf16f93d8bd50814 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Sat, 24 Dec 2016 07:49:18 -0500
Subject: [PATCH 0242/1084] radix tree test suite: Introduce kmalloc_verbose

To help track down where memory leaks may be, add the ability to turn
on/off printing allocations, frees and delayed frees.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/linux.c            |  9 +++++++++
 tools/testing/radix-tree/linux/radix-tree.h | 17 +++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
index 93f06614e91b..3e4f9f36da2e 100644
--- a/tools/testing/radix-tree/linux.c
+++ b/tools/testing/radix-tree/linux.c
@@ -13,6 +13,7 @@
 
 int nr_allocated;
 int preempt_count;
+int kmalloc_verbose;
 
 struct kmem_cache {
 	pthread_mutex_t lock;
@@ -44,6 +45,8 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
 	}
 
 	uatomic_inc(&nr_allocated);
+	if (kmalloc_verbose)
+		printf("Allocating %p from slab\n", node);
 	return node;
 }
 
@@ -51,6 +54,8 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
 {
 	assert(objp);
 	uatomic_dec(&nr_allocated);
+	if (kmalloc_verbose)
+		printf("Freeing %p to slab\n", objp);
 	pthread_mutex_lock(&cachep->lock);
 	if (cachep->nr_objs > 10) {
 		memset(objp, POISON_FREE, cachep->size);
@@ -68,6 +73,8 @@ void *kmalloc(size_t size, gfp_t gfp)
 {
 	void *ret = malloc(size);
 	uatomic_inc(&nr_allocated);
+	if (kmalloc_verbose)
+		printf("Allocating %p from malloc\n", ret);
 	return ret;
 }
 
@@ -76,6 +83,8 @@ void kfree(void *p)
 	if (!p)
 		return;
 	uatomic_dec(&nr_allocated);
+	if (kmalloc_verbose)
+		printf("Freeing %p to malloc\n", p);
 	free(p);
 }
 
diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/radix-tree/linux/radix-tree.h
index ce694ddd4aea..f4d8532e1bef 100644
--- a/tools/testing/radix-tree/linux/radix-tree.h
+++ b/tools/testing/radix-tree/linux/radix-tree.h
@@ -1 +1,18 @@
+#ifndef _TEST_RADIX_TREE_H
+#define _TEST_RADIX_TREE_H
 #include "../../../../include/linux/radix-tree.h"
+
+extern int kmalloc_verbose;
+
+static inline void trace_call_rcu(struct rcu_head *head,
+		void (*func)(struct rcu_head *head))
+{
+	if (kmalloc_verbose)
+		printf("Delaying free of %p to slab\n", (char *)head -
+				offsetof(struct radix_tree_node, rcu_head));
+	call_rcu(head, func);
+}
+#undef call_rcu
+#define call_rcu(x, y) trace_call_rcu(x, y)
+
+#endif /* _TEST_RADIX_TREE_H */
-- 
GitLab


From 73bc029b76482260a144219786d19951f561716e Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Wed, 4 Jan 2017 11:55:00 -0500
Subject: [PATCH 0243/1084] radix tree test suite: Dial down verbosity with -v

Make the output of radix tree test suite less verbose by default and add
-v and -vv command line options for increasing level of verbosity.

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/benchmark.c        |  6 +--
 tools/testing/radix-tree/iteration_check.c  |  2 +-
 tools/testing/radix-tree/linux.c            |  1 +
 tools/testing/radix-tree/linux/radix-tree.h |  6 +++
 tools/testing/radix-tree/main.c             | 45 +++++++++++++--------
 tools/testing/radix-tree/multiorder.c       | 17 ++++----
 tools/testing/radix-tree/regression1.c      |  4 +-
 tools/testing/radix-tree/regression2.c      |  4 +-
 tools/testing/radix-tree/regression3.c      | 28 ++++++-------
 tools/testing/radix-tree/tag_check.c        | 22 +++++-----
 10 files changed, 77 insertions(+), 58 deletions(-)

diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c
index 215ca86c7605..9b09ddfe462f 100644
--- a/tools/testing/radix-tree/benchmark.c
+++ b/tools/testing/radix-tree/benchmark.c
@@ -71,7 +71,7 @@ static void benchmark_size(unsigned long size, unsigned long step, int order)
 	tagged = benchmark_iter(&tree, true);
 	normal = benchmark_iter(&tree, false);
 
-	printf("Size %ld, step %6ld, order %d tagged %10lld ns, normal %10lld ns\n",
+	printv(2, "Size %ld, step %6ld, order %d tagged %10lld ns, normal %10lld ns\n",
 		size, step, order, tagged, normal);
 
 	item_kill_tree(&tree);
@@ -85,8 +85,8 @@ void benchmark(void)
 				128, 256, 512, 12345, 0};
 	int c, s;
 
-	printf("starting benchmarks\n");
-	printf("RADIX_TREE_MAP_SHIFT = %d\n", RADIX_TREE_MAP_SHIFT);
+	printv(1, "starting benchmarks\n");
+	printv(1, "RADIX_TREE_MAP_SHIFT = %d\n", RADIX_TREE_MAP_SHIFT);
 
 	for (c = 0; size[c]; c++)
 		for (s = 0; step[s]; s++)
diff --git a/tools/testing/radix-tree/iteration_check.c b/tools/testing/radix-tree/iteration_check.c
index 7572b7ed930e..a92bab513701 100644
--- a/tools/testing/radix-tree/iteration_check.c
+++ b/tools/testing/radix-tree/iteration_check.c
@@ -177,7 +177,7 @@ void iteration_test(unsigned order, unsigned test_duration)
 {
 	int i;
 
-	printf("Running %siteration tests for %d seconds\n",
+	printv(1, "Running %siteration tests for %d seconds\n",
 			order > 0 ? "multiorder " : "", test_duration);
 
 	max_order = order;
diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
index 3e4f9f36da2e..94bcdb992bbf 100644
--- a/tools/testing/radix-tree/linux.c
+++ b/tools/testing/radix-tree/linux.c
@@ -14,6 +14,7 @@
 int nr_allocated;
 int preempt_count;
 int kmalloc_verbose;
+int test_verbose;
 
 struct kmem_cache {
 	pthread_mutex_t lock;
diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/radix-tree/linux/radix-tree.h
index f4d8532e1bef..ddd135fa3af7 100644
--- a/tools/testing/radix-tree/linux/radix-tree.h
+++ b/tools/testing/radix-tree/linux/radix-tree.h
@@ -3,6 +3,7 @@
 #include "../../../../include/linux/radix-tree.h"
 
 extern int kmalloc_verbose;
+extern int test_verbose;
 
 static inline void trace_call_rcu(struct rcu_head *head,
 		void (*func)(struct rcu_head *head))
@@ -12,6 +13,11 @@ static inline void trace_call_rcu(struct rcu_head *head,
 				offsetof(struct radix_tree_node, rcu_head));
 	call_rcu(head, func);
 }
+
+#define printv(verbosity_level, fmt, ...) \
+	if(test_verbose >= verbosity_level) \
+		printf(fmt, ##__VA_ARGS__)
+
 #undef call_rcu
 #define call_rcu(x, y) trace_call_rcu(x, y)
 
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index ddd90a11db3f..86de448b699e 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -68,7 +68,7 @@ void big_gang_check(bool long_run)
 
 	for (i = 0; i < (long_run ? 1000 : 3); i++) {
 		__big_gang_check();
-		printf("%d ", i);
+		printv(2, "%d ", i);
 		fflush(stdout);
 	}
 }
@@ -129,14 +129,19 @@ void check_copied_tags(struct radix_tree_root *tree, unsigned long start, unsign
 			putchar('.'); */
 		if (idx[i] < start || idx[i] > end) {
 			if (item_tag_get(tree, idx[i], totag)) {
-				printf("%lu-%lu: %lu, tags %d-%d\n", start, end, idx[i], item_tag_get(tree, idx[i], fromtag), item_tag_get(tree, idx[i], totag));
+				printv(2, "%lu-%lu: %lu, tags %d-%d\n", start,
+				       end, idx[i], item_tag_get(tree, idx[i],
+								 fromtag),
+				       item_tag_get(tree, idx[i], totag));
 			}
 			assert(!item_tag_get(tree, idx[i], totag));
 			continue;
 		}
 		if (item_tag_get(tree, idx[i], fromtag) ^
 			item_tag_get(tree, idx[i], totag)) {
-			printf("%lu-%lu: %lu, tags %d-%d\n", start, end, idx[i], item_tag_get(tree, idx[i], fromtag), item_tag_get(tree, idx[i], totag));
+			printv(2, "%lu-%lu: %lu, tags %d-%d\n", start, end,
+			       idx[i], item_tag_get(tree, idx[i], fromtag),
+			       item_tag_get(tree, idx[i], totag));
 		}
 		assert(!(item_tag_get(tree, idx[i], fromtag) ^
 			 item_tag_get(tree, idx[i], totag)));
@@ -238,7 +243,7 @@ static void __locate_check(struct radix_tree_root *tree, unsigned long index,
 	item = item_lookup(tree, index);
 	index2 = find_item(tree, item);
 	if (index != index2) {
-		printf("index %ld order %d inserted; found %ld\n",
+		printv(2, "index %ld order %d inserted; found %ld\n",
 			index, order, index2);
 		abort();
 	}
@@ -289,48 +294,48 @@ static void single_thread_tests(bool long_run)
 {
 	int i;
 
-	printf("starting single_thread_tests: %d allocated, preempt %d\n",
+	printv(1, "starting single_thread_tests: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 	multiorder_checks();
 	rcu_barrier();
-	printf("after multiorder_check: %d allocated, preempt %d\n",
+	printv(2, "after multiorder_check: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 	locate_check();
 	rcu_barrier();
-	printf("after locate_check: %d allocated, preempt %d\n",
+	printv(2, "after locate_check: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 	tag_check();
 	rcu_barrier();
-	printf("after tag_check: %d allocated, preempt %d\n",
+	printv(2, "after tag_check: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 	gang_check();
 	rcu_barrier();
-	printf("after gang_check: %d allocated, preempt %d\n",
+	printv(2, "after gang_check: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 	add_and_check();
 	rcu_barrier();
-	printf("after add_and_check: %d allocated, preempt %d\n",
+	printv(2, "after add_and_check: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 	dynamic_height_check();
 	rcu_barrier();
-	printf("after dynamic_height_check: %d allocated, preempt %d\n",
+	printv(2, "after dynamic_height_check: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 	idr_checks();
 	ida_checks();
 	rcu_barrier();
-	printf("after idr_checks: %d allocated, preempt %d\n",
+	printv(2, "after idr_checks: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 	big_gang_check(long_run);
 	rcu_barrier();
-	printf("after big_gang_check: %d allocated, preempt %d\n",
+	printv(2, "after big_gang_check: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 	for (i = 0; i < (long_run ? 2000 : 3); i++) {
 		copy_tag_check();
-		printf("%d ", i);
+		printv(2, "%d ", i);
 		fflush(stdout);
 	}
 	rcu_barrier();
-	printf("after copy_tag_check: %d allocated, preempt %d\n",
+	printv(2, "after copy_tag_check: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 }
 
@@ -340,16 +345,20 @@ int main(int argc, char **argv)
 	int opt;
 	unsigned int seed = time(NULL);
 
-	while ((opt = getopt(argc, argv, "ls:")) != -1) {
+	while ((opt = getopt(argc, argv, "ls:v")) != -1) {
 		if (opt == 'l')
 			long_run = true;
 		else if (opt == 's')
 			seed = strtoul(optarg, NULL, 0);
+		else if (opt == 'v')
+			test_verbose++;
 	}
 
 	printf("random seed %u\n", seed);
 	srand(seed);
 
+	printf("running tests\n");
+
 	rcu_register_thread();
 	radix_tree_init();
 
@@ -366,9 +375,11 @@ int main(int argc, char **argv)
 	benchmark();
 
 	rcu_barrier();
-	printf("after rcu_barrier: %d allocated, preempt %d\n",
+	printv(2, "after rcu_barrier: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 	rcu_unregister_thread();
 
+	printf("tests completed\n");
+
 	exit(0);
 }
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index e465a3811b97..72d80f7059d3 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -30,7 +30,7 @@ static void __multiorder_tag_test(int index, int order)
 	/* our canonical entry */
 	base = index & ~((1 << order) - 1);
 
-	printf("Multiorder tag test with index %d, canonical entry %d\n",
+	printv(2, "Multiorder tag test with index %d, canonical entry %d\n",
 			index, base);
 
 	err = item_insert_order(&tree, index, order);
@@ -150,7 +150,7 @@ static void multiorder_check(unsigned long index, int order)
 	struct item *item2 = item_create(min, order);
 	RADIX_TREE(tree, GFP_KERNEL);
 
-	printf("Multiorder index %ld, order %d\n", index, order);
+	printv(2, "Multiorder index %ld, order %d\n", index, order);
 
 	assert(item_insert_order(&tree, index, order) == 0);
 
@@ -188,7 +188,7 @@ static void multiorder_shrink(unsigned long index, int order)
 	RADIX_TREE(tree, GFP_KERNEL);
 	struct radix_tree_node *node;
 
-	printf("Multiorder shrink index %ld, order %d\n", index, order);
+	printv(2, "Multiorder shrink index %ld, order %d\n", index, order);
 
 	assert(item_insert_order(&tree, 0, order) == 0);
 
@@ -209,7 +209,8 @@ static void multiorder_shrink(unsigned long index, int order)
 		item_check_absent(&tree, i);
 
 	if (!item_delete(&tree, 0)) {
-		printf("failed to delete index %ld (order %d)\n", index, order);		abort();
+		printv(2, "failed to delete index %ld (order %d)\n", index, order);
+		abort();
 	}
 
 	for (i = 0; i < 2*max; i++)
@@ -234,7 +235,7 @@ void multiorder_iteration(void)
 	void **slot;
 	int i, j, err;
 
-	printf("Multiorder iteration test\n");
+	printv(1, "Multiorder iteration test\n");
 
 #define NUM_ENTRIES 11
 	int index[NUM_ENTRIES] = {0, 2, 4, 8, 16, 32, 34, 36, 64, 72, 128};
@@ -275,7 +276,7 @@ void multiorder_tagged_iteration(void)
 	void **slot;
 	int i, j;
 
-	printf("Multiorder tagged iteration test\n");
+	printv(1, "Multiorder tagged iteration test\n");
 
 #define MT_NUM_ENTRIES 9
 	int index[MT_NUM_ENTRIES] = {0, 2, 4, 16, 32, 40, 64, 72, 128};
@@ -453,7 +454,7 @@ static void check_mem(unsigned old_order, unsigned new_order, unsigned alloc)
 {
 	struct radix_tree_preload *rtp = &radix_tree_preloads;
 	if (rtp->nr != 0)
-		printf("split(%u %u) remaining %u\n", old_order, new_order,
+		printv(2, "split(%u %u) remaining %u\n", old_order, new_order,
 							rtp->nr);
 	/*
 	 * Can't check for equality here as some nodes may have been
@@ -461,7 +462,7 @@ static void check_mem(unsigned old_order, unsigned new_order, unsigned alloc)
 	 * nodes allocated since they should have all been preloaded.
 	 */
 	if (nr_allocated > alloc)
-		printf("split(%u %u) allocated %u %u\n", old_order, new_order,
+		printv(2, "split(%u %u) allocated %u %u\n", old_order, new_order,
 							alloc, nr_allocated);
 }
 
diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c
index 0d6813a61b37..bf97742fc18c 100644
--- a/tools/testing/radix-tree/regression1.c
+++ b/tools/testing/radix-tree/regression1.c
@@ -193,7 +193,7 @@ void regression1_test(void)
 	long arg;
 
 	/* Regression #1 */
-	printf("running regression test 1, should finish in under a minute\n");
+	printv(1, "running regression test 1, should finish in under a minute\n");
 	nr_threads = 2;
 	pthread_barrier_init(&worker_barrier, NULL, nr_threads);
 
@@ -216,5 +216,5 @@ void regression1_test(void)
 
 	free(threads);
 
-	printf("regression test 1, done\n");
+	printv(1, "regression test 1, done\n");
 }
diff --git a/tools/testing/radix-tree/regression2.c b/tools/testing/radix-tree/regression2.c
index a41325d7a170..a24d1beec7c8 100644
--- a/tools/testing/radix-tree/regression2.c
+++ b/tools/testing/radix-tree/regression2.c
@@ -80,7 +80,7 @@ void regression2_test(void)
 	unsigned long int start, end;
 	struct page *pages[1];
 
-	printf("running regression test 2 (should take milliseconds)\n");
+	printv(1, "running regression test 2 (should take milliseconds)\n");
 	/* 0. */
 	for (i = 0; i <= max_slots - 1; i++) {
 		p = page_alloc();
@@ -116,5 +116,5 @@ void regression2_test(void)
 	/* We remove all the remained nodes */
 	radix_tree_delete(&mt_tree, max_slots);
 
-	printf("regression test 2, done\n");
+	printv(1, "regression test 2, done\n");
 }
diff --git a/tools/testing/radix-tree/regression3.c b/tools/testing/radix-tree/regression3.c
index b594841fae85..670c3d2ae7b1 100644
--- a/tools/testing/radix-tree/regression3.c
+++ b/tools/testing/radix-tree/regression3.c
@@ -34,21 +34,21 @@ void regression3_test(void)
 	void **slot;
 	bool first;
 
-	printf("running regression test 3 (should take milliseconds)\n");
+	printv(1, "running regression test 3 (should take milliseconds)\n");
 
 	radix_tree_insert(&root, 0, ptr0);
 	radix_tree_tag_set(&root, 0, 0);
 
 	first = true;
 	radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) {
-		printf("tagged %ld %p\n", iter.index, *slot);
+		printv(2, "tagged %ld %p\n", iter.index, *slot);
 		if (first) {
 			radix_tree_insert(&root, 1, ptr);
 			radix_tree_tag_set(&root, 1, 0);
 			first = false;
 		}
 		if (radix_tree_deref_retry(*slot)) {
-			printf("retry at %ld\n", iter.index);
+			printv(2, "retry at %ld\n", iter.index);
 			slot = radix_tree_iter_retry(&iter);
 			continue;
 		}
@@ -57,13 +57,13 @@ void regression3_test(void)
 
 	first = true;
 	radix_tree_for_each_slot(slot, &root, &iter, 0) {
-		printf("slot %ld %p\n", iter.index, *slot);
+		printv(2, "slot %ld %p\n", iter.index, *slot);
 		if (first) {
 			radix_tree_insert(&root, 1, ptr);
 			first = false;
 		}
 		if (radix_tree_deref_retry(*slot)) {
-			printk("retry at %ld\n", iter.index);
+			printv(2, "retry at %ld\n", iter.index);
 			slot = radix_tree_iter_retry(&iter);
 			continue;
 		}
@@ -72,30 +72,30 @@ void regression3_test(void)
 
 	first = true;
 	radix_tree_for_each_contig(slot, &root, &iter, 0) {
-		printk("contig %ld %p\n", iter.index, *slot);
+		printv(2, "contig %ld %p\n", iter.index, *slot);
 		if (first) {
 			radix_tree_insert(&root, 1, ptr);
 			first = false;
 		}
 		if (radix_tree_deref_retry(*slot)) {
-			printk("retry at %ld\n", iter.index);
+			printv(2, "retry at %ld\n", iter.index);
 			slot = radix_tree_iter_retry(&iter);
 			continue;
 		}
 	}
 
 	radix_tree_for_each_slot(slot, &root, &iter, 0) {
-		printf("slot %ld %p\n", iter.index, *slot);
+		printv(2, "slot %ld %p\n", iter.index, *slot);
 		if (!iter.index) {
-			printf("next at %ld\n", iter.index);
+			printv(2, "next at %ld\n", iter.index);
 			slot = radix_tree_iter_resume(slot, &iter);
 		}
 	}
 
 	radix_tree_for_each_contig(slot, &root, &iter, 0) {
-		printf("contig %ld %p\n", iter.index, *slot);
+		printv(2, "contig %ld %p\n", iter.index, *slot);
 		if (!iter.index) {
-			printf("next at %ld\n", iter.index);
+			printv(2, "next at %ld\n", iter.index);
 			slot = radix_tree_iter_resume(slot, &iter);
 		}
 	}
@@ -103,9 +103,9 @@ void regression3_test(void)
 	radix_tree_tag_set(&root, 0, 0);
 	radix_tree_tag_set(&root, 1, 0);
 	radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) {
-		printf("tagged %ld %p\n", iter.index, *slot);
+		printv(2, "tagged %ld %p\n", iter.index, *slot);
 		if (!iter.index) {
-			printf("next at %ld\n", iter.index);
+			printv(2, "next at %ld\n", iter.index);
 			slot = radix_tree_iter_resume(slot, &iter);
 		}
 	}
@@ -113,5 +113,5 @@ void regression3_test(void)
 	radix_tree_delete(&root, 0);
 	radix_tree_delete(&root, 1);
 
-	printf("regression test 3 passed\n");
+	printv(1, "regression test 3 passed\n");
 }
diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c
index fd98c132207a..d4ff00989245 100644
--- a/tools/testing/radix-tree/tag_check.c
+++ b/tools/testing/radix-tree/tag_check.c
@@ -49,10 +49,10 @@ void simple_checks(void)
 	}
 	verify_tag_consistency(&tree, 0);
 	verify_tag_consistency(&tree, 1);
-	printf("before item_kill_tree: %d allocated\n", nr_allocated);
+	printv(2, "before item_kill_tree: %d allocated\n", nr_allocated);
 	item_kill_tree(&tree);
 	rcu_barrier();
-	printf("after item_kill_tree: %d allocated\n", nr_allocated);
+	printv(2, "after item_kill_tree: %d allocated\n", nr_allocated);
 }
 
 /*
@@ -257,7 +257,7 @@ static void do_thrash(struct radix_tree_root *tree, char *thrash_state, int tag)
 
 		gang_check(tree, thrash_state, tag);
 
-		printf("%d(%d) %d(%d) %d(%d) %d(%d) / "
+		printv(2, "%d(%d) %d(%d) %d(%d) %d(%d) / "
 				"%d(%d) present, %d(%d) tagged\n",
 			insert_chunk, nr_inserted,
 			delete_chunk, nr_deleted,
@@ -296,13 +296,13 @@ static void __leak_check(void)
 {
 	RADIX_TREE(tree, GFP_KERNEL);
 
-	printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+	printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
 	item_insert(&tree, 1000000);
-	printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+	printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
 	item_delete(&tree, 1000000);
-	printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+	printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
 	item_kill_tree(&tree);
-	printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+	printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
 }
 
 static void single_check(void)
@@ -336,15 +336,15 @@ void tag_check(void)
 	extend_checks();
 	contract_checks();
 	rcu_barrier();
-	printf("after extend_checks: %d allocated\n", nr_allocated);
+	printv(2, "after extend_checks: %d allocated\n", nr_allocated);
 	__leak_check();
 	leak_check();
 	rcu_barrier();
-	printf("after leak_check: %d allocated\n", nr_allocated);
+	printv(2, "after leak_check: %d allocated\n", nr_allocated);
 	simple_checks();
 	rcu_barrier();
-	printf("after simple_checks: %d allocated\n", nr_allocated);
+	printv(2, "after simple_checks: %d allocated\n", nr_allocated);
 	thrash_tags();
 	rcu_barrier();
-	printf("after thrash_tags: %d allocated\n", nr_allocated);
+	printv(2, "after thrash_tags: %d allocated\n", nr_allocated);
 }
-- 
GitLab


From 1293d5c5f54d5118fbb34fc94e01ba02fcd25fc1 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 16 Jan 2017 16:41:29 -0500
Subject: [PATCH 0244/1084] radix-tree: Chain preallocated nodes through
 ->parent

Chaining through the ->private_data member means we have to zero
->private_data after removing preallocated nodes from the list.
We're about to initialise ->parent anyway, so we can avoid zeroing it.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 lib/radix-tree.c                 | 9 ++++-----
 tools/testing/radix-tree/linux.c | 6 +++---
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 14130ab197c0..66c71312c381 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -82,7 +82,7 @@ static struct kmem_cache *radix_tree_node_cachep;
  */
 struct radix_tree_preload {
 	unsigned nr;
-	/* nodes->private_data points to next preallocated node */
+	/* nodes->parent points to next preallocated node */
 	struct radix_tree_node *nodes;
 };
 static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
@@ -405,8 +405,7 @@ radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
 		rtp = this_cpu_ptr(&radix_tree_preloads);
 		if (rtp->nr) {
 			ret = rtp->nodes;
-			rtp->nodes = ret->private_data;
-			ret->private_data = NULL;
+			rtp->nodes = ret->parent;
 			rtp->nr--;
 		}
 		/*
@@ -483,7 +482,7 @@ static int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
 		preempt_disable();
 		rtp = this_cpu_ptr(&radix_tree_preloads);
 		if (rtp->nr < nr) {
-			node->private_data = rtp->nodes;
+			node->parent = rtp->nodes;
 			rtp->nodes = node;
 			rtp->nr++;
 		} else {
@@ -2260,7 +2259,7 @@ static int radix_tree_cpu_dead(unsigned int cpu)
 	rtp = &per_cpu(radix_tree_preloads, cpu);
 	while (rtp->nr) {
 		node = rtp->nodes;
-		rtp->nodes = node->private_data;
+		rtp->nodes = node->parent;
 		kmem_cache_free(radix_tree_node_cachep, node);
 		rtp->nr--;
 	}
diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
index 94bcdb992bbf..cf48c8473f48 100644
--- a/tools/testing/radix-tree/linux.c
+++ b/tools/testing/radix-tree/linux.c
@@ -35,9 +35,9 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
 	if (cachep->nr_objs) {
 		cachep->nr_objs--;
 		node = cachep->objs;
-		cachep->objs = node->private_data;
+		cachep->objs = node->parent;
 		pthread_mutex_unlock(&cachep->lock);
-		node->private_data = NULL;
+		node->parent = NULL;
 	} else {
 		pthread_mutex_unlock(&cachep->lock);
 		node = malloc(cachep->size);
@@ -64,7 +64,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
 	} else {
 		struct radix_tree_node *node = objp;
 		cachep->nr_objs++;
-		node->private_data = cachep->objs;
+		node->parent = cachep->objs;
 		cachep->objs = node;
 	}
 	pthread_mutex_unlock(&cachep->lock);
-- 
GitLab


From d58275bc96ae933b1b3888af80920dd6b020c505 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 16 Jan 2017 17:10:21 -0500
Subject: [PATCH 0245/1084] radix-tree: Store a pointer to the root in each
 node

Instead of having this mysterious private_data in each radix_tree_node,
store a pointer to the root, which can be useful for debugging.  This also
relieves the mm code from the duty of updating it.

Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 include/linux/radix-tree.h |  2 +-
 lib/radix-tree.c           | 14 ++++++++------
 mm/workingset.c            |  6 ++----
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 2ba0c1f46c84..d250059bbfa4 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -96,7 +96,7 @@ struct radix_tree_node {
 	unsigned char	count;		/* Total entry count */
 	unsigned char	exceptional;	/* Exceptional entry count */
 	struct radix_tree_node *parent;		/* Used when ascending tree */
-	void *private_data;			/* For tree user */
+	struct radix_tree_root *root;		/* The tree we belong to */
 	union {
 		struct list_head private_list;	/* For tree user */
 		struct rcu_head	rcu_head;	/* Used when freeing node */
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 66c71312c381..dcb9a2329e65 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -374,6 +374,7 @@ static void ida_dump(struct ida *ida)
  */
 static struct radix_tree_node *
 radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
+			struct radix_tree_root *root,
 			unsigned int shift, unsigned int offset,
 			unsigned int count, unsigned int exceptional)
 {
@@ -419,11 +420,12 @@ radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
 out:
 	BUG_ON(radix_tree_is_internal_node(ret));
 	if (ret) {
-		ret->parent = parent;
 		ret->shift = shift;
 		ret->offset = offset;
 		ret->count = count;
 		ret->exceptional = exceptional;
+		ret->parent = parent;
+		ret->root = root;
 	}
 	return ret;
 }
@@ -631,7 +633,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
 
 	do {
 		struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL,
-								shift, 0, 1, 0);
+							root, shift, 0, 1, 0);
 		if (!node)
 			return -ENOMEM;
 
@@ -828,7 +830,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 		shift -= RADIX_TREE_MAP_SHIFT;
 		if (child == NULL) {
 			/* Have to add a child node.  */
-			child = radix_tree_node_alloc(gfp, node, shift,
+			child = radix_tree_node_alloc(gfp, node, root, shift,
 							offset, 0, 0);
 			if (!child)
 				return -ENOMEM;
@@ -1330,7 +1332,7 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
 
 	for (;;) {
 		if (node->shift > order) {
-			child = radix_tree_node_alloc(gfp, node,
+			child = radix_tree_node_alloc(gfp, node, root,
 					node->shift - RADIX_TREE_MAP_SHIFT,
 					offset, 0, 0);
 			if (!child)
@@ -2152,8 +2154,8 @@ void **idr_get_free(struct radix_tree_root *root,
 		shift -= RADIX_TREE_MAP_SHIFT;
 		if (child == NULL) {
 			/* Have to add a child node.  */
-			child = radix_tree_node_alloc(gfp, node, shift, offset,
-							0, 0);
+			child = radix_tree_node_alloc(gfp, node, root, shift,
+							offset, 0, 0);
 			if (!child)
 				return ERR_PTR(-ENOMEM);
 			all_tag_set(child, IDR_FREE);
diff --git a/mm/workingset.c b/mm/workingset.c
index abb58ffa3c64..80c913c89f11 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -354,10 +354,8 @@ void workingset_update_node(struct radix_tree_node *node, void *private)
 	 * as node->private_list is protected by &mapping->tree_lock.
 	 */
 	if (node->count && node->count == node->exceptional) {
-		if (list_empty(&node->private_list)) {
-			node->private_data = mapping;
+		if (list_empty(&node->private_list))
 			list_lru_add(&shadow_nodes, &node->private_list);
-		}
 	} else {
 		if (!list_empty(&node->private_list))
 			list_lru_del(&shadow_nodes, &node->private_list);
@@ -435,7 +433,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 	 */
 
 	node = container_of(item, struct radix_tree_node, private_list);
-	mapping = node->private_data;
+	mapping = container_of(node->root, struct address_space, page_tree);
 
 	/* Coming from the list, invert the lock order */
 	if (!spin_trylock(&mapping->tree_lock)) {
-- 
GitLab


From f7137f79c57f228321dde2ab4586015504feaaac Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 30 Jan 2017 16:22:30 -0500
Subject: [PATCH 0246/1084] radix_tree_iter_resume: Fix out of bounds error

The address sanitizer occasionally finds an out of bounds error while
running the test-suite.  It turned out to be a read of the pointer
immediately next to the tree root, but this out of bounds error could
have occurred elsewhere.  This happens because radix_tree_iter_resume()
dereferences 'slot' before checking whether we've come to the end of
the chunk.  We can just delete this line; the value was never used.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 lib/radix-tree.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index dcb9a2329e65..c1c079ffadcd 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1685,7 +1685,6 @@ void **radix_tree_iter_resume(void **slot, struct radix_tree_iter *iter)
 
 	slot++;
 	iter->index = __radix_tree_iter_add(iter, 1);
-	node = rcu_dereference_raw(*slot);
 	skip_siblings(&node, slot, iter);
 	iter->next_index = iter->index;
 	iter->tags = 0;
-- 
GitLab


From 829f83d3653ef7105187b0110887dbfd46a30ce4 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Sat, 28 Jan 2017 10:03:48 -0500
Subject: [PATCH 0247/1084] radix tree test suite: Enable address sanitizer

I was looking for a memory scribble and instead found a pile of memory
leaks.  Ensure no more occur in future.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 tools/testing/radix-tree/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 4aba7b75e43a..ecea846e7660 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -1,5 +1,5 @@
 
-CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE
+CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address
 LDFLAGS += -lpthread -lurcu
 TARGETS = main idr-test multiorder
 CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
-- 
GitLab


From 18d0c57394e42ff536e5fdc776b6b217fdd9889c Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Sun, 29 Jan 2017 01:48:34 -0500
Subject: [PATCH 0248/1084] radix tree test suite: Fix leaky tests

If item_insert() or item_insert_order() failed to insert an item, they
would leak the item they had just created.  This was causing runaway
memory consumption while running the iteration_check testcase, which
proves that Ross has too much memory in his workstation ;-)

Make sure to free the item on error.  Found with -fsanitize=address.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 tools/testing/radix-tree/test.c | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c
index e5726e373646..1a257d738a1e 100644
--- a/tools/testing/radix-tree/test.c
+++ b/tools/testing/radix-tree/test.c
@@ -29,15 +29,28 @@ int __item_insert(struct radix_tree_root *root, struct item *item)
 	return __radix_tree_insert(root, item->index, item->order, item);
 }
 
-int item_insert(struct radix_tree_root *root, unsigned long index)
+struct item *item_create(unsigned long index, unsigned int order)
 {
-	return __item_insert(root, item_create(index, 0));
+	struct item *ret = malloc(sizeof(*ret));
+
+	ret->index = index;
+	ret->order = order;
+	return ret;
 }
 
 int item_insert_order(struct radix_tree_root *root, unsigned long index,
 			unsigned order)
 {
-	return __item_insert(root, item_create(index, order));
+	struct item *item = item_create(index, order);
+	int err = __item_insert(root, item);
+	if (err)
+		free(item);
+	return err;
+}
+
+int item_insert(struct radix_tree_root *root, unsigned long index)
+{
+	return item_insert_order(root, index, 0);
 }
 
 void item_sanity(struct item *item, unsigned long index)
@@ -61,15 +74,6 @@ int item_delete(struct radix_tree_root *root, unsigned long index)
 	return 0;
 }
 
-struct item *item_create(unsigned long index, unsigned int order)
-{
-	struct item *ret = malloc(sizeof(*ret));
-
-	ret->index = index;
-	ret->order = order;
-	return ret;
-}
-
 void item_check_present(struct radix_tree_root *root, unsigned long index)
 {
 	struct item *item;
-- 
GitLab


From 6da0396cac7692b6667c09382a746593fff90e6d Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Sun, 29 Jan 2017 01:52:55 -0500
Subject: [PATCH 0249/1084] radix tree test suite: Fix leaks in regression2.c

None of the malloc'ed data structures were ever being freed.  Found with
-fsanitize=address.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 tools/testing/radix-tree/regression2.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/testing/radix-tree/regression2.c b/tools/testing/radix-tree/regression2.c
index a24d1beec7c8..42dd2a33ed24 100644
--- a/tools/testing/radix-tree/regression2.c
+++ b/tools/testing/radix-tree/regression2.c
@@ -103,7 +103,7 @@ void regression2_test(void)
 
 	/* 4. */
 	for (i = max_slots - 1; i >= 0; i--)
-		radix_tree_delete(&mt_tree, i);
+		free(radix_tree_delete(&mt_tree, i));
 
 	/* 5. */
 	// NOTE: start should not be 0 because radix_tree_gang_lookup_tag_slot
@@ -114,7 +114,9 @@ void regression2_test(void)
 		PAGECACHE_TAG_TOWRITE);
 
 	/* We remove all the remained nodes */
-	radix_tree_delete(&mt_tree, max_slots);
+	free(radix_tree_delete(&mt_tree, max_slots));
+
+	BUG_ON(!radix_tree_empty(&mt_tree));
 
 	printv(1, "regression test 2, done\n");
 }
-- 
GitLab


From 3b7869c31f9358a63e502c8c5c7664daf1c6d8b0 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Sun, 29 Jan 2017 02:00:31 -0500
Subject: [PATCH 0250/1084] radix tree test suite: Fix split/join memory leaks

The last of the memory leaks in the test suite was a couple of places in
the split/join testing where I forgot to free the element being removed
from the tree.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 tools/testing/radix-tree/multiorder.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 72d80f7059d3..06c71178d07d 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -356,6 +356,10 @@ void multiorder_tagged_iteration(void)
 	item_kill_tree(&tree);
 }
 
+/*
+ * Basic join checks: make sure we can't find an entry in the tree after
+ * a larger entry has replaced it
+ */
 static void multiorder_join1(unsigned long index,
 				unsigned order1, unsigned order2)
 {
@@ -374,6 +378,10 @@ static void multiorder_join1(unsigned long index,
 	item_kill_tree(&tree);
 }
 
+/*
+ * Check that the accounting of exceptional entries is handled correctly
+ * by joining an exceptional entry to a normal pointer.
+ */
 static void multiorder_join2(unsigned order1, unsigned order2)
 {
 	RADIX_TREE(tree, GFP_KERNEL);
@@ -387,6 +395,9 @@ static void multiorder_join2(unsigned order1, unsigned order2)
 	assert(item2 == (void *)0x12UL);
 	assert(node->exceptional == 1);
 
+	item2 = radix_tree_lookup(&tree, 0);
+	free(item2);
+
 	radix_tree_join(&tree, 0, order1, item1);
 	item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
 	assert(item2 == item1);
@@ -472,6 +483,7 @@ static void __multiorder_split(int old_order, int new_order)
 	void **slot;
 	struct radix_tree_iter iter;
 	unsigned alloc;
+	struct item *item;
 
 	radix_tree_preload(GFP_KERNEL);
 	assert(item_insert_order(&tree, 0, old_order) == 0);
@@ -480,7 +492,7 @@ static void __multiorder_split(int old_order, int new_order)
 	/* Wipe out the preloaded cache or it'll confuse check_mem() */
 	radix_tree_cpu_dead(0);
 
-	radix_tree_tag_set(&tree, 0, 2);
+	item = radix_tree_tag_set(&tree, 0, 2);
 
 	radix_tree_split_preload(old_order, new_order, GFP_KERNEL);
 	alloc = nr_allocated;
@@ -493,6 +505,7 @@ static void __multiorder_split(int old_order, int new_order)
 	radix_tree_preload_end();
 
 	item_kill_tree(&tree);
+	free(item);
 }
 
 static void __multiorder_split2(int old_order, int new_order)
-- 
GitLab


From c0cdbf819cd76d977486b4634ea1b4ede2434413 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Sun, 29 Jan 2017 02:27:24 -0500
Subject: [PATCH 0251/1084] radix tree test suite: Run iteration tests for
 longer

If the -l flag is set, run the tests for 100 seconds each instead of
the normal 10 seconds.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 tools/testing/radix-tree/main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index 86de448b699e..b829127d5670 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -365,8 +365,8 @@ int main(int argc, char **argv)
 	regression1_test();
 	regression2_test();
 	regression3_test();
-	iteration_test(0, 10);
-	iteration_test(7, 20);
+	iteration_test(0, 10 + 90 * long_run);
+	iteration_test(7, 10 + 90 * long_run);
 	single_thread_tests(long_run);
 
 	/* Free any remaining preallocated nodes */
-- 
GitLab


From 12320d0ff1c9d5582f5c35e4bb8b9c70c475fd71 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 13 Feb 2017 15:22:48 -0500
Subject: [PATCH 0252/1084] radix-tree: Add rcu_dereference and
 rcu_assign_pointer calls

Some of these have been missing for many years.  Others were recently
introduced by me.  Fortunately, we have tools that help us find such
things.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 include/linux/radix-tree.h |  2 +-
 lib/radix-tree.c           | 26 +++++++++++++++-----------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index d250059bbfa4..0b502de7d23a 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -561,7 +561,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
 	return NULL;
 
  found:
-	if (unlikely(radix_tree_is_internal_node(*slot)))
+	if (unlikely(radix_tree_is_internal_node(rcu_dereference_raw(*slot))))
 		return __radix_tree_next_slot(slot, iter, flags);
 	return slot;
 }
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index c1c079ffadcd..723bebe40eef 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -627,7 +627,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
 	while (index > shift_maxindex(maxshift))
 		maxshift += RADIX_TREE_MAP_SHIFT;
 
-	slot = root->rnode;
+	slot = rcu_dereference_raw(root->rnode);
 	if (!slot && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
 		goto out;
 
@@ -678,7 +678,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
 	bool shrunk = false;
 
 	for (;;) {
-		struct radix_tree_node *node = root->rnode;
+		struct radix_tree_node *node = rcu_dereference_raw(root->rnode);
 		struct radix_tree_node *child;
 
 		if (!radix_tree_is_internal_node(node))
@@ -692,7 +692,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
 		 */
 		if (node->count != 1)
 			break;
-		child = node->slots[0];
+		child = rcu_dereference_raw(node->slots[0]);
 		if (!child)
 			break;
 		if (!radix_tree_is_internal_node(child) && node->shift)
@@ -755,7 +755,8 @@ static bool delete_node(struct radix_tree_root *root,
 		struct radix_tree_node *parent;
 
 		if (node->count) {
-			if (node == entry_to_node(root->rnode))
+			if (node_to_entry(node) ==
+					rcu_dereference_raw(root->rnode))
 				deleted |= radix_tree_shrink(root, update_node,
 								private);
 			return deleted;
@@ -823,7 +824,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 		if (error < 0)
 			return error;
 		shift = error;
-		child = root->rnode;
+		child = rcu_dereference_raw(root->rnode);
 	}
 
 	while (shift > order) {
@@ -868,7 +869,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
 	struct radix_tree_node *child = entry_to_node(node);
 
 	for (;;) {
-		void *entry = child->slots[offset];
+		void *entry = rcu_dereference_raw(child->slots[offset]);
 		if (radix_tree_is_internal_node(entry) &&
 					!is_sibling_entry(child, entry)) {
 			child = entry_to_node(entry);
@@ -925,7 +926,7 @@ static inline int insert_entries(struct radix_tree_node *node, void **slot,
 	}
 
 	for (i = 0; i < n; i++) {
-		struct radix_tree_node *old = slot[i];
+		struct radix_tree_node *old = rcu_dereference_raw(slot[i]);
 		if (i) {
 			rcu_assign_pointer(slot[i], child);
 			for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
@@ -1102,7 +1103,7 @@ static inline void replace_sibling_entries(struct radix_tree_node *node,
 	unsigned offset = get_slot_offset(node, slot) + 1;
 
 	while (offset < RADIX_TREE_MAP_SIZE) {
-		if (node->slots[offset] != ptr)
+		if (rcu_dereference_raw(node->slots[offset]) != ptr)
 			break;
 		if (count < 0) {
 			node->slots[offset] = NULL;
@@ -1308,7 +1309,8 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
 			tags |= 1 << tag;
 
 	for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) {
-		if (!is_sibling_entry(parent, parent->slots[end]))
+		if (!is_sibling_entry(parent,
+				rcu_dereference_raw(parent->slots[end])))
 			break;
 		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
 			if (tags & (1 << tag))
@@ -1339,7 +1341,8 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
 				goto nomem;
 			if (node != parent) {
 				node->count++;
-				node->slots[offset] = node_to_entry(child);
+				rcu_assign_pointer(node->slots[offset],
+							node_to_entry(child));
 				for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
 					if (tags & (1 << tag))
 						tag_set(node, tag, offset);
@@ -1755,7 +1758,8 @@ void **radix_tree_next_chunk(const struct radix_tree_root *root,
 						offset + 1);
 			else
 				while (++offset	< RADIX_TREE_MAP_SIZE) {
-					void *slot = node->slots[offset];
+					void *slot = rcu_dereference_raw(
+							node->slots[offset]);
 					if (is_sibling_entry(node, slot))
 						continue;
 					if (slot)
-- 
GitLab


From d7b627277b57370223d682cede979a279284b12a Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 13 Feb 2017 15:58:24 -0500
Subject: [PATCH 0253/1084] radix-tree: Fix __rcu annotations

Many places were missing __rcu annotations.  A few places needed a few
lines of explanation about why it was safe to not use RCU accessors.
Add a custom CFLAGS setting to the Makefile to ensure that new patches
don't miss RCU annotations.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 include/linux/radix-tree.h | 110 ++++++++++++++++----------------
 lib/Makefile               |   2 +
 lib/radix-tree.c           | 125 ++++++++++++++++++++-----------------
 3 files changed, 122 insertions(+), 115 deletions(-)

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 0b502de7d23a..3e5735064b71 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -221,10 +221,8 @@ static inline unsigned int iter_shift(const struct radix_tree_iter *iter)
  */
 
 /**
- * radix_tree_deref_slot	- dereference a slot
- * @pslot:	pointer to slot, returned by radix_tree_lookup_slot
- * Returns:	item that was stored in that slot with any direct pointer flag
- *		removed.
+ * radix_tree_deref_slot - dereference a slot
+ * @slot: slot pointer, returned by radix_tree_lookup_slot
  *
  * For use with radix_tree_lookup_slot().  Caller must hold tree at least read
  * locked across slot lookup and dereference. Not required if write lock is
@@ -232,26 +230,27 @@ static inline unsigned int iter_shift(const struct radix_tree_iter *iter)
  *
  * radix_tree_deref_retry must be used to confirm validity of the pointer if
  * only the read lock is held.
+ *
+ * Return: entry stored in that slot.
  */
-static inline void *radix_tree_deref_slot(void **pslot)
+static inline void *radix_tree_deref_slot(void __rcu **slot)
 {
-	return rcu_dereference(*pslot);
+	return rcu_dereference(*slot);
 }
 
 /**
- * radix_tree_deref_slot_protected	- dereference a slot without RCU lock but with tree lock held
- * @pslot:	pointer to slot, returned by radix_tree_lookup_slot
- * Returns:	item that was stored in that slot with any direct pointer flag
- *		removed.
- *
- * Similar to radix_tree_deref_slot but only used during migration when a pages
- * mapping is being moved. The caller does not hold the RCU read lock but it
- * must hold the tree lock to prevent parallel updates.
+ * radix_tree_deref_slot_protected - dereference a slot with tree lock held
+ * @slot: slot pointer, returned by radix_tree_lookup_slot
+ *
+ * Similar to radix_tree_deref_slot.  The caller does not hold the RCU read
+ * lock but it must hold the tree lock to prevent parallel updates.
+ *
+ * Return: entry stored in that slot.
  */
-static inline void *radix_tree_deref_slot_protected(void **pslot,
+static inline void *radix_tree_deref_slot_protected(void __rcu **slot,
 							spinlock_t *treelock)
 {
-	return rcu_dereference_protected(*pslot, lockdep_is_held(treelock));
+	return rcu_dereference_protected(*slot, lockdep_is_held(treelock));
 }
 
 /**
@@ -287,9 +286,9 @@ static inline int radix_tree_exception(void *arg)
 	return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK);
 }
 
-int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
+int __radix_tree_create(struct radix_tree_root *, unsigned long index,
 			unsigned order, struct radix_tree_node **nodep,
-			void ***slotp);
+			void __rcu ***slotp);
 int __radix_tree_insert(struct radix_tree_root *, unsigned long index,
 			unsigned order, void *);
 static inline int radix_tree_insert(struct radix_tree_root *root,
@@ -298,42 +297,41 @@ static inline int radix_tree_insert(struct radix_tree_root *root,
 	return __radix_tree_insert(root, index, 0, entry);
 }
 void *__radix_tree_lookup(const struct radix_tree_root *, unsigned long index,
-			  struct radix_tree_node **nodep, void ***slotp);
+			  struct radix_tree_node **nodep, void __rcu ***slotp);
 void *radix_tree_lookup(const struct radix_tree_root *, unsigned long);
-void **radix_tree_lookup_slot(const struct radix_tree_root *, unsigned long);
+void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *,
+					unsigned long index);
 typedef void (*radix_tree_update_node_t)(struct radix_tree_node *, void *);
-void __radix_tree_replace(struct radix_tree_root *root,
-			  struct radix_tree_node *node,
-			  void **slot, void *item,
+void __radix_tree_replace(struct radix_tree_root *, struct radix_tree_node *,
+			  void __rcu **slot, void *entry,
 			  radix_tree_update_node_t update_node, void *private);
 void radix_tree_iter_replace(struct radix_tree_root *,
-		const struct radix_tree_iter *, void **slot, void *item);
-void radix_tree_replace_slot(struct radix_tree_root *root,
-			     void **slot, void *item);
-void __radix_tree_delete_node(struct radix_tree_root *root,
-			      struct radix_tree_node *node,
+		const struct radix_tree_iter *, void __rcu **slot, void *entry);
+void radix_tree_replace_slot(struct radix_tree_root *,
+			     void __rcu **slot, void *entry);
+void __radix_tree_delete_node(struct radix_tree_root *,
+			      struct radix_tree_node *,
 			      radix_tree_update_node_t update_node,
 			      void *private);
 void radix_tree_iter_delete(struct radix_tree_root *,
-				struct radix_tree_iter *iter, void **slot);
+			struct radix_tree_iter *iter, void __rcu **slot);
 void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
 void *radix_tree_delete(struct radix_tree_root *, unsigned long);
-void radix_tree_clear_tags(struct radix_tree_root *root,
-			   struct radix_tree_node *node,
-			   void **slot);
+void radix_tree_clear_tags(struct radix_tree_root *, struct radix_tree_node *,
+			   void __rcu **slot);
 unsigned int radix_tree_gang_lookup(const struct radix_tree_root *,
 			void **results, unsigned long first_index,
 			unsigned int max_items);
 unsigned int radix_tree_gang_lookup_slot(const struct radix_tree_root *,
-			void ***results, unsigned long *indices,
+			void __rcu ***results, unsigned long *indices,
 			unsigned long first_index, unsigned int max_items);
 int radix_tree_preload(gfp_t gfp_mask);
 int radix_tree_maybe_preload(gfp_t gfp_mask);
 int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order);
 void radix_tree_init(void);
-void *radix_tree_tag_set(struct radix_tree_root *root,
+void *radix_tree_tag_set(struct radix_tree_root *,
 			unsigned long index, unsigned int tag);
-void *radix_tree_tag_clear(struct radix_tree_root *root,
+void *radix_tree_tag_clear(struct radix_tree_root *,
 			unsigned long index, unsigned int tag);
 int radix_tree_tag_get(const struct radix_tree_root *,
 			unsigned long index, unsigned int tag);
@@ -341,15 +339,13 @@ void radix_tree_iter_tag_set(struct radix_tree_root *,
 		const struct radix_tree_iter *iter, unsigned int tag);
 void radix_tree_iter_tag_clear(struct radix_tree_root *,
 		const struct radix_tree_iter *iter, unsigned int tag);
-unsigned int
-radix_tree_gang_lookup_tag(const struct radix_tree_root *, void **results,
-		unsigned long first_index, unsigned int max_items,
-		unsigned int tag);
-unsigned int
-radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *, void ***results,
-		unsigned long first_index, unsigned int max_items,
-		unsigned int tag);
-int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag);
+unsigned int radix_tree_gang_lookup_tag(const struct radix_tree_root *,
+		void **results, unsigned long first_index,
+		unsigned int max_items, unsigned int tag);
+unsigned int radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *,
+		void __rcu ***results, unsigned long first_index,
+		unsigned int max_items, unsigned int tag);
+int radix_tree_tagged(const struct radix_tree_root *, unsigned int tag);
 
 static inline void radix_tree_preload_end(void)
 {
@@ -361,7 +357,7 @@ int radix_tree_split(struct radix_tree_root *, unsigned long index,
 			unsigned new_order);
 int radix_tree_join(struct radix_tree_root *, unsigned long index,
 			unsigned new_order, void *);
-void **idr_get_free(struct radix_tree_root *, struct radix_tree_iter *,
+void __rcu **idr_get_free(struct radix_tree_root *, struct radix_tree_iter *,
 			gfp_t, int end);
 
 enum {
@@ -377,7 +373,7 @@ enum {
  * @start:	iteration starting index
  * Returns:	NULL
  */
-static __always_inline void **
+static __always_inline void __rcu **
 radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
 {
 	/*
@@ -406,7 +402,7 @@ radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
  * Also it fills @iter with data about chunk: position in the tree (index),
  * its end (next_index), and constructs a bit mask for tagged iterating (tags).
  */
-void **radix_tree_next_chunk(const struct radix_tree_root *,
+void __rcu **radix_tree_next_chunk(const struct radix_tree_root *,
 			     struct radix_tree_iter *iter, unsigned flags);
 
 /**
@@ -419,7 +415,8 @@ void **radix_tree_next_chunk(const struct radix_tree_root *,
  * containing it and updates @iter to describe the entry.  If @index is not
  * present, it returns NULL.
  */
-static inline void **radix_tree_iter_lookup(const struct radix_tree_root *root,
+static inline void __rcu **
+radix_tree_iter_lookup(const struct radix_tree_root *root,
 			struct radix_tree_iter *iter, unsigned long index)
 {
 	radix_tree_iter_init(iter, index);
@@ -436,7 +433,8 @@ static inline void **radix_tree_iter_lookup(const struct radix_tree_root *root,
  * which is at least @index.  If @index is larger than any present entry, this
  * function returns NULL.  The @iter is updated to describe the entry found.
  */
-static inline void **radix_tree_iter_find(const struct radix_tree_root *root,
+static inline void __rcu **
+radix_tree_iter_find(const struct radix_tree_root *root,
 			struct radix_tree_iter *iter, unsigned long index)
 {
 	radix_tree_iter_init(iter, index);
@@ -453,7 +451,7 @@ static inline void **radix_tree_iter_find(const struct radix_tree_root *root,
  * and continue the iteration.
  */
 static inline __must_check
-void **radix_tree_iter_retry(struct radix_tree_iter *iter)
+void __rcu **radix_tree_iter_retry(struct radix_tree_iter *iter)
 {
 	iter->next_index = iter->index;
 	iter->tags = 0;
@@ -476,7 +474,7 @@ __radix_tree_iter_add(struct radix_tree_iter *iter, unsigned long slots)
  * have been invalidated by an insertion or deletion.  Call this function
  * before releasing the lock to continue the iteration from the next index.
  */
-void **__must_check radix_tree_iter_resume(void **slot,
+void __rcu **__must_check radix_tree_iter_resume(void __rcu **slot,
 					struct radix_tree_iter *iter);
 
 /**
@@ -492,11 +490,11 @@ radix_tree_chunk_size(struct radix_tree_iter *iter)
 }
 
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
-void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter,
-				unsigned flags);
+void __rcu **__radix_tree_next_slot(void __rcu **slot,
+				struct radix_tree_iter *iter, unsigned flags);
 #else
 /* Can't happen without sibling entries, but the compiler can't tell that */
-static inline void ** __radix_tree_next_slot(void **slot,
+static inline void __rcu **__radix_tree_next_slot(void __rcu **slot,
 				struct radix_tree_iter *iter, unsigned flags)
 {
 	return slot;
@@ -522,8 +520,8 @@ static inline void ** __radix_tree_next_slot(void **slot,
  * b) we are doing non-tagged iteration, and iter->index and iter->next_index
  *    have been set up so that radix_tree_chunk_size() returns 1 or 0.
  */
-static __always_inline void **
-radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
+static __always_inline void __rcu **radix_tree_next_slot(void __rcu **slot,
+				struct radix_tree_iter *iter, unsigned flags)
 {
 	if (flags & RADIX_TREE_ITER_TAGGED) {
 		iter->tags >>= 1;
diff --git a/lib/Makefile b/lib/Makefile
index bc4073a8cd08..2fc096985b21 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -24,6 +24,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
 	 earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o
 
+CFLAGS_radix-tree.o += -DCONFIG_SPARSE_RCU_POINTER
+
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
 lib-$(CONFIG_HAS_DMA) += dma-noop.o
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 723bebe40eef..9c0fa4df736b 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -104,7 +104,7 @@ static inline void *node_to_entry(void *ptr)
 static inline
 bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
 {
-	void **ptr = node;
+	void __rcu **ptr = node;
 	return (parent->slots <= ptr) &&
 			(ptr < parent->slots + RADIX_TREE_MAP_SIZE);
 }
@@ -116,8 +116,8 @@ bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
 }
 #endif
 
-static inline
-unsigned long get_slot_offset(const struct radix_tree_node *parent, void **slot)
+static inline unsigned long
+get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot)
 {
 	return slot - parent->slots;
 }
@@ -126,12 +126,13 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
 			struct radix_tree_node **nodep, unsigned long index)
 {
 	unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
-	void **entry = rcu_dereference_raw(parent->slots[offset]);
+	void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);
 
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
 	if (radix_tree_is_internal_node(entry)) {
 		if (is_sibling_entry(parent, entry)) {
-			void **sibentry = (void **) entry_to_node(entry);
+			void __rcu **sibentry;
+			sibentry = (void __rcu **) entry_to_node(entry);
 			offset = get_slot_offset(parent, sibentry);
 			entry = rcu_dereference_raw(*sibentry);
 		}
@@ -618,7 +619,7 @@ static unsigned radix_tree_load_root(const struct radix_tree_root *root,
 static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
 				unsigned long index, unsigned int shift)
 {
-	struct radix_tree_node *slot;
+	void *entry;
 	unsigned int maxshift;
 	int tag;
 
@@ -627,8 +628,8 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
 	while (index > shift_maxindex(maxshift))
 		maxshift += RADIX_TREE_MAP_SHIFT;
 
-	slot = rcu_dereference_raw(root->rnode);
-	if (!slot && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
+	entry = rcu_dereference_raw(root->rnode);
+	if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
 		goto out;
 
 	do {
@@ -652,15 +653,19 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
 		}
 
 		BUG_ON(shift > BITS_PER_LONG);
-		if (radix_tree_is_internal_node(slot)) {
-			entry_to_node(slot)->parent = node;
-		} else if (radix_tree_exceptional_entry(slot)) {
+		if (radix_tree_is_internal_node(entry)) {
+			entry_to_node(entry)->parent = node;
+		} else if (radix_tree_exceptional_entry(entry)) {
 			/* Moving an exceptional root->rnode to a node */
 			node->exceptional = 1;
 		}
-		node->slots[0] = slot;
-		slot = node_to_entry(node);
-		rcu_assign_pointer(root->rnode, slot);
+		/*
+		 * entry was already in the radix tree, so we do not need
+		 * rcu_assign_pointer here
+		 */
+		node->slots[0] = (void __rcu *)entry;
+		entry = node_to_entry(node);
+		rcu_assign_pointer(root->rnode, entry);
 		shift += RADIX_TREE_MAP_SHIFT;
 	} while (shift <= maxshift);
 out:
@@ -708,7 +713,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
 		 * (node->slots[0]), it will be safe to dereference the new
 		 * one (root->rnode) as far as dependent read barriers go.
 		 */
-		root->rnode = child;
+		root->rnode = (void __rcu *)child;
 		if (is_idr(root) && !tag_get(node, IDR_FREE, 0))
 			root_tag_clear(root, IDR_FREE);
 
@@ -732,7 +737,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
 		 */
 		node->count = 0;
 		if (!radix_tree_is_internal_node(child)) {
-			node->slots[0] = RADIX_TREE_RETRY;
+			node->slots[0] = (void __rcu *)RADIX_TREE_RETRY;
 			if (update_node)
 				update_node(node, private);
 		}
@@ -805,10 +810,10 @@ static bool delete_node(struct radix_tree_root *root,
  */
 int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 			unsigned order, struct radix_tree_node **nodep,
-			void ***slotp)
+			void __rcu ***slotp)
 {
 	struct radix_tree_node *node = NULL, *child;
-	void **slot = (void **)&root->rnode;
+	void __rcu **slot = (void __rcu **)&root->rnode;
 	unsigned long maxindex;
 	unsigned int shift, offset = 0;
 	unsigned long max = index | ((1UL << order) - 1);
@@ -890,8 +895,8 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
 }
 
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
-static inline int insert_entries(struct radix_tree_node *node, void **slot,
-				void *item, unsigned order, bool replace)
+static inline int insert_entries(struct radix_tree_node *node,
+		void __rcu **slot, void *item, unsigned order, bool replace)
 {
 	struct radix_tree_node *child;
 	unsigned i, n, tag, offset, tags = 0;
@@ -953,8 +958,8 @@ static inline int insert_entries(struct radix_tree_node *node, void **slot,
 	return n;
 }
 #else
-static inline int insert_entries(struct radix_tree_node *node, void **slot,
-				void *item, unsigned order, bool replace)
+static inline int insert_entries(struct radix_tree_node *node,
+		void __rcu **slot, void *item, unsigned order, bool replace)
 {
 	if (*slot)
 		return -EEXIST;
@@ -981,7 +986,7 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
 			unsigned order, void *item)
 {
 	struct radix_tree_node *node;
-	void **slot;
+	void __rcu **slot;
 	int error;
 
 	BUG_ON(radix_tree_is_internal_node(item));
@@ -1023,15 +1028,15 @@ EXPORT_SYMBOL(__radix_tree_insert);
  */
 void *__radix_tree_lookup(const struct radix_tree_root *root,
 			  unsigned long index, struct radix_tree_node **nodep,
-			  void ***slotp)
+			  void __rcu ***slotp)
 {
 	struct radix_tree_node *node, *parent;
 	unsigned long maxindex;
-	void **slot;
+	void __rcu **slot;
 
  restart:
 	parent = NULL;
-	slot = (void **)&root->rnode;
+	slot = (void __rcu **)&root->rnode;
 	radix_tree_load_root(root, &node, &maxindex);
 	if (index > maxindex)
 		return NULL;
@@ -1066,10 +1071,10 @@ void *__radix_tree_lookup(const struct radix_tree_root *root,
  *	exclusive from other writers. Any dereference of the slot must be done
  *	using radix_tree_deref_slot.
  */
-void **radix_tree_lookup_slot(const struct radix_tree_root *root,
+void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root,
 				unsigned long index)
 {
-	void **slot;
+	void __rcu **slot;
 
 	if (!__radix_tree_lookup(root, index, NULL, &slot))
 		return NULL;
@@ -1096,7 +1101,7 @@ void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
 EXPORT_SYMBOL(radix_tree_lookup);
 
 static inline void replace_sibling_entries(struct radix_tree_node *node,
-				void **slot, int count, int exceptional)
+				void __rcu **slot, int count, int exceptional)
 {
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
 	void *ptr = node_to_entry(slot);
@@ -1115,8 +1120,8 @@ static inline void replace_sibling_entries(struct radix_tree_node *node,
 #endif
 }
 
-static void replace_slot(void **slot, void *item, struct radix_tree_node *node,
-				int count, int exceptional)
+static void replace_slot(void __rcu **slot, void *item,
+		struct radix_tree_node *node, int count, int exceptional)
 {
 	if (WARN_ON_ONCE(radix_tree_is_internal_node(item)))
 		return;
@@ -1147,7 +1152,7 @@ static bool node_tag_get(const struct radix_tree_root *root,
  * deleted.
  */
 static int calculate_count(struct radix_tree_root *root,
-				struct radix_tree_node *node, void **slot,
+				struct radix_tree_node *node, void __rcu **slot,
 				void *item, void *old)
 {
 	if (is_idr(root)) {
@@ -1175,7 +1180,7 @@ static int calculate_count(struct radix_tree_root *root,
  */
 void __radix_tree_replace(struct radix_tree_root *root,
 			  struct radix_tree_node *node,
-			  void **slot, void *item,
+			  void __rcu **slot, void *item,
 			  radix_tree_update_node_t update_node, void *private)
 {
 	void *old = rcu_dereference_raw(*slot);
@@ -1188,7 +1193,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
 	 * deleting entries, but that needs accounting against the
 	 * node unless the slot is root->rnode.
 	 */
-	WARN_ON_ONCE(!node && (slot != (void **)&root->rnode) &&
+	WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->rnode) &&
 			(count || exceptional));
 	replace_slot(slot, item, node, count, exceptional);
 
@@ -1218,7 +1223,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
  * radix_tree_iter_replace().
  */
 void radix_tree_replace_slot(struct radix_tree_root *root,
-			     void **slot, void *item)
+			     void __rcu **slot, void *item)
 {
 	__radix_tree_replace(root, NULL, slot, item, NULL, NULL);
 }
@@ -1233,7 +1238,8 @@ void radix_tree_replace_slot(struct radix_tree_root *root,
  * Caller must hold tree write locked across split and replacement.
  */
 void radix_tree_iter_replace(struct radix_tree_root *root,
-		const struct radix_tree_iter *iter, void **slot, void *item)
+				const struct radix_tree_iter *iter,
+				void __rcu **slot, void *item)
 {
 	__radix_tree_replace(root, iter->node, slot, item, NULL, NULL);
 }
@@ -1257,7 +1263,7 @@ int radix_tree_join(struct radix_tree_root *root, unsigned long index,
 			unsigned order, void *item)
 {
 	struct radix_tree_node *node;
-	void **slot;
+	void __rcu **slot;
 	int error;
 
 	BUG_ON(radix_tree_is_internal_node(item));
@@ -1292,7 +1298,7 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
 				unsigned order)
 {
 	struct radix_tree_node *parent, *node, *child;
-	void **slot;
+	void __rcu **slot;
 	unsigned int offset, end;
 	unsigned n, tag, tags = 0;
 	gfp_t gfp = root_gfp_mask(root);
@@ -1603,8 +1609,8 @@ static void set_iter_tags(struct radix_tree_iter *iter,
 }
 
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
-static void **skip_siblings(struct radix_tree_node **nodep,
-			void **slot, struct radix_tree_iter *iter)
+static void __rcu **skip_siblings(struct radix_tree_node **nodep,
+			void __rcu **slot, struct radix_tree_iter *iter)
 {
 	void *sib = node_to_entry(slot - 1);
 
@@ -1621,8 +1627,8 @@ static void **skip_siblings(struct radix_tree_node **nodep,
 	return NULL;
 }
 
-void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter,
-					unsigned flags)
+void __rcu **__radix_tree_next_slot(void __rcu **slot,
+				struct radix_tree_iter *iter, unsigned flags)
 {
 	unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
 	struct radix_tree_node *node = rcu_dereference_raw(*slot);
@@ -1675,14 +1681,15 @@ void ** __radix_tree_next_slot(void **slot, struct radix_tree_iter *iter,
 }
 EXPORT_SYMBOL(__radix_tree_next_slot);
 #else
-static void **skip_siblings(struct radix_tree_node **nodep,
-			void **slot, struct radix_tree_iter *iter)
+static void __rcu **skip_siblings(struct radix_tree_node **nodep,
+			void __rcu **slot, struct radix_tree_iter *iter)
 {
 	return slot;
 }
 #endif
 
-void **radix_tree_iter_resume(void **slot, struct radix_tree_iter *iter)
+void __rcu **radix_tree_iter_resume(void __rcu **slot,
+					struct radix_tree_iter *iter)
 {
 	struct radix_tree_node *node;
 
@@ -1703,7 +1710,7 @@ EXPORT_SYMBOL(radix_tree_iter_resume);
  * @flags:	RADIX_TREE_ITER_* flags and tag index
  * Returns:	pointer to chunk first slot, or NULL if iteration is over
  */
-void **radix_tree_next_chunk(const struct radix_tree_root *root,
+void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
 			     struct radix_tree_iter *iter, unsigned flags)
 {
 	unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
@@ -1740,7 +1747,7 @@ void **radix_tree_next_chunk(const struct radix_tree_root *root,
 		iter->tags = 1;
 		iter->node = NULL;
 		__set_iter_shift(iter, 0);
-		return (void **)&root->rnode;
+		return (void __rcu **)&root->rnode;
 	}
 
 	do {
@@ -1819,7 +1826,7 @@ radix_tree_gang_lookup(const struct radix_tree_root *root, void **results,
 			unsigned long first_index, unsigned int max_items)
 {
 	struct radix_tree_iter iter;
-	void **slot;
+	void __rcu **slot;
 	unsigned int ret = 0;
 
 	if (unlikely(!max_items))
@@ -1861,11 +1868,11 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
  */
 unsigned int
 radix_tree_gang_lookup_slot(const struct radix_tree_root *root,
-			void ***results, unsigned long *indices,
+			void __rcu ***results, unsigned long *indices,
 			unsigned long first_index, unsigned int max_items)
 {
 	struct radix_tree_iter iter;
-	void **slot;
+	void __rcu **slot;
 	unsigned int ret = 0;
 
 	if (unlikely(!max_items))
@@ -1902,7 +1909,7 @@ radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results,
 		unsigned int tag)
 {
 	struct radix_tree_iter iter;
-	void **slot;
+	void __rcu **slot;
 	unsigned int ret = 0;
 
 	if (unlikely(!max_items))
@@ -1939,11 +1946,11 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
  */
 unsigned int
 radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root,
-		void ***results, unsigned long first_index,
+		void __rcu ***results, unsigned long first_index,
 		unsigned int max_items, unsigned int tag)
 {
 	struct radix_tree_iter iter;
-	void **slot;
+	void __rcu **slot;
 	unsigned int ret = 0;
 
 	if (unlikely(!max_items))
@@ -1979,7 +1986,7 @@ void __radix_tree_delete_node(struct radix_tree_root *root,
 }
 
 static bool __radix_tree_delete(struct radix_tree_root *root,
-				struct radix_tree_node *node, void **slot)
+				struct radix_tree_node *node, void __rcu **slot)
 {
 	void *old = rcu_dereference_raw(*slot);
 	int exceptional = radix_tree_exceptional_entry(old) ? -1 : 0;
@@ -2009,7 +2016,7 @@ static bool __radix_tree_delete(struct radix_tree_root *root,
  * which can access this tree.
  */
 void radix_tree_iter_delete(struct radix_tree_root *root,
-				struct radix_tree_iter *iter, void **slot)
+				struct radix_tree_iter *iter, void __rcu **slot)
 {
 	if (__radix_tree_delete(root, iter->node, slot))
 		iter->index = iter->next_index;
@@ -2030,7 +2037,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
 			     unsigned long index, void *item)
 {
 	struct radix_tree_node *node = NULL;
-	void **slot;
+	void __rcu **slot;
 	void *entry;
 
 	entry = __radix_tree_lookup(root, index, &node, &slot);
@@ -2064,7 +2071,7 @@ EXPORT_SYMBOL(radix_tree_delete);
 
 void radix_tree_clear_tags(struct radix_tree_root *root,
 			   struct radix_tree_node *node,
-			   void **slot)
+			   void __rcu **slot)
 {
 	if (node) {
 		unsigned int tag, offset = get_slot_offset(node, slot);
@@ -2129,11 +2136,11 @@ int ida_pre_get(struct ida *ida, gfp_t gfp)
 }
 EXPORT_SYMBOL(ida_pre_get);
 
-void **idr_get_free(struct radix_tree_root *root,
+void __rcu **idr_get_free(struct radix_tree_root *root,
 			struct radix_tree_iter *iter, gfp_t gfp, int end)
 {
 	struct radix_tree_node *node = NULL, *child;
-	void **slot = (void **)&root->rnode;
+	void __rcu **slot = (void __rcu **)&root->rnode;
 	unsigned long maxindex, start = iter->next_index;
 	unsigned long max = end > 0 ? end - 1 : INT_MAX;
 	unsigned int shift, offset = 0;
-- 
GitLab


From 7e73eb0b2df5e8d7bd00a3c5980ab86619699963 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 13 Feb 2017 16:03:55 -0500
Subject: [PATCH 0254/1084] idr: Add missing __rcu annotations

Where we use the radix tree iteration macros, we need to annotate 'slot'
with __rcu.  Make sure we don't forget any new places in the future with
the same CFLAGS check used for radix-tree.c.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 lib/Makefile |  1 +
 lib/idr.c    | 14 +++++++-------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/lib/Makefile b/lib/Makefile
index 2fc096985b21..43a80ec3bd10 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -25,6 +25,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o
 
 CFLAGS_radix-tree.o += -DCONFIG_SPARSE_RCU_POINTER
+CFLAGS_idr.o += -DCONFIG_SPARSE_RCU_POINTER
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
diff --git a/lib/idr.c b/lib/idr.c
index 7d25e240bc5a..b13682bb0a1c 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -28,7 +28,7 @@ static DEFINE_SPINLOCK(simple_ida_lock);
  */
 int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
 {
-	void **slot;
+	void __rcu **slot;
 	struct radix_tree_iter iter;
 
 	if (WARN_ON_ONCE(start < 0))
@@ -98,7 +98,7 @@ int idr_for_each(const struct idr *idr,
 		int (*fn)(int id, void *p, void *data), void *data)
 {
 	struct radix_tree_iter iter;
-	void **slot;
+	void __rcu **slot;
 
 	radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) {
 		int ret = fn(iter.index, rcu_dereference_raw(*slot), data);
@@ -123,7 +123,7 @@ EXPORT_SYMBOL(idr_for_each);
 void *idr_get_next(struct idr *idr, int *nextid)
 {
 	struct radix_tree_iter iter;
-	void **slot;
+	void __rcu **slot;
 
 	slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid);
 	if (!slot)
@@ -151,7 +151,7 @@ EXPORT_SYMBOL(idr_get_next);
 void *idr_replace(struct idr *idr, void *ptr, int id)
 {
 	struct radix_tree_node *node;
-	void **slot = NULL;
+	void __rcu **slot = NULL;
 	void *entry;
 
 	if (WARN_ON_ONCE(id < 0))
@@ -250,7 +250,7 @@ EXPORT_SYMBOL(idr_replace);
 int ida_get_new_above(struct ida *ida, int start, int *id)
 {
 	struct radix_tree_root *root = &ida->ida_rt;
-	void **slot;
+	void __rcu **slot;
 	struct radix_tree_iter iter;
 	struct ida_bitmap *bitmap;
 	unsigned long index;
@@ -350,7 +350,7 @@ void ida_remove(struct ida *ida, int id)
 	struct ida_bitmap *bitmap;
 	unsigned long *btmp;
 	struct radix_tree_iter iter;
-	void **slot;
+	void __rcu **slot;
 
 	slot = radix_tree_iter_lookup(&ida->ida_rt, &iter, index);
 	if (!slot)
@@ -396,7 +396,7 @@ EXPORT_SYMBOL(ida_remove);
 void ida_destroy(struct ida *ida)
 {
 	struct radix_tree_iter iter;
-	void **slot;
+	void __rcu **slot;
 
 	radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) {
 		struct ida_bitmap *bitmap = rcu_dereference_raw(*slot);
-- 
GitLab


From c6ce3e2fe3dacda5e8afb0036c814ae9c3fee9b9 Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Mon, 13 Feb 2017 16:16:03 -0500
Subject: [PATCH 0255/1084] radix tree test suite: Add config option for map
 shift

Add config option "SHIFT=<value>" to Makefile for building test suite
with any value of RADIX_TREE_MAP_SHIFT between 3 and 7 inclusive.

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
[mawilcox@microsoft.com: .gitignore, quieten grep, remove on clean]
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/.gitignore         |  1 +
 tools/testing/radix-tree/Makefile           | 18 +++++++++++++-----
 tools/testing/radix-tree/linux/kernel.h     |  6 ------
 tools/testing/radix-tree/linux/radix-tree.h |  2 ++
 4 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore
index 26dedaf57aab..d4706c0ffceb 100644
--- a/tools/testing/radix-tree/.gitignore
+++ b/tools/testing/radix-tree/.gitignore
@@ -1,3 +1,4 @@
+generated/map-shift.h
 idr.c
 idr-test
 main
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index ecea846e7660..f11315bedefc 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -6,11 +6,11 @@ CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
 OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
 	 tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
 
-ifdef BENCHMARK
-	CFLAGS += -DBENCHMARK=1
+ifndef SHIFT
+	SHIFT=3
 endif
 
-targets: $(TARGETS)
+targets: mapshift $(TARGETS)
 
 main:	$(OFILES)
 	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o main
@@ -22,11 +22,11 @@ multiorder: multiorder.o $(CORE_OFILES)
 	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o multiorder
 
 clean:
-	$(RM) $(TARGETS) *.o radix-tree.c idr.c
+	$(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h
 
 vpath %.c ../../lib
 
-$(OFILES): *.h */*.h \
+$(OFILES): *.h */*.h generated/map-shift.h \
 	../../include/linux/*.h \
 	../../include/asm/*.h \
 	../../../include/linux/radix-tree.h \
@@ -37,3 +37,11 @@ radix-tree.c: ../../../lib/radix-tree.c
 
 idr.c: ../../../lib/idr.c
 	sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
+
+.PHONY: mapshift
+
+mapshift:
+	@if ! grep -qw $(SHIFT) generated/map-shift.h; then		\
+		echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" >		\
+				generated/map-shift.h;			\
+	fi
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 677b8c0f60f9..b21a77fddcf7 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -12,12 +12,6 @@
 #include <linux/log2.h>
 #include "../../../include/linux/kconfig.h"
 
-#ifdef BENCHMARK
-#define RADIX_TREE_MAP_SHIFT	6
-#else
-#define RADIX_TREE_MAP_SHIFT	3
-#endif
-
 #define printk printf
 #define pr_debug printk
 #define pr_cont printk
diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/radix-tree/linux/radix-tree.h
index ddd135fa3af7..bf1bb231f9b5 100644
--- a/tools/testing/radix-tree/linux/radix-tree.h
+++ b/tools/testing/radix-tree/linux/radix-tree.h
@@ -1,5 +1,7 @@
 #ifndef _TEST_RADIX_TREE_H
 #define _TEST_RADIX_TREE_H
+
+#include "generated/map-shift.h"
 #include "../../../../include/linux/radix-tree.h"
 
 extern int kmalloc_verbose;
-- 
GitLab


From cc7639ce18b950086c7b06e00350652db3732183 Mon Sep 17 00:00:00 2001
From: Balbir Singh <bsingharora@gmail.com>
Date: Thu, 2 Feb 2017 10:33:42 +0530
Subject: [PATCH 0256/1084] powerpc/xmon: Update ppc-dis/opc.c and ppc.h

Upgrade ppc-opc.c, ppc-dis.c and ppc.h to the versions belonging to the
following binutils commit:

  65b650b4c7463f4508bed523c24ab0031a5ae5cd
  * ppc-dis.c (print_insn_powerpc): Don't skip all operands after
    setting skip_optional.

That is the last version of those files that were licensed under GPLv2.

This leaves the code in a state that does not compile, because the
binutils code needs to be tweaked to work in the kernel. We don't fix
that in this commit, because we want to import more binutils changes in
subsequent commits. So for now we mark XMON_DISASSEMBLY as BROKEN, so it
can't be built.

Signed-off-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Kconfig.debug  |   1 +
 arch/powerpc/xmon/ppc-dis.c | 272 +++++++++++---
 arch/powerpc/xmon/ppc-opc.c | 728 ++++++++++++------------------------
 arch/powerpc/xmon/ppc.h     |  62 +--
 4 files changed, 493 insertions(+), 570 deletions(-)

diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index c86df246339e..73a3264cf052 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -115,6 +115,7 @@ config XMON_DEFAULT
 config XMON_DISASSEMBLY
 	bool "Include disassembly support in xmon"
 	depends on XMON
+	depends on BROKEN
 	default y
 	help
 	  Include support for disassembling in xmon. You probably want
diff --git a/arch/powerpc/xmon/ppc-dis.c b/arch/powerpc/xmon/ppc-dis.c
index ee9891734149..2545a36f21a5 100644
--- a/arch/powerpc/xmon/ppc-dis.c
+++ b/arch/powerpc/xmon/ppc-dis.c
@@ -1,5 +1,5 @@
 /* ppc-dis.c -- Disassemble PowerPC instructions
-   Copyright 1994, 1995, 2000, 2001, 2002, 2003, 2004, 2005, 2006
+   Copyright 1994, 1995, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
    Free Software Foundation, Inc.
    Written by Ian Lance Taylor, Cygnus Support
 
@@ -19,34 +19,193 @@ You should have received a copy of the GNU General Public License
 along with this file; see the file COPYING.  If not, write to the Free
 Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.  */
 
-#include <asm/cputable.h>
-#include <asm/cpu_has_feature.h>
-#include "nonstdio.h"
-#include "ansidecl.h"
-#include "ppc.h"
+#include <stdio.h>
+#include "sysdep.h"
 #include "dis-asm.h"
+#include "opcode/ppc.h"
 
-/* Print a PowerPC or POWER instruction.  */
+/* This file provides several disassembler functions, all of which use
+   the disassembler interface defined in dis-asm.h.  Several functions
+   are provided because this file handles disassembly for the PowerPC
+   in both big and little endian mode and also for the POWER (RS/6000)
+   chip.  */
+
+static int print_insn_powerpc (bfd_vma, struct disassemble_info *, int, int);
+
+/* Determine which set of machines to disassemble for.  PPC403/601 or
+   BookE.  For convenience, also disassemble instructions supported
+   by the AltiVec vector unit.  */
+
+static int
+powerpc_dialect (struct disassemble_info *info)
+{
+  int dialect = PPC_OPCODE_PPC;
+
+  if (BFD_DEFAULT_TARGET_SIZE == 64)
+    dialect |= PPC_OPCODE_64;
+
+  if (info->disassembler_options
+      && strstr (info->disassembler_options, "booke") != NULL)
+    dialect |= PPC_OPCODE_BOOKE | PPC_OPCODE_BOOKE64;
+  else if ((info->mach == bfd_mach_ppc_e500)
+	   || (info->disassembler_options
+	       && strstr (info->disassembler_options, "e500") != NULL))
+    dialect |= (PPC_OPCODE_BOOKE
+		| PPC_OPCODE_SPE | PPC_OPCODE_ISEL
+		| PPC_OPCODE_EFS | PPC_OPCODE_BRLOCK
+		| PPC_OPCODE_PMR | PPC_OPCODE_CACHELCK
+		| PPC_OPCODE_RFMCI);
+  else if (info->disassembler_options
+	   && strstr (info->disassembler_options, "efs") != NULL)
+    dialect |= PPC_OPCODE_EFS;
+  else if (info->disassembler_options
+	   && strstr (info->disassembler_options, "e300") != NULL)
+    dialect |= PPC_OPCODE_E300 | PPC_OPCODE_CLASSIC | PPC_OPCODE_COMMON;
+  else if (info->disassembler_options
+	   && strstr (info->disassembler_options, "440") != NULL)
+    dialect |= PPC_OPCODE_BOOKE | PPC_OPCODE_32
+      | PPC_OPCODE_440 | PPC_OPCODE_ISEL | PPC_OPCODE_RFMCI;
+  else
+    dialect |= (PPC_OPCODE_403 | PPC_OPCODE_601 | PPC_OPCODE_CLASSIC
+		| PPC_OPCODE_COMMON | PPC_OPCODE_ALTIVEC);
+
+  if (info->disassembler_options
+      && strstr (info->disassembler_options, "power4") != NULL)
+    dialect |= PPC_OPCODE_POWER4;
+
+  if (info->disassembler_options
+      && strstr (info->disassembler_options, "power5") != NULL)
+    dialect |= PPC_OPCODE_POWER4 | PPC_OPCODE_POWER5;
+
+  if (info->disassembler_options
+      && strstr (info->disassembler_options, "cell") != NULL)
+    dialect |= PPC_OPCODE_POWER4 | PPC_OPCODE_CELL | PPC_OPCODE_ALTIVEC;
+
+  if (info->disassembler_options
+      && strstr (info->disassembler_options, "power6") != NULL)
+    dialect |= PPC_OPCODE_POWER4 | PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_ALTIVEC;
+
+  if (info->disassembler_options
+      && strstr (info->disassembler_options, "any") != NULL)
+    dialect |= PPC_OPCODE_ANY;
+
+  if (info->disassembler_options)
+    {
+      if (strstr (info->disassembler_options, "32") != NULL)
+	dialect &= ~PPC_OPCODE_64;
+      else if (strstr (info->disassembler_options, "64") != NULL)
+	dialect |= PPC_OPCODE_64;
+    }
+
+  info->private_data = (char *) 0 + dialect;
+  return dialect;
+}
+
+/* Print a big endian PowerPC instruction.  */
+
+int
+print_insn_big_powerpc (bfd_vma memaddr, struct disassemble_info *info)
+{
+  int dialect = (char *) info->private_data - (char *) 0;
+  return print_insn_powerpc (memaddr, info, 1, dialect);
+}
+
+/* Print a little endian PowerPC instruction.  */
 
 int
-print_insn_powerpc (unsigned long insn, unsigned long memaddr)
+print_insn_little_powerpc (bfd_vma memaddr, struct disassemble_info *info)
+{
+  int dialect = (char *) info->private_data - (char *) 0;
+  return print_insn_powerpc (memaddr, info, 0, dialect);
+}
+
+/* Print a POWER (RS/6000) instruction.  */
+
+int
+print_insn_rs6000 (bfd_vma memaddr, struct disassemble_info *info)
+{
+  return print_insn_powerpc (memaddr, info, 1, PPC_OPCODE_POWER);
+}
+
+/* Extract the operand value from the PowerPC or POWER instruction.  */
+
+static long
+operand_value_powerpc (const struct powerpc_operand *operand,
+		       unsigned long insn, int dialect)
+{
+  long value;
+  int invalid;
+  /* Extract the value from the instruction.  */
+  if (operand->extract)
+    value = (*operand->extract) (insn, dialect, &invalid);
+  else
+    {
+      value = (insn >> operand->shift) & operand->bitm;
+      if ((operand->flags & PPC_OPERAND_SIGNED) != 0)
+	{
+	  /* BITM is always some number of zeros followed by some
+	     number of ones, followed by some numer of zeros.  */
+	  unsigned long top = operand->bitm;
+	  /* top & -top gives the rightmost 1 bit, so this
+	     fills in any trailing zeros.  */
+	  top |= (top & -top) - 1;
+	  top &= ~(top >> 1);
+	  value = (value ^ top) - top;
+	}
+    }
+
+  return value;
+}
+
+/* Determine whether the optional operand(s) should be printed.  */
+
+static int
+skip_optional_operands (const unsigned char *opindex,
+			unsigned long insn, int dialect)
+{
+  const struct powerpc_operand *operand;
+
+  for (; *opindex != 0; opindex++)
+    {
+      operand = &powerpc_operands[*opindex];
+      if ((operand->flags & PPC_OPERAND_NEXT) != 0
+	  || ((operand->flags & PPC_OPERAND_OPTIONAL) != 0
+	      && operand_value_powerpc (operand, insn, dialect) != 0))
+	return 0;
+    }
+
+  return 1;
+}
+
+/* Print a PowerPC or POWER instruction.  */
+
+static int
+print_insn_powerpc (bfd_vma memaddr,
+		    struct disassemble_info *info,
+		    int bigendian,
+		    int dialect)
 {
+  bfd_byte buffer[4];
+  int status;
+  unsigned long insn;
   const struct powerpc_opcode *opcode;
   const struct powerpc_opcode *opcode_end;
   unsigned long op;
-  int dialect;
-
-  dialect = PPC_OPCODE_PPC | PPC_OPCODE_CLASSIC | PPC_OPCODE_COMMON
-	      | PPC_OPCODE_64 | PPC_OPCODE_POWER4 | PPC_OPCODE_ALTIVEC;
 
-  if (cpu_has_feature(CPU_FTRS_POWER5))
-    dialect |= PPC_OPCODE_POWER5;
+  if (dialect == 0)
+    dialect = powerpc_dialect (info);
 
-  if (cpu_has_feature(CPU_FTRS_CELL))
-    dialect |= PPC_OPCODE_CELL | PPC_OPCODE_ALTIVEC;
+  status = (*info->read_memory_func) (memaddr, buffer, 4, info);
+  if (status != 0)
+    {
+      (*info->memory_error_func) (status, memaddr, info);
+      return -1;
+    }
 
-  if (cpu_has_feature(CPU_FTRS_POWER6))
-    dialect |= PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_ALTIVEC;
+  if (bigendian)
+    insn = bfd_getb32 (buffer);
+  else
+    insn = bfd_getl32 (buffer);
 
   /* Get the major opcode of the instruction.  */
   op = PPC_OP (insn);
@@ -63,6 +222,7 @@ print_insn_powerpc (unsigned long insn, unsigned long memaddr)
       int invalid;
       int need_comma;
       int need_paren;
+      int skip_optional;
 
       table_op = PPC_OP (opcode->opcode);
       if (op < table_op)
@@ -88,13 +248,15 @@ print_insn_powerpc (unsigned long insn, unsigned long memaddr)
 	continue;
 
       /* The instruction is valid.  */
-      printf("%s", opcode->name);
       if (opcode->operands[0] != 0)
-	printf("\t");
+	(*info->fprintf_func) (info->stream, "%-7s ", opcode->name);
+      else
+	(*info->fprintf_func) (info->stream, "%s", opcode->name);
 
       /* Now extract and print the operands.  */
       need_comma = 0;
       need_paren = 0;
+      skip_optional = -1;
       for (opindex = opcode->operands; *opindex != 0; opindex++)
 	{
 	  long value;
@@ -107,49 +269,44 @@ print_insn_powerpc (unsigned long insn, unsigned long memaddr)
 	  if ((operand->flags & PPC_OPERAND_FAKE) != 0)
 	    continue;
 
-	  /* Extract the value from the instruction.  */
-	  if (operand->extract)
-	    value = (*operand->extract) (insn, dialect, &invalid);
-	  else
+	  /* If all of the optional operands have the value zero,
+	     then don't print any of them.  */
+	  if ((operand->flags & PPC_OPERAND_OPTIONAL) != 0)
 	    {
-	      value = (insn >> operand->shift) & ((1 << operand->bits) - 1);
-	      if ((operand->flags & PPC_OPERAND_SIGNED) != 0
-		  && (value & (1 << (operand->bits - 1))) != 0)
-		value -= 1 << operand->bits;
+	      if (skip_optional < 0)
+		skip_optional = skip_optional_operands (opindex, insn,
+							dialect);
+	      if (skip_optional)
+		continue;
 	    }
 
-	  /* If the operand is optional, and the value is zero, don't
-	     print anything.  */
-	  if ((operand->flags & PPC_OPERAND_OPTIONAL) != 0
-	      && (operand->flags & PPC_OPERAND_NEXT) == 0
-	      && value == 0)
-	    continue;
+	  value = operand_value_powerpc (operand, insn, dialect);
 
 	  if (need_comma)
 	    {
-	      printf(",");
+	      (*info->fprintf_func) (info->stream, ",");
 	      need_comma = 0;
 	    }
 
 	  /* Print the operand as directed by the flags.  */
 	  if ((operand->flags & PPC_OPERAND_GPR) != 0
 	      || ((operand->flags & PPC_OPERAND_GPR_0) != 0 && value != 0))
-	    printf("r%ld", value);
+	    (*info->fprintf_func) (info->stream, "r%ld", value);
 	  else if ((operand->flags & PPC_OPERAND_FPR) != 0)
-	    printf("f%ld", value);
+	    (*info->fprintf_func) (info->stream, "f%ld", value);
 	  else if ((operand->flags & PPC_OPERAND_VR) != 0)
-	    printf("v%ld", value);
+	    (*info->fprintf_func) (info->stream, "v%ld", value);
 	  else if ((operand->flags & PPC_OPERAND_RELATIVE) != 0)
-	    print_address (memaddr + value);
+	    (*info->print_address_func) (memaddr + value, info);
 	  else if ((operand->flags & PPC_OPERAND_ABSOLUTE) != 0)
-	    print_address (value & 0xffffffff);
+	    (*info->print_address_func) ((bfd_vma) value & 0xffffffff, info);
 	  else if ((operand->flags & PPC_OPERAND_CR) == 0
 		   || (dialect & PPC_OPCODE_PPC) == 0)
-	    printf("%ld", value);
+	    (*info->fprintf_func) (info->stream, "%ld", value);
 	  else
 	    {
-	      if (operand->bits == 3)
-		printf("cr%ld", value);
+	      if (operand->bitm == 7)
+		(*info->fprintf_func) (info->stream, "cr%ld", value);
 	      else
 		{
 		  static const char *cbnames[4] = { "lt", "gt", "eq", "so" };
@@ -158,15 +315,15 @@ print_insn_powerpc (unsigned long insn, unsigned long memaddr)
 
 		  cr = value >> 2;
 		  if (cr != 0)
-		    printf("4*cr%d+", cr);
+		    (*info->fprintf_func) (info->stream, "4*cr%d+", cr);
 		  cc = value & 3;
-		  printf("%s", cbnames[cc]);
+		  (*info->fprintf_func) (info->stream, "%s", cbnames[cc]);
 		}
 	    }
 
 	  if (need_paren)
 	    {
-	      printf(")");
+	      (*info->fprintf_func) (info->stream, ")");
 	      need_paren = 0;
 	    }
 
@@ -174,7 +331,7 @@ print_insn_powerpc (unsigned long insn, unsigned long memaddr)
 	    need_comma = 1;
 	  else
 	    {
-	      printf("(");
+	      (*info->fprintf_func) (info->stream, "(");
 	      need_paren = 1;
 	    }
 	}
@@ -190,7 +347,26 @@ print_insn_powerpc (unsigned long insn, unsigned long memaddr)
     }
 
   /* We could not find a match.  */
-  printf(".long 0x%lx", insn);
+  (*info->fprintf_func) (info->stream, ".long 0x%lx", insn);
 
   return 4;
 }
+
+void
+print_ppc_disassembler_options (FILE *stream)
+{
+  fprintf (stream, "\n\
+The following PPC specific disassembler options are supported for use with\n\
+the -M switch:\n");
+
+  fprintf (stream, "  booke|booke32|booke64    Disassemble the BookE instructions\n");
+  fprintf (stream, "  e300                     Disassemble the e300 instructions\n");
+  fprintf (stream, "  e500|e500x2              Disassemble the e500 instructions\n");
+  fprintf (stream, "  440                      Disassemble the 440 instructions\n");
+  fprintf (stream, "  efs                      Disassemble the EFS instructions\n");
+  fprintf (stream, "  power4                   Disassemble the Power4 instructions\n");
+  fprintf (stream, "  power5                   Disassemble the Power5 instructions\n");
+  fprintf (stream, "  power6                   Disassemble the Power6 instructions\n");
+  fprintf (stream, "  32                       Do not disassemble 64-bit instructions\n");
+  fprintf (stream, "  64                       Allow disassembly of 64-bit instructions\n");
+}
diff --git a/arch/powerpc/xmon/ppc-opc.c b/arch/powerpc/xmon/ppc-opc.c
index 6845e91ba04a..5995f81de9ff 100644
--- a/arch/powerpc/xmon/ppc-opc.c
+++ b/arch/powerpc/xmon/ppc-opc.c
@@ -1,6 +1,6 @@
 /* ppc-opc.c -- PowerPC opcode list
    Copyright 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2004,
-   2005 Free Software Foundation, Inc.
+   2005, 2006, 2007 Free Software Foundation, Inc.
    Written by Ian Lance Taylor, Cygnus Support
 
    This file is part of GDB, GAS, and the GNU binutils.
@@ -20,14 +20,10 @@
    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
    02110-1301, USA.  */
 
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/bug.h>
-#include "nonstdio.h"
-#include "ppc.h"
-
-#define ATTRIBUTE_UNUSED
-#define _(x)	x
+#include <stdio.h>
+#include "sysdep.h"
+#include "opcode/ppc.h"
+#include "opintl.h"
 
 /* This file holds the PowerPC opcode table.  The opcode table
    includes almost all of the extended instruction mnemonics.  This
@@ -46,8 +42,6 @@ static unsigned long insert_bat (unsigned long, long, int, const char **);
 static long extract_bat (unsigned long, int, int *);
 static unsigned long insert_bba (unsigned long, long, int, const char **);
 static long extract_bba (unsigned long, int, int *);
-static unsigned long insert_bd (unsigned long, long, int, const char **);
-static long extract_bd (unsigned long, int, int *);
 static unsigned long insert_bdm (unsigned long, long, int, const char **);
 static long extract_bdm (unsigned long, int, int *);
 static unsigned long insert_bdp (unsigned long, long, int, const char **);
@@ -56,23 +50,12 @@ static unsigned long insert_bo (unsigned long, long, int, const char **);
 static long extract_bo (unsigned long, int, int *);
 static unsigned long insert_boe (unsigned long, long, int, const char **);
 static long extract_boe (unsigned long, int, int *);
-static unsigned long insert_dq (unsigned long, long, int, const char **);
-static long extract_dq (unsigned long, int, int *);
-static unsigned long insert_ds (unsigned long, long, int, const char **);
-static long extract_ds (unsigned long, int, int *);
-static unsigned long insert_de (unsigned long, long, int, const char **);
-static long extract_de (unsigned long, int, int *);
-static unsigned long insert_des (unsigned long, long, int, const char **);
-static long extract_des (unsigned long, int, int *);
 static unsigned long insert_fxm (unsigned long, long, int, const char **);
 static long extract_fxm (unsigned long, int, int *);
-static unsigned long insert_li (unsigned long, long, int, const char **);
-static long extract_li (unsigned long, int, int *);
 static unsigned long insert_mbe (unsigned long, long, int, const char **);
 static long extract_mbe (unsigned long, int, int *);
 static unsigned long insert_mb6 (unsigned long, long, int, const char **);
 static long extract_mb6 (unsigned long, int, int *);
-static unsigned long insert_nb (unsigned long, long, int, const char **);
 static long extract_nb (unsigned long, int, int *);
 static unsigned long insert_nsi (unsigned long, long, int, const char **);
 static long extract_nsi (unsigned long, int, int *);
@@ -82,8 +65,6 @@ static unsigned long insert_raq (unsigned long, long, int, const char **);
 static unsigned long insert_ras (unsigned long, long, int, const char **);
 static unsigned long insert_rbs (unsigned long, long, int, const char **);
 static long extract_rbs (unsigned long, int, int *);
-static unsigned long insert_rsq (unsigned long, long, int, const char **);
-static unsigned long insert_rtq (unsigned long, long, int, const char **);
 static unsigned long insert_sh6 (unsigned long, long, int, const char **);
 static long extract_sh6 (unsigned long, int, int *);
 static unsigned long insert_spr (unsigned long, long, int, const char **);
@@ -92,16 +73,10 @@ static unsigned long insert_sprg (unsigned long, long, int, const char **);
 static long extract_sprg (unsigned long, int, int *);
 static unsigned long insert_tbr (unsigned long, long, int, const char **);
 static long extract_tbr (unsigned long, int, int *);
-static unsigned long insert_ev2 (unsigned long, long, int, const char **);
-static long extract_ev2 (unsigned long, int, int *);
-static unsigned long insert_ev4 (unsigned long, long, int, const char **);
-static long extract_ev4 (unsigned long, int, int *);
-static unsigned long insert_ev8 (unsigned long, long, int, const char **);
-static long extract_ev8 (unsigned long, int, int *);
 
 /* The operands table.
 
-   The fields are bits, shift, insert, extract, flags.
+   The fields are bitm, shift, insert, extract, flags.
 
    We used to put parens around the various additions, like the one
    for BA just below.  However, that caused trouble with feeble
@@ -119,302 +94,298 @@ const struct powerpc_operand powerpc_operands[] =
 
   /* The BA field in an XL form instruction.  */
 #define BA UNUSED + 1
-#define BA_MASK (0x1f << 16)
-  { 5, 16, NULL, NULL, PPC_OPERAND_CR },
+  /* The BI field in a B form or XL form instruction.  */
+#define BI BA
+#define BI_MASK (0x1f << 16)
+  { 0x1f, 16, NULL, NULL, PPC_OPERAND_CR },
 
   /* The BA field in an XL form instruction when it must be the same
      as the BT field in the same instruction.  */
 #define BAT BA + 1
-  { 5, 16, insert_bat, extract_bat, PPC_OPERAND_FAKE },
+  { 0x1f, 16, insert_bat, extract_bat, PPC_OPERAND_FAKE },
 
   /* The BB field in an XL form instruction.  */
 #define BB BAT + 1
 #define BB_MASK (0x1f << 11)
-  { 5, 11, NULL, NULL, PPC_OPERAND_CR },
+  { 0x1f, 11, NULL, NULL, PPC_OPERAND_CR },
 
   /* The BB field in an XL form instruction when it must be the same
      as the BA field in the same instruction.  */
 #define BBA BB + 1
-  { 5, 11, insert_bba, extract_bba, PPC_OPERAND_FAKE },
+  { 0x1f, 11, insert_bba, extract_bba, PPC_OPERAND_FAKE },
 
   /* The BD field in a B form instruction.  The lower two bits are
      forced to zero.  */
 #define BD BBA + 1
-  { 16, 0, insert_bd, extract_bd, PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED },
+  { 0xfffc, 0, NULL, NULL, PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED },
 
   /* The BD field in a B form instruction when absolute addressing is
      used.  */
 #define BDA BD + 1
-  { 16, 0, insert_bd, extract_bd, PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED },
+  { 0xfffc, 0, NULL, NULL, PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED },
 
   /* The BD field in a B form instruction when the - modifier is used.
      This sets the y bit of the BO field appropriately.  */
 #define BDM BDA + 1
-  { 16, 0, insert_bdm, extract_bdm,
+  { 0xfffc, 0, insert_bdm, extract_bdm,
       PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED },
 
   /* The BD field in a B form instruction when the - modifier is used
      and absolute address is used.  */
 #define BDMA BDM + 1
-  { 16, 0, insert_bdm, extract_bdm,
+  { 0xfffc, 0, insert_bdm, extract_bdm,
       PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED },
 
   /* The BD field in a B form instruction when the + modifier is used.
      This sets the y bit of the BO field appropriately.  */
 #define BDP BDMA + 1
-  { 16, 0, insert_bdp, extract_bdp,
+  { 0xfffc, 0, insert_bdp, extract_bdp,
       PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED },
 
   /* The BD field in a B form instruction when the + modifier is used
      and absolute addressing is used.  */
 #define BDPA BDP + 1
-  { 16, 0, insert_bdp, extract_bdp,
+  { 0xfffc, 0, insert_bdp, extract_bdp,
       PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED },
 
   /* The BF field in an X or XL form instruction.  */
 #define BF BDPA + 1
-  { 3, 23, NULL, NULL, PPC_OPERAND_CR },
+  /* The CRFD field in an X form instruction.  */
+#define CRFD BF
+  { 0x7, 23, NULL, NULL, PPC_OPERAND_CR },
+
+  /* The BF field in an X or XL form instruction.  */
+#define BFF BF + 1
+  { 0x7, 23, NULL, NULL, 0 },
 
   /* An optional BF field.  This is used for comparison instructions,
      in which an omitted BF field is taken as zero.  */
-#define OBF BF + 1
-  { 3, 23, NULL, NULL, PPC_OPERAND_CR | PPC_OPERAND_OPTIONAL },
+#define OBF BFF + 1
+  { 0x7, 23, NULL, NULL, PPC_OPERAND_CR | PPC_OPERAND_OPTIONAL },
 
   /* The BFA field in an X or XL form instruction.  */
 #define BFA OBF + 1
-  { 3, 18, NULL, NULL, PPC_OPERAND_CR },
-
-  /* The BI field in a B form or XL form instruction.  */
-#define BI BFA + 1
-#define BI_MASK (0x1f << 16)
-  { 5, 16, NULL, NULL, PPC_OPERAND_CR },
+  { 0x7, 18, NULL, NULL, PPC_OPERAND_CR },
 
   /* The BO field in a B form instruction.  Certain values are
      illegal.  */
-#define BO BI + 1
+#define BO BFA + 1
 #define BO_MASK (0x1f << 21)
-  { 5, 21, insert_bo, extract_bo, 0 },
+  { 0x1f, 21, insert_bo, extract_bo, 0 },
 
   /* The BO field in a B form instruction when the + or - modifier is
      used.  This is like the BO field, but it must be even.  */
 #define BOE BO + 1
-  { 5, 21, insert_boe, extract_boe, 0 },
+  { 0x1e, 21, insert_boe, extract_boe, 0 },
 
 #define BH BOE + 1
-  { 2, 11, NULL, NULL, PPC_OPERAND_OPTIONAL },
+  { 0x3, 11, NULL, NULL, PPC_OPERAND_OPTIONAL },
 
   /* The BT field in an X or XL form instruction.  */
 #define BT BH + 1
-  { 5, 21, NULL, NULL, PPC_OPERAND_CR },
+  { 0x1f, 21, NULL, NULL, PPC_OPERAND_CR },
 
   /* The condition register number portion of the BI field in a B form
      or XL form instruction.  This is used for the extended
      conditional branch mnemonics, which set the lower two bits of the
      BI field.  This field is optional.  */
 #define CR BT + 1
-  { 3, 18, NULL, NULL, PPC_OPERAND_CR | PPC_OPERAND_OPTIONAL },
+  { 0x7, 18, NULL, NULL, PPC_OPERAND_CR | PPC_OPERAND_OPTIONAL },
 
   /* The CRB field in an X form instruction.  */
 #define CRB CR + 1
-  { 5, 6, NULL, NULL, 0 },
-
-  /* The CRFD field in an X form instruction.  */
-#define CRFD CRB + 1
-  { 3, 23, NULL, NULL, PPC_OPERAND_CR },
+  /* The MB field in an M form instruction.  */
+#define MB CRB
+#define MB_MASK (0x1f << 6)
+  { 0x1f, 6, NULL, NULL, 0 },
 
   /* The CRFS field in an X form instruction.  */
-#define CRFS CRFD + 1
-  { 3, 0, NULL, NULL, PPC_OPERAND_CR },
+#define CRFS CRB + 1
+  { 0x7, 0, NULL, NULL, PPC_OPERAND_CR },
 
   /* The CT field in an X form instruction.  */
 #define CT CRFS + 1
-  { 5, 21, NULL, NULL, PPC_OPERAND_OPTIONAL },
+  /* The MO field in an mbar instruction.  */
+#define MO CT
+  { 0x1f, 21, NULL, NULL, PPC_OPERAND_OPTIONAL },
 
   /* The D field in a D form instruction.  This is a displacement off
      a register, and implies that the next operand is a register in
      parentheses.  */
 #define D CT + 1
-  { 16, 0, NULL, NULL, PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED },
+  { 0xffff, 0, NULL, NULL, PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED },
 
   /* The DE field in a DE form instruction.  This is like D, but is 12
      bits only.  */
 #define DE D + 1
-  { 14, 0, insert_de, extract_de, PPC_OPERAND_PARENS },
+  { 0xfff, 4, NULL, NULL, PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED },
 
   /* The DES field in a DES form instruction.  This is like DS, but is 14
      bits only (12 stored.)  */
 #define DES DE + 1
-  { 14, 0, insert_des, extract_des, PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED },
+  { 0x3ffc, 2, NULL, NULL, PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED },
 
   /* The DQ field in a DQ form instruction.  This is like D, but the
      lower four bits are forced to zero. */
 #define DQ DES + 1
-  { 16, 0, insert_dq, extract_dq,
-      PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED | PPC_OPERAND_DQ },
+  { 0xfff0, 0, NULL, NULL,
+    PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED | PPC_OPERAND_DQ },
 
   /* The DS field in a DS form instruction.  This is like D, but the
      lower two bits are forced to zero.  */
 #define DS DQ + 1
-  { 16, 0, insert_ds, extract_ds,
-      PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED | PPC_OPERAND_DS },
+  { 0xfffc, 0, NULL, NULL,
+    PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED | PPC_OPERAND_DS },
 
   /* The E field in a wrteei instruction.  */
 #define E DS + 1
-  { 1, 15, NULL, NULL, 0 },
+  { 0x1, 15, NULL, NULL, 0 },
 
   /* The FL1 field in a POWER SC form instruction.  */
 #define FL1 E + 1
-  { 4, 12, NULL, NULL, 0 },
+  /* The U field in an X form instruction.  */
+#define U FL1
+  { 0xf, 12, NULL, NULL, 0 },
 
   /* The FL2 field in a POWER SC form instruction.  */
 #define FL2 FL1 + 1
-  { 3, 2, NULL, NULL, 0 },
+  { 0x7, 2, NULL, NULL, 0 },
 
   /* The FLM field in an XFL form instruction.  */
 #define FLM FL2 + 1
-  { 8, 17, NULL, NULL, 0 },
+  { 0xff, 17, NULL, NULL, 0 },
 
   /* The FRA field in an X or A form instruction.  */
 #define FRA FLM + 1
 #define FRA_MASK (0x1f << 16)
-  { 5, 16, NULL, NULL, PPC_OPERAND_FPR },
+  { 0x1f, 16, NULL, NULL, PPC_OPERAND_FPR },
 
   /* The FRB field in an X or A form instruction.  */
 #define FRB FRA + 1
 #define FRB_MASK (0x1f << 11)
-  { 5, 11, NULL, NULL, PPC_OPERAND_FPR },
+  { 0x1f, 11, NULL, NULL, PPC_OPERAND_FPR },
 
   /* The FRC field in an A form instruction.  */
 #define FRC FRB + 1
 #define FRC_MASK (0x1f << 6)
-  { 5, 6, NULL, NULL, PPC_OPERAND_FPR },
+  { 0x1f, 6, NULL, NULL, PPC_OPERAND_FPR },
 
   /* The FRS field in an X form instruction or the FRT field in a D, X
      or A form instruction.  */
 #define FRS FRC + 1
 #define FRT FRS
-  { 5, 21, NULL, NULL, PPC_OPERAND_FPR },
+  { 0x1f, 21, NULL, NULL, PPC_OPERAND_FPR },
 
   /* The FXM field in an XFX instruction.  */
 #define FXM FRS + 1
-#define FXM_MASK (0xff << 12)
-  { 8, 12, insert_fxm, extract_fxm, 0 },
+  { 0xff, 12, insert_fxm, extract_fxm, 0 },
 
   /* Power4 version for mfcr.  */
 #define FXM4 FXM + 1
-  { 8, 12, insert_fxm, extract_fxm, PPC_OPERAND_OPTIONAL },
+  { 0xff, 12, insert_fxm, extract_fxm, PPC_OPERAND_OPTIONAL },
 
   /* The L field in a D or X form instruction.  */
 #define L FXM4 + 1
-  { 1, 21, NULL, NULL, PPC_OPERAND_OPTIONAL },
+  { 0x1, 21, NULL, NULL, PPC_OPERAND_OPTIONAL },
 
   /* The LEV field in a POWER SVC form instruction.  */
 #define SVC_LEV L + 1
-  { 7, 5, NULL, NULL, 0 },
+  { 0x7f, 5, NULL, NULL, 0 },
 
   /* The LEV field in an SC form instruction.  */
 #define LEV SVC_LEV + 1
-  { 7, 5, NULL, NULL, PPC_OPERAND_OPTIONAL },
+  { 0x7f, 5, NULL, NULL, PPC_OPERAND_OPTIONAL },
 
   /* The LI field in an I form instruction.  The lower two bits are
      forced to zero.  */
 #define LI LEV + 1
-  { 26, 0, insert_li, extract_li, PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED },
+  { 0x3fffffc, 0, NULL, NULL, PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED },
 
   /* The LI field in an I form instruction when used as an absolute
      address.  */
 #define LIA LI + 1
-  { 26, 0, insert_li, extract_li, PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED },
+  { 0x3fffffc, 0, NULL, NULL, PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED },
 
   /* The LS field in an X (sync) form instruction.  */
 #define LS LIA + 1
-  { 2, 21, NULL, NULL, PPC_OPERAND_OPTIONAL },
-
-  /* The MB field in an M form instruction.  */
-#define MB LS + 1
-#define MB_MASK (0x1f << 6)
-  { 5, 6, NULL, NULL, 0 },
+  { 0x3, 21, NULL, NULL, PPC_OPERAND_OPTIONAL },
 
   /* The ME field in an M form instruction.  */
-#define ME MB + 1
+#define ME LS + 1
 #define ME_MASK (0x1f << 1)
-  { 5, 1, NULL, NULL, 0 },
+  { 0x1f, 1, NULL, NULL, 0 },
 
   /* The MB and ME fields in an M form instruction expressed a single
      operand which is a bitmask indicating which bits to select.  This
      is a two operand form using PPC_OPERAND_NEXT.  See the
      description in opcode/ppc.h for what this means.  */
 #define MBE ME + 1
-  { 5, 6, NULL, NULL, PPC_OPERAND_OPTIONAL | PPC_OPERAND_NEXT },
-  { 32, 0, insert_mbe, extract_mbe, 0 },
+  { 0x1f, 6, NULL, NULL, PPC_OPERAND_OPTIONAL | PPC_OPERAND_NEXT },
+  { -1, 0, insert_mbe, extract_mbe, 0 },
 
   /* The MB or ME field in an MD or MDS form instruction.  The high
      bit is wrapped to the low end.  */
 #define MB6 MBE + 2
 #define ME6 MB6
 #define MB6_MASK (0x3f << 5)
-  { 6, 5, insert_mb6, extract_mb6, 0 },
-
-  /* The MO field in an mbar instruction.  */
-#define MO MB6 + 1
-  { 5, 21, NULL, NULL, PPC_OPERAND_OPTIONAL },
+  { 0x3f, 5, insert_mb6, extract_mb6, 0 },
 
   /* The NB field in an X form instruction.  The value 32 is stored as
      0.  */
-#define NB MO + 1
-  { 6, 11, insert_nb, extract_nb, 0 },
+#define NB MB6 + 1
+  { 0x1f, 11, NULL, extract_nb, PPC_OPERAND_PLUS1 },
 
   /* The NSI field in a D form instruction.  This is the same as the
      SI field, only negated.  */
 #define NSI NB + 1
-  { 16, 0, insert_nsi, extract_nsi,
+  { 0xffff, 0, insert_nsi, extract_nsi,
       PPC_OPERAND_NEGATIVE | PPC_OPERAND_SIGNED },
 
   /* The RA field in an D, DS, DQ, X, XO, M, or MDS form instruction.  */
 #define RA NSI + 1
 #define RA_MASK (0x1f << 16)
-  { 5, 16, NULL, NULL, PPC_OPERAND_GPR },
+  { 0x1f, 16, NULL, NULL, PPC_OPERAND_GPR },
 
   /* As above, but 0 in the RA field means zero, not r0.  */
 #define RA0 RA + 1
-  { 5, 16, NULL, NULL, PPC_OPERAND_GPR_0 },
+  { 0x1f, 16, NULL, NULL, PPC_OPERAND_GPR_0 },
 
   /* The RA field in the DQ form lq instruction, which has special
      value restrictions.  */
 #define RAQ RA0 + 1
-  { 5, 16, insert_raq, NULL, PPC_OPERAND_GPR_0 },
+  { 0x1f, 16, insert_raq, NULL, PPC_OPERAND_GPR_0 },
 
   /* The RA field in a D or X form instruction which is an updating
      load, which means that the RA field may not be zero and may not
      equal the RT field.  */
 #define RAL RAQ + 1
-  { 5, 16, insert_ral, NULL, PPC_OPERAND_GPR_0 },
+  { 0x1f, 16, insert_ral, NULL, PPC_OPERAND_GPR_0 },
 
   /* The RA field in an lmw instruction, which has special value
      restrictions.  */
 #define RAM RAL + 1
-  { 5, 16, insert_ram, NULL, PPC_OPERAND_GPR_0 },
+  { 0x1f, 16, insert_ram, NULL, PPC_OPERAND_GPR_0 },
 
   /* The RA field in a D or X form instruction which is an updating
      store or an updating floating point load, which means that the RA
      field may not be zero.  */
 #define RAS RAM + 1
-  { 5, 16, insert_ras, NULL, PPC_OPERAND_GPR_0 },
+  { 0x1f, 16, insert_ras, NULL, PPC_OPERAND_GPR_0 },
 
   /* The RA field of the tlbwe instruction, which is optional.  */
 #define RAOPT RAS + 1
-  { 5, 16, NULL, NULL, PPC_OPERAND_GPR | PPC_OPERAND_OPTIONAL },
+  { 0x1f, 16, NULL, NULL, PPC_OPERAND_GPR | PPC_OPERAND_OPTIONAL },
 
   /* The RB field in an X, XO, M, or MDS form instruction.  */
 #define RB RAOPT + 1
 #define RB_MASK (0x1f << 11)
-  { 5, 11, NULL, NULL, PPC_OPERAND_GPR },
+  { 0x1f, 11, NULL, NULL, PPC_OPERAND_GPR },
 
   /* The RB field in an X form instruction when it must be the same as
      the RS field in the instruction.  This is used for extended
      mnemonics like mr.  */
 #define RBS RB + 1
-  { 5, 1, insert_rbs, extract_rbs, PPC_OPERAND_FAKE },
+  { 0x1f, 11, insert_rbs, extract_rbs, PPC_OPERAND_FAKE },
 
   /* The RS field in a D, DS, X, XFX, XS, M, MD or MDS form
      instruction or the RT field in a D, DS, X, XFX or XO form
@@ -422,188 +393,168 @@ const struct powerpc_operand powerpc_operands[] =
 #define RS RBS + 1
 #define RT RS
 #define RT_MASK (0x1f << 21)
-  { 5, 21, NULL, NULL, PPC_OPERAND_GPR },
+  { 0x1f, 21, NULL, NULL, PPC_OPERAND_GPR },
 
-  /* The RS field of the DS form stq instruction, which has special
-     value restrictions.  */
+  /* The RS and RT fields of the DS form stq instruction, which have
+     special value restrictions.  */
 #define RSQ RS + 1
-  { 5, 21, insert_rsq, NULL, PPC_OPERAND_GPR_0 },
-
-  /* The RT field of the DQ form lq instruction, which has special
-     value restrictions.  */
-#define RTQ RSQ + 1
-  { 5, 21, insert_rtq, NULL, PPC_OPERAND_GPR_0 },
+#define RTQ RSQ
+  { 0x1e, 21, NULL, NULL, PPC_OPERAND_GPR_0 },
 
   /* The RS field of the tlbwe instruction, which is optional.  */
-#define RSO RTQ + 1
+#define RSO RSQ + 1
 #define RTO RSO
-  { 5, 21, NULL, NULL, PPC_OPERAND_GPR | PPC_OPERAND_OPTIONAL },
+  { 0x1f, 21, NULL, NULL, PPC_OPERAND_GPR | PPC_OPERAND_OPTIONAL },
 
   /* The SH field in an X or M form instruction.  */
 #define SH RSO + 1
 #define SH_MASK (0x1f << 11)
-  { 5, 11, NULL, NULL, 0 },
+  /* The other UIMM field in a EVX form instruction.  */
+#define EVUIMM SH
+  { 0x1f, 11, NULL, NULL, 0 },
 
   /* The SH field in an MD form instruction.  This is split.  */
 #define SH6 SH + 1
 #define SH6_MASK ((0x1f << 11) | (1 << 1))
-  { 6, 1, insert_sh6, extract_sh6, 0 },
+  { 0x3f, -1, insert_sh6, extract_sh6, 0 },
 
   /* The SH field of the tlbwe instruction, which is optional.  */
 #define SHO SH6 + 1
-  { 5, 11,NULL, NULL, PPC_OPERAND_OPTIONAL },
+  { 0x1f, 11, NULL, NULL, PPC_OPERAND_OPTIONAL },
 
   /* The SI field in a D form instruction.  */
 #define SI SHO + 1
-  { 16, 0, NULL, NULL, PPC_OPERAND_SIGNED },
+  { 0xffff, 0, NULL, NULL, PPC_OPERAND_SIGNED },
 
   /* The SI field in a D form instruction when we accept a wide range
      of positive values.  */
 #define SISIGNOPT SI + 1
-  { 16, 0, NULL, NULL, PPC_OPERAND_SIGNED | PPC_OPERAND_SIGNOPT },
+  { 0xffff, 0, NULL, NULL, PPC_OPERAND_SIGNED | PPC_OPERAND_SIGNOPT },
 
   /* The SPR field in an XFX form instruction.  This is flipped--the
      lower 5 bits are stored in the upper 5 and vice- versa.  */
 #define SPR SISIGNOPT + 1
 #define PMR SPR
 #define SPR_MASK (0x3ff << 11)
-  { 10, 11, insert_spr, extract_spr, 0 },
+  { 0x3ff, 11, insert_spr, extract_spr, 0 },
 
   /* The BAT index number in an XFX form m[ft]ibat[lu] instruction.  */
 #define SPRBAT SPR + 1
 #define SPRBAT_MASK (0x3 << 17)
-  { 2, 17, NULL, NULL, 0 },
+  { 0x3, 17, NULL, NULL, 0 },
 
   /* The SPRG register number in an XFX form m[ft]sprg instruction.  */
 #define SPRG SPRBAT + 1
-  { 5, 16, insert_sprg, extract_sprg, 0 },
+  { 0x1f, 16, insert_sprg, extract_sprg, 0 },
 
   /* The SR field in an X form instruction.  */
 #define SR SPRG + 1
-  { 4, 16, NULL, NULL, 0 },
+  { 0xf, 16, NULL, NULL, 0 },
 
   /* The STRM field in an X AltiVec form instruction.  */
 #define STRM SR + 1
-#define STRM_MASK (0x3 << 21)
-  { 2, 21, NULL, NULL, 0 },
+  { 0x3, 21, NULL, NULL, 0 },
 
   /* The SV field in a POWER SC form instruction.  */
 #define SV STRM + 1
-  { 14, 2, NULL, NULL, 0 },
+  { 0x3fff, 2, NULL, NULL, 0 },
 
   /* The TBR field in an XFX form instruction.  This is like the SPR
      field, but it is optional.  */
 #define TBR SV + 1
-  { 10, 11, insert_tbr, extract_tbr, PPC_OPERAND_OPTIONAL },
+  { 0x3ff, 11, insert_tbr, extract_tbr, PPC_OPERAND_OPTIONAL },
 
   /* The TO field in a D or X form instruction.  */
 #define TO TBR + 1
 #define TO_MASK (0x1f << 21)
-  { 5, 21, NULL, NULL, 0 },
-
-  /* The U field in an X form instruction.  */
-#define U TO + 1
-  { 4, 12, NULL, NULL, 0 },
+  { 0x1f, 21, NULL, NULL, 0 },
 
   /* The UI field in a D form instruction.  */
-#define UI U + 1
-  { 16, 0, NULL, NULL, 0 },
+#define UI TO + 1
+  { 0xffff, 0, NULL, NULL, 0 },
 
   /* The VA field in a VA, VX or VXR form instruction.  */
 #define VA UI + 1
-#define VA_MASK	(0x1f << 16)
-  { 5, 16, NULL, NULL, PPC_OPERAND_VR },
+  { 0x1f, 16, NULL, NULL, PPC_OPERAND_VR },
 
   /* The VB field in a VA, VX or VXR form instruction.  */
 #define VB VA + 1
-#define VB_MASK (0x1f << 11)
-  { 5, 11, NULL, NULL, PPC_OPERAND_VR },
+  { 0x1f, 11, NULL, NULL, PPC_OPERAND_VR },
 
   /* The VC field in a VA form instruction.  */
 #define VC VB + 1
-#define VC_MASK (0x1f << 6)
-  { 5, 6, NULL, NULL, PPC_OPERAND_VR },
+  { 0x1f, 6, NULL, NULL, PPC_OPERAND_VR },
 
   /* The VD or VS field in a VA, VX, VXR or X form instruction.  */
 #define VD VC + 1
 #define VS VD
-#define VD_MASK (0x1f << 21)
-  { 5, 21, NULL, NULL, PPC_OPERAND_VR },
+  { 0x1f, 21, NULL, NULL, PPC_OPERAND_VR },
 
   /* The SIMM field in a VX form instruction.  */
 #define SIMM VD + 1
-  { 5, 16, NULL, NULL, PPC_OPERAND_SIGNED},
+  { 0x1f, 16, NULL, NULL, PPC_OPERAND_SIGNED},
 
-  /* The UIMM field in a VX form instruction.  */
+  /* The UIMM field in a VX form instruction, and TE in Z form.  */
 #define UIMM SIMM + 1
-  { 5, 16, NULL, NULL, 0 },
+#define TE UIMM
+  { 0x1f, 16, NULL, NULL, 0 },
 
   /* The SHB field in a VA form instruction.  */
 #define SHB UIMM + 1
-  { 4, 6, NULL, NULL, 0 },
-
-  /* The other UIMM field in a EVX form instruction.  */
-#define EVUIMM SHB + 1
-  { 5, 11, NULL, NULL, 0 },
+  { 0xf, 6, NULL, NULL, 0 },
 
   /* The other UIMM field in a half word EVX form instruction.  */
-#define EVUIMM_2 EVUIMM + 1
-  { 32, 11, insert_ev2, extract_ev2, PPC_OPERAND_PARENS },
+#define EVUIMM_2 SHB + 1
+  { 0x3e, 10, NULL, NULL, PPC_OPERAND_PARENS },
 
   /* The other UIMM field in a word EVX form instruction.  */
 #define EVUIMM_4 EVUIMM_2 + 1
-  { 32, 11, insert_ev4, extract_ev4, PPC_OPERAND_PARENS },
+  { 0x7c, 9, NULL, NULL, PPC_OPERAND_PARENS },
 
   /* The other UIMM field in a double EVX form instruction.  */
 #define EVUIMM_8 EVUIMM_4 + 1
-  { 32, 11, insert_ev8, extract_ev8, PPC_OPERAND_PARENS },
+  { 0xf8, 8, NULL, NULL, PPC_OPERAND_PARENS },
 
   /* The WS field.  */
 #define WS EVUIMM_8 + 1
-#define WS_MASK (0x7 << 11)
-  { 3, 11, NULL, NULL, 0 },
-
-  /* The L field in an mtmsrd or A form instruction.  */
-#define MTMSRD_L WS + 1
-#define A_L MTMSRD_L
-  { 1, 16, NULL, NULL, PPC_OPERAND_OPTIONAL },
-
-  /* The DCM field in a Z form instruction.  */
-#define DCM MTMSRD_L + 1
-  { 6, 16, NULL, NULL, 0 },
+  { 0x7, 11, NULL, NULL, 0 },
 
-  /* Likewise, the DGM field in a Z form instruction.  */
-#define DGM DCM + 1
-  { 6, 16, NULL, NULL, 0 },
+  /* The L field in an mtmsrd or A form instruction or W in an X form.  */
+#define A_L WS + 1
+#define W A_L
+  { 0x1, 16, NULL, NULL, PPC_OPERAND_OPTIONAL },
 
-#define TE DGM + 1
-  { 5, 11, NULL, NULL, 0 },
-
-#define RMC TE + 1
-  { 2, 21, NULL, NULL, 0 },
+#define RMC A_L + 1
+  { 0x3, 9, NULL, NULL, 0 },
 
 #define R RMC + 1
-  { 1, 15, NULL, NULL, 0 },
+  { 0x1, 16, NULL, NULL, 0 },
 
 #define SP R + 1
-  { 2, 11, NULL, NULL, 0 },
+  { 0x3, 19, NULL, NULL, 0 },
 
 #define S SP + 1
-  { 1, 11, NULL, NULL, 0 },
+  { 0x1, 20, NULL, NULL, 0 },
 
   /* SH field starting at bit position 16.  */
 #define SH16 S + 1
-  { 6, 10, NULL, NULL, 0 },
-
-  /* The L field in an X form with the RT field fixed instruction.  */
-#define XRT_L SH16 + 1
-  { 2, 21, NULL, NULL, PPC_OPERAND_OPTIONAL },
+  /* The DCM and DGM fields in a Z form instruction.  */
+#define DCM SH16
+#define DGM DCM
+  { 0x3f, 10, NULL, NULL, 0 },
 
   /* The EH field in larx instruction.  */
-#define EH XRT_L + 1
-  { 1, 0, NULL, NULL, PPC_OPERAND_OPTIONAL },
+#define EH SH16 + 1
+  { 0x1, 0, NULL, NULL, PPC_OPERAND_OPTIONAL },
+
+  /* The L field in an mtfsf or XFL form instruction.  */
+#define XFL_L EH + 1
+  { 0x1, 25, NULL, NULL, PPC_OPERAND_OPTIONAL},
 };
 
+const unsigned int num_powerpc_operands = (sizeof (powerpc_operands)
+					   / sizeof (powerpc_operands[0]));
+
 /* The functions used to insert and extract complicated operands.  */
 
 /* The BA field in an XL form instruction when it must be the same as
@@ -656,26 +607,6 @@ extract_bba (unsigned long insn,
   return 0;
 }
 
-/* The BD field in a B form instruction.  The lower two bits are
-   forced to zero.  */
-
-static unsigned long
-insert_bd (unsigned long insn,
-	   long value,
-	   int dialect ATTRIBUTE_UNUSED,
-	   const char **errmsg ATTRIBUTE_UNUSED)
-{
-  return insn | (value & 0xfffc);
-}
-
-static long
-extract_bd (unsigned long insn,
-	    int dialect ATTRIBUTE_UNUSED,
-	    int *invalid ATTRIBUTE_UNUSED)
-{
-  return ((insn & 0xfffc) ^ 0x8000) - 0x8000;
-}
-
 /* The BD field in a B form instruction when the - modifier is used.
    This modifier means that the branch is not expected to be taken.
    For chips built to versions of the architecture prior to version 2
@@ -687,7 +618,11 @@ extract_bd (unsigned long insn,
    the "y" bit.  "at" == 00 => no hint, "at" == 01 => unpredictable,
    "at" == 10 => not taken, "at" == 11 => taken.  The "t" bit is 00001
    in BO field, the "a" bit is 00010 for branch on CR(BI) and 01000
-   for branch on CTR.  We only handle the taken/not-taken hint here.  */
+   for branch on CTR.  We only handle the taken/not-taken hint here.
+   Note that we don't relax the conditions tested here when
+   disassembling with -Many because insns using extract_bdm and
+   extract_bdp always occur in pairs.  One or the other will always
+   be valid.  */
 
 static unsigned long
 insert_bdm (unsigned long insn,
@@ -778,10 +713,11 @@ extract_bdp (unsigned long insn,
 /* Check for legal values of a BO field.  */
 
 static int
-valid_bo (long value, int dialect)
+valid_bo (long value, int dialect, int extract)
 {
   if ((dialect & PPC_OPCODE_POWER4) == 0)
     {
+      int valid;
       /* Certain encodings have bits that are required to be zero.
 	 These are (z must be zero, y may be anything):
 	     001zy
@@ -794,36 +730,43 @@ valid_bo (long value, int dialect)
 	{
 	default:
 	case 0:
-	  return 1;
+	  valid = 1;
+	  break;
 	case 0x4:
-	  return (value & 0x2) == 0;
+	  valid = (value & 0x2) == 0;
+	  break;
 	case 0x10:
-	  return (value & 0x8) == 0;
+	  valid = (value & 0x8) == 0;
+	  break;
 	case 0x14:
-	  return value == 0x14;
+	  valid = value == 0x14;
+	  break;
 	}
+      /* When disassembling with -Many, accept power4 encodings too.  */
+      if (valid
+	  || (dialect & PPC_OPCODE_ANY) == 0
+	  || !extract)
+	return valid;
     }
+
+  /* Certain encodings have bits that are required to be zero.
+     These are (z must be zero, a & t may be anything):
+	 0000z
+	 0001z
+	 0100z
+	 0101z
+	 001at
+	 011at
+	 1a00t
+	 1a01t
+	 1z1zz
+  */
+  if ((value & 0x14) == 0)
+    return (value & 0x1) == 0;
+  else if ((value & 0x14) == 0x14)
+    return value == 0x14;
   else
-    {
-      /* Certain encodings have bits that are required to be zero.
-	 These are (z must be zero, a & t may be anything):
-	     0000z
-	     0001z
-	     0100z
-	     0101z
-	     001at
-	     011at
-	     1a00t
-	     1a01t
-	     1z1zz
-      */
-      if ((value & 0x14) == 0)
-	return (value & 0x1) == 0;
-      else if ((value & 0x14) == 0x14)
-	return value == 0x14;
-      else
-	return 1;
-    }
+    return 1;
 }
 
 /* The BO field in a B form instruction.  Warn about attempts to set
@@ -835,7 +778,7 @@ insert_bo (unsigned long insn,
 	   int dialect,
 	   const char **errmsg)
 {
-  if (!valid_bo (value, dialect))
+  if (!valid_bo (value, dialect, 0))
     *errmsg = _("invalid conditional option");
   return insn | ((value & 0x1f) << 21);
 }
@@ -848,7 +791,7 @@ extract_bo (unsigned long insn,
   long value;
 
   value = (insn >> 21) & 0x1f;
-  if (!valid_bo (value, dialect))
+  if (!valid_bo (value, dialect, 1))
     *invalid = 1;
   return value;
 }
@@ -863,7 +806,7 @@ insert_boe (unsigned long insn,
 	    int dialect,
 	    const char **errmsg)
 {
-  if (!valid_bo (value, dialect))
+  if (!valid_bo (value, dialect, 0))
     *errmsg = _("invalid conditional option");
   else if ((value & 1) != 0)
     *errmsg = _("attempt to set y bit when using + or - modifier");
@@ -879,162 +822,11 @@ extract_boe (unsigned long insn,
   long value;
 
   value = (insn >> 21) & 0x1f;
-  if (!valid_bo (value, dialect))
+  if (!valid_bo (value, dialect, 1))
     *invalid = 1;
   return value & 0x1e;
 }
 
-/* The DQ field in a DQ form instruction.  This is like D, but the
-   lower four bits are forced to zero. */
-
-static unsigned long
-insert_dq (unsigned long insn,
-	   long value,
-	   int dialect ATTRIBUTE_UNUSED,
-	   const char **errmsg)
-{
-  if ((value & 0xf) != 0)
-    *errmsg = _("offset not a multiple of 16");
-  return insn | (value & 0xfff0);
-}
-
-static long
-extract_dq (unsigned long insn,
-	    int dialect ATTRIBUTE_UNUSED,
-	    int *invalid ATTRIBUTE_UNUSED)
-{
-  return ((insn & 0xfff0) ^ 0x8000) - 0x8000;
-}
-
-static unsigned long
-insert_ev2 (unsigned long insn,
-	    long value,
-	    int dialect ATTRIBUTE_UNUSED,
-	    const char **errmsg)
-{
-  if ((value & 1) != 0)
-    *errmsg = _("offset not a multiple of 2");
-  if ((value > 62) != 0)
-    *errmsg = _("offset greater than 62");
-  return insn | ((value & 0x3e) << 10);
-}
-
-static long
-extract_ev2 (unsigned long insn,
-	     int dialect ATTRIBUTE_UNUSED,
-	     int *invalid ATTRIBUTE_UNUSED)
-{
-  return (insn >> 10) & 0x3e;
-}
-
-static unsigned long
-insert_ev4 (unsigned long insn,
-	    long value,
-	    int dialect ATTRIBUTE_UNUSED,
-	    const char **errmsg)
-{
-  if ((value & 3) != 0)
-    *errmsg = _("offset not a multiple of 4");
-  if ((value > 124) != 0)
-    *errmsg = _("offset greater than 124");
-  return insn | ((value & 0x7c) << 9);
-}
-
-static long
-extract_ev4 (unsigned long insn,
-	     int dialect ATTRIBUTE_UNUSED,
-	     int *invalid ATTRIBUTE_UNUSED)
-{
-  return (insn >> 9) & 0x7c;
-}
-
-static unsigned long
-insert_ev8 (unsigned long insn,
-	    long value,
-	    int dialect ATTRIBUTE_UNUSED,
-	    const char **errmsg)
-{
-  if ((value & 7) != 0)
-    *errmsg = _("offset not a multiple of 8");
-  if ((value > 248) != 0)
-    *errmsg = _("offset greater than 248");
-  return insn | ((value & 0xf8) << 8);
-}
-
-static long
-extract_ev8 (unsigned long insn,
-	     int dialect ATTRIBUTE_UNUSED,
-	     int *invalid ATTRIBUTE_UNUSED)
-{
-  return (insn >> 8) & 0xf8;
-}
-
-/* The DS field in a DS form instruction.  This is like D, but the
-   lower two bits are forced to zero.  */
-
-static unsigned long
-insert_ds (unsigned long insn,
-	   long value,
-	   int dialect ATTRIBUTE_UNUSED,
-	   const char **errmsg)
-{
-  if ((value & 3) != 0)
-    *errmsg = _("offset not a multiple of 4");
-  return insn | (value & 0xfffc);
-}
-
-static long
-extract_ds (unsigned long insn,
-	    int dialect ATTRIBUTE_UNUSED,
-	    int *invalid ATTRIBUTE_UNUSED)
-{
-  return ((insn & 0xfffc) ^ 0x8000) - 0x8000;
-}
-
-/* The DE field in a DE form instruction.  */
-
-static unsigned long
-insert_de (unsigned long insn,
-	   long value,
-	   int dialect ATTRIBUTE_UNUSED,
-	   const char **errmsg)
-{
-  if (value > 2047 || value < -2048)
-    *errmsg = _("offset not between -2048 and 2047");
-  return insn | ((value << 4) & 0xfff0);
-}
-
-static long
-extract_de (unsigned long insn,
-	    int dialect ATTRIBUTE_UNUSED,
-	    int *invalid ATTRIBUTE_UNUSED)
-{
-  return (insn & 0xfff0) >> 4;
-}
-
-/* The DES field in a DES form instruction.  */
-
-static unsigned long
-insert_des (unsigned long insn,
-	    long value,
-	    int dialect ATTRIBUTE_UNUSED,
-	    const char **errmsg)
-{
-  if (value > 8191 || value < -8192)
-    *errmsg = _("offset not between -8192 and 8191");
-  else if ((value & 3) != 0)
-    *errmsg = _("offset not a multiple of 4");
-  return insn | ((value << 2) & 0xfff0);
-}
-
-static long
-extract_des (unsigned long insn,
-	     int dialect ATTRIBUTE_UNUSED,
-	     int *invalid ATTRIBUTE_UNUSED)
-{
-  return (((insn >> 2) & 0x3ffc) ^ 0x2000) - 0x2000;
-}
-
 /* FXM mask in mfcr and mtcrf instructions.  */
 
 static unsigned long
@@ -1107,28 +899,6 @@ extract_fxm (unsigned long insn,
   return mask;
 }
 
-/* The LI field in an I form instruction.  The lower two bits are
-   forced to zero.  */
-
-static unsigned long
-insert_li (unsigned long insn,
-	   long value,
-	   int dialect ATTRIBUTE_UNUSED,
-	   const char **errmsg)
-{
-  if ((value & 3) != 0)
-    *errmsg = _("ignoring least significant bits in branch offset");
-  return insn | (value & 0x3fffffc);
-}
-
-static long
-extract_li (unsigned long insn,
-	    int dialect ATTRIBUTE_UNUSED,
-	    int *invalid ATTRIBUTE_UNUSED)
-{
-  return ((insn & 0x3fffffc) ^ 0x2000000) - 0x2000000;
-}
-
 /* The MB and ME fields in an M form instruction expressed as a single
    operand which is itself a bitmask.  The extraction function always
    marks it as invalid, since we never want to recognize an
@@ -1240,19 +1010,6 @@ extract_mb6 (unsigned long insn,
 /* The NB field in an X form instruction.  The value 32 is stored as
    0.  */
 
-static unsigned long
-insert_nb (unsigned long insn,
-	   long value,
-	   int dialect ATTRIBUTE_UNUSED,
-	   const char **errmsg)
-{
-  if (value < 0 || value > 32)
-    *errmsg = _("value out of range");
-  if (value == 32)
-    value = 0;
-  return insn | ((value & 0x1f) << 11);
-}
-
 static long
 extract_nb (unsigned long insn,
 	    int dialect ATTRIBUTE_UNUSED,
@@ -1375,34 +1132,6 @@ extract_rbs (unsigned long insn,
   return 0;
 }
 
-/* The RT field of the DQ form lq instruction, which has special
-   value restrictions.  */
-
-static unsigned long
-insert_rtq (unsigned long insn,
-	    long value,
-	    int dialect ATTRIBUTE_UNUSED,
-	    const char **errmsg)
-{
-  if ((value & 1) != 0)
-    *errmsg = _("target register operand must be even");
-  return insn | ((value & 0x1f) << 21);
-}
-
-/* The RS field of the DS form stq instruction, which has special
-   value restrictions.  */
-
-static unsigned long
-insert_rsq (unsigned long insn,
-	    long value ATTRIBUTE_UNUSED,
-	    int dialect ATTRIBUTE_UNUSED,
-	    const char **errmsg)
-{
-  if ((value & 1) != 0)
-    *errmsg = _("source register operand must be even");
-  return insn | ((value & 0x1f) << 21);
-}
-
 /* The SH field in an MD form instruction.  This is split.  */
 
 static unsigned long
@@ -1675,10 +1404,14 @@ extract_tbr (unsigned long insn,
 
 /* The mask for a Z form instruction.  */
 #define Z_MASK ZRC (0x3f, 0x1ff, 1)
+#define Z2_MASK ZRC (0x3f, 0xff, 1)
 
 /* An X_MASK with the RA field fixed.  */
 #define XRA_MASK (X_MASK | RA_MASK)
 
+/* An XRA_MASK with the W field clear.  */
+#define XWRA_MASK (XRA_MASK & ~((unsigned long) 1 << 16))
+
 /* An X_MASK with the RB field fixed.  */
 #define XRB_MASK (X_MASK | RB_MASK)
 
@@ -1733,7 +1466,7 @@ extract_tbr (unsigned long insn,
 
 /* An XFL form instruction.  */
 #define XFL(op, xop, rc) (OP (op) | ((((unsigned long)(xop)) & 0x3ff) << 1) | (((unsigned long)(rc)) & 1))
-#define XFL_MASK (XFL (0x3f, 0x3ff, 1) | (((unsigned long)1) << 25) | (((unsigned long)1) << 16))
+#define XFL_MASK XFL (0x3f, 0x3ff, 1)
 
 /* An X form isel instruction.  */
 #define XISEL(op, xop)  (OP (op) | ((((unsigned long)(xop)) & 0x1f) << 1))
@@ -1809,7 +1542,7 @@ extract_tbr (unsigned long insn,
 
 /* An XFX form instruction with the SPR field filled in except for the
    SPRG field.  */
-#define XSPRG_MASK (XSPR_MASK & ~(0x17 << 16))
+#define XSPRG_MASK (XSPR_MASK & ~(0x1f << 16))
 
 /* An X form instruction with everything filled in except the E field.  */
 #define XE_MASK (0xffff7fff)
@@ -3240,8 +2973,8 @@ const struct powerpc_opcode powerpc_opcodes[] = {
 { "bcctrl",  XLLK(19,528,1),     XLBH_MASK,   PPCCOM,	{ BO, BI, BH } },
 { "bcc",     XLLK(19,528,0),     XLBB_MASK,   PWRCOM,	{ BO, BI } },
 { "bccl",    XLLK(19,528,1),     XLBB_MASK,   PWRCOM,	{ BO, BI } },
-{ "bcctre",  XLLK(19,529,0),     XLYBB_MASK,  BOOKE64,	{ BO, BI } },
-{ "bcctrel", XLLK(19,529,1),     XLYBB_MASK,  BOOKE64,	{ BO, BI } },
+{ "bcctre",  XLLK(19,529,0),     XLBB_MASK,   BOOKE64,	{ BO, BI } },
+{ "bcctrel", XLLK(19,529,1),     XLBB_MASK,   BOOKE64,	{ BO, BI } },
 
 { "rlwimi",  M(20,0),	M_MASK,		PPCCOM,		{ RA,RS,SH,MBE,ME } },
 { "rlimi",   M(20,0),	M_MASK,		PWRCOM,		{ RA,RS,SH,MBE,ME } },
@@ -3486,7 +3219,7 @@ const struct powerpc_opcode powerpc_opcodes[] = {
 { "ldarx",   X(31,84),	XEH_MASK,	PPC64,		{ RT, RA0, RB, EH } },
 
 { "dcbfl",   XOPL(31,86,1), XRT_MASK,	POWER5,		{ RA, RB } },
-{ "dcbf",    X(31,86),	XLRT_MASK,	PPC,		{ RA, RB, XRT_L } },
+{ "dcbf",    X(31,86),	XLRT_MASK,	PPC,		{ RA, RB, L } },
 
 { "lbzx",    X(31,87),	X_MASK,		COM,		{ RT, RA0, RB } },
 
@@ -3575,7 +3308,7 @@ const struct powerpc_opcode powerpc_opcodes[] = {
 { "dcbtls",  X(31,166),	X_MASK,		PPCCHLK,	{ CT, RA, RB }},
 { "dcbtlse", X(31,174),	X_MASK,		PPCCHLK64,	{ CT, RA, RB }},
 
-{ "mtmsrd",  X(31,178),	XRLARB_MASK,	PPC64,		{ RS, MTMSRD_L } },
+{ "mtmsrd",  X(31,178),	XRLARB_MASK,	PPC64,		{ RS, A_L } },
 
 { "stdux",   X(31,181),	X_MASK,		PPC64,		{ RS, RAS, RB } },
 
@@ -4026,6 +3759,13 @@ const struct powerpc_opcode powerpc_opcodes[] = {
 
 { "sthuxe",  X(31,447),	X_MASK,		BOOKE64,	{ RS, RAS, RB } },
 
+{ "cctpl",   0x7c210b78,    0xffffffff,	CELL,		{ 0 }},
+{ "cctpm",   0x7c421378,    0xffffffff,	CELL,		{ 0 }},
+{ "cctph",   0x7c631b78,    0xffffffff,	CELL,		{ 0 }},
+{ "db8cyc",  0x7f9ce378,    0xffffffff,	CELL,		{ 0 }},
+{ "db10cyc", 0x7fbdeb78,    0xffffffff,	CELL,		{ 0 }},
+{ "db12cyc", 0x7fdef378,    0xffffffff,	CELL,		{ 0 }},
+{ "db16cyc", 0x7ffffb78,    0xffffffff,	CELL,		{ 0 }},
 { "mr",	     XRC(31,444,0), X_MASK,	COM,		{ RA, RS, RBS } },
 { "or",      XRC(31,444,0), X_MASK,	COM,		{ RA, RS, RB } },
 { "mr.",     XRC(31,444,1), X_MASK,	COM,		{ RA, RS, RBS } },
@@ -4446,8 +4186,8 @@ const struct powerpc_opcode powerpc_opcodes[] = {
 
 { "tlbsx",   XRC(31,914,0), X_MASK, 	PPC403|BOOKE,	{ RTO, RA, RB } },
 { "tlbsx.",  XRC(31,914,1), X_MASK, 	PPC403|BOOKE,	{ RTO, RA, RB } },
-{ "tlbsxe",  XRC(31,915,0), X_MASK,	BOOKE64,	{ RA, RB } },
-{ "tlbsxe.", XRC(31,915,1), X_MASK,	BOOKE64,	{ RA, RB } },
+{ "tlbsxe",  XRC(31,915,0), X_MASK,	BOOKE64,	{ RTO, RA, RB } },
+{ "tlbsxe.", XRC(31,915,1), X_MASK,	BOOKE64,	{ RTO, RA, RB } },
 
 { "slbmfee", X(31,915), XRA_MASK,	PPC64,		{ RT, RB } },
 
@@ -4624,8 +4364,8 @@ const struct powerpc_opcode powerpc_opcodes[] = {
 { "dadd",    XRC(59,2,0), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
 { "dadd.",   XRC(59,2,1), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
 
-{ "dqua",    ZRC(59,3,0), Z_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
-{ "dqua.",   ZRC(59,3,1), Z_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
+{ "dqua",    ZRC(59,3,0), Z2_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
+{ "dqua.",   ZRC(59,3,1), Z2_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
 
 { "fdivs",   A(59,18,0), AFRC_MASK,	PPC,		{ FRT, FRA, FRB } },
 { "fdivs.",  A(59,18,1), AFRC_MASK,	PPC,		{ FRT, FRA, FRB } },
@@ -4663,20 +4403,20 @@ const struct powerpc_opcode powerpc_opcodes[] = {
 { "dmul",    XRC(59,34,0), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
 { "dmul.",   XRC(59,34,1), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
 
-{ "drrnd",   ZRC(59,35,0), Z_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
-{ "drrnd.",  ZRC(59,35,1), Z_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
+{ "drrnd",   ZRC(59,35,0), Z2_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
+{ "drrnd.",  ZRC(59,35,1), Z2_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
 
 { "dscli",   ZRC(59,66,0), Z_MASK,	POWER6,		{ FRT, FRA, SH16 } },
 { "dscli.",  ZRC(59,66,1), Z_MASK,	POWER6,		{ FRT, FRA, SH16 } },
 
-{ "dquai",   ZRC(59,67,0), Z_MASK,	POWER6,		{ TE,  FRT, FRB, RMC } },
-{ "dquai.",  ZRC(59,67,1), Z_MASK,	POWER6,		{ TE,  FRT, FRB, RMC } },
+{ "dquai",   ZRC(59,67,0), Z2_MASK,	POWER6,		{ TE,  FRT, FRB, RMC } },
+{ "dquai.",  ZRC(59,67,1), Z2_MASK,	POWER6,		{ TE,  FRT, FRB, RMC } },
 
 { "dscri",   ZRC(59,98,0), Z_MASK,	POWER6,		{ FRT, FRA, SH16 } },
 { "dscri.",  ZRC(59,98,1), Z_MASK,	POWER6,		{ FRT, FRA, SH16 } },
 
-{ "drintx",  ZRC(59,99,0), Z_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
-{ "drintx.", ZRC(59,99,1), Z_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
+{ "drintx",  ZRC(59,99,0), Z2_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
+{ "drintx.", ZRC(59,99,1), Z2_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
 
 { "dcmpo",   X(59,130),	   X_MASK,	POWER6,		{ BF,  FRA, FRB } },
 
@@ -4684,8 +4424,8 @@ const struct powerpc_opcode powerpc_opcodes[] = {
 { "dtstdc",  Z(59,194),	   Z_MASK,	POWER6,		{ BF,  FRA, DCM } },
 { "dtstdg",  Z(59,226),	   Z_MASK,	POWER6,		{ BF,  FRA, DGM } },
 
-{ "drintn",  ZRC(59,227,0), Z_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
-{ "drintn.", ZRC(59,227,1), Z_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
+{ "drintn",  ZRC(59,227,0), Z2_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
+{ "drintn.", ZRC(59,227,1), Z2_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
 
 { "dctdp",   XRC(59,258,0), X_MASK,	POWER6,		{ FRT, FRB } },
 { "dctdp.",  XRC(59,258,1), X_MASK,	POWER6,		{ FRT, FRB } },
@@ -4751,8 +4491,8 @@ const struct powerpc_opcode powerpc_opcodes[] = {
 { "daddq",   XRC(63,2,0), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
 { "daddq.",  XRC(63,2,1), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
 
-{ "dquaq",   ZRC(63,3,0), Z_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
-{ "dquaq.",  ZRC(63,3,1), Z_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
+{ "dquaq",   ZRC(63,3,0), Z2_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
+{ "dquaq.",  ZRC(63,3,1), Z2_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
 
 { "fcpsgn",  XRC(63,8,0), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
 { "fcpsgn.", XRC(63,8,1), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
@@ -4827,8 +4567,8 @@ const struct powerpc_opcode powerpc_opcodes[] = {
 { "dmulq",   XRC(63,34,0), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
 { "dmulq.",  XRC(63,34,1), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
 
-{ "drrndq",  ZRC(63,35,0), Z_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
-{ "drrndq.", ZRC(63,35,1), Z_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
+{ "drrndq",  ZRC(63,35,0), Z2_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
+{ "drrndq.", ZRC(63,35,1), Z2_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
 
 { "mtfsb1",  XRC(63,38,0), XRARB_MASK,	COM,		{ BT } },
 { "mtfsb1.", XRC(63,38,1), XRARB_MASK,	COM,		{ BT } },
@@ -4841,8 +4581,8 @@ const struct powerpc_opcode powerpc_opcodes[] = {
 { "dscliq",  ZRC(63,66,0), Z_MASK,	POWER6,		{ FRT, FRA, SH16 } },
 { "dscliq.", ZRC(63,66,1), Z_MASK,	POWER6,		{ FRT, FRA, SH16 } },
 
-{ "dquaiq",  ZRC(63,67,0), Z_MASK,	POWER6,		{ TE,  FRT, FRB, RMC } },
-{ "dquaiq.", ZRC(63,67,1), Z_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
+{ "dquaiq",  ZRC(63,67,0), Z2_MASK,	POWER6,		{ TE,  FRT, FRB, RMC } },
+{ "dquaiq.", ZRC(63,67,1), Z2_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
 
 { "mtfsb0",  XRC(63,70,0), XRARB_MASK,	COM,		{ BT } },
 { "mtfsb0.", XRC(63,70,1), XRARB_MASK,	COM,		{ BT } },
@@ -4853,13 +4593,13 @@ const struct powerpc_opcode powerpc_opcodes[] = {
 { "dscriq",  ZRC(63,98,0), Z_MASK,	POWER6,		{ FRT, FRA, SH16 } },
 { "dscriq.", ZRC(63,98,1), Z_MASK,	POWER6,		{ FRT, FRA, SH16 } },
 
-{ "drintxq", ZRC(63,99,0), Z_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
-{ "drintxq.",ZRC(63,99,1), Z_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
+{ "drintxq", ZRC(63,99,0), Z2_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
+{ "drintxq.",ZRC(63,99,1), Z2_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
 
 { "dcmpoq",  X(63,130),	   X_MASK,	POWER6,		{ BF,  FRA, FRB } },
 
-{ "mtfsfi",  XRC(63,134,0), XRA_MASK|(3<<21)|(1<<11), COM, { BF, U } },
-{ "mtfsfi.", XRC(63,134,1), XRA_MASK|(3<<21)|(1<<11), COM, { BF, U } },
+{ "mtfsfi",  XRC(63,134,0), XWRA_MASK|(3<<21)|(1<<11), COM, { BFF, U, W } },
+{ "mtfsfi.", XRC(63,134,1), XWRA_MASK|(3<<21)|(1<<11), COM, { BFF, U, W } },
 
 { "fnabs",   XRC(63,136,0), XRA_MASK,	COM,		{ FRT, FRB } },
 { "fnabs.",  XRC(63,136,1), XRA_MASK,	COM,		{ FRT, FRB } },
@@ -4868,8 +4608,8 @@ const struct powerpc_opcode powerpc_opcodes[] = {
 { "dtstdcq", Z(63,194),	    Z_MASK,	POWER6,		{ BF,  FRA, DCM } },
 { "dtstdgq", Z(63,226),	    Z_MASK,	POWER6,		{ BF,  FRA, DGM } },
 
-{ "drintnq", ZRC(63,227,0), Z_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
-{ "drintnq.",ZRC(63,227,1), Z_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
+{ "drintnq", ZRC(63,227,0), Z2_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
+{ "drintnq.",ZRC(63,227,1), Z2_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
 
 { "dctqpq",  XRC(63,258,0), X_MASK,	POWER6,		{ FRT, FRB } },
 { "dctqpq.", XRC(63,258,1), X_MASK,	POWER6,		{ FRT, FRB } },
@@ -4908,8 +4648,8 @@ const struct powerpc_opcode powerpc_opcodes[] = {
 
 { "dtstsfq", X(63,674),	    X_MASK,	POWER6,		{ BF,  FRA, FRB } },
 
-{ "mtfsf",   XFL(63,711,0), XFL_MASK,	COM,		{ FLM, FRB } },
-{ "mtfsf.",  XFL(63,711,1), XFL_MASK,	COM,		{ FLM, FRB } },
+{ "mtfsf",   XFL(63,711,0), XFL_MASK,	COM,		{ FLM, FRB, XFL_L, W } },
+{ "mtfsf.",  XFL(63,711,1), XFL_MASK,	COM,		{ FLM, FRB, XFL_L, W } },
 
 { "drdpq",   XRC(63,770,0), X_MASK,	POWER6,		{ FRT, FRB } },
 { "drdpq.",  XRC(63,770,1), X_MASK,	POWER6,		{ FRT, FRB } },
@@ -4934,7 +4674,8 @@ const struct powerpc_opcode powerpc_opcodes[] = {
 
 };
 
-const int powerpc_num_opcodes = ARRAY_SIZE(powerpc_opcodes);
+const int powerpc_num_opcodes =
+  sizeof (powerpc_opcodes) / sizeof (powerpc_opcodes[0]);
 
 /* The macro table.  This is only used by the assembler.  */
 
@@ -4990,4 +4731,5 @@ const struct powerpc_macro powerpc_macros[] = {
 { "clrlslwi.",4,  PPCCOM,	"rlwinm. %0,%1,%3,(%2)-(%3),31-(%3)" },
 };
 
-const int powerpc_num_macros = ARRAY_SIZE(powerpc_macros);
+const int powerpc_num_macros =
+  sizeof (powerpc_macros) / sizeof (powerpc_macros[0]);
diff --git a/arch/powerpc/xmon/ppc.h b/arch/powerpc/xmon/ppc.h
index 110df96354b4..6771856fd5f8 100644
--- a/arch/powerpc/xmon/ppc.h
+++ b/arch/powerpc/xmon/ppc.h
@@ -1,6 +1,6 @@
 /* ppc.h -- Header file for PowerPC opcode table
-   Copyright 1994, 1995, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
-   Free Software Foundation, Inc.
+   Copyright 1994, 1995, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+   2007 Free Software Foundation, Inc.
    Written by Ian Lance Taylor, Cygnus Support
 
 This file is part of GDB, GAS, and the GNU binutils.
@@ -153,20 +153,21 @@ extern const int powerpc_num_opcodes;
 
 struct powerpc_operand
 {
-  /* The number of bits in the operand.  */
-  int bits;
+  /* A bitmask of bits in the operand.  */
+  unsigned int bitm;
 
-  /* How far the operand is left shifted in the instruction.  */
+  /* How far the operand is left shifted in the instruction.
+     -1 to indicate that BITM and SHIFT cannot be used to determine
+     where the operand goes in the insn.  */
   int shift;
 
   /* Insertion function.  This is used by the assembler.  To insert an
      operand value into an instruction, check this field.
 
      If it is NULL, execute
-         i |= (op & ((1 << o->bits) - 1)) << o->shift;
+	 i |= (op & o->bitm) << o->shift;
      (i is the instruction which we are filling in, o is a pointer to
-     this structure, and op is the opcode value; this assumes twos
-     complement arithmetic).
+     this structure, and op is the operand value).
 
      If this field is not NULL, then simply call it with the
      instruction and the operand value.  It will return the new value
@@ -182,12 +183,11 @@ struct powerpc_operand
      extract this operand type from an instruction, check this field.
 
      If it is NULL, compute
-         op = ((i) >> o->shift) & ((1 << o->bits) - 1);
-	 if ((o->flags & PPC_OPERAND_SIGNED) != 0
-	     && (op & (1 << (o->bits - 1))) != 0)
-	   op -= 1 << o->bits;
+	 op = (i >> o->shift) & o->bitm;
+	 if ((o->flags & PPC_OPERAND_SIGNED) != 0)
+	   sign_extend (op);
      (i is the instruction, o is a pointer to this structure, and op
-     is the result; this assumes twos complement arithmetic).
+     is the result).
 
      If this field is not NULL, then simply call it with the
      instruction value.  It will return the value of the operand.  If
@@ -205,17 +205,18 @@ struct powerpc_operand
    the operands field of the powerpc_opcodes table.  */
 
 extern const struct powerpc_operand powerpc_operands[];
+extern const unsigned int num_powerpc_operands;
 
 /* Values defined for the flags field of a struct powerpc_operand.  */
 
 /* This operand takes signed values.  */
-#define PPC_OPERAND_SIGNED (01)
+#define PPC_OPERAND_SIGNED (0x1)
 
 /* This operand takes signed values, but also accepts a full positive
    range of values when running in 32 bit mode.  That is, if bits is
    16, it takes any value from -0x8000 to 0xffff.  In 64 bit mode,
    this flag is ignored.  */
-#define PPC_OPERAND_SIGNOPT (02)
+#define PPC_OPERAND_SIGNOPT (0x2)
 
 /* This operand does not actually exist in the assembler input.  This
    is used to support extended mnemonics such as mr, for which two
@@ -223,14 +224,14 @@ extern const struct powerpc_operand powerpc_operands[];
    insert function with any op value.  The disassembler should call
    the extract function, ignore the return value, and check the value
    placed in the valid argument.  */
-#define PPC_OPERAND_FAKE (04)
+#define PPC_OPERAND_FAKE (0x4)
 
 /* The next operand should be wrapped in parentheses rather than
    separated from this one by a comma.  This is used for the load and
    store instructions which want their operands to look like
        reg,displacement(reg)
    */
-#define PPC_OPERAND_PARENS (010)
+#define PPC_OPERAND_PARENS (0x8)
 
 /* This operand may use the symbolic names for the CR fields, which
    are
@@ -239,26 +240,26 @@ extern const struct powerpc_operand powerpc_operands[];
        cr4 4	cr5 5	cr6 6	cr7 7
    These may be combined arithmetically, as in cr2*4+gt.  These are
    only supported on the PowerPC, not the POWER.  */
-#define PPC_OPERAND_CR (020)
+#define PPC_OPERAND_CR (0x10)
 
 /* This operand names a register.  The disassembler uses this to print
    register names with a leading 'r'.  */
-#define PPC_OPERAND_GPR (040)
+#define PPC_OPERAND_GPR (0x20)
 
 /* Like PPC_OPERAND_GPR, but don't print a leading 'r' for r0.  */
-#define PPC_OPERAND_GPR_0 (0100)
+#define PPC_OPERAND_GPR_0 (0x40)
 
 /* This operand names a floating point register.  The disassembler
    prints these with a leading 'f'.  */
-#define PPC_OPERAND_FPR (0200)
+#define PPC_OPERAND_FPR (0x80)
 
 /* This operand is a relative branch displacement.  The disassembler
    prints these symbolically if possible.  */
-#define PPC_OPERAND_RELATIVE (0400)
+#define PPC_OPERAND_RELATIVE (0x100)
 
 /* This operand is an absolute branch address.  The disassembler
    prints these symbolically if possible.  */
-#define PPC_OPERAND_ABSOLUTE (01000)
+#define PPC_OPERAND_ABSOLUTE (0x200)
 
 /* This operand is optional, and is zero if omitted.  This is used for
    example, in the optional BF field in the comparison instructions.  The
@@ -266,7 +267,7 @@ extern const struct powerpc_operand powerpc_operands[];
    and the number of operands remaining for the opcode, and decide
    whether this operand is present or not.  The disassembler should
    print this operand out only if it is not zero.  */
-#define PPC_OPERAND_OPTIONAL (02000)
+#define PPC_OPERAND_OPTIONAL (0x400)
 
 /* This flag is only used with PPC_OPERAND_OPTIONAL.  If this operand
    is omitted, then for the next operand use this operand value plus
@@ -274,24 +275,27 @@ extern const struct powerpc_operand powerpc_operands[];
    hack is needed because the Power rotate instructions can take
    either 4 or 5 operands.  The disassembler should print this operand
    out regardless of the PPC_OPERAND_OPTIONAL field.  */
-#define PPC_OPERAND_NEXT (04000)
+#define PPC_OPERAND_NEXT (0x800)
 
 /* This operand should be regarded as a negative number for the
    purposes of overflow checking (i.e., the normal most negative
    number is disallowed and one more than the normal most positive
    number is allowed).  This flag will only be set for a signed
    operand.  */
-#define PPC_OPERAND_NEGATIVE (010000)
+#define PPC_OPERAND_NEGATIVE (0x1000)
 
 /* This operand names a vector unit register.  The disassembler
    prints these with a leading 'v'.  */
-#define PPC_OPERAND_VR (020000)
+#define PPC_OPERAND_VR (0x2000)
 
 /* This operand is for the DS field in a DS form instruction.  */
-#define PPC_OPERAND_DS (040000)
+#define PPC_OPERAND_DS (0x4000)
 
 /* This operand is for the DQ field in a DQ form instruction.  */
-#define PPC_OPERAND_DQ (0100000)
+#define PPC_OPERAND_DQ (0x8000)
+
+/* Valid range of operand is 0..n rather than 0..n-1.  */
+#define PPC_OPERAND_PLUS1 (0x10000)
 
 /* The POWER and PowerPC assemblers use a few macros.  We keep them
    with the operands table for simplicity.  The macro table is an
-- 
GitLab


From 08d96e0b127e07c3b90e10f1939caf70b456793e Mon Sep 17 00:00:00 2001
From: Balbir Singh <bsingharora@gmail.com>
Date: Thu, 2 Feb 2017 10:33:43 +0530
Subject: [PATCH 0257/1084] powerpc/xmon: Apply binutils changes to upgrade
 disassembly

The following commit-ids from the binutils project were applied on the
xmon branch and relicensed with the permission of the authors under
GPLv2 for the following files:

  ppc-opc.c
  ppc-dis.c
  ppc.h

Working off of binutils commit 65b650b4c746 we have now moved up to
binutils commit a5721ba270dd.

Some commit logs have been taken verbatim, some are summarized for ease
of understanding.

Here is a summary of the commits:

 33e8d5ac613d PPC7450 New.  (powerpc_opcodes): Use it in dcba.
 c3d65c1ced61 New opcodes and mask
 8dbcd839b1bb Instruction Sorting
 91eb7075e370 (powerpc_opcodes): Fix the first two operands of dquaiq.
 548b1dcfcbab ppc-opc.c (powerpc_opcodes): Remove the dcffix and dcffix.
 930bb4cfae30 Support optional L form mtmsr.
 de866fccd87d (powerpc_opcodes): Order and format.
 19a6653ce8c6 ppc e500mc support
 fa452fa6833c (ppc_cpu_t): New typedef.
 c8187e1509b2 (parse_cpu): Handle -m464.
 081ba1b3c08b Define. (PPC_OPERAND_FSL, PPC_OPERAND_FCR, PPC_OPERAND_UDI)
 9b4e57660d38 Rename altivec_or_spe to retain_flags. Handle -mvsx and -mpower7.
 899d85beadd0 (powerpc_opcodes): Enable rfci, mfpmr, mtpmr for e300.
 e1c93c699b7d (extract_sprg): Correct operand range check.
 2f3bb96af796 (powerpc_init_dialect): Do not set PPC_OPCODE_BOOKE
 1cb0a7674666 (ppc_setup_opcodes): Remove PPC_OPCODE_NOPOWER4 test
 21169fcfadfa (print_insn_powerpc): Skip insn if it is deprecated
 80890a619b85 ("dcbt", "dcbtst")
 0e55be1624c2 ("lfdepx", "stfdepx")
 066be9f7bd8e (parse_cpu): Extend -mpower7 to accept power7 and isel instructions.
 c72ab5f2c55d (powerpc_opcodes): Reorder the opcode table so that instructions
 69fe9ce501f5 (ppc_parse_cpu): New function. 	(powerpc_init_dialect)
 e401b04ca7cd (powerpc_opcodes) <"dcbzl">: Merge the POWER4 and E500MC entries.
 70dc4e324b9a (powerpc_init_dialect): Do not choose a default dialect due to -many/-Many.
 858d7a6db20b (powerpc_opcodes) <"tlbilxlpid", "tlbilxpid", "tlbilxva", "tlbilx"
 bdc7fcfe59f1 (powerpc_macros <extrdi>): Allow n+b of 64
 e0d602ecffb0 (md_show_usage): Document -mpcca2
 b961e85b6ebe (ppc_cpu_t): Typedef to uint64_t
 8765b5569284 (powerpc_opcodes): Remove support for the the "lxsdux", "lxvd2ux"
 634b50f2a623 Rename "ppca2" to "a2"
 9fe54b1ca1c0 (md_show_usage): Document -m476
 0dc9305793c8 Add bfd_mach_ppc_e500mc64
 ce3d2015b21b Define. bfd/ 	* archures.c (bfd_mach_ppc_titan)
 cdc51b0748c4 Add -mpwr4, -mpwr5, -mpwr5x, -mpwr6 and -mpwr7
 63d0fa4e9e57 Add PPC_OPCODE_E500MC for "e500mc64"
 cee62821d472 New Define. ("dccci"): Enable for PPCA2
 85d4ac0b3c0b Correct wclr encoding.
 51b5d4a8c5e5 (powerpc_opcodes): Enable divdeu, devweu, divde, divwe, divdeuo
 e01d869a3be2 (md_assemble): Emit APUinfo section for PPC_OPCODE_E500
 09a8ad8d8f56 (powerpc_opcodes): Revert deprecation of mfocrf, mtcrf and mtocrf on EFS.
 f2bae120dcef (PPC_OPCODE_COMMON): Expand comment.
 81a0b7e2ae09 (PPCPWR2): Add PPC_OPCODE_COMMON. (powerpc_opcodes): Add "subc"
 bdc70b4a03fd (PPC_OPCODE_32, PPC_OPCODE_BOOKE64, PPC_OPCODE_CLASSIC)
 7102e95e4943 (ppc_set_cpu): Cast PPC_OPCODE_xxx to ppc_cpu_t before inverting
 f383de6633cb (powerpc_opcodes) [lswx,lswi,stswx,stswi]: Deprecate on E500 and E500MC
 6b069ee70de3 Remove PPC_OPCODE_PPCPS
 2f7f77101279 (powerpc_opcodes): Enable icswx for POWER7
 989993d80a97 (insert_nbi, insert_rbx, FRAp, FRBp, FRSp, FRTp, NBI, RAX, RBX)
 a08fc94222d1 <drrndq, drrndq., dtstexq, dctqpq, dctqpq., dctfixq, dctfixq.
 8ebac3aae962 (ISA_V2): Define and use for relevant BO field tests
 aea77599d0db Add PPC_OPCODE_ALTIVEC2, PPC_OPCODE_E6500, PPC_OPCODE_TMR
 b240011aba98 (disassemble_init_for_target): Handle ppc init.
 d668828207c2 (powerpc_opcd_indices): Bump array size
 b9c361e0ad33 Add support for PowerPC VLE.
 e1dad58d73dc (has_tls_reloc, has_tls_get_addr_call, has_vle_insns, is_ppc_vle)
 df7b86aa4cb6 Add check that sysdep.h has been included before
 98c76446ea6b (extract_sprg): Use ALLOW8_SPRG to include VLE.
 a4ebc835cbcb (powerpc_macros): Add entries for e_extlwi to e_clrlslwi
 94caa966375d (has_vle_insns, is_ppc_vle): Delete
 c7a8dbf91f37 Change RA to RA0
 d908c8af5a1d Add necessary casts for printing integer values
 03edbe3bfb93 Add/remove PPCVLE for some 32-bit insns
 9f6a6cc022e1 <xnop, yield, mdoio, mdoom>: New extended mnemonics
 588925d06545 <RSQ, RTQ>: Use PPC_OPERAND_GPR
 8baf7b78b5d9 <"lswx">: Use RAX for the second and RBX for the third operand
 e67ed0e885d6 Changed opcode for vabsdub, vabsduh, vabsduw, mviwsplt
 fb048c26f19f (UIMM4, UIMM3, UIMM2, VXVA_MASK, VXVB_MASK, VXVAVB_MASK, VXVDVA_MASK
 382c72e90441 (VXASHB_MASK): New define
 c7a5aa9c64fc (ppc_opts) <altivec>: Use PPC_OPCODE_ALTIVEC2
 ab4437c3224f <vcfpsxws>: Fix opcode spelling
 62082a42b9cd "lfdp" and "stfdp" use DS offset.
 776fc41826bb (ppc_parse_cpu): Update prototype
 943d398f4c52 (insert_sci8, extract_sci8): Rewrite.
 5817ffd1f81c New define (PPC_OPCODE_HTM/POWER8)
 9f0682fe89d9 (extract_vlesi): Properly sign extend
 c0637f3af686 (powerpc_init_dialect): Set default dialect to power8.
 58ae08f29af8 (powerpc_opcodes): Add tdui, twui, tdu, twu, tui, tu
 4f6ffcd38d90 (powerpc_init_dialect): Use ppc_parse_cpu() to set dialect
 4b95cf5c0c75 Update copyright years
 a47622ac1bad Allow both signed and unsigned fields in PowerPC cmpli insn
 12e87fac5c76 ppc: enable msgclr and msgsnd on Power8
 8514e4db84cc Don't deprecate powerpc mftb insn
 db76a70026ab Power4 should treat mftb as extended mfspr mnemonic
 b90efa5b79ac ChangeLog rotatation and copyright year update
 c4e676f19656 powerpc: Add slbfee. instruction
 27c49e9a8fc0 powerpc: Only initialise opcode indices once
 4fff86c517ab DCBT_EO): New define
 4bc0608a8b69 Fix some PPC assembler errors
 dc302c00611b Add hwsync extended mnemonic
 99a2c5612124 Remove unused MTMSRD_L macro and re-add accidentally deleted comment
 11a0cf2ec0ed Allow for optional operands with non-zero default values
 7b9341139a69 PPC sync instruction accepts invalid and incompatible operands
 ef5a96d564a2 Remove ppc860, ppc750cl, ppc7450 insns from common ppc
 43e65147c07b Remove trailing spaces in opcodes
 6dca4fd141fd Add dscr and ctrl SPR mnemonics
 b6518b387185 Fix compile time warnings generated when compiling with clang
 36f7a9411dcd Patches for illegal ppc 500 instructions
 a680de9a980e Add assembler, disassembler and linker support for power9
 dd2887fc3de4 Reorder some power9 insns
 b817670b52b7 Enable 2 operand form of powerpc mfcr with -many
 6f2750feaf28 Copyright update for binutils
 afa8d4054b8e Delete opcodes that have been removed from ISA 3.0
 1178da445ad5 Accept valid one byte signed and unsigned values for the IMM8 operand
 e43de63c8fd1 Fix powerpc subis range
 514e58b72633 Correct "Fix powerpc subis range"
 19dfcc89e8d9 Add support for new POWER ISA 3.0 instructions
 1fe0971e41a4 add more extern C
 026122a67044 Re-add support for lbarx, lharx, stbcx. and sthcx. insns back to the E6500 cpu
 14b57c7c6a53 PowerPC VLE
 6fd3a02da554 Add support for yet some more new ISA 3.0 instructions
 dfdaec14b0db Fix some PowerPC VLE BFD issues and add some PowerPC VLE instructions
 fd486b633e87 Modify POWER9 support to match final ISA 3.0 documentation
 a5721ba270dd Disallow 3-operand cmp[l][i] for ppc64

This updates the disassembly capabilities to add support for newer
processors.

Signed-off-by: Balbir Singh <bsingharora@gmail.com>
[mpe: Reformat commit list for brevity]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/xmon/ppc-dis.c |  710 ++-
 arch/powerpc/xmon/ppc-opc.c | 8591 +++++++++++++++++++++++------------
 arch/powerpc/xmon/ppc.h     |  222 +-
 3 files changed, 6310 insertions(+), 3213 deletions(-)

diff --git a/arch/powerpc/xmon/ppc-dis.c b/arch/powerpc/xmon/ppc-dis.c
index 2545a36f21a5..c382d13c88a3 100644
--- a/arch/powerpc/xmon/ppc-dis.c
+++ b/arch/powerpc/xmon/ppc-dis.c
@@ -1,6 +1,5 @@
 /* ppc-dis.c -- Disassemble PowerPC instructions
-   Copyright 1994, 1995, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
-   Free Software Foundation, Inc.
+   Copyright (C) 1994-2016 Free Software Foundation, Inc.
    Written by Ian Lance Taylor, Cygnus Support
 
 This file is part of GDB, GAS, and the GNU binutils.
@@ -19,9 +18,12 @@ You should have received a copy of the GNU General Public License
 along with this file; see the file COPYING.  If not, write to the Free
 Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.  */
 
-#include <stdio.h>
 #include "sysdep.h"
+#include <stdio.h>
 #include "dis-asm.h"
+#include "elf-bfd.h"
+#include "elf/ppc.h"
+#include "opintl.h"
 #include "opcode/ppc.h"
 
 /* This file provides several disassembler functions, all of which use
@@ -29,76 +31,380 @@ Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, US
    are provided because this file handles disassembly for the PowerPC
    in both big and little endian mode and also for the POWER (RS/6000)
    chip.  */
+static int print_insn_powerpc (bfd_vma, struct disassemble_info *, int,
+			       ppc_cpu_t);
 
-static int print_insn_powerpc (bfd_vma, struct disassemble_info *, int, int);
+struct dis_private
+{
+  /* Stash the result of parsing disassembler_options here.  */
+  ppc_cpu_t dialect;
+} private;
+
+#define POWERPC_DIALECT(INFO) \
+  (((struct dis_private *) ((INFO)->private_data))->dialect)
+
+struct ppc_mopt {
+  const char *opt;
+  ppc_cpu_t cpu;
+  ppc_cpu_t sticky;
+};
+
+struct ppc_mopt ppc_opts[] = {
+  { "403",     PPC_OPCODE_PPC | PPC_OPCODE_403,
+    0 },
+  { "405",     PPC_OPCODE_PPC | PPC_OPCODE_403 | PPC_OPCODE_405,
+    0 },
+  { "440",     (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE | PPC_OPCODE_440
+		| PPC_OPCODE_ISEL | PPC_OPCODE_RFMCI),
+    0 },
+  { "464",     (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE | PPC_OPCODE_440
+		| PPC_OPCODE_ISEL | PPC_OPCODE_RFMCI),
+    0 },
+  { "476",     (PPC_OPCODE_PPC | PPC_OPCODE_ISEL | PPC_OPCODE_440
+		| PPC_OPCODE_476 | PPC_OPCODE_POWER4 | PPC_OPCODE_POWER5),
+    0 },
+  { "601",     PPC_OPCODE_PPC | PPC_OPCODE_601,
+    0 },
+  { "603",     PPC_OPCODE_PPC,
+    0 },
+  { "604",     PPC_OPCODE_PPC,
+    0 },
+  { "620",     PPC_OPCODE_PPC | PPC_OPCODE_64,
+    0 },
+  { "7400",    PPC_OPCODE_PPC | PPC_OPCODE_ALTIVEC,
+    0 },
+  { "7410",    PPC_OPCODE_PPC | PPC_OPCODE_ALTIVEC,
+    0 },
+  { "7450",    PPC_OPCODE_PPC | PPC_OPCODE_7450 | PPC_OPCODE_ALTIVEC,
+    0 },
+  { "7455",    PPC_OPCODE_PPC | PPC_OPCODE_ALTIVEC,
+    0 },
+  { "750cl",   PPC_OPCODE_PPC | PPC_OPCODE_750 | PPC_OPCODE_PPCPS
+    , 0 },
+  { "821",     PPC_OPCODE_PPC | PPC_OPCODE_860,
+    0 },
+  { "850",     PPC_OPCODE_PPC | PPC_OPCODE_860,
+    0 },
+  { "860",     PPC_OPCODE_PPC | PPC_OPCODE_860,
+    0 },
+  { "a2",      (PPC_OPCODE_PPC | PPC_OPCODE_ISEL | PPC_OPCODE_POWER4
+		| PPC_OPCODE_POWER5 | PPC_OPCODE_CACHELCK | PPC_OPCODE_64
+		| PPC_OPCODE_A2),
+    0 },
+  { "altivec", PPC_OPCODE_PPC,
+    PPC_OPCODE_ALTIVEC | PPC_OPCODE_ALTIVEC2 },
+  { "any",     0,
+    PPC_OPCODE_ANY },
+  { "booke",   PPC_OPCODE_PPC | PPC_OPCODE_BOOKE,
+    0 },
+  { "booke32", PPC_OPCODE_PPC | PPC_OPCODE_BOOKE,
+    0 },
+  { "cell",    (PPC_OPCODE_PPC | PPC_OPCODE_64 | PPC_OPCODE_POWER4
+		| PPC_OPCODE_CELL | PPC_OPCODE_ALTIVEC),
+    0 },
+  { "com",     PPC_OPCODE_COMMON,
+    0 },
+  { "e200z4",  (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE| PPC_OPCODE_SPE
+		| PPC_OPCODE_ISEL | PPC_OPCODE_EFS | PPC_OPCODE_BRLOCK
+		| PPC_OPCODE_PMR | PPC_OPCODE_CACHELCK | PPC_OPCODE_RFMCI
+		| PPC_OPCODE_E500 | PPC_OPCODE_E200Z4),
+    PPC_OPCODE_VLE },
+  { "e300",    PPC_OPCODE_PPC | PPC_OPCODE_E300,
+    0 },
+  { "e500",    (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE | PPC_OPCODE_SPE
+		| PPC_OPCODE_ISEL | PPC_OPCODE_EFS | PPC_OPCODE_BRLOCK
+		| PPC_OPCODE_PMR | PPC_OPCODE_CACHELCK | PPC_OPCODE_RFMCI
+		| PPC_OPCODE_E500),
+    0 },
+  { "e500mc",  (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE | PPC_OPCODE_ISEL
+		| PPC_OPCODE_PMR | PPC_OPCODE_CACHELCK | PPC_OPCODE_RFMCI
+		| PPC_OPCODE_E500MC),
+    0 },
+  { "e500mc64",  (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE | PPC_OPCODE_ISEL
+		| PPC_OPCODE_PMR | PPC_OPCODE_CACHELCK | PPC_OPCODE_RFMCI
+		| PPC_OPCODE_E500MC | PPC_OPCODE_64 | PPC_OPCODE_POWER5
+		| PPC_OPCODE_POWER6 | PPC_OPCODE_POWER7),
+    0 },
+  { "e5500",    (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE | PPC_OPCODE_ISEL
+		| PPC_OPCODE_PMR | PPC_OPCODE_CACHELCK | PPC_OPCODE_RFMCI
+		| PPC_OPCODE_E500MC | PPC_OPCODE_64 | PPC_OPCODE_POWER4
+		| PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6
+		| PPC_OPCODE_POWER7),
+    0 },
+  { "e6500",   (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE | PPC_OPCODE_ISEL
+		| PPC_OPCODE_PMR | PPC_OPCODE_CACHELCK | PPC_OPCODE_RFMCI
+		| PPC_OPCODE_E500MC | PPC_OPCODE_64 | PPC_OPCODE_ALTIVEC
+		| PPC_OPCODE_ALTIVEC2 | PPC_OPCODE_E6500 | PPC_OPCODE_POWER4
+		| PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_POWER7),
+    0 },
+  { "e500x2",  (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE | PPC_OPCODE_SPE
+		| PPC_OPCODE_ISEL | PPC_OPCODE_EFS | PPC_OPCODE_BRLOCK
+		| PPC_OPCODE_PMR | PPC_OPCODE_CACHELCK | PPC_OPCODE_RFMCI
+		| PPC_OPCODE_E500),
+    0 },
+  { "efs",     PPC_OPCODE_PPC | PPC_OPCODE_EFS,
+    0 },
+  { "power4",  PPC_OPCODE_PPC | PPC_OPCODE_64 | PPC_OPCODE_POWER4,
+    0 },
+  { "power5",  (PPC_OPCODE_PPC | PPC_OPCODE_64 | PPC_OPCODE_POWER4
+		| PPC_OPCODE_POWER5),
+    0 },
+  { "power6",  (PPC_OPCODE_PPC | PPC_OPCODE_64 | PPC_OPCODE_POWER4
+		| PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_ALTIVEC),
+    0 },
+  { "power7",  (PPC_OPCODE_PPC | PPC_OPCODE_ISEL | PPC_OPCODE_64
+		| PPC_OPCODE_POWER4 | PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6
+		| PPC_OPCODE_POWER7 | PPC_OPCODE_ALTIVEC | PPC_OPCODE_VSX),
+    0 },
+  { "power8",  (PPC_OPCODE_PPC | PPC_OPCODE_ISEL | PPC_OPCODE_64
+		| PPC_OPCODE_POWER4 | PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6
+		| PPC_OPCODE_POWER7 | PPC_OPCODE_POWER8 | PPC_OPCODE_HTM
+		| PPC_OPCODE_ALTIVEC | PPC_OPCODE_ALTIVEC2 | PPC_OPCODE_VSX),
+    0 },
+  { "power9",  (PPC_OPCODE_PPC | PPC_OPCODE_ISEL | PPC_OPCODE_64
+		| PPC_OPCODE_POWER4 | PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6
+		| PPC_OPCODE_POWER7 | PPC_OPCODE_POWER8 | PPC_OPCODE_POWER9
+		| PPC_OPCODE_HTM | PPC_OPCODE_ALTIVEC | PPC_OPCODE_ALTIVEC2
+		| PPC_OPCODE_VSX | PPC_OPCODE_VSX3 ),
+    0 },
+  { "ppc",     PPC_OPCODE_PPC,
+    0 },
+  { "ppc32",   PPC_OPCODE_PPC,
+    0 },
+  { "ppc64",   PPC_OPCODE_PPC | PPC_OPCODE_64,
+    0 },
+  { "ppc64bridge", PPC_OPCODE_PPC | PPC_OPCODE_64_BRIDGE,
+    0 },
+  { "ppcps",   PPC_OPCODE_PPC | PPC_OPCODE_PPCPS,
+    0 },
+  { "pwr",     PPC_OPCODE_POWER,
+    0 },
+  { "pwr2",    PPC_OPCODE_POWER | PPC_OPCODE_POWER2,
+    0 },
+  { "pwr4",    PPC_OPCODE_PPC | PPC_OPCODE_64 | PPC_OPCODE_POWER4,
+    0 },
+  { "pwr5",    (PPC_OPCODE_PPC | PPC_OPCODE_64 | PPC_OPCODE_POWER4
+		| PPC_OPCODE_POWER5),
+    0 },
+  { "pwr5x",   (PPC_OPCODE_PPC | PPC_OPCODE_64 | PPC_OPCODE_POWER4
+		| PPC_OPCODE_POWER5),
+    0 },
+  { "pwr6",    (PPC_OPCODE_PPC | PPC_OPCODE_64 | PPC_OPCODE_POWER4
+		| PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_ALTIVEC),
+    0 },
+  { "pwr7",    (PPC_OPCODE_PPC | PPC_OPCODE_ISEL | PPC_OPCODE_64
+		| PPC_OPCODE_POWER4 | PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6
+		| PPC_OPCODE_POWER7 | PPC_OPCODE_ALTIVEC | PPC_OPCODE_VSX),
+    0 },
+  { "pwr8",    (PPC_OPCODE_PPC | PPC_OPCODE_ISEL | PPC_OPCODE_64
+		| PPC_OPCODE_POWER4 | PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6
+		| PPC_OPCODE_POWER7 | PPC_OPCODE_POWER8 | PPC_OPCODE_HTM
+		| PPC_OPCODE_ALTIVEC | PPC_OPCODE_ALTIVEC2 | PPC_OPCODE_VSX),
+    0 },
+  { "pwr9",    (PPC_OPCODE_PPC | PPC_OPCODE_ISEL | PPC_OPCODE_64
+		| PPC_OPCODE_POWER4 | PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6
+		| PPC_OPCODE_POWER7 | PPC_OPCODE_POWER8 | PPC_OPCODE_POWER9
+		| PPC_OPCODE_HTM | PPC_OPCODE_ALTIVEC | PPC_OPCODE_ALTIVEC2
+		| PPC_OPCODE_VSX | PPC_OPCODE_VSX3 ),
+    0 },
+  { "pwrx",    PPC_OPCODE_POWER | PPC_OPCODE_POWER2,
+    0 },
+  { "spe",     PPC_OPCODE_PPC | PPC_OPCODE_EFS,
+    PPC_OPCODE_SPE },
+  { "titan",   (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE | PPC_OPCODE_PMR
+		| PPC_OPCODE_RFMCI | PPC_OPCODE_TITAN),
+    0 },
+  { "vle",     (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE| PPC_OPCODE_SPE
+		| PPC_OPCODE_ISEL | PPC_OPCODE_EFS | PPC_OPCODE_BRLOCK
+		| PPC_OPCODE_PMR | PPC_OPCODE_CACHELCK | PPC_OPCODE_RFMCI
+		| PPC_OPCODE_E500),
+    PPC_OPCODE_VLE },
+  { "vsx",     PPC_OPCODE_PPC,
+    PPC_OPCODE_VSX | PPC_OPCODE_VSX3 },
+  { "htm",     PPC_OPCODE_PPC,
+    PPC_OPCODE_HTM },
+};
+
+/* Switch between Booke and VLE dialects for interlinked dumps.  */
+static ppc_cpu_t
+get_powerpc_dialect (struct disassemble_info *info)
+{
+  ppc_cpu_t dialect = 0;
 
-/* Determine which set of machines to disassemble for.  PPC403/601 or
-   BookE.  For convenience, also disassemble instructions supported
-   by the AltiVec vector unit.  */
+  dialect = POWERPC_DIALECT (info);
 
-static int
-powerpc_dialect (struct disassemble_info *info)
-{
-  int dialect = PPC_OPCODE_PPC;
-
-  if (BFD_DEFAULT_TARGET_SIZE == 64)
-    dialect |= PPC_OPCODE_64;
-
-  if (info->disassembler_options
-      && strstr (info->disassembler_options, "booke") != NULL)
-    dialect |= PPC_OPCODE_BOOKE | PPC_OPCODE_BOOKE64;
-  else if ((info->mach == bfd_mach_ppc_e500)
-	   || (info->disassembler_options
-	       && strstr (info->disassembler_options, "e500") != NULL))
-    dialect |= (PPC_OPCODE_BOOKE
-		| PPC_OPCODE_SPE | PPC_OPCODE_ISEL
-		| PPC_OPCODE_EFS | PPC_OPCODE_BRLOCK
-		| PPC_OPCODE_PMR | PPC_OPCODE_CACHELCK
-		| PPC_OPCODE_RFMCI);
-  else if (info->disassembler_options
-	   && strstr (info->disassembler_options, "efs") != NULL)
-    dialect |= PPC_OPCODE_EFS;
-  else if (info->disassembler_options
-	   && strstr (info->disassembler_options, "e300") != NULL)
-    dialect |= PPC_OPCODE_E300 | PPC_OPCODE_CLASSIC | PPC_OPCODE_COMMON;
-  else if (info->disassembler_options
-	   && strstr (info->disassembler_options, "440") != NULL)
-    dialect |= PPC_OPCODE_BOOKE | PPC_OPCODE_32
-      | PPC_OPCODE_440 | PPC_OPCODE_ISEL | PPC_OPCODE_RFMCI;
+  /* Disassemble according to the section headers flags for VLE-mode.  */
+  if (dialect & PPC_OPCODE_VLE
+      && info->section->owner != NULL
+      && bfd_get_flavour (info->section->owner) == bfd_target_elf_flavour
+      && elf_object_id (info->section->owner) == PPC32_ELF_DATA
+      && (elf_section_flags (info->section) & SHF_PPC_VLE) != 0)
+    return dialect;
   else
-    dialect |= (PPC_OPCODE_403 | PPC_OPCODE_601 | PPC_OPCODE_CLASSIC
-		| PPC_OPCODE_COMMON | PPC_OPCODE_ALTIVEC);
+    return dialect & ~ PPC_OPCODE_VLE;
+}
+
+/* Handle -m and -M options that set cpu type, and .machine arg.  */
 
-  if (info->disassembler_options
-      && strstr (info->disassembler_options, "power4") != NULL)
-    dialect |= PPC_OPCODE_POWER4;
+ppc_cpu_t
+ppc_parse_cpu (ppc_cpu_t ppc_cpu, ppc_cpu_t *sticky, const char *arg)
+{
+  unsigned int i;
+
+  for (i = 0; i < sizeof (ppc_opts) / sizeof (ppc_opts[0]); i++)
+    if (strcmp (ppc_opts[i].opt, arg) == 0)
+      {
+	if (ppc_opts[i].sticky)
+	  {
+	    *sticky |= ppc_opts[i].sticky;
+	    if ((ppc_cpu & ~*sticky) != 0)
+	      break;
+	  }
+	ppc_cpu = ppc_opts[i].cpu;
+	break;
+      }
+  if (i >= sizeof (ppc_opts) / sizeof (ppc_opts[0]))
+    return 0;
+
+  ppc_cpu |= *sticky;
+  return ppc_cpu;
+}
 
-  if (info->disassembler_options
-      && strstr (info->disassembler_options, "power5") != NULL)
-    dialect |= PPC_OPCODE_POWER4 | PPC_OPCODE_POWER5;
+/* Determine which set of machines to disassemble for.  */
 
-  if (info->disassembler_options
-      && strstr (info->disassembler_options, "cell") != NULL)
-    dialect |= PPC_OPCODE_POWER4 | PPC_OPCODE_CELL | PPC_OPCODE_ALTIVEC;
+static void
+powerpc_init_dialect (struct disassemble_info *info)
+{
+  ppc_cpu_t dialect = 0;
+  ppc_cpu_t sticky = 0;
+  char *arg;
+  struct dis_private *priv = calloc (sizeof (*priv), 1);
 
-  if (info->disassembler_options
-      && strstr (info->disassembler_options, "power6") != NULL)
-    dialect |= PPC_OPCODE_POWER4 | PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_ALTIVEC;
+  if (priv == NULL)
+    priv = &private;
 
-  if (info->disassembler_options
-      && strstr (info->disassembler_options, "any") != NULL)
-    dialect |= PPC_OPCODE_ANY;
+  switch (info->mach)
+    {
+    case bfd_mach_ppc_403:
+    case bfd_mach_ppc_403gc:
+      dialect = ppc_parse_cpu (dialect, &sticky, "403");
+      break;
+    case bfd_mach_ppc_405:
+      dialect = ppc_parse_cpu (dialect, &sticky, "405");
+      break;
+    case bfd_mach_ppc_601:
+      dialect = ppc_parse_cpu (dialect, &sticky, "601");
+      break;
+    case bfd_mach_ppc_a35:
+    case bfd_mach_ppc_rs64ii:
+    case bfd_mach_ppc_rs64iii:
+      dialect = ppc_parse_cpu (dialect, &sticky, "pwr2") | PPC_OPCODE_64;
+      break;
+    case bfd_mach_ppc_e500:
+      dialect = ppc_parse_cpu (dialect, &sticky, "e500");
+      break;
+    case bfd_mach_ppc_e500mc:
+      dialect = ppc_parse_cpu (dialect, &sticky, "e500mc");
+      break;
+    case bfd_mach_ppc_e500mc64:
+      dialect = ppc_parse_cpu (dialect, &sticky, "e500mc64");
+      break;
+    case bfd_mach_ppc_e5500:
+      dialect = ppc_parse_cpu (dialect, &sticky, "e5500");
+      break;
+    case bfd_mach_ppc_e6500:
+      dialect = ppc_parse_cpu (dialect, &sticky, "e6500");
+      break;
+    case bfd_mach_ppc_titan:
+      dialect = ppc_parse_cpu (dialect, &sticky, "titan");
+      break;
+    case bfd_mach_ppc_vle:
+      dialect = ppc_parse_cpu (dialect, &sticky, "vle");
+      break;
+    default:
+      dialect = ppc_parse_cpu (dialect, &sticky, "power9") | PPC_OPCODE_ANY;
+    }
 
-  if (info->disassembler_options)
+  arg = info->disassembler_options;
+  while (arg != NULL)
     {
-      if (strstr (info->disassembler_options, "32") != NULL)
-	dialect &= ~PPC_OPCODE_64;
-      else if (strstr (info->disassembler_options, "64") != NULL)
+      ppc_cpu_t new_cpu = 0;
+      char *end = strchr (arg, ',');
+
+      if (end != NULL)
+	*end = 0;
+
+      if ((new_cpu = ppc_parse_cpu (dialect, &sticky, arg)) != 0)
+	dialect = new_cpu;
+      else if (strcmp (arg, "32") == 0)
+	dialect &= ~(ppc_cpu_t) PPC_OPCODE_64;
+      else if (strcmp (arg, "64") == 0)
 	dialect |= PPC_OPCODE_64;
+      else
+	fprintf (stderr, _("warning: ignoring unknown -M%s option\n"), arg);
+
+      if (end != NULL)
+	*end++ = ',';
+      arg = end;
+    }
+
+  info->private_data = priv;
+  POWERPC_DIALECT(info) = dialect;
+}
+
+#define PPC_OPCD_SEGS 64
+static unsigned short powerpc_opcd_indices[PPC_OPCD_SEGS+1];
+#define VLE_OPCD_SEGS 32
+static unsigned short vle_opcd_indices[VLE_OPCD_SEGS+1];
+
+/* Calculate opcode table indices to speed up disassembly,
+   and init dialect.  */
+
+void
+disassemble_init_powerpc (struct disassemble_info *info)
+{
+  int i;
+  unsigned short last;
+
+  if (powerpc_opcd_indices[PPC_OPCD_SEGS] == 0)
+    {
+
+      i = powerpc_num_opcodes;
+      while (--i >= 0)
+        {
+          unsigned op = PPC_OP (powerpc_opcodes[i].opcode);
+
+          powerpc_opcd_indices[op] = i;
+        }
+
+      last = powerpc_num_opcodes;
+      for (i = PPC_OPCD_SEGS; i > 0; --i)
+        {
+          if (powerpc_opcd_indices[i] == 0)
+	    powerpc_opcd_indices[i] = last;
+          last = powerpc_opcd_indices[i];
+        }
+
+      i = vle_num_opcodes;
+      while (--i >= 0)
+        {
+          unsigned op = VLE_OP (vle_opcodes[i].opcode, vle_opcodes[i].mask);
+          unsigned seg = VLE_OP_TO_SEG (op);
+
+          vle_opcd_indices[seg] = i;
+        }
+
+      last = vle_num_opcodes;
+      for (i = VLE_OPCD_SEGS; i > 0; --i)
+        {
+          if (vle_opcd_indices[i] == 0)
+	    vle_opcd_indices[i] = last;
+          last = vle_opcd_indices[i];
+        }
     }
 
-  info->private_data = (char *) 0 + dialect;
-  return dialect;
+  if (info->arch == bfd_arch_powerpc)
+    powerpc_init_dialect (info);
 }
 
 /* Print a big endian PowerPC instruction.  */
@@ -106,8 +412,7 @@ powerpc_dialect (struct disassemble_info *info)
 int
 print_insn_big_powerpc (bfd_vma memaddr, struct disassemble_info *info)
 {
-  int dialect = (char *) info->private_data - (char *) 0;
-  return print_insn_powerpc (memaddr, info, 1, dialect);
+  return print_insn_powerpc (memaddr, info, 1, get_powerpc_dialect (info));
 }
 
 /* Print a little endian PowerPC instruction.  */
@@ -115,8 +420,7 @@ print_insn_big_powerpc (bfd_vma memaddr, struct disassemble_info *info)
 int
 print_insn_little_powerpc (bfd_vma memaddr, struct disassemble_info *info)
 {
-  int dialect = (char *) info->private_data - (char *) 0;
-  return print_insn_powerpc (memaddr, info, 0, dialect);
+  return print_insn_powerpc (memaddr, info, 0, get_powerpc_dialect (info));
 }
 
 /* Print a POWER (RS/6000) instruction.  */
@@ -131,7 +435,7 @@ print_insn_rs6000 (bfd_vma memaddr, struct disassemble_info *info)
 
 static long
 operand_value_powerpc (const struct powerpc_operand *operand,
-		       unsigned long insn, int dialect)
+		       unsigned long insn, ppc_cpu_t dialect)
 {
   long value;
   int invalid;
@@ -140,11 +444,14 @@ operand_value_powerpc (const struct powerpc_operand *operand,
     value = (*operand->extract) (insn, dialect, &invalid);
   else
     {
-      value = (insn >> operand->shift) & operand->bitm;
+      if (operand->shift >= 0)
+	value = (insn >> operand->shift) & operand->bitm;
+      else
+	value = (insn << -operand->shift) & operand->bitm;
       if ((operand->flags & PPC_OPERAND_SIGNED) != 0)
 	{
 	  /* BITM is always some number of zeros followed by some
-	     number of ones, followed by some numer of zeros.  */
+	     number of ones, followed by some number of zeros.  */
 	  unsigned long top = operand->bitm;
 	  /* top & -top gives the rightmost 1 bit, so this
 	     fills in any trailing zeros.  */
@@ -161,7 +468,7 @@ operand_value_powerpc (const struct powerpc_operand *operand,
 
 static int
 skip_optional_operands (const unsigned char *opindex,
-			unsigned long insn, int dialect)
+			unsigned long insn, ppc_cpu_t dialect)
 {
   const struct powerpc_operand *operand;
 
@@ -170,36 +477,148 @@ skip_optional_operands (const unsigned char *opindex,
       operand = &powerpc_operands[*opindex];
       if ((operand->flags & PPC_OPERAND_NEXT) != 0
 	  || ((operand->flags & PPC_OPERAND_OPTIONAL) != 0
-	      && operand_value_powerpc (operand, insn, dialect) != 0))
+	      && operand_value_powerpc (operand, insn, dialect) !=
+		 ppc_optional_operand_value (operand)))
 	return 0;
     }
 
   return 1;
 }
 
+/* Find a match for INSN in the opcode table, given machine DIALECT.
+   A DIALECT of -1 is special, matching all machine opcode variations.  */
+
+static const struct powerpc_opcode *
+lookup_powerpc (unsigned long insn, ppc_cpu_t dialect)
+{
+  const struct powerpc_opcode *opcode;
+  const struct powerpc_opcode *opcode_end;
+  unsigned long op;
+
+  /* Get the major opcode of the instruction.  */
+  op = PPC_OP (insn);
+
+  /* Find the first match in the opcode table for this major opcode.  */
+  opcode_end = powerpc_opcodes + powerpc_opcd_indices[op + 1];
+  for (opcode = powerpc_opcodes + powerpc_opcd_indices[op];
+       opcode < opcode_end;
+       ++opcode)
+    {
+      const unsigned char *opindex;
+      const struct powerpc_operand *operand;
+      int invalid;
+
+      if ((insn & opcode->mask) != opcode->opcode
+	  || (dialect != (ppc_cpu_t) -1
+	      && ((opcode->flags & dialect) == 0
+		  || (opcode->deprecated & dialect) != 0)))
+	continue;
+
+      /* Check validity of operands.  */
+      invalid = 0;
+      for (opindex = opcode->operands; *opindex != 0; opindex++)
+	{
+	  operand = powerpc_operands + *opindex;
+	  if (operand->extract)
+	    (*operand->extract) (insn, dialect, &invalid);
+	}
+      if (invalid)
+	continue;
+
+      return opcode;
+    }
+
+  return NULL;
+}
+
+/* Find a match for INSN in the VLE opcode table.  */
+
+static const struct powerpc_opcode *
+lookup_vle (unsigned long insn)
+{
+  const struct powerpc_opcode *opcode;
+  const struct powerpc_opcode *opcode_end;
+  unsigned op, seg;
+
+  op = PPC_OP (insn);
+  if (op >= 0x20 && op <= 0x37)
+    {
+      /* This insn has a 4-bit opcode.  */
+      op &= 0x3c;
+    }
+  seg = VLE_OP_TO_SEG (op);
+
+  /* Find the first match in the opcode table for this major opcode.  */
+  opcode_end = vle_opcodes + vle_opcd_indices[seg + 1];
+  for (opcode = vle_opcodes + vle_opcd_indices[seg];
+       opcode < opcode_end;
+       ++opcode)
+    {
+      unsigned long table_opcd = opcode->opcode;
+      unsigned long table_mask = opcode->mask;
+      bfd_boolean table_op_is_short = PPC_OP_SE_VLE(table_mask);
+      unsigned long insn2;
+      const unsigned char *opindex;
+      const struct powerpc_operand *operand;
+      int invalid;
+
+      insn2 = insn;
+      if (table_op_is_short)
+	insn2 >>= 16;
+      if ((insn2 & table_mask) != table_opcd)
+	continue;
+
+      /* Check validity of operands.  */
+      invalid = 0;
+      for (opindex = opcode->operands; *opindex != 0; ++opindex)
+	{
+	  operand = powerpc_operands + *opindex;
+	  if (operand->extract)
+	    (*operand->extract) (insn, (ppc_cpu_t)0, &invalid);
+	}
+      if (invalid)
+	continue;
+
+      return opcode;
+    }
+
+  return NULL;
+}
+
 /* Print a PowerPC or POWER instruction.  */
 
 static int
 print_insn_powerpc (bfd_vma memaddr,
 		    struct disassemble_info *info,
 		    int bigendian,
-		    int dialect)
+		    ppc_cpu_t dialect)
 {
   bfd_byte buffer[4];
   int status;
   unsigned long insn;
   const struct powerpc_opcode *opcode;
-  const struct powerpc_opcode *opcode_end;
-  unsigned long op;
-
-  if (dialect == 0)
-    dialect = powerpc_dialect (info);
+  bfd_boolean insn_is_short;
 
   status = (*info->read_memory_func) (memaddr, buffer, 4, info);
   if (status != 0)
     {
-      (*info->memory_error_func) (status, memaddr, info);
-      return -1;
+      /* The final instruction may be a 2-byte VLE insn.  */
+      if ((dialect & PPC_OPCODE_VLE) != 0)
+        {
+          /* Clear buffer so unused bytes will not have garbage in them.  */
+          buffer[0] = buffer[1] = buffer[2] = buffer[3] = 0;
+          status = (*info->read_memory_func) (memaddr, buffer, 2, info);
+          if (status != 0)
+            {
+              (*info->memory_error_func) (status, memaddr, info);
+              return -1;
+            }
+        }
+      else
+        {
+          (*info->memory_error_func) (status, memaddr, info);
+          return -1;
+        }
     }
 
   if (bigendian)
@@ -207,52 +626,37 @@ print_insn_powerpc (bfd_vma memaddr,
   else
     insn = bfd_getl32 (buffer);
 
-  /* Get the major opcode of the instruction.  */
-  op = PPC_OP (insn);
+  /* Get the major opcode of the insn.  */
+  opcode = NULL;
+  insn_is_short = FALSE;
+  if ((dialect & PPC_OPCODE_VLE) != 0)
+    {
+      opcode = lookup_vle (insn);
+      if (opcode != NULL)
+	insn_is_short = PPC_OP_SE_VLE(opcode->mask);
+    }
+  if (opcode == NULL)
+    opcode = lookup_powerpc (insn, dialect);
+  if (opcode == NULL && (dialect & PPC_OPCODE_ANY) != 0)
+    opcode = lookup_powerpc (insn, (ppc_cpu_t) -1);
 
-  /* Find the first match in the opcode table.  We could speed this up
-     a bit by doing a binary search on the major opcode.  */
-  opcode_end = powerpc_opcodes + powerpc_num_opcodes;
- again:
-  for (opcode = powerpc_opcodes; opcode < opcode_end; opcode++)
+  if (opcode != NULL)
     {
-      unsigned long table_op;
       const unsigned char *opindex;
       const struct powerpc_operand *operand;
-      int invalid;
       int need_comma;
       int need_paren;
       int skip_optional;
 
-      table_op = PPC_OP (opcode->opcode);
-      if (op < table_op)
-	break;
-      if (op > table_op)
-	continue;
-
-      if ((insn & opcode->mask) != opcode->opcode
-	  || (opcode->flags & dialect) == 0)
-	continue;
-
-      /* Make two passes over the operands.  First see if any of them
-	 have extraction functions, and, if they do, make sure the
-	 instruction is valid.  */
-      invalid = 0;
-      for (opindex = opcode->operands; *opindex != 0; opindex++)
-	{
-	  operand = powerpc_operands + *opindex;
-	  if (operand->extract)
-	    (*operand->extract) (insn, dialect, &invalid);
-	}
-      if (invalid)
-	continue;
-
-      /* The instruction is valid.  */
       if (opcode->operands[0] != 0)
 	(*info->fprintf_func) (info->stream, "%-7s ", opcode->name);
       else
 	(*info->fprintf_func) (info->stream, "%s", opcode->name);
 
+      if (insn_is_short)
+        /* The operands will be fetched out of the 16-bit instruction.  */
+        insn >>= 16;
+
       /* Now extract and print the operands.  */
       need_comma = 0;
       need_paren = 0;
@@ -296,30 +700,38 @@ print_insn_powerpc (bfd_vma memaddr,
 	    (*info->fprintf_func) (info->stream, "f%ld", value);
 	  else if ((operand->flags & PPC_OPERAND_VR) != 0)
 	    (*info->fprintf_func) (info->stream, "v%ld", value);
+	  else if ((operand->flags & PPC_OPERAND_VSR) != 0)
+	    (*info->fprintf_func) (info->stream, "vs%ld", value);
 	  else if ((operand->flags & PPC_OPERAND_RELATIVE) != 0)
 	    (*info->print_address_func) (memaddr + value, info);
 	  else if ((operand->flags & PPC_OPERAND_ABSOLUTE) != 0)
 	    (*info->print_address_func) ((bfd_vma) value & 0xffffffff, info);
-	  else if ((operand->flags & PPC_OPERAND_CR) == 0
-		   || (dialect & PPC_OPCODE_PPC) == 0)
+	  else if ((operand->flags & PPC_OPERAND_FSL) != 0)
+	    (*info->fprintf_func) (info->stream, "fsl%ld", value);
+	  else if ((operand->flags & PPC_OPERAND_FCR) != 0)
+	    (*info->fprintf_func) (info->stream, "fcr%ld", value);
+	  else if ((operand->flags & PPC_OPERAND_UDI) != 0)
 	    (*info->fprintf_func) (info->stream, "%ld", value);
-	  else
+	  else if ((operand->flags & PPC_OPERAND_CR_REG) != 0
+		   && (((dialect & PPC_OPCODE_PPC) != 0)
+		       || ((dialect & PPC_OPCODE_VLE) != 0)))
+	    (*info->fprintf_func) (info->stream, "cr%ld", value);
+	  else if (((operand->flags & PPC_OPERAND_CR_BIT) != 0)
+		   && (((dialect & PPC_OPCODE_PPC) != 0)
+		       || ((dialect & PPC_OPCODE_VLE) != 0)))
 	    {
-	      if (operand->bitm == 7)
-		(*info->fprintf_func) (info->stream, "cr%ld", value);
-	      else
-		{
-		  static const char *cbnames[4] = { "lt", "gt", "eq", "so" };
-		  int cr;
-		  int cc;
-
-		  cr = value >> 2;
-		  if (cr != 0)
-		    (*info->fprintf_func) (info->stream, "4*cr%d+", cr);
-		  cc = value & 3;
-		  (*info->fprintf_func) (info->stream, "%s", cbnames[cc]);
-		}
+	      static const char *cbnames[4] = { "lt", "gt", "eq", "so" };
+	      int cr;
+	      int cc;
+
+	      cr = value >> 2;
+	      if (cr != 0)
+		(*info->fprintf_func) (info->stream, "4*cr%d+", cr);
+	      cc = value & 3;
+	      (*info->fprintf_func) (info->stream, "%s", cbnames[cc]);
 	    }
+	  else
+	    (*info->fprintf_func) (info->stream, "%d", (int) value);
 
 	  if (need_paren)
 	    {
@@ -336,14 +748,16 @@ print_insn_powerpc (bfd_vma memaddr,
 	    }
 	}
 
-      /* We have found and printed an instruction; return.  */
-      return 4;
-    }
-
-  if ((dialect & PPC_OPCODE_ANY) != 0)
-    {
-      dialect = ~PPC_OPCODE_ANY;
-      goto again;
+      /* We have found and printed an instruction.
+         If it was a short VLE instruction we have more to do.  */
+      if (insn_is_short)
+        {
+          memaddr += 2;
+          return 2;
+        }
+      else
+        /* Otherwise, return.  */
+        return 4;
     }
 
   /* We could not find a match.  */
@@ -355,18 +769,20 @@ print_insn_powerpc (bfd_vma memaddr,
 void
 print_ppc_disassembler_options (FILE *stream)
 {
-  fprintf (stream, "\n\
+  unsigned int i, col;
+
+  fprintf (stream, _("\n\
 The following PPC specific disassembler options are supported for use with\n\
-the -M switch:\n");
-
-  fprintf (stream, "  booke|booke32|booke64    Disassemble the BookE instructions\n");
-  fprintf (stream, "  e300                     Disassemble the e300 instructions\n");
-  fprintf (stream, "  e500|e500x2              Disassemble the e500 instructions\n");
-  fprintf (stream, "  440                      Disassemble the 440 instructions\n");
-  fprintf (stream, "  efs                      Disassemble the EFS instructions\n");
-  fprintf (stream, "  power4                   Disassemble the Power4 instructions\n");
-  fprintf (stream, "  power5                   Disassemble the Power5 instructions\n");
-  fprintf (stream, "  power6                   Disassemble the Power6 instructions\n");
-  fprintf (stream, "  32                       Do not disassemble 64-bit instructions\n");
-  fprintf (stream, "  64                       Allow disassembly of 64-bit instructions\n");
+the -M switch:\n"));
+
+  for (col = 0, i = 0; i < sizeof (ppc_opts) / sizeof (ppc_opts[0]); i++)
+    {
+      col += fprintf (stream, " %s,", ppc_opts[i].opt);
+      if (col > 66)
+	{
+	  fprintf (stream, "\n");
+	  col = 0;
+	}
+    }
+  fprintf (stream, " 32, 64\n");
 }
diff --git a/arch/powerpc/xmon/ppc-opc.c b/arch/powerpc/xmon/ppc-opc.c
index 5995f81de9ff..e3ad69c3be07 100644
--- a/arch/powerpc/xmon/ppc-opc.c
+++ b/arch/powerpc/xmon/ppc-opc.c
@@ -1,6 +1,5 @@
 /* ppc-opc.c -- PowerPC opcode list
-   Copyright 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2004,
-   2005, 2006, 2007 Free Software Foundation, Inc.
+   Copyright (C) 1994-2016 Free Software Foundation, Inc.
    Written by Ian Lance Taylor, Cygnus Support
 
    This file is part of GDB, GAS, and the GNU binutils.
@@ -20,8 +19,8 @@
    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
    02110-1301, USA.  */
 
-#include <stdio.h>
 #include "sysdep.h"
+#include <stdio.h>
 #include "opcode/ppc.h"
 #include "opintl.h"
 
@@ -38,41 +37,93 @@
 
 /* Local insertion and extraction functions.  */
 
-static unsigned long insert_bat (unsigned long, long, int, const char **);
-static long extract_bat (unsigned long, int, int *);
-static unsigned long insert_bba (unsigned long, long, int, const char **);
-static long extract_bba (unsigned long, int, int *);
-static unsigned long insert_bdm (unsigned long, long, int, const char **);
-static long extract_bdm (unsigned long, int, int *);
-static unsigned long insert_bdp (unsigned long, long, int, const char **);
-static long extract_bdp (unsigned long, int, int *);
-static unsigned long insert_bo (unsigned long, long, int, const char **);
-static long extract_bo (unsigned long, int, int *);
-static unsigned long insert_boe (unsigned long, long, int, const char **);
-static long extract_boe (unsigned long, int, int *);
-static unsigned long insert_fxm (unsigned long, long, int, const char **);
-static long extract_fxm (unsigned long, int, int *);
-static unsigned long insert_mbe (unsigned long, long, int, const char **);
-static long extract_mbe (unsigned long, int, int *);
-static unsigned long insert_mb6 (unsigned long, long, int, const char **);
-static long extract_mb6 (unsigned long, int, int *);
-static long extract_nb (unsigned long, int, int *);
-static unsigned long insert_nsi (unsigned long, long, int, const char **);
-static long extract_nsi (unsigned long, int, int *);
-static unsigned long insert_ral (unsigned long, long, int, const char **);
-static unsigned long insert_ram (unsigned long, long, int, const char **);
-static unsigned long insert_raq (unsigned long, long, int, const char **);
-static unsigned long insert_ras (unsigned long, long, int, const char **);
-static unsigned long insert_rbs (unsigned long, long, int, const char **);
-static long extract_rbs (unsigned long, int, int *);
-static unsigned long insert_sh6 (unsigned long, long, int, const char **);
-static long extract_sh6 (unsigned long, int, int *);
-static unsigned long insert_spr (unsigned long, long, int, const char **);
-static long extract_spr (unsigned long, int, int *);
-static unsigned long insert_sprg (unsigned long, long, int, const char **);
-static long extract_sprg (unsigned long, int, int *);
-static unsigned long insert_tbr (unsigned long, long, int, const char **);
-static long extract_tbr (unsigned long, int, int *);
+static unsigned long insert_arx (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_arx (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_ary (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_ary (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_bat (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_bat (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_bba (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_bba (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_bdm (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_bdm (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_bdp (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_bdp (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_bo (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_bo (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_boe (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_boe (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_esync (unsigned long, long, ppc_cpu_t, const char **);
+static unsigned long insert_dcmxs (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_dcmxs (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_dxd (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_dxd (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_dxdn (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_dxdn (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_fxm (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_fxm (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_li20 (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_li20 (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_ls (unsigned long, long, ppc_cpu_t, const char **);
+static unsigned long insert_mbe (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_mbe (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_mb6 (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_mb6 (unsigned long, ppc_cpu_t, int *);
+static long extract_nb (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_nbi (unsigned long, long, ppc_cpu_t, const char **);
+static unsigned long insert_nsi (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_nsi (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_oimm (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_oimm (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_ral (unsigned long, long, ppc_cpu_t, const char **);
+static unsigned long insert_ram (unsigned long, long, ppc_cpu_t, const char **);
+static unsigned long insert_raq (unsigned long, long, ppc_cpu_t, const char **);
+static unsigned long insert_ras (unsigned long, long, ppc_cpu_t, const char **);
+static unsigned long insert_rbs (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_rbs (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_rbx (unsigned long, long, ppc_cpu_t, const char **);
+static unsigned long insert_rx (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_rx (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_ry (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_ry (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_sh6 (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_sh6 (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_sci8 (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_sci8 (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_sci8n (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_sci8n (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_sd4h (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_sd4h (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_sd4w (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_sd4w (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_spr (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_spr (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_sprg (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_sprg (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_tbr (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_tbr (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_xt6 (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_xt6 (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_xtq6 (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_xtq6 (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_xa6 (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_xa6 (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_xb6 (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_xb6 (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_xb6s (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_xb6s (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_xc6 (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_xc6 (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_dm (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_dm (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_vlesi (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_vlesi (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_vlensi (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_vlensi (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_vleui (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_vleui (unsigned long, ppc_cpu_t, int *);
+static unsigned long insert_vleil (unsigned long, long, ppc_cpu_t, const char **);
+static long extract_vleil (unsigned long, ppc_cpu_t, int *);
 
 /* The operands table.
 
@@ -97,7 +148,7 @@ const struct powerpc_operand powerpc_operands[] =
   /* The BI field in a B form or XL form instruction.  */
 #define BI BA
 #define BI_MASK (0x1f << 16)
-  { 0x1f, 16, NULL, NULL, PPC_OPERAND_CR },
+  { 0x1f, 16, NULL, NULL, PPC_OPERAND_CR_BIT },
 
   /* The BA field in an XL form instruction when it must be the same
      as the BT field in the same instruction.  */
@@ -107,11 +158,14 @@ const struct powerpc_operand powerpc_operands[] =
   /* The BB field in an XL form instruction.  */
 #define BB BAT + 1
 #define BB_MASK (0x1f << 11)
-  { 0x1f, 11, NULL, NULL, PPC_OPERAND_CR },
+  { 0x1f, 11, NULL, NULL, PPC_OPERAND_CR_BIT },
 
   /* The BB field in an XL form instruction when it must be the same
      as the BA field in the same instruction.  */
 #define BBA BB + 1
+  /* The VB field in a VX form instruction when it must be the same
+     as the VA field in the same instruction.  */
+#define VBA BBA
   { 0x1f, 11, insert_bba, extract_bba, PPC_OPERAND_FAKE },
 
   /* The BD field in a B form instruction.  The lower two bits are
@@ -128,31 +182,33 @@ const struct powerpc_operand powerpc_operands[] =
      This sets the y bit of the BO field appropriately.  */
 #define BDM BDA + 1
   { 0xfffc, 0, insert_bdm, extract_bdm,
-      PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED },
+    PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED },
 
   /* The BD field in a B form instruction when the - modifier is used
      and absolute address is used.  */
 #define BDMA BDM + 1
   { 0xfffc, 0, insert_bdm, extract_bdm,
-      PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED },
+    PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED },
 
   /* The BD field in a B form instruction when the + modifier is used.
      This sets the y bit of the BO field appropriately.  */
 #define BDP BDMA + 1
   { 0xfffc, 0, insert_bdp, extract_bdp,
-      PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED },
+    PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED },
 
   /* The BD field in a B form instruction when the + modifier is used
      and absolute addressing is used.  */
 #define BDPA BDP + 1
   { 0xfffc, 0, insert_bdp, extract_bdp,
-      PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED },
+    PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED },
 
   /* The BF field in an X or XL form instruction.  */
 #define BF BDPA + 1
   /* The CRFD field in an X form instruction.  */
 #define CRFD BF
-  { 0x7, 23, NULL, NULL, PPC_OPERAND_CR },
+  /* The CRD field in an XL form instruction.  */
+#define CRD BF
+  { 0x7, 23, NULL, NULL, PPC_OPERAND_CR_REG },
 
   /* The BF field in an X or XL form instruction.  */
 #define BFF BF + 1
@@ -161,11 +217,11 @@ const struct powerpc_operand powerpc_operands[] =
   /* An optional BF field.  This is used for comparison instructions,
      in which an omitted BF field is taken as zero.  */
 #define OBF BFF + 1
-  { 0x7, 23, NULL, NULL, PPC_OPERAND_CR | PPC_OPERAND_OPTIONAL },
+  { 0x7, 23, NULL, NULL, PPC_OPERAND_CR_REG | PPC_OPERAND_OPTIONAL },
 
   /* The BFA field in an X or XL form instruction.  */
 #define BFA OBF + 1
-  { 0x7, 18, NULL, NULL, PPC_OPERAND_CR },
+  { 0x7, 18, NULL, NULL, PPC_OPERAND_CR_REG },
 
   /* The BO field in a B form instruction.  Certain values are
      illegal.  */
@@ -178,19 +234,49 @@ const struct powerpc_operand powerpc_operands[] =
 #define BOE BO + 1
   { 0x1e, 21, insert_boe, extract_boe, 0 },
 
-#define BH BOE + 1
+  /* The RM field in an X form instruction.  */
+#define RM BOE + 1
+  { 0x3, 11, NULL, NULL, 0 },
+
+#define BH RM + 1
   { 0x3, 11, NULL, NULL, PPC_OPERAND_OPTIONAL },
 
   /* The BT field in an X or XL form instruction.  */
 #define BT BH + 1
-  { 0x1f, 21, NULL, NULL, PPC_OPERAND_CR },
+  { 0x1f, 21, NULL, NULL, PPC_OPERAND_CR_BIT },
+
+  /* The BI16 field in a BD8 form instruction.  */
+#define BI16 BT + 1
+  { 0x3, 8, NULL, NULL, PPC_OPERAND_CR_BIT },
+
+  /* The BI32 field in a BD15 form instruction.  */
+#define BI32 BI16 + 1
+  { 0xf, 16, NULL, NULL, PPC_OPERAND_CR_BIT },
+
+  /* The BO32 field in a BD15 form instruction.  */
+#define BO32 BI32 + 1
+  { 0x3, 20, NULL, NULL, 0 },
+
+  /* The B8 field in a BD8 form instruction.  */
+#define B8 BO32 + 1
+  { 0x1fe, -1, NULL, NULL, PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED },
+
+  /* The B15 field in a BD15 form instruction.  The lowest bit is
+     forced to zero.  */
+#define B15 B8 + 1
+  { 0xfffe, 0, NULL, NULL, PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED },
+
+  /* The B24 field in a BD24 form instruction.  The lowest bit is
+     forced to zero.  */
+#define B24 B15 + 1
+  { 0x1fffffe, 0, NULL, NULL, PPC_OPERAND_RELATIVE | PPC_OPERAND_SIGNED },
 
   /* The condition register number portion of the BI field in a B form
      or XL form instruction.  This is used for the extended
      conditional branch mnemonics, which set the lower two bits of the
      BI field.  This field is optional.  */
-#define CR BT + 1
-  { 0x7, 18, NULL, NULL, PPC_OPERAND_CR | PPC_OPERAND_OPTIONAL },
+#define CR B24 + 1
+  { 0x7, 18, NULL, NULL, PPC_OPERAND_CR_REG | PPC_OPERAND_OPTIONAL },
 
   /* The CRB field in an X form instruction.  */
 #define CRB CR + 1
@@ -199,12 +285,19 @@ const struct powerpc_operand powerpc_operands[] =
 #define MB_MASK (0x1f << 6)
   { 0x1f, 6, NULL, NULL, 0 },
 
+  /* The CRD32 field in an XL form instruction.  */
+#define CRD32 CRB + 1
+  { 0x3, 21, NULL, NULL, PPC_OPERAND_CR_REG },
+
   /* The CRFS field in an X form instruction.  */
-#define CRFS CRB + 1
-  { 0x7, 0, NULL, NULL, PPC_OPERAND_CR },
+#define CRFS CRD32 + 1
+  { 0x7, 0, NULL, NULL, PPC_OPERAND_CR_REG },
+
+#define CRS CRFS + 1
+  { 0x3, 18, NULL, NULL, PPC_OPERAND_CR_REG | PPC_OPERAND_OPTIONAL },
 
   /* The CT field in an X form instruction.  */
-#define CT CRFS + 1
+#define CT CRS + 1
   /* The MO field in an mbar instruction.  */
 #define MO CT
   { 0x1f, 21, NULL, NULL, PPC_OPERAND_OPTIONAL },
@@ -215,19 +308,23 @@ const struct powerpc_operand powerpc_operands[] =
 #define D CT + 1
   { 0xffff, 0, NULL, NULL, PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED },
 
-  /* The DE field in a DE form instruction.  This is like D, but is 12
-     bits only.  */
-#define DE D + 1
-  { 0xfff, 4, NULL, NULL, PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED },
+  /* The D8 field in a D form instruction.  This is a displacement off
+     a register, and implies that the next operand is a register in
+     parentheses.  */
+#define D8 D + 1
+  { 0xff, 0, NULL, NULL, PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED },
+
+  /* The DCMX field in an X form instruction.  */
+#define DCMX D8 + 1
+  { 0x7f, 16, NULL, NULL, 0 },
 
-  /* The DES field in a DES form instruction.  This is like DS, but is 14
-     bits only (12 stored.)  */
-#define DES DE + 1
-  { 0x3ffc, 2, NULL, NULL, PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED },
+  /* The split DCMX field in an X form instruction.  */
+#define DCMXS DCMX + 1
+  { 0x7f, PPC_OPSHIFT_INV, insert_dcmxs, extract_dcmxs, 0 },
 
   /* The DQ field in a DQ form instruction.  This is like D, but the
      lower four bits are forced to zero. */
-#define DQ DES + 1
+#define DQ DCMXS + 1
   { 0xfff0, 0, NULL, NULL,
     PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED | PPC_OPERAND_DQ },
 
@@ -237,8 +334,29 @@ const struct powerpc_operand powerpc_operands[] =
   { 0xfffc, 0, NULL, NULL,
     PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED | PPC_OPERAND_DS },
 
+  /* The DUIS or BHRBE fields in a XFX form instruction, 10 bits
+     unsigned imediate */
+#define DUIS DS + 1
+#define BHRBE DUIS
+  { 0x3ff, 11, NULL, NULL, 0 },
+
+  /* The split D field in a DX form instruction.  */
+#define DXD DUIS + 1
+  { 0xffff, PPC_OPSHIFT_INV, insert_dxd, extract_dxd,
+    PPC_OPERAND_SIGNED | PPC_OPERAND_SIGNOPT},
+
+  /* The split ND field in a DX form instruction.
+     This is the same as the DX field, only negated.  */
+#define NDXD DXD + 1
+  { 0xffff, PPC_OPSHIFT_INV, insert_dxdn, extract_dxdn,
+    PPC_OPERAND_NEGATIVE | PPC_OPERAND_SIGNED | PPC_OPERAND_SIGNOPT},
+
   /* The E field in a wrteei instruction.  */
-#define E DS + 1
+  /* And the W bit in the pair singles instructions.  */
+  /* And the ST field in a VX form instruction.  */
+#define E NDXD + 1
+#define PSW E
+#define ST E
   { 0x1, 15, NULL, NULL, 0 },
 
   /* The FL1 field in a POWER SC form instruction.  */
@@ -260,13 +378,21 @@ const struct powerpc_operand powerpc_operands[] =
 #define FRA_MASK (0x1f << 16)
   { 0x1f, 16, NULL, NULL, PPC_OPERAND_FPR },
 
+  /* The FRAp field of DFP instructions.  */
+#define FRAp FRA + 1
+  { 0x1e, 16, NULL, NULL, PPC_OPERAND_FPR },
+
   /* The FRB field in an X or A form instruction.  */
-#define FRB FRA + 1
+#define FRB FRAp + 1
 #define FRB_MASK (0x1f << 11)
   { 0x1f, 11, NULL, NULL, PPC_OPERAND_FPR },
 
+  /* The FRBp field of DFP instructions.  */
+#define FRBp FRB + 1
+  { 0x1e, 11, NULL, NULL, PPC_OPERAND_FPR },
+
   /* The FRC field in an A form instruction.  */
-#define FRC FRB + 1
+#define FRC FRBp + 1
 #define FRC_MASK (0x1f << 6)
   { 0x1f, 6, NULL, NULL, PPC_OPERAND_FPR },
 
@@ -276,20 +402,47 @@ const struct powerpc_operand powerpc_operands[] =
 #define FRT FRS
   { 0x1f, 21, NULL, NULL, PPC_OPERAND_FPR },
 
+  /* The FRSp field of stfdp or the FRTp field of lfdp and DFP
+     instructions.  */
+#define FRSp FRS + 1
+#define FRTp FRSp
+  { 0x1e, 21, NULL, NULL, PPC_OPERAND_FPR },
+
   /* The FXM field in an XFX instruction.  */
-#define FXM FRS + 1
+#define FXM FRSp + 1
   { 0xff, 12, insert_fxm, extract_fxm, 0 },
 
   /* Power4 version for mfcr.  */
 #define FXM4 FXM + 1
-  { 0xff, 12, insert_fxm, extract_fxm, PPC_OPERAND_OPTIONAL },
+  { 0xff, 12, insert_fxm, extract_fxm,
+    PPC_OPERAND_OPTIONAL | PPC_OPERAND_OPTIONAL_VALUE},
+  /* If the FXM4 operand is ommitted, use the sentinel value -1.  */
+  { -1, -1, NULL, NULL, 0},
+
+  /* The IMM20 field in an LI instruction.  */
+#define IMM20 FXM4 + 2
+  { 0xfffff, PPC_OPSHIFT_INV, insert_li20, extract_li20, PPC_OPERAND_SIGNED},
 
   /* The L field in a D or X form instruction.  */
-#define L FXM4 + 1
+#define L IMM20 + 1
+  { 0x1, 21, NULL, NULL, 0 },
+
+  /* The optional L field in tlbie and tlbiel instructions.  */
+#define LOPT L + 1
+  /* The R field in a HTM X form instruction.  */
+#define HTM_R LOPT
   { 0x1, 21, NULL, NULL, PPC_OPERAND_OPTIONAL },
 
+  /* The optional (for 32-bit) L field in cmp[l][i] instructions.  */
+#define L32OPT LOPT + 1
+  { 0x1, 21, NULL, NULL, PPC_OPERAND_OPTIONAL | PPC_OPERAND_OPTIONAL32 },
+
+  /* The L field in dcbf instruction.  */
+#define L2OPT L32OPT + 1
+  { 0x3, 21, NULL, NULL, PPC_OPERAND_OPTIONAL },
+
   /* The LEV field in a POWER SVC form instruction.  */
-#define SVC_LEV L + 1
+#define SVC_LEV L2OPT + 1
   { 0x7f, 5, NULL, NULL, 0 },
 
   /* The LEV field in an SC form instruction.  */
@@ -306,9 +459,10 @@ const struct powerpc_operand powerpc_operands[] =
 #define LIA LI + 1
   { 0x3fffffc, 0, NULL, NULL, PPC_OPERAND_ABSOLUTE | PPC_OPERAND_SIGNED },
 
-  /* The LS field in an X (sync) form instruction.  */
+  /* The LS or WC field in an X (sync or wait) form instruction.  */
 #define LS LIA + 1
-  { 0x3, 21, NULL, NULL, PPC_OPERAND_OPTIONAL },
+#define WC LS
+  { 0x3, 21, insert_ls, NULL, PPC_OPERAND_OPTIONAL },
 
   /* The ME field in an M form instruction.  */
 #define ME LS + 1
@@ -335,14 +489,25 @@ const struct powerpc_operand powerpc_operands[] =
 #define NB MB6 + 1
   { 0x1f, 11, NULL, extract_nb, PPC_OPERAND_PLUS1 },
 
+  /* The NBI field in an lswi instruction, which has special value
+     restrictions.  The value 32 is stored as 0.  */
+#define NBI NB + 1
+  { 0x1f, 11, insert_nbi, extract_nb, PPC_OPERAND_PLUS1 },
+
   /* The NSI field in a D form instruction.  This is the same as the
      SI field, only negated.  */
-#define NSI NB + 1
+#define NSI NBI + 1
+  { 0xffff, 0, insert_nsi, extract_nsi,
+    PPC_OPERAND_NEGATIVE | PPC_OPERAND_SIGNED },
+
+  /* The NSI field in a D form instruction when we accept a wide range
+     of positive values.  */
+#define NSISIGNOPT NSI + 1
   { 0xffff, 0, insert_nsi, extract_nsi,
-      PPC_OPERAND_NEGATIVE | PPC_OPERAND_SIGNED },
+    PPC_OPERAND_NEGATIVE | PPC_OPERAND_SIGNED | PPC_OPERAND_SIGNOPT },
 
   /* The RA field in an D, DS, DQ, X, XO, M, or MDS form instruction.  */
-#define RA NSI + 1
+#define RA NSISIGNOPT + 1
 #define RA_MASK (0x1f << 16)
   { 0x1f, 16, NULL, NULL, PPC_OPERAND_GPR },
 
@@ -350,9 +515,10 @@ const struct powerpc_operand powerpc_operands[] =
 #define RA0 RA + 1
   { 0x1f, 16, NULL, NULL, PPC_OPERAND_GPR_0 },
 
-  /* The RA field in the DQ form lq instruction, which has special
+  /* The RA field in the DQ form lq or an lswx instruction, which have special
      value restrictions.  */
 #define RAQ RA0 + 1
+#define RAX RAQ
   { 0x1f, 16, insert_raq, NULL, PPC_OPERAND_GPR_0 },
 
   /* The RA field in a D or X form instruction which is an updating
@@ -372,7 +538,8 @@ const struct powerpc_operand powerpc_operands[] =
 #define RAS RAM + 1
   { 0x1f, 16, insert_ras, NULL, PPC_OPERAND_GPR_0 },
 
-  /* The RA field of the tlbwe instruction, which is optional.  */
+  /* The RA field of the tlbwe, dccci and iccci instructions,
+     which are optional.  */
 #define RAOPT RAS + 1
   { 0x1f, 16, NULL, NULL, PPC_OPERAND_GPR | PPC_OPERAND_OPTIONAL },
 
@@ -387,36 +554,95 @@ const struct powerpc_operand powerpc_operands[] =
 #define RBS RB + 1
   { 0x1f, 11, insert_rbs, extract_rbs, PPC_OPERAND_FAKE },
 
+  /* The RB field in an lswx instruction, which has special value
+     restrictions.  */
+#define RBX RBS + 1
+  { 0x1f, 11, insert_rbx, NULL, PPC_OPERAND_GPR },
+
+  /* The RB field of the dccci and iccci instructions, which are optional.  */
+#define RBOPT RBX + 1
+  { 0x1f, 11, NULL, NULL, PPC_OPERAND_GPR | PPC_OPERAND_OPTIONAL },
+
+  /* The RC register field in an maddld, maddhd or maddhdu instruction.  */
+#define RC RBOPT + 1
+  { 0x1f, 6, NULL, NULL, PPC_OPERAND_GPR },
+
   /* The RS field in a D, DS, X, XFX, XS, M, MD or MDS form
      instruction or the RT field in a D, DS, X, XFX or XO form
      instruction.  */
-#define RS RBS + 1
+#define RS RC + 1
 #define RT RS
 #define RT_MASK (0x1f << 21)
+#define RD RS
   { 0x1f, 21, NULL, NULL, PPC_OPERAND_GPR },
 
-  /* The RS and RT fields of the DS form stq instruction, which have
-     special value restrictions.  */
+  /* The RS and RT fields of the DS form stq and DQ form lq instructions,
+     which have special value restrictions.  */
 #define RSQ RS + 1
 #define RTQ RSQ
-  { 0x1e, 21, NULL, NULL, PPC_OPERAND_GPR_0 },
+  { 0x1e, 21, NULL, NULL, PPC_OPERAND_GPR },
 
   /* The RS field of the tlbwe instruction, which is optional.  */
 #define RSO RSQ + 1
 #define RTO RSO
   { 0x1f, 21, NULL, NULL, PPC_OPERAND_GPR | PPC_OPERAND_OPTIONAL },
 
+  /* The RX field of the SE_RR form instruction.  */
+#define RX RSO + 1
+  { 0x1f, PPC_OPSHIFT_INV, insert_rx, extract_rx, PPC_OPERAND_GPR },
+
+  /* The ARX field of the SE_RR form instruction.  */
+#define ARX RX + 1
+  { 0x1f, PPC_OPSHIFT_INV, insert_arx, extract_arx, PPC_OPERAND_GPR },
+
+  /* The RY field of the SE_RR form instruction.  */
+#define RY ARX + 1
+#define RZ RY
+  { 0x1f, PPC_OPSHIFT_INV, insert_ry, extract_ry, PPC_OPERAND_GPR },
+
+  /* The ARY field of the SE_RR form instruction.  */
+#define ARY RY + 1
+  { 0x1f, PPC_OPSHIFT_INV, insert_ary, extract_ary, PPC_OPERAND_GPR },
+
+  /* The SCLSCI8 field in a D form instruction.  */
+#define SCLSCI8 ARY + 1
+  { 0xffffffff, PPC_OPSHIFT_INV, insert_sci8, extract_sci8, 0 },
+
+  /* The SCLSCI8N field in a D form instruction.  This is the same as the
+     SCLSCI8 field, only negated.  */
+#define SCLSCI8N SCLSCI8 + 1
+  { 0xffffffff, PPC_OPSHIFT_INV, insert_sci8n, extract_sci8n,
+    PPC_OPERAND_NEGATIVE | PPC_OPERAND_SIGNED },
+
+  /* The SD field of the SD4 form instruction.  */
+#define SE_SD SCLSCI8N + 1
+  { 0xf, 8, NULL, NULL, PPC_OPERAND_PARENS },
+
+  /* The SD field of the SD4 form instruction, for halfword.  */
+#define SE_SDH SE_SD + 1
+  { 0x1e, PPC_OPSHIFT_INV, insert_sd4h, extract_sd4h, PPC_OPERAND_PARENS },
+
+  /* The SD field of the SD4 form instruction, for word.  */
+#define SE_SDW SE_SDH + 1
+  { 0x3c, PPC_OPSHIFT_INV, insert_sd4w, extract_sd4w, PPC_OPERAND_PARENS },
+
   /* The SH field in an X or M form instruction.  */
-#define SH RSO + 1
+#define SH SE_SDW + 1
 #define SH_MASK (0x1f << 11)
   /* The other UIMM field in a EVX form instruction.  */
 #define EVUIMM SH
+  /* The FC field in an atomic X form instruction.  */
+#define FC SH
   { 0x1f, 11, NULL, NULL, 0 },
 
+  /* The SI field in a HTM X form instruction.  */
+#define HTM_SI SH + 1
+  { 0x1f, 11, NULL, NULL, PPC_OPERAND_SIGNED },
+
   /* The SH field in an MD form instruction.  This is split.  */
-#define SH6 SH + 1
+#define SH6 HTM_SI + 1
 #define SH6_MASK ((0x1f << 11) | (1 << 1))
-  { 0x3f, -1, insert_sh6, extract_sh6, 0 },
+  { 0x3f, PPC_OPSHIFT_INV, insert_sh6, extract_sh6, 0 },
 
   /* The SH field of the tlbwe instruction, which is optional.  */
 #define SHO SH6 + 1
@@ -431,10 +657,15 @@ const struct powerpc_operand powerpc_operands[] =
 #define SISIGNOPT SI + 1
   { 0xffff, 0, NULL, NULL, PPC_OPERAND_SIGNED | PPC_OPERAND_SIGNOPT },
 
+  /* The SI8 field in a D form instruction.  */
+#define SI8 SISIGNOPT + 1
+  { 0xff, 0, NULL, NULL, PPC_OPERAND_SIGNED },
+
   /* The SPR field in an XFX form instruction.  This is flipped--the
      lower 5 bits are stored in the upper 5 and vice- versa.  */
-#define SPR SISIGNOPT + 1
+#define SPR SI8 + 1
 #define PMR SPR
+#define TMR SPR
 #define SPR_MASK (0x3ff << 11)
   { 0x3ff, 11, insert_spr, extract_spr, 0 },
 
@@ -449,23 +680,37 @@ const struct powerpc_operand powerpc_operands[] =
 
   /* The SR field in an X form instruction.  */
 #define SR SPRG + 1
+  /* The 4-bit UIMM field in a VX form instruction.  */
+#define UIMM4 SR
   { 0xf, 16, NULL, NULL, 0 },
 
   /* The STRM field in an X AltiVec form instruction.  */
 #define STRM SR + 1
+  /* The T field in a tlbilx form instruction.  */
+#define T STRM
+  /* The L field in wclr instructions.  */
+#define L2 STRM
   { 0x3, 21, NULL, NULL, 0 },
 
+  /* The ESYNC field in an X (sync) form instruction.  */
+#define ESYNC STRM + 1
+  { 0xf, 16, insert_esync, NULL, PPC_OPERAND_OPTIONAL },
+
   /* The SV field in a POWER SC form instruction.  */
-#define SV STRM + 1
+#define SV ESYNC + 1
   { 0x3fff, 2, NULL, NULL, 0 },
 
   /* The TBR field in an XFX form instruction.  This is like the SPR
      field, but it is optional.  */
 #define TBR SV + 1
-  { 0x3ff, 11, insert_tbr, extract_tbr, PPC_OPERAND_OPTIONAL },
+  { 0x3ff, 11, insert_tbr, extract_tbr,
+    PPC_OPERAND_OPTIONAL | PPC_OPERAND_OPTIONAL_VALUE},
+  /* If the TBR operand is ommitted, use the value 268.  */
+  { -1, 268, NULL, NULL, 0},
 
   /* The TO field in a D or X form instruction.  */
-#define TO TBR + 1
+#define TO TBR + 2
+#define DUI TO
 #define TO_MASK (0x1f << 21)
   { 0x1f, 21, NULL, NULL, 0 },
 
@@ -473,8 +718,23 @@ const struct powerpc_operand powerpc_operands[] =
 #define UI TO + 1
   { 0xffff, 0, NULL, NULL, 0 },
 
+#define UISIGNOPT UI + 1
+  { 0xffff, 0, NULL, NULL, PPC_OPERAND_SIGNOPT },
+
+  /* The IMM field in an SE_IM5 instruction.  */
+#define UI5 UISIGNOPT + 1
+  { 0x1f, 4, NULL, NULL, 0 },
+
+  /* The OIMM field in an SE_OIM5 instruction.  */
+#define OIMM5 UI5 + 1
+  { 0x1f, PPC_OPSHIFT_INV, insert_oimm, extract_oimm, PPC_OPERAND_PLUS1 },
+
+  /* The UI7 field in an SE_LI instruction.  */
+#define UI7 OIMM5 + 1
+  { 0x7f, 4, NULL, NULL, 0 },
+
   /* The VA field in a VA, VX or VXR form instruction.  */
-#define VA UI + 1
+#define VA UI7 + 1
   { 0x1f, 16, NULL, NULL, PPC_OPERAND_VR },
 
   /* The VB field in a VA, VX or VXR form instruction.  */
@@ -490,17 +750,34 @@ const struct powerpc_operand powerpc_operands[] =
 #define VS VD
   { 0x1f, 21, NULL, NULL, PPC_OPERAND_VR },
 
-  /* The SIMM field in a VX form instruction.  */
+  /* The SIMM field in a VX form instruction, and TE in Z form.  */
 #define SIMM VD + 1
+#define TE SIMM
   { 0x1f, 16, NULL, NULL, PPC_OPERAND_SIGNED},
 
-  /* The UIMM field in a VX form instruction, and TE in Z form.  */
+  /* The UIMM field in a VX form instruction.  */
 #define UIMM SIMM + 1
-#define TE UIMM
+#define DCTL UIMM
   { 0x1f, 16, NULL, NULL, 0 },
 
+  /* The 3-bit UIMM field in a VX form instruction.  */
+#define UIMM3 UIMM + 1
+  { 0x7, 16, NULL, NULL, 0 },
+
+  /* The 6-bit UIM field in a X form instruction.  */
+#define UIM6 UIMM3 + 1
+  { 0x3f, 16, NULL, NULL, 0 },
+
+  /* The SIX field in a VX form instruction.  */
+#define SIX UIM6 + 1
+  { 0xf, 11, NULL, NULL, 0 },
+
+  /* The PS field in a VX form instruction.  */
+#define PS SIX + 1
+  { 0x1, 9, NULL, NULL, 0 },
+
   /* The SHB field in a VA form instruction.  */
-#define SHB UIMM + 1
+#define SHB PS + 1
   { 0xf, 6, NULL, NULL, 0 },
 
   /* The other UIMM field in a half word EVX form instruction.  */
@@ -515,29 +792,64 @@ const struct powerpc_operand powerpc_operands[] =
 #define EVUIMM_8 EVUIMM_4 + 1
   { 0xf8, 8, NULL, NULL, PPC_OPERAND_PARENS },
 
-  /* The WS field.  */
+  /* The WS or DRM field in an X form instruction.  */
 #define WS EVUIMM_8 + 1
+#define DRM WS
   { 0x7, 11, NULL, NULL, 0 },
 
-  /* The L field in an mtmsrd or A form instruction or W in an X form.  */
-#define A_L WS + 1
+  /* PowerPC paired singles extensions.  */
+  /* W bit in the pair singles instructions for x type instructions.  */
+#define PSWM WS + 1
+  /* The BO16 field in a BD8 form instruction.  */
+#define BO16 PSWM
+  {  0x1, 10, 0, 0, 0 },
+
+  /* IDX bits for quantization in the pair singles instructions.  */
+#define PSQ PSWM + 1
+  {  0x7, 12, 0, 0, 0 },
+
+  /* IDX bits for quantization in the pair singles x-type instructions.  */
+#define PSQM PSQ + 1
+  {  0x7, 7, 0, 0, 0 },
+
+  /* Smaller D field for quantization in the pair singles instructions.  */
+#define PSD PSQM + 1
+  {  0xfff, 0, 0, 0,  PPC_OPERAND_PARENS | PPC_OPERAND_SIGNED },
+
+  /* The L field in an mtmsrd or A form instruction or R or W in an X form.  */
+#define A_L PSD + 1
 #define W A_L
+#define X_R A_L
   { 0x1, 16, NULL, NULL, PPC_OPERAND_OPTIONAL },
 
+  /* The RMC or CY field in a Z23 form instruction.  */
 #define RMC A_L + 1
+#define CY RMC
   { 0x3, 9, NULL, NULL, 0 },
 
 #define R RMC + 1
   { 0x1, 16, NULL, NULL, 0 },
 
-#define SP R + 1
+#define RIC R + 1
+  { 0x3, 18, NULL, NULL, PPC_OPERAND_OPTIONAL },
+
+#define PRS RIC + 1
+  { 0x1, 17, NULL, NULL, PPC_OPERAND_OPTIONAL },
+
+#define SP PRS + 1
   { 0x3, 19, NULL, NULL, 0 },
 
 #define S SP + 1
   { 0x1, 20, NULL, NULL, 0 },
 
+  /* The S field in a XL form instruction.  */
+#define SXL S + 1
+  { 0x1, 11, NULL, NULL, PPC_OPERAND_OPTIONAL | PPC_OPERAND_OPTIONAL_VALUE},
+  /* If the SXL operand is ommitted, use the value 1.  */
+  { -1, 1, NULL, NULL, 0},
+
   /* SH field starting at bit position 16.  */
-#define SH16 S + 1
+#define SH16 SXL + 2
   /* The DCM and DGM fields in a Z form instruction.  */
 #define DCM SH16
 #define DGM DCM
@@ -548,8 +860,106 @@ const struct powerpc_operand powerpc_operands[] =
   { 0x1, 0, NULL, NULL, PPC_OPERAND_OPTIONAL },
 
   /* The L field in an mtfsf or XFL form instruction.  */
+  /* The A field in a HTM X form instruction.  */
 #define XFL_L EH + 1
+#define HTM_A XFL_L
   { 0x1, 25, NULL, NULL, PPC_OPERAND_OPTIONAL},
+
+  /* Xilinx APU related masks and macros */
+#define FCRT XFL_L + 1
+#define FCRT_MASK (0x1f << 21)
+  { 0x1f, 21, 0, 0, PPC_OPERAND_FCR },
+
+  /* Xilinx FSL related masks and macros */
+#define FSL FCRT + 1
+#define FSL_MASK (0x1f << 11)
+  { 0x1f, 11, 0, 0, PPC_OPERAND_FSL },
+
+  /* Xilinx UDI related masks and macros */
+#define URT FSL + 1
+  { 0x1f, 21, 0, 0, PPC_OPERAND_UDI },
+
+#define URA URT + 1
+  { 0x1f, 16, 0, 0, PPC_OPERAND_UDI },
+
+#define URB URA + 1
+  { 0x1f, 11, 0, 0, PPC_OPERAND_UDI },
+
+#define URC URB + 1
+  { 0x1f, 6, 0, 0, PPC_OPERAND_UDI },
+
+  /* The VLESIMM field in a D form instruction.  */
+#define VLESIMM URC + 1
+  { 0xffff, PPC_OPSHIFT_INV, insert_vlesi, extract_vlesi,
+    PPC_OPERAND_SIGNED | PPC_OPERAND_SIGNOPT },
+
+  /* The VLENSIMM field in a D form instruction.  */
+#define VLENSIMM VLESIMM + 1
+  { 0xffff, PPC_OPSHIFT_INV, insert_vlensi, extract_vlensi,
+    PPC_OPERAND_NEGATIVE | PPC_OPERAND_SIGNED | PPC_OPERAND_SIGNOPT },
+
+  /* The VLEUIMM field in a D form instruction.  */
+#define VLEUIMM VLENSIMM + 1
+  { 0xffff, PPC_OPSHIFT_INV, insert_vleui, extract_vleui, 0 },
+
+  /* The VLEUIMML field in a D form instruction.  */
+#define VLEUIMML VLEUIMM + 1
+  { 0xffff, PPC_OPSHIFT_INV, insert_vleil, extract_vleil, 0 },
+
+  /* The XT and XS fields in an XX1 or XX3 form instruction.  This is split.  */
+#define XS6 VLEUIMML + 1
+#define XT6 XS6
+  { 0x3f, PPC_OPSHIFT_INV, insert_xt6, extract_xt6, PPC_OPERAND_VSR },
+
+  /* The XT and XS fields in an DQ form VSX instruction.  This is split.  */
+#define XSQ6 XT6 + 1
+#define XTQ6 XSQ6
+  { 0x3f, PPC_OPSHIFT_INV, insert_xtq6, extract_xtq6, PPC_OPERAND_VSR },
+
+  /* The XA field in an XX3 form instruction.  This is split.  */
+#define XA6 XTQ6 + 1
+  { 0x3f, PPC_OPSHIFT_INV, insert_xa6, extract_xa6, PPC_OPERAND_VSR },
+
+  /* The XB field in an XX2 or XX3 form instruction.  This is split.  */
+#define XB6 XA6 + 1
+  { 0x3f, PPC_OPSHIFT_INV, insert_xb6, extract_xb6, PPC_OPERAND_VSR },
+
+  /* The XB field in an XX3 form instruction when it must be the same as
+     the XA field in the instruction.  This is used in extended mnemonics
+     like xvmovdp.  This is split.  */
+#define XB6S XB6 + 1
+  { 0x3f, PPC_OPSHIFT_INV, insert_xb6s, extract_xb6s, PPC_OPERAND_FAKE },
+
+  /* The XC field in an XX4 form instruction.  This is split.  */
+#define XC6 XB6S + 1
+  { 0x3f, PPC_OPSHIFT_INV, insert_xc6, extract_xc6, PPC_OPERAND_VSR },
+
+  /* The DM or SHW field in an XX3 form instruction.  */
+#define DM XC6 + 1
+#define SHW DM
+  { 0x3, 8, NULL, NULL, 0 },
+
+  /* The DM field in an extended mnemonic XX3 form instruction.  */
+#define DMEX DM + 1
+  { 0x3, 8, insert_dm, extract_dm, 0 },
+
+  /* The UIM field in an XX2 form instruction.  */
+#define UIM DMEX + 1
+  /* The 2-bit UIMM field in a VX form instruction.  */
+#define UIMM2 UIM
+  /* The 2-bit L field in a darn instruction.  */
+#define LRAND UIM
+  { 0x3, 16, NULL, NULL, 0 },
+
+#define ERAT_T UIM + 1
+  { 0x7, 21, NULL, NULL, 0 },
+
+#define IH ERAT_T + 1
+  { 0x7, 21, NULL, NULL, PPC_OPERAND_OPTIONAL },
+
+  /* The 8-bit IMM8 field in a XX1 form instruction.  */
+#define IMM8 IH + 1
+  { 0xff, 11, NULL, NULL, PPC_OPERAND_SIGNOPT },
 };
 
 const unsigned int num_powerpc_operands = (sizeof (powerpc_operands)
@@ -557,6 +967,112 @@ const unsigned int num_powerpc_operands = (sizeof (powerpc_operands)
 
 /* The functions used to insert and extract complicated operands.  */
 
+/* The ARX, ARY, RX and RY operands are alternate encodings of GPRs.  */
+
+static unsigned long
+insert_arx (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  if (value >= 8 && value < 24)
+    return insn | ((value - 8) & 0xf);
+  else
+    {
+      *errmsg = _("invalid register");
+      return 0;
+    }
+}
+
+static long
+extract_arx (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  return (insn & 0xf) + 8;
+}
+
+static unsigned long
+insert_ary (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  if (value >= 8 && value < 24)
+    return insn | (((value - 8) & 0xf) << 4);
+  else
+    {
+      *errmsg = _("invalid register");
+      return 0;
+    }
+}
+
+static long
+extract_ary (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn >> 4) & 0xf) + 8;
+}
+
+static unsigned long
+insert_rx (unsigned long insn,
+	   long value,
+	   ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	   const char **errmsg)
+{
+  if (value >= 0 && value < 8)
+    return insn | value;
+  else if (value >= 24 && value <= 31)
+    return insn | (value - 16);
+  else
+    {
+      *errmsg = _("invalid register");
+      return 0;
+    }
+}
+
+static long
+extract_rx (unsigned long insn,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    int *invalid ATTRIBUTE_UNUSED)
+{
+  int value = insn & 0xf;
+  if (value >= 0 && value < 8)
+    return value;
+  else
+    return value + 16;
+}
+
+static unsigned long
+insert_ry (unsigned long insn,
+	   long value,
+	   ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	   const char **errmsg)
+{
+  if (value >= 0 && value < 8)
+    return insn | (value << 4);
+  else if (value >= 24 && value <= 31)
+    return insn | ((value - 16) << 4);
+  else
+    {
+      *errmsg = _("invalid register");
+      return 0;
+    }
+}
+
+static long
+extract_ry (unsigned long insn,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    int *invalid ATTRIBUTE_UNUSED)
+{
+  int value = (insn >> 4) & 0xf;
+  if (value >= 0 && value < 8)
+    return value;
+  else
+    return value + 16;
+}
+
 /* The BA field in an XL form instruction when it must be the same as
    the BT field in the same instruction.  This operand is marked FAKE.
    The insertion function just copies the BT field into the BA field,
@@ -566,7 +1082,7 @@ const unsigned int num_powerpc_operands = (sizeof (powerpc_operands)
 static unsigned long
 insert_bat (unsigned long insn,
 	    long value ATTRIBUTE_UNUSED,
-	    int dialect ATTRIBUTE_UNUSED,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
 	    const char **errmsg ATTRIBUTE_UNUSED)
 {
   return insn | (((insn >> 21) & 0x1f) << 16);
@@ -574,7 +1090,7 @@ insert_bat (unsigned long insn,
 
 static long
 extract_bat (unsigned long insn,
-	     int dialect ATTRIBUTE_UNUSED,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
 	     int *invalid)
 {
   if (((insn >> 21) & 0x1f) != ((insn >> 16) & 0x1f))
@@ -591,7 +1107,7 @@ extract_bat (unsigned long insn,
 static unsigned long
 insert_bba (unsigned long insn,
 	    long value ATTRIBUTE_UNUSED,
-	    int dialect ATTRIBUTE_UNUSED,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
 	    const char **errmsg ATTRIBUTE_UNUSED)
 {
   return insn | (((insn >> 16) & 0x1f) << 11);
@@ -599,7 +1115,7 @@ insert_bba (unsigned long insn,
 
 static long
 extract_bba (unsigned long insn,
-	     int dialect ATTRIBUTE_UNUSED,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
 	     int *invalid)
 {
   if (((insn >> 16) & 0x1f) != ((insn >> 11) & 0x1f))
@@ -624,13 +1140,15 @@ extract_bba (unsigned long insn,
    extract_bdp always occur in pairs.  One or the other will always
    be valid.  */
 
+#define ISA_V2 (PPC_OPCODE_POWER4 | PPC_OPCODE_E500MC | PPC_OPCODE_TITAN)
+
 static unsigned long
 insert_bdm (unsigned long insn,
 	    long value,
-	    int dialect,
+	    ppc_cpu_t dialect,
 	    const char **errmsg ATTRIBUTE_UNUSED)
 {
-  if ((dialect & PPC_OPCODE_POWER4) == 0)
+  if ((dialect & ISA_V2) == 0)
     {
       if ((value & 0x8000) != 0)
 	insn |= 1 << 21;
@@ -647,10 +1165,10 @@ insert_bdm (unsigned long insn,
 
 static long
 extract_bdm (unsigned long insn,
-	     int dialect,
+	     ppc_cpu_t dialect,
 	     int *invalid)
 {
-  if ((dialect & PPC_OPCODE_POWER4) == 0)
+  if ((dialect & ISA_V2) == 0)
     {
       if (((insn & (1 << 21)) == 0) != ((insn & (1 << 15)) == 0))
 	*invalid = 1;
@@ -672,10 +1190,10 @@ extract_bdm (unsigned long insn,
 static unsigned long
 insert_bdp (unsigned long insn,
 	    long value,
-	    int dialect,
+	    ppc_cpu_t dialect,
 	    const char **errmsg ATTRIBUTE_UNUSED)
 {
-  if ((dialect & PPC_OPCODE_POWER4) == 0)
+  if ((dialect & ISA_V2) == 0)
     {
       if ((value & 0x8000) == 0)
 	insn |= 1 << 21;
@@ -692,10 +1210,10 @@ insert_bdp (unsigned long insn,
 
 static long
 extract_bdp (unsigned long insn,
-	     int dialect,
+	     ppc_cpu_t dialect,
 	     int *invalid)
 {
-  if ((dialect & PPC_OPCODE_POWER4) == 0)
+  if ((dialect & ISA_V2) == 0)
     {
       if (((insn & (1 << 21)) == 0) == ((insn & (1 << 15)) == 0))
 	*invalid = 1;
@@ -710,52 +1228,41 @@ extract_bdp (unsigned long insn,
   return ((insn & 0xfffc) ^ 0x8000) - 0x8000;
 }
 
-/* Check for legal values of a BO field.  */
-
-static int
-valid_bo (long value, int dialect, int extract)
+static inline int
+valid_bo_pre_v2 (long value)
 {
-  if ((dialect & PPC_OPCODE_POWER4) == 0)
-    {
-      int valid;
-      /* Certain encodings have bits that are required to be zero.
-	 These are (z must be zero, y may be anything):
-	     001zy
-	     011zy
-	     1z00y
-	     1z01y
-	     1z1zz
-      */
-      switch (value & 0x14)
-	{
-	default:
-	case 0:
-	  valid = 1;
-	  break;
-	case 0x4:
-	  valid = (value & 0x2) == 0;
-	  break;
-	case 0x10:
-	  valid = (value & 0x8) == 0;
-	  break;
-	case 0x14:
-	  valid = value == 0x14;
-	  break;
-	}
-      /* When disassembling with -Many, accept power4 encodings too.  */
-      if (valid
-	  || (dialect & PPC_OPCODE_ANY) == 0
-	  || !extract)
-	return valid;
-    }
+  /* Certain encodings have bits that are required to be zero.
+     These are (z must be zero, y may be anything):
+	 0000y
+	 0001y
+	 001zy
+	 0100y
+	 0101y
+	 011zy
+	 1z00y
+	 1z01y
+	 1z1zz
+  */
+  if ((value & 0x14) == 0)
+    return 1;
+  else if ((value & 0x14) == 0x4)
+    return (value & 0x2) == 0;
+  else if ((value & 0x14) == 0x10)
+    return (value & 0x8) == 0;
+  else
+    return value == 0x14;
+}
 
+static inline int
+valid_bo_post_v2 (long value)
+{
   /* Certain encodings have bits that are required to be zero.
      These are (z must be zero, a & t may be anything):
 	 0000z
 	 0001z
+	 001at
 	 0100z
 	 0101z
-	 001at
 	 011at
 	 1a00t
 	 1a01t
@@ -769,23 +1276,43 @@ valid_bo (long value, int dialect, int extract)
     return 1;
 }
 
+/* Check for legal values of a BO field.  */
+
+static int
+valid_bo (long value, ppc_cpu_t dialect, int extract)
+{
+  int valid_y = valid_bo_pre_v2 (value);
+  int valid_at = valid_bo_post_v2 (value);
+
+  /* When disassembling with -Many, accept either encoding on the
+     second pass through opcodes.  */
+  if (extract && dialect == ~(ppc_cpu_t) PPC_OPCODE_ANY)
+    return valid_y || valid_at;
+  if ((dialect & ISA_V2) == 0)
+    return valid_y;
+  else
+    return valid_at;
+}
+
 /* The BO field in a B form instruction.  Warn about attempts to set
    the field to an illegal value.  */
 
 static unsigned long
 insert_bo (unsigned long insn,
 	   long value,
-	   int dialect,
+	   ppc_cpu_t dialect,
 	   const char **errmsg)
 {
   if (!valid_bo (value, dialect, 0))
     *errmsg = _("invalid conditional option");
+  else if (PPC_OP (insn) == 19 && (insn & 0x400) && ! (value & 4))
+    *errmsg = _("invalid counter access");
   return insn | ((value & 0x1f) << 21);
 }
 
 static long
 extract_bo (unsigned long insn,
-	    int dialect,
+	    ppc_cpu_t dialect,
 	    int *invalid)
 {
   long value;
@@ -803,11 +1330,13 @@ extract_bo (unsigned long insn,
 static unsigned long
 insert_boe (unsigned long insn,
 	    long value,
-	    int dialect,
+	    ppc_cpu_t dialect,
 	    const char **errmsg)
 {
   if (!valid_bo (value, dialect, 0))
     *errmsg = _("invalid conditional option");
+  else if (PPC_OP (insn) == 19 && (insn & 0x400) && ! (value & 4))
+    *errmsg = _("invalid counter access");
   else if ((value & 1) != 0)
     *errmsg = _("attempt to set y bit when using + or - modifier");
 
@@ -816,7 +1345,7 @@ insert_boe (unsigned long insn,
 
 static long
 extract_boe (unsigned long insn,
-	     int dialect,
+	     ppc_cpu_t dialect,
 	     int *invalid)
 {
   long value;
@@ -827,12 +1356,70 @@ extract_boe (unsigned long insn,
   return value & 0x1e;
 }
 
+/* The DCMX field in a X form instruction when the field is split
+   into separate DC, DM and DX fields.  */
+
+static unsigned long
+insert_dcmxs (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0x1f) << 16) | ((value & 0x20) >> 3) | (value & 0x40);
+}
+
+static long
+extract_dcmxs (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  return (insn & 0x40) | ((insn << 3) & 0x20) | ((insn >> 16) & 0x1f);
+}
+
+/* The D field in a DX form instruction when the field is split
+   into separate D0, D1 and D2 fields.  */
+
+static unsigned long
+insert_dxd (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | (value & 0xffc1) | ((value & 0x3e) << 15);
+}
+
+static long
+extract_dxd (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  unsigned long dxd = (insn & 0xffc1) | ((insn >> 15) & 0x3e);
+  return (dxd ^ 0x8000) - 0x8000;
+}
+
+static unsigned long
+insert_dxdn (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insert_dxd (insn, -value, dialect, errmsg);
+}
+
+static long
+extract_dxdn (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  return -extract_dxd (insn, dialect, invalid);
+}
+
 /* FXM mask in mfcr and mtcrf instructions.  */
 
 static unsigned long
 insert_fxm (unsigned long insn,
 	    long value,
-	    int dialect,
+	    ppc_cpu_t dialect,
 	    const char **errmsg)
 {
   /* If we're handling the mfocrf and mtocrf insns ensure that exactly
@@ -846,19 +1433,13 @@ insert_fxm (unsigned long insn,
 	}
     }
 
-  /* If the optional field on mfcr is missing that means we want to use
-     the old form of the instruction that moves the whole cr.  In that
-     case we'll have VALUE zero.  There doesn't seem to be a way to
-     distinguish this from the case where someone writes mfcr %r3,0.  */
-  else if (value == 0)
-    ;
-
   /* If only one bit of the FXM field is set, we can use the new form
      of the instruction, which is faster.  Unlike the Power4 branch hint
      encoding, this is not backward compatible.  Do not generate the
      new form unless -mpower4 has been given, or -many and the two
      operand form of mfcr was used.  */
-  else if ((value & -value) == value
+  else if (value > 0
+	   && (value & -value) == value
 	   && ((dialect & PPC_OPCODE_POWER4) != 0
 	       || ((dialect & PPC_OPCODE_ANY) != 0
 		   && (insn & (0x3ff << 1)) == 19 << 1)))
@@ -867,7 +1448,10 @@ insert_fxm (unsigned long insn,
   /* Any other value on mfcr is an error.  */
   else if ((insn & (0x3ff << 1)) == 19 << 1)
     {
-      *errmsg = _("ignoring invalid mfcr mask");
+      /* A value of -1 means we used the one operand form of
+	 mfcr which is valid.  */
+      if (value != -1)
+        *errmsg = _("invalid mfcr mask");
       value = 0;
     }
 
@@ -876,7 +1460,7 @@ insert_fxm (unsigned long insn,
 
 static long
 extract_fxm (unsigned long insn,
-	     int dialect ATTRIBUTE_UNUSED,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
 	     int *invalid)
 {
   long mask = (insn >> 12) & 0xff;
@@ -894,11 +1478,88 @@ extract_fxm (unsigned long insn,
     {
       if (mask != 0)
 	*invalid = 1;
+      else
+	mask = -1;
     }
 
   return mask;
 }
 
+static unsigned long
+insert_li20 (unsigned long insn,
+	     long value,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0xf0000) >> 5) | ((value & 0x0f800) << 5) | (value & 0x7ff);
+}
+
+static long
+extract_li20 (unsigned long insn,
+	      ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	      int *invalid ATTRIBUTE_UNUSED)
+{
+  long ext = ((insn & 0x4000) == 0x4000) ? 0xfff00000 : 0x00000000;
+
+  return ext
+         | (((insn >> 11) & 0xf) << 16)
+         | (((insn >> 17) & 0xf) << 12)
+         | (((insn >> 16) & 0x1) << 11)
+         | (insn & 0x7ff);
+}
+
+/* The 2-bit L field in a SYNC or WC field in a WAIT instruction.
+   For SYNC, some L values are reserved:
+     * Value 3 is reserved on newer server cpus.
+     * Values 2 and 3 are reserved on all other cpus.  */
+
+static unsigned long
+insert_ls (unsigned long insn,
+	   long value,
+	   ppc_cpu_t dialect,
+	   const char **errmsg)
+{
+  /* For SYNC, some L values are illegal.  */
+  if (((insn >> 1) & 0x3ff) == 598)
+    {
+      long max_lvalue = (dialect & PPC_OPCODE_POWER4) ? 2 : 1;
+      if (value > max_lvalue)
+	{
+	  *errmsg = _("illegal L operand value");
+	  return insn;
+	}
+    }
+
+  return insn | ((value & 0x3) << 21);
+}
+
+/* The 4-bit E field in a sync instruction that accepts 2 operands.
+   If ESYNC is non-zero, then the L field must be either 0 or 1 and
+   the complement of ESYNC-bit2.  */
+
+static unsigned long
+insert_esync (unsigned long insn,
+	      long value,
+	      ppc_cpu_t dialect,
+	      const char **errmsg)
+{
+  unsigned long ls = (insn >> 21) & 0x03;
+
+  if (value == 0)
+    {
+      if (((dialect & PPC_OPCODE_E6500) != 0 && ls > 1)
+	  || ((dialect & PPC_OPCODE_POWER9) != 0 && ls > 2))
+        *errmsg = _("illegal L operand value");
+      return insn;
+    }
+
+  if ((ls & ~0x1)
+      || (((value >> 1) & 0x1) ^ ls) == 0)
+        *errmsg = _("incompatible L operand value");
+
+  return insn | ((value & 0xf) << 16);
+}
+
 /* The MB and ME fields in an M form instruction expressed as a single
    operand which is itself a bitmask.  The extraction function always
    marks it as invalid, since we never want to recognize an
@@ -907,7 +1568,7 @@ extract_fxm (unsigned long insn,
 static unsigned long
 insert_mbe (unsigned long insn,
 	    long value,
-	    int dialect ATTRIBUTE_UNUSED,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
 	    const char **errmsg)
 {
   unsigned long uval, mask;
@@ -959,7 +1620,7 @@ insert_mbe (unsigned long insn,
 
 static long
 extract_mbe (unsigned long insn,
-	     int dialect ATTRIBUTE_UNUSED,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
 	     int *invalid)
 {
   long ret;
@@ -993,7 +1654,7 @@ extract_mbe (unsigned long insn,
 static unsigned long
 insert_mb6 (unsigned long insn,
 	    long value,
-	    int dialect ATTRIBUTE_UNUSED,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
 	    const char **errmsg ATTRIBUTE_UNUSED)
 {
   return insn | ((value & 0x1f) << 6) | (value & 0x20);
@@ -1001,7 +1662,7 @@ insert_mb6 (unsigned long insn,
 
 static long
 extract_mb6 (unsigned long insn,
-	     int dialect ATTRIBUTE_UNUSED,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
 	     int *invalid ATTRIBUTE_UNUSED)
 {
   return ((insn >> 6) & 0x1f) | (insn & 0x20);
@@ -1012,7 +1673,7 @@ extract_mb6 (unsigned long insn,
 
 static long
 extract_nb (unsigned long insn,
-	    int dialect ATTRIBUTE_UNUSED,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
 	    int *invalid ATTRIBUTE_UNUSED)
 {
   long ret;
@@ -1023,6 +1684,26 @@ extract_nb (unsigned long insn,
   return ret;
 }
 
+/* The NB field in an lswi instruction, which has special value
+   restrictions.  The value 32 is stored as 0.  */
+
+static unsigned long
+insert_nbi (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  long rtvalue = (insn & RT_MASK) >> 21;
+  long ravalue = (insn & RA_MASK) >> 16;
+
+  if (value == 0)
+    value = 32;
+  if (rtvalue + (value + 3) / 4 > (rtvalue > ravalue ? ravalue + 32
+						     : ravalue))
+    *errmsg = _("address register in load range");
+  return insn | ((value & 0x1f) << 11);
+}
+
 /* The NSI field in a D form instruction.  This is the same as the SI
    field, only negated.  The extraction function always marks it as
    invalid, since we never want to recognize an instruction which uses
@@ -1031,7 +1712,7 @@ extract_nb (unsigned long insn,
 static unsigned long
 insert_nsi (unsigned long insn,
 	    long value,
-	    int dialect ATTRIBUTE_UNUSED,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
 	    const char **errmsg ATTRIBUTE_UNUSED)
 {
   return insn | (-value & 0xffff);
@@ -1039,7 +1720,7 @@ insert_nsi (unsigned long insn,
 
 static long
 extract_nsi (unsigned long insn,
-	     int dialect ATTRIBUTE_UNUSED,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
 	     int *invalid)
 {
   *invalid = 1;
@@ -1053,7 +1734,7 @@ extract_nsi (unsigned long insn,
 static unsigned long
 insert_ral (unsigned long insn,
 	    long value,
-	    int dialect ATTRIBUTE_UNUSED,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
 	    const char **errmsg)
 {
   if (value == 0
@@ -1068,7 +1749,7 @@ insert_ral (unsigned long insn,
 static unsigned long
 insert_ram (unsigned long insn,
 	    long value,
-	    int dialect ATTRIBUTE_UNUSED,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
 	    const char **errmsg)
 {
   if ((unsigned long) value >= ((insn >> 21) & 0x1f))
@@ -1076,13 +1757,13 @@ insert_ram (unsigned long insn,
   return insn | ((value & 0x1f) << 16);
 }
 
-/* The RA field in the DQ form lq instruction, which has special
+/* The RA field in the DQ form lq or an lswx instruction, which have special
    value restrictions.  */
 
 static unsigned long
 insert_raq (unsigned long insn,
 	    long value,
-	    int dialect ATTRIBUTE_UNUSED,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
 	    const char **errmsg)
 {
   long rtvalue = (insn & RT_MASK) >> 21;
@@ -1099,7 +1780,7 @@ insert_raq (unsigned long insn,
 static unsigned long
 insert_ras (unsigned long insn,
 	    long value,
-	    int dialect ATTRIBUTE_UNUSED,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
 	    const char **errmsg)
 {
   if (value == 0)
@@ -1116,7 +1797,7 @@ insert_ras (unsigned long insn,
 static unsigned long
 insert_rbs (unsigned long insn,
 	    long value ATTRIBUTE_UNUSED,
-	    int dialect ATTRIBUTE_UNUSED,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
 	    const char **errmsg ATTRIBUTE_UNUSED)
 {
   return insn | (((insn >> 21) & 0x1f) << 11);
@@ -1124,7 +1805,7 @@ insert_rbs (unsigned long insn,
 
 static long
 extract_rbs (unsigned long insn,
-	     int dialect ATTRIBUTE_UNUSED,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
 	     int *invalid)
 {
   if (((insn >> 21) & 0x1f) != ((insn >> 11) & 0x1f))
@@ -1132,59 +1813,215 @@ extract_rbs (unsigned long insn,
   return 0;
 }
 
-/* The SH field in an MD form instruction.  This is split.  */
+/* The RB field in an lswx instruction, which has special value
+   restrictions.  */
 
 static unsigned long
-insert_sh6 (unsigned long insn,
+insert_rbx (unsigned long insn,
 	    long value,
-	    int dialect ATTRIBUTE_UNUSED,
-	    const char **errmsg ATTRIBUTE_UNUSED)
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg)
 {
-  return insn | ((value & 0x1f) << 11) | ((value & 0x20) >> 4);
-}
+  long rtvalue = (insn & RT_MASK) >> 21;
 
-static long
-extract_sh6 (unsigned long insn,
-	     int dialect ATTRIBUTE_UNUSED,
-	     int *invalid ATTRIBUTE_UNUSED)
-{
-  return ((insn >> 11) & 0x1f) | ((insn << 4) & 0x20);
+  if (value == rtvalue)
+    *errmsg = _("source and target register operands must be different");
+  return insn | ((value & 0x1f) << 11);
 }
 
-/* The SPR field in an XFX form instruction.  This is flipped--the
-   lower 5 bits are stored in the upper 5 and vice- versa.  */
-
+/* The SCI8 field is made up of SCL and {U,N}I8 fields.  */
 static unsigned long
-insert_spr (unsigned long insn,
-	    long value,
-	    int dialect ATTRIBUTE_UNUSED,
-	    const char **errmsg ATTRIBUTE_UNUSED)
+insert_sci8 (unsigned long insn,
+	     long value,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     const char **errmsg)
 {
-  return insn | ((value & 0x1f) << 16) | ((value & 0x3e0) << 6);
-}
+  unsigned int fill_scale = 0;
+  unsigned long ui8 = value;
 
-static long
-extract_spr (unsigned long insn,
-	     int dialect ATTRIBUTE_UNUSED,
+  if ((ui8 & 0xffffff00) == 0)
+    ;
+  else if ((ui8 & 0xffffff00) == 0xffffff00)
+    fill_scale = 0x400;
+  else if ((ui8 & 0xffff00ff) == 0)
+    {
+      fill_scale = 1 << 8;
+      ui8 >>= 8;
+    }
+  else if ((ui8 & 0xffff00ff) == 0xffff00ff)
+    {
+      fill_scale = 0x400 | (1 << 8);
+      ui8 >>= 8;
+    }
+  else if ((ui8 & 0xff00ffff) == 0)
+    {
+      fill_scale = 2 << 8;
+      ui8 >>= 16;
+    }
+  else if ((ui8 & 0xff00ffff) == 0xff00ffff)
+    {
+      fill_scale = 0x400 | (2 << 8);
+      ui8 >>= 16;
+    }
+  else if ((ui8 & 0x00ffffff) == 0)
+    {
+      fill_scale = 3 << 8;
+      ui8 >>= 24;
+    }
+  else if ((ui8 & 0x00ffffff) == 0x00ffffff)
+    {
+      fill_scale = 0x400 | (3 << 8);
+      ui8 >>= 24;
+    }
+  else
+    {
+      *errmsg = _("illegal immediate value");
+      ui8 = 0;
+    }
+
+  return insn | fill_scale | (ui8 & 0xff);
+}
+
+static long
+extract_sci8 (unsigned long insn,
+	      ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	      int *invalid ATTRIBUTE_UNUSED)
+{
+  int fill = insn & 0x400;
+  int scale_factor = (insn & 0x300) >> 5;
+  long value = (insn & 0xff) << scale_factor;
+
+  if (fill != 0)
+    value |= ~((long) 0xff << scale_factor);
+  return value;
+}
+
+static unsigned long
+insert_sci8n (unsigned long insn,
+	      long value,
+	      ppc_cpu_t dialect,
+	      const char **errmsg)
+{
+  return insert_sci8 (insn, -value, dialect, errmsg);
+}
+
+static long
+extract_sci8n (unsigned long insn,
+	       ppc_cpu_t dialect,
+	       int *invalid)
+{
+  return -extract_sci8 (insn, dialect, invalid);
+}
+
+static unsigned long
+insert_sd4h (unsigned long insn,
+	     long value,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0x1e) << 7);
+}
+
+static long
+extract_sd4h (unsigned long insn,
+	      ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	      int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn >> 8) & 0xf) << 1;
+}
+
+static unsigned long
+insert_sd4w (unsigned long insn,
+	     long value,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0x3c) << 6);
+}
+
+static long
+extract_sd4w (unsigned long insn,
+	      ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	      int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn >> 8) & 0xf) << 2;
+}
+
+static unsigned long
+insert_oimm (unsigned long insn,
+	     long value,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | (((value - 1) & 0x1f) << 4);
+}
+
+static long
+extract_oimm (unsigned long insn,
+	      ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	      int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn >> 4) & 0x1f) + 1;
+}
+
+/* The SH field in an MD form instruction.  This is split.  */
+
+static unsigned long
+insert_sh6 (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  /* SH6 operand in the rldixor instructions.  */
+  if (PPC_OP (insn) == 4)
+    return insn | ((value & 0x1f) << 6) | ((value & 0x20) >> 5);
+  else
+    return insn | ((value & 0x1f) << 11) | ((value & 0x20) >> 4);
+}
+
+static long
+extract_sh6 (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  /* SH6 operand in the rldixor instructions.  */
+  if (PPC_OP (insn) == 4)
+    return ((insn >> 6) & 0x1f) | ((insn << 5) & 0x20);
+  else
+    return ((insn >> 11) & 0x1f) | ((insn << 4) & 0x20);
+}
+
+/* The SPR field in an XFX form instruction.  This is flipped--the
+   lower 5 bits are stored in the upper 5 and vice- versa.  */
+
+static unsigned long
+insert_spr (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0x1f) << 16) | ((value & 0x3e0) << 6);
+}
+
+static long
+extract_spr (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
 	     int *invalid ATTRIBUTE_UNUSED)
 {
   return ((insn >> 16) & 0x1f) | ((insn >> 6) & 0x3e0);
 }
 
 /* Some dialects have 8 SPRG registers instead of the standard 4.  */
+#define ALLOW8_SPRG (PPC_OPCODE_BOOKE | PPC_OPCODE_405)
 
 static unsigned long
 insert_sprg (unsigned long insn,
 	     long value,
-	     int dialect,
+	     ppc_cpu_t dialect,
 	     const char **errmsg)
 {
-  /* This check uses PPC_OPCODE_403 because PPC405 is later defined
-     as a synonym.  If ever a 405 specific dialect is added this
-     check should use that instead.  */
   if (value > 7
-      || (value > 3
-	  && (dialect & (PPC_OPCODE_BOOKE | PPC_OPCODE_403)) == 0))
+      || (value > 3 && (dialect & ALLOW8_SPRG) == 0))
     *errmsg = _("invalid sprg number");
 
   /* If this is mfsprg4..7 then use spr 260..263 which can be read in
@@ -1197,54 +2034,272 @@ insert_sprg (unsigned long insn,
 
 static long
 extract_sprg (unsigned long insn,
-	      int dialect,
+	      ppc_cpu_t dialect,
 	      int *invalid)
 {
   unsigned long val = (insn >> 16) & 0x1f;
 
   /* mfsprg can use 260..263 and 272..279.  mtsprg only uses spr 272..279
-     If not BOOKE or 405, then both use only 272..275.  */
-  if (val <= 3
-      || (val < 0x10 && (insn & 0x100) != 0)
-      || (val - 0x10 > 3
-	  && (dialect & (PPC_OPCODE_BOOKE | PPC_OPCODE_403)) == 0))
+     If not BOOKE, 405 or VLE, then both use only 272..275.  */
+  if ((val - 0x10 > 3 && (dialect & ALLOW8_SPRG) == 0)
+      || (val - 0x10 > 7 && (insn & 0x100) != 0)
+      || val <= 3
+      || (val & 8) != 0)
     *invalid = 1;
   return val & 7;
 }
 
 /* The TBR field in an XFX instruction.  This is just like SPR, but it
-   is optional.  When TBR is omitted, it must be inserted as 268 (the
-   magic number of the TB register).  These functions treat 0
-   (indicating an omitted optional operand) as 268.  This means that
-   ``mftb 4,0'' is not handled correctly.  This does not matter very
-   much, since the architecture manual does not define mftb as
-   accepting any values other than 268 or 269.  */
-
-#define TB (268)
+   is optional.  */
 
 static unsigned long
 insert_tbr (unsigned long insn,
 	    long value,
-	    int dialect ATTRIBUTE_UNUSED,
-	    const char **errmsg ATTRIBUTE_UNUSED)
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg)
 {
-  if (value == 0)
-    value = TB;
+  if (value != 268 && value != 269)
+    *errmsg = _("invalid tbr number");
   return insn | ((value & 0x1f) << 16) | ((value & 0x3e0) << 6);
 }
 
 static long
 extract_tbr (unsigned long insn,
-	     int dialect ATTRIBUTE_UNUSED,
-	     int *invalid ATTRIBUTE_UNUSED)
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid)
 {
   long ret;
 
   ret = ((insn >> 16) & 0x1f) | ((insn >> 6) & 0x3e0);
-  if (ret == TB)
-    ret = 0;
+  if (ret != 268 && ret != 269)
+    *invalid = 1;
   return ret;
 }
+
+/* The XT and XS fields in an XX1 or XX3 form instruction.  This is split.  */
+
+static unsigned long
+insert_xt6 (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0x1f) << 21) | ((value & 0x20) >> 5);
+}
+
+static long
+extract_xt6 (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn << 5) & 0x20) | ((insn >> 21) & 0x1f);
+}
+
+/* The XT and XS fields in an DQ form VSX instruction.  This is split.  */
+static unsigned long
+insert_xtq6 (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0x1f) << 21) | ((value & 0x20) >> 2);
+}
+
+static long
+extract_xtq6 (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn << 2) & 0x20) | ((insn >> 21) & 0x1f);
+}
+
+/* The XA field in an XX3 form instruction.  This is split.  */
+
+static unsigned long
+insert_xa6 (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0x1f) << 16) | ((value & 0x20) >> 3);
+}
+
+static long
+extract_xa6 (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn << 3) & 0x20) | ((insn >> 16) & 0x1f);
+}
+
+/* The XB field in an XX3 form instruction.  This is split.  */
+
+static unsigned long
+insert_xb6 (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0x1f) << 11) | ((value & 0x20) >> 4);
+}
+
+static long
+extract_xb6 (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn << 4) & 0x20) | ((insn >> 11) & 0x1f);
+}
+
+/* The XB field in an XX3 form instruction when it must be the same as
+   the XA field in the instruction.  This is used for extended
+   mnemonics like xvmovdp.  This operand is marked FAKE.  The insertion
+   function just copies the XA field into the XB field, and the
+   extraction function just checks that the fields are the same.  */
+
+static unsigned long
+insert_xb6s (unsigned long insn,
+	    long value ATTRIBUTE_UNUSED,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | (((insn >> 16) & 0x1f) << 11) | (((insn >> 2) & 0x1) << 1);
+}
+
+static long
+extract_xb6s (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid)
+{
+  if ((((insn >> 16) & 0x1f) != ((insn >> 11) & 0x1f))
+      || (((insn >> 2) & 0x1) != ((insn >> 1) & 0x1)))
+    *invalid = 1;
+  return 0;
+}
+
+/* The XC field in an XX4 form instruction.  This is split.  */
+
+static unsigned long
+insert_xc6 (unsigned long insn,
+	    long value,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0x1f) << 6) | ((value & 0x20) >> 2);
+}
+
+static long
+extract_xc6 (unsigned long insn,
+	     ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	     int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn << 2) & 0x20) | ((insn >> 6) & 0x1f);
+}
+
+static unsigned long
+insert_dm (unsigned long insn,
+	   long value,
+	   ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	   const char **errmsg)
+{
+  if (value != 0 && value != 1)
+    *errmsg = _("invalid constant");
+  return insn | (((value) ? 3 : 0) << 8);
+}
+
+static long
+extract_dm (unsigned long insn,
+	    ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+	    int *invalid)
+{
+  long value;
+
+  value = (insn >> 8) & 3;
+  if (value != 0 && value != 3)
+    *invalid = 1;
+  return (value) ? 1 : 0;
+}
+
+/* The VLESIMM field in an I16A form instruction.  This is split.  */
+
+static unsigned long
+insert_vlesi (unsigned long insn,
+            long value,
+            ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+            const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0xf800) << 10) | (value & 0x7ff);
+}
+
+static long
+extract_vlesi (unsigned long insn,
+             ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+             int *invalid ATTRIBUTE_UNUSED)
+{
+  long value = ((insn >> 10) & 0xf800) | (insn & 0x7ff);
+  value = (value ^ 0x8000) - 0x8000;
+  return value;
+}
+
+static unsigned long
+insert_vlensi (unsigned long insn,
+            long value,
+            ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+            const char **errmsg ATTRIBUTE_UNUSED)
+{
+  value = -value;
+  return insn | ((value & 0xf800) << 10) | (value & 0x7ff);
+}
+static long
+extract_vlensi (unsigned long insn,
+             ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+             int *invalid ATTRIBUTE_UNUSED)
+{
+  long value = ((insn >> 10) & 0xf800) | (insn & 0x7ff);
+  value = (value ^ 0x8000) - 0x8000;
+  /* Don't use for disassembly.  */
+  *invalid = 1;
+  return -value;
+}
+
+/* The VLEUIMM field in an I16A form instruction.  This is split.  */
+
+static unsigned long
+insert_vleui (unsigned long insn,
+            long value,
+            ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+            const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0xf800) << 10) | (value & 0x7ff);
+}
+
+static long
+extract_vleui (unsigned long insn,
+             ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+             int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn >> 10) & 0xf800) | (insn & 0x7ff);
+}
+
+/* The VLEUIMML field in an I16L form instruction.  This is split.  */
+
+static unsigned long
+insert_vleil (unsigned long insn,
+            long value,
+            ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+            const char **errmsg ATTRIBUTE_UNUSED)
+{
+  return insn | ((value & 0xf800) << 5) | (value & 0x7ff);
+}
+
+static long
+extract_vleil (unsigned long insn,
+             ppc_cpu_t dialect ATTRIBUTE_UNUSED,
+             int *invalid ATTRIBUTE_UNUSED)
+{
+  return ((insn >> 5) & 0xf800) | (insn & 0x7ff);
+}
+
 
 /* Macros used to form opcodes.  */
 
@@ -1264,6 +2319,17 @@ extract_tbr (unsigned long insn,
 #define OPL(x,l) (OP (x) | ((((unsigned long)(l)) & 1) << 21))
 #define OPL_MASK OPL (0x3f,1)
 
+/* The main opcode combined with an update code in D form instruction.
+   Used for extended mnemonics for VLE memory instructions.  */
+#define OPVUP(x,vup) (OP (x) | ((((unsigned long)(vup)) & 0xff) << 8))
+#define OPVUP_MASK OPVUP (0x3f,  0xff)
+
+/* The main opcode combined with an update code and the RT fields specified in
+   D form instruction.  Used for VLE volatile context save/restore
+   instructions.  */
+#define OPVUPRT(x,vup,rt) (OPVUP (x, vup) | ((((unsigned long)(rt)) & 0x1f) << 21))
+#define OPVUPRT_MASK OPVUPRT (0x3f, 0xff, 0x1f)
+
 /* An A form instruction.  */
 #define A(op, xop, rc) (OP (op) | ((((unsigned long)(xop)) & 0x1f) << 1) | (((unsigned long)(rc)) & 1))
 #define A_MASK A (0x3f, 0x1f, 1)
@@ -1284,6 +2350,43 @@ extract_tbr (unsigned long insn,
 #define B(op, aa, lk) (OP (op) | ((((unsigned long)(aa)) & 1) << 1) | ((lk) & 1))
 #define B_MASK B (0x3f, 1, 1)
 
+/* A BD8 form instruction.  This is a 16-bit instruction.  */
+#define BD8(op, aa, lk) (((((unsigned long)(op)) & 0x3f) << 10) | (((aa) & 1) << 9) | (((lk) & 1) << 8))
+#define BD8_MASK BD8 (0x3f, 1, 1)
+
+/* Another BD8 form instruction.  This is a 16-bit instruction.  */
+#define BD8IO(op) ((((unsigned long)(op)) & 0x1f) << 11)
+#define BD8IO_MASK BD8IO (0x1f)
+
+/* A BD8 form instruction for simplified mnemonics.  */
+#define EBD8IO(op, bo, bi) (BD8IO ((op)) | ((bo) << 10) | ((bi) << 8))
+/* A mask that excludes BO32 and BI32.  */
+#define EBD8IO1_MASK 0xf800
+/* A mask that includes BO32 and excludes BI32.  */
+#define EBD8IO2_MASK 0xfc00
+/* A mask that include BO32 AND BI32.  */
+#define EBD8IO3_MASK 0xff00
+
+/* A BD15 form instruction.  */
+#define BD15(op, aa, lk) (OP (op) | ((((unsigned long)(aa)) & 0xf) << 22) | ((lk) & 1))
+#define BD15_MASK BD15 (0x3f, 0xf, 1)
+
+/* A BD15 form instruction for extended conditional branch mnemonics.  */
+#define EBD15(op, aa, bo, lk) (((op) & 0x3f) << 26) | (((aa) & 0xf) << 22) | (((bo) & 0x3) << 20) | ((lk) & 1)
+#define EBD15_MASK 0xfff00001
+
+/* A BD15 form instruction for extended conditional branch mnemonics with BI.  */
+#define EBD15BI(op, aa, bo, bi, lk) (((op) & 0x3f) << 26) \
+                                    | (((aa) & 0xf) << 22) \
+                                    | (((bo) & 0x3) << 20) \
+                                    | (((bi) & 0x3) << 16) \
+                                    | ((lk) & 1)
+#define EBD15BI_MASK  0xfff30001
+
+/* A BD24 form instruction.  */
+#define BD24(op, aa, lk) (OP (op) | ((((unsigned long)(aa)) & 1) << 25) | ((lk) & 1))
+#define BD24_MASK BD24 (0x3f, 1, 1)
+
 /* A B form instruction setting the BO field.  */
 #define BBO(op, bo, aa, lk) (B ((op), (aa), (lk)) | ((((unsigned long)(bo)) & 0x1f) << 21))
 #define BBO_MASK BBO (0x3f, 0x1f, 1, 1)
@@ -1291,7 +2394,7 @@ extract_tbr (unsigned long insn,
 /* A BBO_MASK with the y bit of the BO field removed.  This permits
    matching a conditional branch regardless of the setting of the y
    bit.  Similarly for the 'at' bits used for power4 branch hints.  */
-#define Y_MASK   (((unsigned long) 1) << 21)
+#define Y_MASK	 (((unsigned long) 1) << 21)
 #define AT1_MASK (((unsigned long) 3) << 21)
 #define AT2_MASK (((unsigned long) 9) << 21)
 #define BBOY_MASK  (BBO_MASK &~ Y_MASK)
@@ -1312,6 +2415,12 @@ extract_tbr (unsigned long insn,
 #define BBOYBI_MASK (BBOYCB_MASK | BI_MASK)
 #define BBOATBI_MASK (BBOAT2CB_MASK | BI_MASK)
 
+/* A VLE C form instruction.  */
+#define C_LK(x, lk) (((((unsigned long)(x)) & 0x7fff) << 1) | ((lk) & 1))
+#define C_LK_MASK C_LK(0x7fff, 1)
+#define C(x) ((((unsigned long)(x)) & 0xffff))
+#define C_MASK C(0xffff)
+
 /* An Context form instruction.  */
 #define CTX(op, xop)   (OP (op) | (((unsigned long)(xop)) & 0x7))
 #define CTX_MASK CTX(0x3f, 0x7)
@@ -1323,22 +2432,46 @@ extract_tbr (unsigned long insn,
 /* The main opcode mask with the RA field clear.  */
 #define DRA_MASK (OP_MASK | RA_MASK)
 
+/* A DQ form VSX instruction.  */
+#define DQX(op, xop) (OP (op) | ((xop) & 0x7))
+#define DQX_MASK DQX (0x3f, 7)
+
 /* A DS form instruction.  */
 #define DSO(op, xop) (OP (op) | ((xop) & 0x3))
 #define DS_MASK DSO (0x3f, 3)
 
-/* A DE form instruction.  */
-#define DEO(op, xop) (OP (op) | ((xop) & 0xf))
-#define DE_MASK DEO (0x3e, 0xf)
+/* An DX form instruction.  */
+#define DX(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x1f) << 1))
+#define DX_MASK DX (0x3f, 0x1f)
 
 /* An EVSEL form instruction.  */
 #define EVSEL(op, xop) (OP (op) | (((unsigned long)(xop)) & 0xff) << 3)
 #define EVSEL_MASK EVSEL(0x3f, 0xff)
 
+/* An IA16 form instruction.  */
+#define IA16(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x1f) << 11)
+#define IA16_MASK IA16(0x3f, 0x1f)
+
+/* An I16A form instruction.  */
+#define I16A(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x1f) << 11)
+#define I16A_MASK I16A(0x3f, 0x1f)
+
+/* An I16L form instruction.  */
+#define I16L(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x1f) << 11)
+#define I16L_MASK I16L(0x3f, 0x1f)
+
+/* An IM7 form instruction.  */
+#define IM7(op) ((((unsigned long)(op)) & 0x1f) << 11)
+#define IM7_MASK IM7(0x1f)
+
 /* An M form instruction.  */
 #define M(op, rc) (OP (op) | ((rc) & 1))
 #define M_MASK M (0x3f, 1)
 
+/* An LI20 form instruction.  */
+#define LI20(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x1) << 15)
+#define LI20_MASK LI20(0x3f, 0x1)
+
 /* An M form instruction with the ME field specified.  */
 #define MME(op, me, rc) (M ((op), (rc)) | ((((unsigned long)(me)) & 0x1f) << 1))
 
@@ -1369,48 +2502,189 @@ extract_tbr (unsigned long insn,
 #define SC(op, sa, lk) (OP (op) | ((((unsigned long)(sa)) & 1) << 1) | ((lk) & 1))
 #define SC_MASK (OP_MASK | (((unsigned long)0x3ff) << 16) | (((unsigned long)1) << 1) | 1)
 
-/* An VX form instruction.  */
+/* An SCI8 form instruction.  */
+#define SCI8(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x1f) << 11))
+#define SCI8_MASK SCI8(0x3f, 0x1f)
+
+/* An SCI8 form instruction.  */
+#define SCI8BF(op, fop, xop) (OP (op) | ((((unsigned long)(xop)) & 0x1f) << 11) | (((fop) & 7) << 23))
+#define SCI8BF_MASK SCI8BF(0x3f, 7, 0x1f)
+
+/* An SD4 form instruction.  This is a 16-bit instruction.  */
+#define SD4(op) ((((unsigned long)(op)) & 0xf) << 12)
+#define SD4_MASK SD4(0xf)
+
+/* An SE_IM5 form instruction.  This is a 16-bit instruction.  */
+#define SE_IM5(op, xop) (((((unsigned long)(op)) & 0x3f) << 10) | (((xop) & 0x1) << 9))
+#define SE_IM5_MASK SE_IM5(0x3f, 1)
+
+/* An SE_R form instruction.  This is a 16-bit instruction.  */
+#define SE_R(op, xop) (((((unsigned long)(op)) & 0x3f) << 10) | (((xop) & 0x3f) << 4))
+#define SE_R_MASK SE_R(0x3f, 0x3f)
+
+/* An SE_RR form instruction.  This is a 16-bit instruction.  */
+#define SE_RR(op, xop) (((((unsigned long)(op)) & 0x3f) << 10) | (((xop) & 0x3) << 8))
+#define SE_RR_MASK SE_RR(0x3f, 3)
+
+/* A VX form instruction.  */
 #define VX(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x7ff))
 
 /* The mask for an VX form instruction.  */
 #define VX_MASK	VX(0x3f, 0x7ff)
 
-/* An VA form instruction.  */
+/* A VX_MASK with the VA field fixed.  */
+#define VXVA_MASK (VX_MASK | (0x1f << 16))
+
+/* A VX_MASK with the VB field fixed.  */
+#define VXVB_MASK (VX_MASK | (0x1f << 11))
+
+/* A VX_MASK with the VA and VB fields fixed.  */
+#define VXVAVB_MASK (VX_MASK | (0x1f << 16) | (0x1f << 11))
+
+/* A VX_MASK with the VD and VA fields fixed.  */
+#define VXVDVA_MASK (VX_MASK | (0x1f << 21) | (0x1f << 16))
+
+/* A VX_MASK with a UIMM4 field.  */
+#define VXUIMM4_MASK (VX_MASK | (0x1 << 20))
+
+/* A VX_MASK with a UIMM3 field.  */
+#define VXUIMM3_MASK (VX_MASK | (0x3 << 19))
+
+/* A VX_MASK with a UIMM2 field.  */
+#define VXUIMM2_MASK (VX_MASK | (0x7 << 18))
+
+/* A VX_MASK with a PS field.  */
+#define VXPS_MASK (VX_MASK & ~(0x1 << 9))
+
+/* A VX_MASK with the VA field fixed with a PS field.  */
+#define VXVAPS_MASK ((VX_MASK | (0x1f << 16)) & ~(0x1 << 9))
+
+/* A VA form instruction.  */
 #define VXA(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x03f))
 
 /* The mask for an VA form instruction.  */
 #define VXA_MASK VXA(0x3f, 0x3f)
 
-/* An VXR form instruction.  */
+/* A VXA_MASK with a SHB field.  */
+#define VXASHB_MASK (VXA_MASK | (1 << 10))
+
+/* A VXR form instruction.  */
 #define VXR(op, xop, rc) (OP (op) | (((rc) & 1) << 10) | (((unsigned long)(xop)) & 0x3ff))
 
 /* The mask for a VXR form instruction.  */
 #define VXR_MASK VXR(0x3f, 0x3ff, 1)
 
+/* A VX form instruction with a VA tertiary opcode.  */
+#define VXVA(op, xop, vaop) (VX(op,xop) | (((vaop) & 0x1f) << 16))
+
+#define VXASH(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x1f) << 1))
+#define VXASH_MASK VXASH (0x3f, 0x1f)
+
 /* An X form instruction.  */
 #define X(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x3ff) << 1))
 
+/* A X form instruction for Quad-Precision FP Instructions.  */
+#define XVA(op, xop, vaop) (X(op,xop) | (((vaop) & 0x1f) << 16))
+
+/* An EX form instruction.  */
+#define EX(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x7ff))
+
+/* The mask for an EX form instruction.  */
+#define EX_MASK EX (0x3f, 0x7ff)
+
+/* An XX2 form instruction.  */
+#define XX2(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x1ff) << 2))
+
+/* A XX2 form instruction with the VA bits specified.  */
+#define XX2VA(op, xop, vaop) (XX2(op,xop) | (((vaop) & 0x1f) << 16))
+
+/* An XX3 form instruction.  */
+#define XX3(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0xff) << 3))
+
+/* An XX3 form instruction with the RC bit specified.  */
+#define XX3RC(op, xop, rc) (OP (op) | (((rc) & 1) << 10) | ((((unsigned long)(xop)) & 0x7f) << 3))
+
+/* An XX4 form instruction.  */
+#define XX4(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x3) << 4))
+
 /* A Z form instruction.  */
 #define Z(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x1ff) << 1))
 
 /* An X form instruction with the RC bit specified.  */
 #define XRC(op, xop, rc) (X ((op), (xop)) | ((rc) & 1))
 
+/* A X form instruction for Quad-Precision FP Instructions with RC bit.  */
+#define XVARC(op, xop, vaop, rc) (XVA ((op), (xop), (vaop)) | ((rc) & 1))
+
+/* An X form instruction with the RA bits specified as two ops.  */
+#define XMMF(op, xop, mop0, mop1) (X ((op), (xop)) | ((mop0) & 3) << 19 | ((mop1) & 7) << 16)
+
 /* A Z form instruction with the RC bit specified.  */
 #define ZRC(op, xop, rc) (Z ((op), (xop)) | ((rc) & 1))
 
 /* The mask for an X form instruction.  */
 #define X_MASK XRC (0x3f, 0x3ff, 1)
 
+/* The mask for an X form instruction with the BF bits specified.  */
+#define XBF_MASK (X_MASK | (3 << 21))
+
+/* An X form wait instruction with everything filled in except the WC field.  */
+#define XWC_MASK (XRC (0x3f, 0x3ff, 1) | (7 << 23) | RA_MASK | RB_MASK)
+
+/* The mask for an XX1 form instruction.  */
+#define XX1_MASK X (0x3f, 0x3ff)
+
+/* An XX1_MASK with the RB field fixed.  */
+#define XX1RB_MASK (XX1_MASK | RB_MASK)
+
+/* The mask for an XX2 form instruction.  */
+#define XX2_MASK (XX2 (0x3f, 0x1ff) | (0x1f << 16))
+
+/* The mask for an XX2 form instruction with the UIM bits specified.  */
+#define XX2UIM_MASK (XX2 (0x3f, 0x1ff) | (7 << 18))
+
+/* The mask for an XX2 form instruction with the 4 UIM bits specified.  */
+#define XX2UIM4_MASK (XX2 (0x3f, 0x1ff) | (1 << 20))
+
+/* The mask for an XX2 form instruction with the BF bits specified.  */
+#define XX2BF_MASK (XX2_MASK | (3 << 21) | (1))
+
+/* The mask for an XX2 form instruction with the BF and DCMX bits specified.  */
+#define XX2BFD_MASK (XX2 (0x3f, 0x1ff) | 1)
+
+/* The mask for an XX2 form instruction with a split DCMX bits specified.  */
+#define XX2DCMXS_MASK XX2 (0x3f, 0x1ee)
+
+/* The mask for an XX3 form instruction.  */
+#define XX3_MASK XX3 (0x3f, 0xff)
+
+/* The mask for an XX3 form instruction with the BF bits specified.  */
+#define XX3BF_MASK (XX3 (0x3f, 0xff) | (3 << 21) | (1))
+
+/* The mask for an XX3 form instruction with the DM or SHW bits specified.  */
+#define XX3DM_MASK (XX3 (0x3f, 0x1f) | (1 << 10))
+#define XX3SHW_MASK XX3DM_MASK
+
+/* The mask for an XX4 form instruction.  */
+#define XX4_MASK XX4 (0x3f, 0x3)
+
+/* An X form wait instruction with everything filled in except the WC field.  */
+#define XWC_MASK (XRC (0x3f, 0x3ff, 1) | (7 << 23) | RA_MASK | RB_MASK)
+
+/* The mask for an XMMF form instruction.  */
+#define XMMF_MASK (XMMF (0x3f, 0x3ff, 3, 7) | (1))
+
 /* The mask for a Z form instruction.  */
 #define Z_MASK ZRC (0x3f, 0x1ff, 1)
 #define Z2_MASK ZRC (0x3f, 0xff, 1)
 
-/* An X_MASK with the RA field fixed.  */
+/* An X_MASK with the RA/VA field fixed.  */
 #define XRA_MASK (X_MASK | RA_MASK)
+#define XVA_MASK XRA_MASK
 
-/* An XRA_MASK with the W field clear.  */
+/* An XRA_MASK with the A_L/W field clear.  */
 #define XWRA_MASK (XRA_MASK & ~((unsigned long) 1 << 16))
+#define XRLA_MASK XWRA_MASK
 
 /* An X_MASK with the RB field fixed.  */
 #define XRB_MASK (X_MASK | RB_MASK)
@@ -1424,18 +2698,54 @@ extract_tbr (unsigned long insn,
 /* An X_MASK with the RA and RB fields fixed.  */
 #define XRARB_MASK (X_MASK | RA_MASK | RB_MASK)
 
+/* An XBF_MASK with the RA and RB fields fixed.  */
+#define XBFRARB_MASK (XBF_MASK | RA_MASK | RB_MASK)
+
 /* An XRARB_MASK, but with the L bit clear.  */
 #define XRLARB_MASK (XRARB_MASK & ~((unsigned long) 1 << 16))
 
+/* An XRARB_MASK, but with the L bits in a darn instruction clear.  */
+#define XLRAND_MASK (XRARB_MASK & ~((unsigned long) 3 << 16))
+
 /* An X_MASK with the RT and RA fields fixed.  */
 #define XRTRA_MASK (X_MASK | RT_MASK | RA_MASK)
 
+/* An X_MASK with the RT and RB fields fixed.  */
+#define XRTRB_MASK (X_MASK | RT_MASK | RB_MASK)
+
 /* An XRTRA_MASK, but with L bit clear.  */
 #define XRTLRA_MASK (XRTRA_MASK & ~((unsigned long) 1 << 21))
 
+/* An X_MASK with the RT, RA and RB fields fixed.  */
+#define XRTRARB_MASK (X_MASK | RT_MASK | RA_MASK | RB_MASK)
+
+/* An XRTRARB_MASK, but with L bit clear.  */
+#define XRTLRARB_MASK (XRTRARB_MASK & ~((unsigned long) 1 << 21))
+
+/* An XRTRARB_MASK, but with A bit clear.  */
+#define XRTARARB_MASK (XRTRARB_MASK & ~((unsigned long) 1 << 25))
+
+/* An XRTRARB_MASK, but with BF bits clear.  */
+#define XRTBFRARB_MASK (XRTRARB_MASK & ~((unsigned long) 7 << 23))
+
 /* An X form instruction with the L bit specified.  */
 #define XOPL(op, xop, l) (X ((op), (xop)) | ((((unsigned long)(l)) & 1) << 21))
 
+/* An X form instruction with the L bits specified.  */
+#define XOPL2(op, xop, l) (X ((op), (xop)) | ((((unsigned long)(l)) & 3) << 21))
+
+/* An X form instruction with the L bit and RC bit specified.  */
+#define XRCL(op, xop, l, rc) (XRC ((op), (xop), (rc)) | ((((unsigned long)(l)) & 1) << 21))
+
+/* An X form instruction with RT fields specified */
+#define XRT(op, xop, rt) (X ((op), (xop)) \
+        | ((((unsigned long)(rt)) & 0x1f) << 21))
+
+/* An X form instruction with RT and RA fields specified */
+#define XRTRA(op, xop, rt, ra) (X ((op), (xop)) \
+        | ((((unsigned long)(rt)) & 0x1f) << 21) \
+        | ((((unsigned long)(ra)) & 0x1f) << 16))
+
 /* The mask for an X form comparison instruction.  */
 #define XCMP_MASK (X_MASK | (((unsigned long)1) << 22))
 
@@ -1457,6 +2767,9 @@ extract_tbr (unsigned long insn,
 /* An X form sync instruction with everything filled in except the LS field.  */
 #define XSYNC_MASK (0xff9fffff)
 
+/* An X form sync instruction with everything filled in except the L and E fields.  */
+#define XSYNCLE_MASK (0xff90ffff)
+
 /* An X_MASK, but with the EH bit clear.  */
 #define XEH_MASK (X_MASK & ~((unsigned long )1))
 
@@ -1469,8 +2782,8 @@ extract_tbr (unsigned long insn,
 #define XFL_MASK XFL (0x3f, 0x3ff, 1)
 
 /* An X form isel instruction.  */
-#define XISEL(op, xop)  (OP (op) | ((((unsigned long)(xop)) & 0x1f) << 1))
-#define XISEL_MASK      XISEL(0x3f, 0x1f)
+#define XISEL(op, xop)	(OP (op) | ((((unsigned long)(xop)) & 0x1f) << 1))
+#define XISEL_MASK	XISEL(0x3f, 0x1f)
 
 /* An XL form instruction with the LK field set to 0.  */
 #define XL(op, xop) (OP (op) | ((((unsigned long)(xop)) & 0x3ff) << 1))
@@ -1481,6 +2794,9 @@ extract_tbr (unsigned long insn,
 /* The mask for an XL form instruction.  */
 #define XL_MASK XLLK (0x3f, 0x3ff, 1)
 
+/* An XL_MASK with the RT, RA and RB fields fixed, but S bit clear.  */
+#define XLS_MASK ((XL_MASK | RT_MASK | RA_MASK | RB_MASK) & ~(1 << 11))
+
 /* An XL form instruction which explicitly sets the BO field.  */
 #define XLO(op, bo, xop, lk) \
   (XLLK ((op), (xop), (lk)) | ((((unsigned long)(bo)) & 0x1f) << 21))
@@ -1511,6 +2827,9 @@ extract_tbr (unsigned long insn,
 /* An XL_MASK with the BO, BI and BB fields fixed.  */
 #define XLBOBIBB_MASK (XL_MASK | BO_MASK | BI_MASK | BB_MASK)
 
+/* An X form mbar instruction with MO field.  */
+#define XMBAR(op, xop, mo) (X ((op), (xop)) | ((((unsigned long)(mo)) & 1) << 21))
+
 /* An XO form instruction.  */
 #define XO(op, xop, oe, rc) \
   (OP (op) | ((((unsigned long)(xop)) & 0x1ff) << 1) | ((((unsigned long)(oe)) & 1) << 10) | (((unsigned long)(rc)) & 1))
@@ -1519,6 +2838,12 @@ extract_tbr (unsigned long insn,
 /* An XO_MASK with the RB field fixed.  */
 #define XORB_MASK (XO_MASK | RB_MASK)
 
+/* An XOPS form instruction for paired singles.  */
+#define XOPS(op, xop, rc) \
+  (OP (op) | ((((unsigned long)(xop)) & 0x3ff) << 1) | (((unsigned long)(rc)) & 1))
+#define XOPS_MASK XOPS (0x3f, 0x3ff, 1)
+
+
 /* An XS form instruction.  */
 #define XS(op, xop, rc) (OP (op) | ((((unsigned long)(xop)) & 0x1ff) << 2) | (((unsigned long)(rc)) & 1))
 #define XS_MASK XS (0x3f, 0x1ff, 1)
@@ -1551,6 +2876,19 @@ extract_tbr (unsigned long insn,
 #define XUC(op, xop)  (OP (op) | (((unsigned long)(xop)) & 0x1f))
 #define XUC_MASK      XUC(0x3f, 0x1f)
 
+/* An XW form instruction.  */
+#define XW(op, xop, rc) (OP (op) | ((((unsigned long)(xop)) & 0x3f) << 1) | ((rc) & 1))
+/* The mask for a G form instruction. rc not supported at present.  */
+#define XW_MASK XW (0x3f, 0x3f, 0)
+
+/* An APU form instruction.  */
+#define APU(op, xop, rc) (OP (op) | (((unsigned long)(xop)) & 0x3ff) << 1 | ((rc) & 1))
+
+/* The mask for an APU form instruction.  */
+#define APU_MASK APU (0x3f, 0x3ff, 1)
+#define APU_RT_MASK (APU_MASK | RT_MASK)
+#define APU_RA_MASK (APU_MASK | RA_MASK)
+
 /* The BO encodings used in extended conditional branch mnemonics.  */
 #define BODNZF	(0x0)
 #define BODNZFP	(0x1)
@@ -1581,6 +2919,16 @@ extract_tbr (unsigned long insn,
 
 #define BOU	(0x14)
 
+/* The BO16 encodings used in extended VLE conditional branch mnemonics.  */
+#define BO16F   (0x0)
+#define BO16T   (0x1)
+
+/* The BO32 encodings used in extended VLE conditional branch mnemonics.  */
+#define BO32F   (0x0)
+#define BO32T   (0x1)
+#define BO32DNZ (0x2)
+#define BO32DZ  (0x3)
+
 /* The BI condition bit encodings used in extended conditional branch
    mnemonics.  */
 #define CBLT	(0)
@@ -1608,3075 +2956,4268 @@ extract_tbr (unsigned long insn,
 /* Smaller names for the flags so each entry in the opcodes table will
    fit on a single line.  */
 #undef	PPC
-#define PPC     PPC_OPCODE_PPC
+#define PPC	PPC_OPCODE_PPC
 #define PPCCOM	PPC_OPCODE_PPC | PPC_OPCODE_COMMON
-#define NOPOWER4 PPC_OPCODE_NOPOWER4 | PPCCOM
 #define POWER4	PPC_OPCODE_POWER4
 #define POWER5	PPC_OPCODE_POWER5
 #define POWER6	PPC_OPCODE_POWER6
+#define POWER7	PPC_OPCODE_POWER7
+#define POWER8	PPC_OPCODE_POWER8
+#define POWER9	PPC_OPCODE_POWER9
 #define CELL	PPC_OPCODE_CELL
-#define PPC32   PPC_OPCODE_32 | PPC_OPCODE_PPC
-#define PPC64   PPC_OPCODE_64 | PPC_OPCODE_PPC
+#define PPC64	PPC_OPCODE_64 | PPC_OPCODE_64_BRIDGE
+#define NON32	(PPC_OPCODE_64 | PPC_OPCODE_POWER4	\
+		 | PPC_OPCODE_EFS | PPC_OPCODE_E500MC | PPC_OPCODE_TITAN)
 #define PPC403	PPC_OPCODE_403
-#define PPC405	PPC403
+#define PPC405	PPC_OPCODE_405
 #define PPC440	PPC_OPCODE_440
-#define PPC750	PPC
-#define PPC860	PPC
+#define PPC464	PPC440
+#define PPC476	PPC_OPCODE_476
+#define PPC750	PPC_OPCODE_750
+#define PPC7450 PPC_OPCODE_7450
+#define PPC860	PPC_OPCODE_860
+#define PPCPS	PPC_OPCODE_PPCPS
 #define PPCVEC	PPC_OPCODE_ALTIVEC
-#define	POWER   PPC_OPCODE_POWER
-#define	POWER2	PPC_OPCODE_POWER | PPC_OPCODE_POWER2
-#define PPCPWR2	PPC_OPCODE_PPC | PPC_OPCODE_POWER | PPC_OPCODE_POWER2
-#define	POWER32	PPC_OPCODE_POWER | PPC_OPCODE_32
-#define	COM     PPC_OPCODE_POWER | PPC_OPCODE_PPC | PPC_OPCODE_COMMON
-#define	COM32   PPC_OPCODE_POWER | PPC_OPCODE_PPC | PPC_OPCODE_COMMON | PPC_OPCODE_32
-#define	M601    PPC_OPCODE_POWER | PPC_OPCODE_601
+#define PPCVEC2	PPC_OPCODE_ALTIVEC2
+#define PPCVEC3	PPC_OPCODE_ALTIVEC2
+#define PPCVSX	PPC_OPCODE_VSX
+#define PPCVSX2	PPC_OPCODE_VSX
+#define PPCVSX3	PPC_OPCODE_VSX3
+#define POWER	PPC_OPCODE_POWER
+#define POWER2	PPC_OPCODE_POWER | PPC_OPCODE_POWER2
+#define PWR2COM PPC_OPCODE_POWER | PPC_OPCODE_POWER2 | PPC_OPCODE_COMMON
+#define PPCPWR2 PPC_OPCODE_PPC | PPC_OPCODE_POWER | PPC_OPCODE_POWER2 | PPC_OPCODE_COMMON
+#define COM	PPC_OPCODE_POWER | PPC_OPCODE_PPC | PPC_OPCODE_COMMON
+#define M601	PPC_OPCODE_POWER | PPC_OPCODE_601
 #define PWRCOM	PPC_OPCODE_POWER | PPC_OPCODE_601 | PPC_OPCODE_COMMON
-#define	MFDEC1	PPC_OPCODE_POWER
-#define	MFDEC2	PPC_OPCODE_PPC | PPC_OPCODE_601 | PPC_OPCODE_BOOKE
+#define MFDEC1	PPC_OPCODE_POWER
+#define MFDEC2	PPC_OPCODE_PPC | PPC_OPCODE_601 | PPC_OPCODE_BOOKE | PPC_OPCODE_TITAN
 #define BOOKE	PPC_OPCODE_BOOKE
-#define BOOKE64	PPC_OPCODE_BOOKE64
-#define CLASSIC	PPC_OPCODE_CLASSIC
+#define NO371	PPC_OPCODE_BOOKE | PPC_OPCODE_PPCPS | PPC_OPCODE_EFS
 #define PPCE300 PPC_OPCODE_E300
 #define PPCSPE	PPC_OPCODE_SPE
-#define PPCISEL	PPC_OPCODE_ISEL
+#define PPCISEL PPC_OPCODE_ISEL
 #define PPCEFS	PPC_OPCODE_EFS
-#define PPCBRLK	PPC_OPCODE_BRLOCK
+#define PPCBRLK PPC_OPCODE_BRLOCK
 #define PPCPMR	PPC_OPCODE_PMR
-#define PPCCHLK	PPC_OPCODE_CACHELCK
-#define PPCCHLK64	PPC_OPCODE_CACHELCK | PPC_OPCODE_BOOKE64
+#define PPCTMR  PPC_OPCODE_TMR
+#define PPCCHLK PPC_OPCODE_CACHELCK
 #define PPCRFMCI	PPC_OPCODE_RFMCI
+#define E500MC  PPC_OPCODE_E500MC
+#define PPCA2	PPC_OPCODE_A2
+#define TITAN   PPC_OPCODE_TITAN
+#define MULHW   PPC_OPCODE_405 | PPC_OPCODE_440 | TITAN
+#define E500	PPC_OPCODE_E500
+#define E6500	PPC_OPCODE_E6500
+#define PPCVLE  PPC_OPCODE_VLE
+#define PPCHTM  PPC_OPCODE_HTM
+#define E200Z4  PPC_OPCODE_E200Z4
+/* The list of embedded processors that use the embedded operand ordering
+   for the 3 operand dcbt and dcbtst instructions.  */
+#define DCBT_EO	(PPC_OPCODE_E500 | PPC_OPCODE_E500MC | PPC_OPCODE_476 \
+		 | PPC_OPCODE_A2)
+
+
 
 /* The opcode table.
 
    The format of the opcode table is:
 
-   NAME	     OPCODE	MASK		FLAGS		{ OPERANDS }
+   NAME		OPCODE		MASK	     FLAGS	ANTI		{OPERANDS}
 
    NAME is the name of the instruction.
    OPCODE is the instruction opcode.
    MASK is the opcode mask; this is used to tell the disassembler
      which bits in the actual opcode must match OPCODE.
-   FLAGS are flags indicated what processors support the instruction.
+   FLAGS are flags indicating which processors support the instruction.
+   ANTI indicates which processors don't support the instruction.
    OPERANDS is the list of operands.
 
    The disassembler reads the table in order and prints the first
    instruction which matches, so this table is sorted to put more
-   specific instructions before more general instructions.  It is also
-   sorted by major opcode.  */
+   specific instructions before more general instructions.
+
+   This table must be sorted by major opcode.  Please try to keep it
+   vaguely sorted within major opcode too, except of course where
+   constrained otherwise by disassembler operation.  */
 
 const struct powerpc_opcode powerpc_opcodes[] = {
-{ "attn",    X(0,256), X_MASK,		POWER4,		{ 0 } },
-{ "tdlgti",  OPTO(2,TOLGT), OPTO_MASK,	PPC64,		{ RA, SI } },
-{ "tdllti",  OPTO(2,TOLLT), OPTO_MASK,	PPC64,		{ RA, SI } },
-{ "tdeqi",   OPTO(2,TOEQ), OPTO_MASK,	PPC64,		{ RA, SI } },
-{ "tdlgei",  OPTO(2,TOLGE), OPTO_MASK,	PPC64,		{ RA, SI } },
-{ "tdlnli",  OPTO(2,TOLNL), OPTO_MASK,	PPC64,		{ RA, SI } },
-{ "tdllei",  OPTO(2,TOLLE), OPTO_MASK,	PPC64,		{ RA, SI } },
-{ "tdlngi",  OPTO(2,TOLNG), OPTO_MASK,	PPC64,		{ RA, SI } },
-{ "tdgti",   OPTO(2,TOGT), OPTO_MASK,	PPC64,		{ RA, SI } },
-{ "tdgei",   OPTO(2,TOGE), OPTO_MASK,	PPC64,		{ RA, SI } },
-{ "tdnli",   OPTO(2,TONL), OPTO_MASK,	PPC64,		{ RA, SI } },
-{ "tdlti",   OPTO(2,TOLT), OPTO_MASK,	PPC64,		{ RA, SI } },
-{ "tdlei",   OPTO(2,TOLE), OPTO_MASK,	PPC64,		{ RA, SI } },
-{ "tdngi",   OPTO(2,TONG), OPTO_MASK,	PPC64,		{ RA, SI } },
-{ "tdnei",   OPTO(2,TONE), OPTO_MASK,	PPC64,		{ RA, SI } },
-{ "tdi",     OP(2),	OP_MASK,	PPC64,		{ TO, RA, SI } },
-
-{ "twlgti",  OPTO(3,TOLGT), OPTO_MASK,	PPCCOM,		{ RA, SI } },
-{ "tlgti",   OPTO(3,TOLGT), OPTO_MASK,	PWRCOM,		{ RA, SI } },
-{ "twllti",  OPTO(3,TOLLT), OPTO_MASK,	PPCCOM,		{ RA, SI } },
-{ "tllti",   OPTO(3,TOLLT), OPTO_MASK,	PWRCOM,		{ RA, SI } },
-{ "tweqi",   OPTO(3,TOEQ), OPTO_MASK,	PPCCOM,		{ RA, SI } },
-{ "teqi",    OPTO(3,TOEQ), OPTO_MASK,	PWRCOM,		{ RA, SI } },
-{ "twlgei",  OPTO(3,TOLGE), OPTO_MASK,	PPCCOM,		{ RA, SI } },
-{ "tlgei",   OPTO(3,TOLGE), OPTO_MASK,	PWRCOM,		{ RA, SI } },
-{ "twlnli",  OPTO(3,TOLNL), OPTO_MASK,	PPCCOM,		{ RA, SI } },
-{ "tlnli",   OPTO(3,TOLNL), OPTO_MASK,	PWRCOM,		{ RA, SI } },
-{ "twllei",  OPTO(3,TOLLE), OPTO_MASK,	PPCCOM,		{ RA, SI } },
-{ "tllei",   OPTO(3,TOLLE), OPTO_MASK,	PWRCOM,		{ RA, SI } },
-{ "twlngi",  OPTO(3,TOLNG), OPTO_MASK,	PPCCOM,		{ RA, SI } },
-{ "tlngi",   OPTO(3,TOLNG), OPTO_MASK,	PWRCOM,		{ RA, SI } },
-{ "twgti",   OPTO(3,TOGT), OPTO_MASK,	PPCCOM,		{ RA, SI } },
-{ "tgti",    OPTO(3,TOGT), OPTO_MASK,	PWRCOM,		{ RA, SI } },
-{ "twgei",   OPTO(3,TOGE), OPTO_MASK,	PPCCOM,		{ RA, SI } },
-{ "tgei",    OPTO(3,TOGE), OPTO_MASK,	PWRCOM,		{ RA, SI } },
-{ "twnli",   OPTO(3,TONL), OPTO_MASK,	PPCCOM,		{ RA, SI } },
-{ "tnli",    OPTO(3,TONL), OPTO_MASK,	PWRCOM,		{ RA, SI } },
-{ "twlti",   OPTO(3,TOLT), OPTO_MASK,	PPCCOM,		{ RA, SI } },
-{ "tlti",    OPTO(3,TOLT), OPTO_MASK,	PWRCOM,		{ RA, SI } },
-{ "twlei",   OPTO(3,TOLE), OPTO_MASK,	PPCCOM,		{ RA, SI } },
-{ "tlei",    OPTO(3,TOLE), OPTO_MASK,	PWRCOM,		{ RA, SI } },
-{ "twngi",   OPTO(3,TONG), OPTO_MASK,	PPCCOM,		{ RA, SI } },
-{ "tngi",    OPTO(3,TONG), OPTO_MASK,	PWRCOM,		{ RA, SI } },
-{ "twnei",   OPTO(3,TONE), OPTO_MASK,	PPCCOM,		{ RA, SI } },
-{ "tnei",    OPTO(3,TONE), OPTO_MASK,	PWRCOM,		{ RA, SI } },
-{ "twi",     OP(3),	OP_MASK,	PPCCOM,		{ TO, RA, SI } },
-{ "ti",      OP(3),	OP_MASK,	PWRCOM,		{ TO, RA, SI } },
-
-{ "macchw",	XO(4,172,0,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "macchw.",	XO(4,172,0,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "macchwo",	XO(4,172,1,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "macchwo.",	XO(4,172,1,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "macchws",	XO(4,236,0,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "macchws.",	XO(4,236,0,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "macchwso",	XO(4,236,1,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "macchwso.",	XO(4,236,1,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "macchwsu",	XO(4,204,0,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "macchwsu.",	XO(4,204,0,1), XO_MASK, PPC405|PPC440,	{ RT, RA, RB } },
-{ "macchwsuo",	XO(4,204,1,0), XO_MASK, PPC405|PPC440,	{ RT, RA, RB } },
-{ "macchwsuo.",	XO(4,204,1,1), XO_MASK, PPC405|PPC440,	{ RT, RA, RB } },
-{ "macchwu",	XO(4,140,0,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "macchwu.",	XO(4,140,0,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "macchwuo",	XO(4,140,1,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "macchwuo.",	XO(4,140,1,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "machhw",	XO(4,44,0,0),  XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "machhw.",	XO(4,44,0,1),  XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "machhwo",	XO(4,44,1,0),  XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "machhwo.",	XO(4,44,1,1),  XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "machhws",	XO(4,108,0,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "machhws.",	XO(4,108,0,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "machhwso",	XO(4,108,1,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "machhwso.",	XO(4,108,1,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "machhwsu",	XO(4,76,0,0),  XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "machhwsu.",	XO(4,76,0,1),  XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "machhwsuo",	XO(4,76,1,0),  XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "machhwsuo.",	XO(4,76,1,1),  XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "machhwu",	XO(4,12,0,0),  XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "machhwu.",	XO(4,12,0,1),  XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "machhwuo",	XO(4,12,1,0),  XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "machhwuo.",	XO(4,12,1,1),  XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "maclhw",	XO(4,428,0,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "maclhw.",	XO(4,428,0,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "maclhwo",	XO(4,428,1,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "maclhwo.",	XO(4,428,1,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "maclhws",	XO(4,492,0,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "maclhws.",	XO(4,492,0,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "maclhwso",	XO(4,492,1,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "maclhwso.",	XO(4,492,1,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "maclhwsu",	XO(4,460,0,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "maclhwsu.",	XO(4,460,0,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "maclhwsuo",	XO(4,460,1,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "maclhwsuo.",	XO(4,460,1,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "maclhwu",	XO(4,396,0,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "maclhwu.",	XO(4,396,0,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "maclhwuo",	XO(4,396,1,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "maclhwuo.",	XO(4,396,1,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "mulchw",	XRC(4,168,0),  X_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "mulchw.",	XRC(4,168,1),  X_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "mulchwu",	XRC(4,136,0),  X_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "mulchwu.",	XRC(4,136,1),  X_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "mulhhw",	XRC(4,40,0),   X_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "mulhhw.",	XRC(4,40,1),   X_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "mulhhwu",	XRC(4,8,0),    X_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "mulhhwu.",	XRC(4,8,1),    X_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "mullhw",	XRC(4,424,0),  X_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "mullhw.",	XRC(4,424,1),  X_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "mullhwu",	XRC(4,392,0),  X_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "mullhwu.",	XRC(4,392,1),  X_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmacchw",	XO(4,174,0,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmacchw.",	XO(4,174,0,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmacchwo",	XO(4,174,1,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmacchwo.",	XO(4,174,1,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmacchws",	XO(4,238,0,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmacchws.",	XO(4,238,0,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmacchwso",	XO(4,238,1,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmacchwso.",	XO(4,238,1,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmachhw",	XO(4,46,0,0),  XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmachhw.",	XO(4,46,0,1),  XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmachhwo",	XO(4,46,1,0),  XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmachhwo.",	XO(4,46,1,1),  XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmachhws",	XO(4,110,0,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmachhws.",	XO(4,110,0,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmachhwso",	XO(4,110,1,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmachhwso.",	XO(4,110,1,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmaclhw",	XO(4,430,0,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmaclhw.",	XO(4,430,0,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmaclhwo",	XO(4,430,1,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmaclhwo.",	XO(4,430,1,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmaclhws",	XO(4,494,0,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmaclhws.",	XO(4,494,0,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmaclhwso",	XO(4,494,1,0), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "nmaclhwso.",	XO(4,494,1,1), XO_MASK,	PPC405|PPC440,	{ RT, RA, RB } },
-{ "mfvscr",  VX(4, 1540), VX_MASK,	PPCVEC,		{ VD } },
-{ "mtvscr",  VX(4, 1604), VX_MASK,	PPCVEC,		{ VB } },
-
-  /* Double-precision opcodes.  */
-  /* Some of these conflict with AltiVec, so move them before, since
-     PPCVEC includes the PPC_OPCODE_PPC set.  */
-{ "efscfd",   VX(4, 719), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efdabs",   VX(4, 740), VX_MASK,	PPCEFS,		{ RS, RA } },
-{ "efdnabs",  VX(4, 741), VX_MASK,	PPCEFS,		{ RS, RA } },
-{ "efdneg",   VX(4, 742), VX_MASK,	PPCEFS,		{ RS, RA } },
-{ "efdadd",   VX(4, 736), VX_MASK,	PPCEFS,		{ RS, RA, RB } },
-{ "efdsub",   VX(4, 737), VX_MASK,	PPCEFS,		{ RS, RA, RB } },
-{ "efdmul",   VX(4, 744), VX_MASK,	PPCEFS,		{ RS, RA, RB } },
-{ "efddiv",   VX(4, 745), VX_MASK,	PPCEFS,		{ RS, RA, RB } },
-{ "efdcmpgt", VX(4, 748), VX_MASK,	PPCEFS,		{ CRFD, RA, RB } },
-{ "efdcmplt", VX(4, 749), VX_MASK,	PPCEFS,		{ CRFD, RA, RB } },
-{ "efdcmpeq", VX(4, 750), VX_MASK,	PPCEFS,		{ CRFD, RA, RB } },
-{ "efdtstgt", VX(4, 764), VX_MASK,	PPCEFS,		{ CRFD, RA, RB } },
-{ "efdtstlt", VX(4, 765), VX_MASK,	PPCEFS,		{ CRFD, RA, RB } },
-{ "efdtsteq", VX(4, 766), VX_MASK,	PPCEFS,		{ CRFD, RA, RB } },
-{ "efdcfsi",  VX(4, 753), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efdcfsid", VX(4, 739), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efdcfui",  VX(4, 752), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efdcfuid", VX(4, 738), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efdcfsf",  VX(4, 755), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efdcfuf",  VX(4, 754), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efdctsi",  VX(4, 757), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efdctsidz",VX(4, 747), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efdctsiz", VX(4, 762), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efdctui",  VX(4, 756), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efdctuidz",VX(4, 746), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efdctuiz", VX(4, 760), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efdctsf",  VX(4, 759), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efdctuf",  VX(4, 758), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efdcfs",   VX(4, 751), VX_MASK,	PPCEFS,		{ RS, RB } },
-  /* End of double-precision opcodes.  */
-
-{ "vaddcuw", VX(4,  384), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vaddfp",  VX(4,   10), VX_MASK, 	PPCVEC,		{ VD, VA, VB } },
-{ "vaddsbs", VX(4,  768), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vaddshs", VX(4,  832), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vaddsws", VX(4,  896), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vaddubm", VX(4,    0), VX_MASK, 	PPCVEC,		{ VD, VA, VB } },
-{ "vaddubs", VX(4,  512), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vadduhm", VX(4,   64), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vadduhs", VX(4,  576), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vadduwm", VX(4,  128), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vadduws", VX(4,  640), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vand",    VX(4, 1028), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vandc",   VX(4, 1092), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vavgsb",  VX(4, 1282), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vavgsh",  VX(4, 1346), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vavgsw",  VX(4, 1410), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vavgub",  VX(4, 1026), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vavguh",  VX(4, 1090), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vavguw",  VX(4, 1154), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vcfsx",   VX(4,  842), VX_MASK,	PPCVEC,		{ VD, VB, UIMM } },
-{ "vcfux",   VX(4,  778), VX_MASK,	PPCVEC,		{ VD, VB, UIMM } },
-{ "vcmpbfp",   VXR(4, 966, 0), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpbfp.",  VXR(4, 966, 1), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpeqfp",  VXR(4, 198, 0), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpeqfp.", VXR(4, 198, 1), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpequb",  VXR(4,   6, 0), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpequb.", VXR(4,   6, 1), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpequh",  VXR(4,  70, 0), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpequh.", VXR(4,  70, 1), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpequw",  VXR(4, 134, 0), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpequw.", VXR(4, 134, 1), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpgefp",  VXR(4, 454, 0), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpgefp.", VXR(4, 454, 1), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpgtfp",  VXR(4, 710, 0), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpgtfp.", VXR(4, 710, 1), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpgtsb",  VXR(4, 774, 0), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpgtsb.", VXR(4, 774, 1), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpgtsh",  VXR(4, 838, 0), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpgtsh.", VXR(4, 838, 1), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpgtsw",  VXR(4, 902, 0), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpgtsw.", VXR(4, 902, 1), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpgtub",  VXR(4, 518, 0), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpgtub.", VXR(4, 518, 1), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpgtuh",  VXR(4, 582, 0), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpgtuh.", VXR(4, 582, 1), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpgtuw",  VXR(4, 646, 0), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vcmpgtuw.", VXR(4, 646, 1), VXR_MASK, PPCVEC,	{ VD, VA, VB } },
-{ "vctsxs",    VX(4,  970), VX_MASK,	PPCVEC,		{ VD, VB, UIMM } },
-{ "vctuxs",    VX(4,  906), VX_MASK,	PPCVEC,		{ VD, VB, UIMM } },
-{ "vexptefp",  VX(4,  394), VX_MASK,	PPCVEC,		{ VD, VB } },
-{ "vlogefp",   VX(4,  458), VX_MASK,	PPCVEC,		{ VD, VB } },
-{ "vmaddfp",   VXA(4,  46), VXA_MASK,	PPCVEC,		{ VD, VA, VC, VB } },
-{ "vmaxfp",    VX(4, 1034), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vmaxsb",    VX(4,  258), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vmaxsh",    VX(4,  322), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vmaxsw",    VX(4,  386), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vmaxub",    VX(4,    2), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vmaxuh",    VX(4,   66), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vmaxuw",    VX(4,  130), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vmhaddshs", VXA(4,  32), VXA_MASK,	PPCVEC,		{ VD, VA, VB, VC } },
-{ "vmhraddshs", VXA(4, 33), VXA_MASK,	PPCVEC,		{ VD, VA, VB, VC } },
-{ "vminfp",    VX(4, 1098), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vminsb",    VX(4,  770), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vminsh",    VX(4,  834), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vminsw",    VX(4,  898), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vminub",    VX(4,  514), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vminuh",    VX(4,  578), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vminuw",    VX(4,  642), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vmladduhm", VXA(4,  34), VXA_MASK,	PPCVEC,		{ VD, VA, VB, VC } },
-{ "vmrghb",    VX(4,   12), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vmrghh",    VX(4,   76), VX_MASK,    PPCVEC,		{ VD, VA, VB } },
-{ "vmrghw",    VX(4,  140), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vmrglb",    VX(4,  268), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vmrglh",    VX(4,  332), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vmrglw",    VX(4,  396), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vmsummbm",  VXA(4,  37), VXA_MASK,	PPCVEC,		{ VD, VA, VB, VC } },
-{ "vmsumshm",  VXA(4,  40), VXA_MASK,	PPCVEC,		{ VD, VA, VB, VC } },
-{ "vmsumshs",  VXA(4,  41), VXA_MASK,	PPCVEC,		{ VD, VA, VB, VC } },
-{ "vmsumubm",  VXA(4,  36), VXA_MASK,   PPCVEC,		{ VD, VA, VB, VC } },
-{ "vmsumuhm",  VXA(4,  38), VXA_MASK,   PPCVEC,		{ VD, VA, VB, VC } },
-{ "vmsumuhs",  VXA(4,  39), VXA_MASK,   PPCVEC,		{ VD, VA, VB, VC } },
-{ "vmulesb",   VX(4,  776), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vmulesh",   VX(4,  840), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vmuleub",   VX(4,  520), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vmuleuh",   VX(4,  584), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vmulosb",   VX(4,  264), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vmulosh",   VX(4,  328), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vmuloub",   VX(4,    8), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vmulouh",   VX(4,   72), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vnmsubfp",  VXA(4,  47), VXA_MASK,	PPCVEC,		{ VD, VA, VC, VB } },
-{ "vnor",      VX(4, 1284), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vor",       VX(4, 1156), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vperm",     VXA(4,  43), VXA_MASK,	PPCVEC,		{ VD, VA, VB, VC } },
-{ "vpkpx",     VX(4,  782), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vpkshss",   VX(4,  398), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vpkshus",   VX(4,  270), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vpkswss",   VX(4,  462), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vpkswus",   VX(4,  334), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vpkuhum",   VX(4,   14), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vpkuhus",   VX(4,  142), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vpkuwum",   VX(4,   78), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vpkuwus",   VX(4,  206), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vrefp",     VX(4,  266), VX_MASK,	PPCVEC,		{ VD, VB } },
-{ "vrfim",     VX(4,  714), VX_MASK,	PPCVEC,		{ VD, VB } },
-{ "vrfin",     VX(4,  522), VX_MASK,	PPCVEC,		{ VD, VB } },
-{ "vrfip",     VX(4,  650), VX_MASK,	PPCVEC,		{ VD, VB } },
-{ "vrfiz",     VX(4,  586), VX_MASK,	PPCVEC,		{ VD, VB } },
-{ "vrlb",      VX(4,    4), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vrlh",      VX(4,   68), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vrlw",      VX(4,  132), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vrsqrtefp", VX(4,  330), VX_MASK,	PPCVEC,		{ VD, VB } },
-{ "vsel",      VXA(4,  42), VXA_MASK,	PPCVEC,		{ VD, VA, VB, VC } },
-{ "vsl",       VX(4,  452), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vslb",      VX(4,  260), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsldoi",    VXA(4,  44), VXA_MASK,	PPCVEC,		{ VD, VA, VB, SHB } },
-{ "vslh",      VX(4,  324), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vslo",      VX(4, 1036), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vslw",      VX(4,  388), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vspltb",    VX(4,  524), VX_MASK,	PPCVEC,		{ VD, VB, UIMM } },
-{ "vsplth",    VX(4,  588), VX_MASK,	PPCVEC,		{ VD, VB, UIMM } },
-{ "vspltisb",  VX(4,  780), VX_MASK,	PPCVEC,		{ VD, SIMM } },
-{ "vspltish",  VX(4,  844), VX_MASK,	PPCVEC,		{ VD, SIMM } },
-{ "vspltisw",  VX(4,  908), VX_MASK,	PPCVEC,		{ VD, SIMM } },
-{ "vspltw",    VX(4,  652), VX_MASK,	PPCVEC,		{ VD, VB, UIMM } },
-{ "vsr",       VX(4,  708), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsrab",     VX(4,  772), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsrah",     VX(4,  836), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsraw",     VX(4,  900), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsrb",      VX(4,  516), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsrh",      VX(4,  580), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsro",      VX(4, 1100), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsrw",      VX(4,  644), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsubcuw",   VX(4, 1408), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsubfp",    VX(4,   74), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsubsbs",   VX(4, 1792), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsubshs",   VX(4, 1856), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsubsws",   VX(4, 1920), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsububm",   VX(4, 1024), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsububs",   VX(4, 1536), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsubuhm",   VX(4, 1088), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsubuhs",   VX(4, 1600), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsubuwm",   VX(4, 1152), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsubuws",   VX(4, 1664), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsumsws",   VX(4, 1928), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsum2sws",  VX(4, 1672), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsum4sbs",  VX(4, 1800), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsum4shs",  VX(4, 1608), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vsum4ubs",  VX(4, 1544), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-{ "vupkhpx",   VX(4,  846), VX_MASK,	PPCVEC,		{ VD, VB } },
-{ "vupkhsb",   VX(4,  526), VX_MASK,	PPCVEC,		{ VD, VB } },
-{ "vupkhsh",   VX(4,  590), VX_MASK,	PPCVEC,		{ VD, VB } },
-{ "vupklpx",   VX(4,  974), VX_MASK,	PPCVEC,		{ VD, VB } },
-{ "vupklsb",   VX(4,  654), VX_MASK,	PPCVEC,		{ VD, VB } },
-{ "vupklsh",   VX(4,  718), VX_MASK,	PPCVEC,		{ VD, VB } },
-{ "vxor",      VX(4, 1220), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
-
-{ "evaddw",    VX(4, 512), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evaddiw",   VX(4, 514), VX_MASK,	PPCSPE,		{ RS, RB, UIMM } },
-{ "evsubfw",   VX(4, 516), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evsubw",    VX(4, 516), VX_MASK,	PPCSPE,		{ RS, RB, RA } },
-{ "evsubifw",  VX(4, 518), VX_MASK,	PPCSPE,		{ RS, UIMM, RB } },
-{ "evsubiw",   VX(4, 518), VX_MASK,	PPCSPE,		{ RS, RB, UIMM } },
-{ "evabs",     VX(4, 520), VX_MASK,	PPCSPE,		{ RS, RA } },
-{ "evneg",     VX(4, 521), VX_MASK,	PPCSPE,		{ RS, RA } },
-{ "evextsb",   VX(4, 522), VX_MASK,	PPCSPE,		{ RS, RA } },
-{ "evextsh",   VX(4, 523), VX_MASK,	PPCSPE,		{ RS, RA } },
-{ "evrndw",    VX(4, 524), VX_MASK,	PPCSPE,		{ RS, RA } },
-{ "evcntlzw",  VX(4, 525), VX_MASK,	PPCSPE,		{ RS, RA } },
-{ "evcntlsw",  VX(4, 526), VX_MASK,	PPCSPE,		{ RS, RA } },
-
-{ "brinc",     VX(4, 527), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-
-{ "evand",     VX(4, 529), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evandc",    VX(4, 530), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmr",      VX(4, 535), VX_MASK,	PPCSPE,		{ RS, RA, BBA } },
-{ "evor",      VX(4, 535), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evorc",     VX(4, 539), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evxor",     VX(4, 534), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "eveqv",     VX(4, 537), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evnand",    VX(4, 542), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evnot",     VX(4, 536), VX_MASK,	PPCSPE,		{ RS, RA, BBA } },
-{ "evnor",     VX(4, 536), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-
-{ "evrlw",     VX(4, 552), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evrlwi",    VX(4, 554), VX_MASK,	PPCSPE,		{ RS, RA, EVUIMM } },
-{ "evslw",     VX(4, 548), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evslwi",    VX(4, 550), VX_MASK,	PPCSPE,		{ RS, RA, EVUIMM } },
-{ "evsrws",    VX(4, 545), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evsrwu",    VX(4, 544), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evsrwis",   VX(4, 547), VX_MASK,	PPCSPE,		{ RS, RA, EVUIMM } },
-{ "evsrwiu",   VX(4, 546), VX_MASK,	PPCSPE,		{ RS, RA, EVUIMM } },
-{ "evsplati",  VX(4, 553), VX_MASK,	PPCSPE,		{ RS, SIMM } },
-{ "evsplatfi", VX(4, 555), VX_MASK,	PPCSPE,		{ RS, SIMM } },
-{ "evmergehi", VX(4, 556), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmergelo", VX(4, 557), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmergehilo",VX(4,558), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmergelohi",VX(4,559), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-
-{ "evcmpgts",  VX(4, 561), VX_MASK,	PPCSPE,		{ CRFD, RA, RB } },
-{ "evcmpgtu",  VX(4, 560), VX_MASK,	PPCSPE,		{ CRFD, RA, RB } },
-{ "evcmplts",  VX(4, 563), VX_MASK,	PPCSPE,		{ CRFD, RA, RB } },
-{ "evcmpltu",  VX(4, 562), VX_MASK,	PPCSPE,		{ CRFD, RA, RB } },
-{ "evcmpeq",   VX(4, 564), VX_MASK,	PPCSPE,		{ CRFD, RA, RB } },
-{ "evsel",     EVSEL(4,79),EVSEL_MASK,	PPCSPE,		{ RS, RA, RB, CRFS } },
-
-{ "evldd",     VX(4, 769), VX_MASK,	PPCSPE,		{ RS, EVUIMM_8, RA } },
-{ "evlddx",    VX(4, 768), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evldw",     VX(4, 771), VX_MASK,	PPCSPE,		{ RS, EVUIMM_8, RA } },
-{ "evldwx",    VX(4, 770), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evldh",     VX(4, 773), VX_MASK,	PPCSPE,		{ RS, EVUIMM_8, RA } },
-{ "evldhx",    VX(4, 772), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evlwhe",    VX(4, 785), VX_MASK,	PPCSPE,		{ RS, EVUIMM_4, RA } },
-{ "evlwhex",   VX(4, 784), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evlwhou",   VX(4, 789), VX_MASK,	PPCSPE,		{ RS, EVUIMM_4, RA } },
-{ "evlwhoux",  VX(4, 788), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evlwhos",   VX(4, 791), VX_MASK,	PPCSPE,		{ RS, EVUIMM_4, RA } },
-{ "evlwhosx",  VX(4, 790), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evlwwsplat",VX(4, 793), VX_MASK,	PPCSPE,		{ RS, EVUIMM_4, RA } },
-{ "evlwwsplatx",VX(4, 792), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evlwhsplat",VX(4, 797), VX_MASK,	PPCSPE,		{ RS, EVUIMM_4, RA } },
-{ "evlwhsplatx",VX(4, 796), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evlhhesplat",VX(4, 777), VX_MASK,	PPCSPE,		{ RS, EVUIMM_2, RA } },
-{ "evlhhesplatx",VX(4, 776), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evlhhousplat",VX(4, 781), VX_MASK,	PPCSPE,		{ RS, EVUIMM_2, RA } },
-{ "evlhhousplatx",VX(4, 780), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evlhhossplat",VX(4, 783), VX_MASK,	PPCSPE,		{ RS, EVUIMM_2, RA } },
-{ "evlhhossplatx",VX(4, 782), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-
-{ "evstdd",    VX(4, 801), VX_MASK,	PPCSPE,		{ RS, EVUIMM_8, RA } },
-{ "evstddx",   VX(4, 800), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evstdw",    VX(4, 803), VX_MASK,	PPCSPE,		{ RS, EVUIMM_8, RA } },
-{ "evstdwx",   VX(4, 802), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evstdh",    VX(4, 805), VX_MASK,	PPCSPE,		{ RS, EVUIMM_8, RA } },
-{ "evstdhx",   VX(4, 804), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evstwwe",   VX(4, 825), VX_MASK,	PPCSPE,		{ RS, EVUIMM_4, RA } },
-{ "evstwwex",  VX(4, 824), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evstwwo",   VX(4, 829), VX_MASK,	PPCSPE,		{ RS, EVUIMM_4, RA } },
-{ "evstwwox",  VX(4, 828), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evstwhe",   VX(4, 817), VX_MASK,	PPCSPE,		{ RS, EVUIMM_4, RA } },
-{ "evstwhex",  VX(4, 816), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evstwho",   VX(4, 821), VX_MASK,	PPCSPE,		{ RS, EVUIMM_4, RA } },
-{ "evstwhox",  VX(4, 820), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-
-{ "evfsabs",   VX(4, 644), VX_MASK,	PPCSPE,		{ RS, RA } },
-{ "evfsnabs",  VX(4, 645), VX_MASK,	PPCSPE,		{ RS, RA } },
-{ "evfsneg",   VX(4, 646), VX_MASK,	PPCSPE,		{ RS, RA } },
-{ "evfsadd",   VX(4, 640), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evfssub",   VX(4, 641), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evfsmul",   VX(4, 648), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evfsdiv",   VX(4, 649), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evfscmpgt", VX(4, 652), VX_MASK,	PPCSPE,		{ CRFD, RA, RB } },
-{ "evfscmplt", VX(4, 653), VX_MASK,	PPCSPE,		{ CRFD, RA, RB } },
-{ "evfscmpeq", VX(4, 654), VX_MASK,	PPCSPE,		{ CRFD, RA, RB } },
-{ "evfststgt", VX(4, 668), VX_MASK,	PPCSPE,		{ CRFD, RA, RB } },
-{ "evfststlt", VX(4, 669), VX_MASK,	PPCSPE,		{ CRFD, RA, RB } },
-{ "evfststeq", VX(4, 670), VX_MASK,	PPCSPE,		{ CRFD, RA, RB } },
-{ "evfscfui",  VX(4, 656), VX_MASK,	PPCSPE,		{ RS, RB } },
-{ "evfsctuiz", VX(4, 664), VX_MASK,	PPCSPE,		{ RS, RB } },
-{ "evfscfsi",  VX(4, 657), VX_MASK,	PPCSPE,		{ RS, RB } },
-{ "evfscfuf",  VX(4, 658), VX_MASK,	PPCSPE,		{ RS, RB } },
-{ "evfscfsf",  VX(4, 659), VX_MASK,	PPCSPE,		{ RS, RB } },
-{ "evfsctui",  VX(4, 660), VX_MASK,	PPCSPE,		{ RS, RB } },
-{ "evfsctsi",  VX(4, 661), VX_MASK,	PPCSPE,		{ RS, RB } },
-{ "evfsctsiz", VX(4, 666), VX_MASK,	PPCSPE,		{ RS, RB } },
-{ "evfsctuf",  VX(4, 662), VX_MASK,	PPCSPE,		{ RS, RB } },
-{ "evfsctsf",  VX(4, 663), VX_MASK,	PPCSPE,		{ RS, RB } },
-
-{ "efsabs",   VX(4, 708), VX_MASK,	PPCEFS,		{ RS, RA } },
-{ "efsnabs",  VX(4, 709), VX_MASK,	PPCEFS,		{ RS, RA } },
-{ "efsneg",   VX(4, 710), VX_MASK,	PPCEFS,		{ RS, RA } },
-{ "efsadd",   VX(4, 704), VX_MASK,	PPCEFS,		{ RS, RA, RB } },
-{ "efssub",   VX(4, 705), VX_MASK,	PPCEFS,		{ RS, RA, RB } },
-{ "efsmul",   VX(4, 712), VX_MASK,	PPCEFS,		{ RS, RA, RB } },
-{ "efsdiv",   VX(4, 713), VX_MASK,	PPCEFS,		{ RS, RA, RB } },
-{ "efscmpgt", VX(4, 716), VX_MASK,	PPCEFS,		{ CRFD, RA, RB } },
-{ "efscmplt", VX(4, 717), VX_MASK,	PPCEFS,		{ CRFD, RA, RB } },
-{ "efscmpeq", VX(4, 718), VX_MASK,	PPCEFS,		{ CRFD, RA, RB } },
-{ "efststgt", VX(4, 732), VX_MASK,	PPCEFS,		{ CRFD, RA, RB } },
-{ "efststlt", VX(4, 733), VX_MASK,	PPCEFS,		{ CRFD, RA, RB } },
-{ "efststeq", VX(4, 734), VX_MASK,	PPCEFS,		{ CRFD, RA, RB } },
-{ "efscfui",  VX(4, 720), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efsctuiz", VX(4, 728), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efscfsi",  VX(4, 721), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efscfuf",  VX(4, 722), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efscfsf",  VX(4, 723), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efsctui",  VX(4, 724), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efsctsi",  VX(4, 725), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efsctsiz", VX(4, 730), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efsctuf",  VX(4, 726), VX_MASK,	PPCEFS,		{ RS, RB } },
-{ "efsctsf",  VX(4, 727), VX_MASK,	PPCEFS,		{ RS, RB } },
-
-{ "evmhossf",  VX(4, 1031), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhossfa", VX(4, 1063), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhosmf",  VX(4, 1039), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhosmfa", VX(4, 1071), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhosmi",  VX(4, 1037), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhosmia", VX(4, 1069), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhoumi",  VX(4, 1036), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhoumia", VX(4, 1068), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhessf",  VX(4, 1027), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhessfa", VX(4, 1059), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhesmf",  VX(4, 1035), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhesmfa", VX(4, 1067), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhesmi",  VX(4, 1033), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhesmia", VX(4, 1065), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmheumi",  VX(4, 1032), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmheumia", VX(4, 1064), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-
-{ "evmhossfaaw",VX(4, 1287), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhossiaaw",VX(4, 1285), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhosmfaaw",VX(4, 1295), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhosmiaaw",VX(4, 1293), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhousiaaw",VX(4, 1284), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhoumiaaw",VX(4, 1292), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhessfaaw",VX(4, 1283), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhessiaaw",VX(4, 1281), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhesmfaaw",VX(4, 1291), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhesmiaaw",VX(4, 1289), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmheusiaaw",VX(4, 1280), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmheumiaaw",VX(4, 1288), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-
-{ "evmhossfanw",VX(4, 1415), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhossianw",VX(4, 1413), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhosmfanw",VX(4, 1423), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhosmianw",VX(4, 1421), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhousianw",VX(4, 1412), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhoumianw",VX(4, 1420), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhessfanw",VX(4, 1411), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhessianw",VX(4, 1409), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhesmfanw",VX(4, 1419), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhesmianw",VX(4, 1417), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmheusianw",VX(4, 1408), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmheumianw",VX(4, 1416), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-
-{ "evmhogsmfaa",VX(4, 1327), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhogsmiaa",VX(4, 1325), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhogumiaa",VX(4, 1324), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhegsmfaa",VX(4, 1323), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhegsmiaa",VX(4, 1321), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhegumiaa",VX(4, 1320), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-
-{ "evmhogsmfan",VX(4, 1455), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhogsmian",VX(4, 1453), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhogumian",VX(4, 1452), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhegsmfan",VX(4, 1451), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhegsmian",VX(4, 1449), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmhegumian",VX(4, 1448), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-
-{ "evmwhssf",  VX(4, 1095), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwhssfa", VX(4, 1127), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwhsmf",  VX(4, 1103), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwhsmfa", VX(4, 1135), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwhsmi",  VX(4, 1101), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwhsmia", VX(4, 1133), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwhumi",  VX(4, 1100), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwhumia", VX(4, 1132), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-
-{ "evmwlumi",  VX(4, 1096), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwlumia", VX(4, 1128), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-
-{ "evmwlssiaaw",VX(4, 1345), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwlsmiaaw",VX(4, 1353), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwlusiaaw",VX(4, 1344), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwlumiaaw",VX(4, 1352), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-
-{ "evmwlssianw",VX(4, 1473), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwlsmianw",VX(4, 1481), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwlusianw",VX(4, 1472), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwlumianw",VX(4, 1480), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-
-{ "evmwssf",   VX(4, 1107), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwssfa",  VX(4, 1139), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwsmf",   VX(4, 1115), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwsmfa",  VX(4, 1147), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwsmi",   VX(4, 1113), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwsmia",  VX(4, 1145), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwumi",   VX(4, 1112), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwumia",  VX(4, 1144), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-
-{ "evmwssfaa", VX(4, 1363), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwsmfaa", VX(4, 1371), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwsmiaa", VX(4, 1369), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwumiaa", VX(4, 1368), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-
-{ "evmwssfan", VX(4, 1491), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwsmfan", VX(4, 1499), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwsmian", VX(4, 1497), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evmwumian", VX(4, 1496), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-
-{ "evaddssiaaw",VX(4, 1217), VX_MASK,	PPCSPE,		{ RS, RA } },
-{ "evaddsmiaaw",VX(4, 1225), VX_MASK,	PPCSPE,		{ RS, RA } },
-{ "evaddusiaaw",VX(4, 1216), VX_MASK,	PPCSPE,		{ RS, RA } },
-{ "evaddumiaaw",VX(4, 1224), VX_MASK,	PPCSPE,		{ RS, RA } },
-
-{ "evsubfssiaaw",VX(4, 1219), VX_MASK,	PPCSPE,		{ RS, RA } },
-{ "evsubfsmiaaw",VX(4, 1227), VX_MASK,	PPCSPE,		{ RS, RA } },
-{ "evsubfusiaaw",VX(4, 1218), VX_MASK,	PPCSPE,		{ RS, RA } },
-{ "evsubfumiaaw",VX(4, 1226), VX_MASK,	PPCSPE,		{ RS, RA } },
-
-{ "evmra",    VX(4, 1220), VX_MASK,	PPCSPE,		{ RS, RA } },
-
-{ "evdivws",  VX(4, 1222), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-{ "evdivwu",  VX(4, 1223), VX_MASK,	PPCSPE,		{ RS, RA, RB } },
-
-{ "mulli",   OP(7),	OP_MASK,	PPCCOM,		{ RT, RA, SI } },
-{ "muli",    OP(7),	OP_MASK,	PWRCOM,		{ RT, RA, SI } },
-
-{ "subfic",  OP(8),	OP_MASK,	PPCCOM,		{ RT, RA, SI } },
-{ "sfi",     OP(8),	OP_MASK,	PWRCOM,		{ RT, RA, SI } },
-
-{ "dozi",    OP(9),	OP_MASK,	M601,		{ RT, RA, SI } },
-
-{ "bce",     B(9,0,0),	B_MASK,		BOOKE64,	{ BO, BI, BD } },
-{ "bcel",    B(9,0,1),	B_MASK,		BOOKE64,	{ BO, BI, BD } },
-{ "bcea",    B(9,1,0),	B_MASK,		BOOKE64,	{ BO, BI, BDA } },
-{ "bcela",   B(9,1,1),	B_MASK,		BOOKE64,	{ BO, BI, BDA } },
-
-{ "cmplwi",  OPL(10,0),	OPL_MASK,	PPCCOM,		{ OBF, RA, UI } },
-{ "cmpldi",  OPL(10,1), OPL_MASK,	PPC64,		{ OBF, RA, UI } },
-{ "cmpli",   OP(10),	OP_MASK,	PPC,		{ BF, L, RA, UI } },
-{ "cmpli",   OP(10),	OP_MASK,	PWRCOM,		{ BF, RA, UI } },
-
-{ "cmpwi",   OPL(11,0),	OPL_MASK,	PPCCOM,		{ OBF, RA, SI } },
-{ "cmpdi",   OPL(11,1),	OPL_MASK,	PPC64,		{ OBF, RA, SI } },
-{ "cmpi",    OP(11),	OP_MASK,	PPC,		{ BF, L, RA, SI } },
-{ "cmpi",    OP(11),	OP_MASK,	PWRCOM,		{ BF, RA, SI } },
-
-{ "addic",   OP(12),	OP_MASK,	PPCCOM,		{ RT, RA, SI } },
-{ "ai",	     OP(12),	OP_MASK,	PWRCOM,		{ RT, RA, SI } },
-{ "subic",   OP(12),	OP_MASK,	PPCCOM,		{ RT, RA, NSI } },
-
-{ "addic.",  OP(13),	OP_MASK,	PPCCOM,		{ RT, RA, SI } },
-{ "ai.",     OP(13),	OP_MASK,	PWRCOM,		{ RT, RA, SI } },
-{ "subic.",  OP(13),	OP_MASK,	PPCCOM,		{ RT, RA, NSI } },
-
-{ "li",	     OP(14),	DRA_MASK,	PPCCOM,		{ RT, SI } },
-{ "lil",     OP(14),	DRA_MASK,	PWRCOM,		{ RT, SI } },
-{ "addi",    OP(14),	OP_MASK,	PPCCOM,		{ RT, RA0, SI } },
-{ "cal",     OP(14),	OP_MASK,	PWRCOM,		{ RT, D, RA0 } },
-{ "subi",    OP(14),	OP_MASK,	PPCCOM,		{ RT, RA0, NSI } },
-{ "la",	     OP(14),	OP_MASK,	PPCCOM,		{ RT, D, RA0 } },
-
-{ "lis",     OP(15),	DRA_MASK,	PPCCOM,		{ RT, SISIGNOPT } },
-{ "liu",     OP(15),	DRA_MASK,	PWRCOM,		{ RT, SISIGNOPT } },
-{ "addis",   OP(15),	OP_MASK,	PPCCOM,		{ RT,RA0,SISIGNOPT } },
-{ "cau",     OP(15),	OP_MASK,	PWRCOM,		{ RT,RA0,SISIGNOPT } },
-{ "subis",   OP(15),	OP_MASK,	PPCCOM,		{ RT, RA0, NSI } },
-
-{ "bdnz-",   BBO(16,BODNZ,0,0),      BBOATBI_MASK, PPCCOM,	{ BDM } },
-{ "bdnz+",   BBO(16,BODNZ,0,0),      BBOATBI_MASK, PPCCOM,	{ BDP } },
-{ "bdnz",    BBO(16,BODNZ,0,0),      BBOATBI_MASK, PPCCOM,	{ BD } },
-{ "bdn",     BBO(16,BODNZ,0,0),      BBOATBI_MASK, PWRCOM,	{ BD } },
-{ "bdnzl-",  BBO(16,BODNZ,0,1),      BBOATBI_MASK, PPCCOM,	{ BDM } },
-{ "bdnzl+",  BBO(16,BODNZ,0,1),      BBOATBI_MASK, PPCCOM,	{ BDP } },
-{ "bdnzl",   BBO(16,BODNZ,0,1),      BBOATBI_MASK, PPCCOM,	{ BD } },
-{ "bdnl",    BBO(16,BODNZ,0,1),      BBOATBI_MASK, PWRCOM,	{ BD } },
-{ "bdnza-",  BBO(16,BODNZ,1,0),      BBOATBI_MASK, PPCCOM,	{ BDMA } },
-{ "bdnza+",  BBO(16,BODNZ,1,0),      BBOATBI_MASK, PPCCOM,	{ BDPA } },
-{ "bdnza",   BBO(16,BODNZ,1,0),      BBOATBI_MASK, PPCCOM,	{ BDA } },
-{ "bdna",    BBO(16,BODNZ,1,0),      BBOATBI_MASK, PWRCOM,	{ BDA } },
-{ "bdnzla-", BBO(16,BODNZ,1,1),      BBOATBI_MASK, PPCCOM,	{ BDMA } },
-{ "bdnzla+", BBO(16,BODNZ,1,1),      BBOATBI_MASK, PPCCOM,	{ BDPA } },
-{ "bdnzla",  BBO(16,BODNZ,1,1),      BBOATBI_MASK, PPCCOM,	{ BDA } },
-{ "bdnla",   BBO(16,BODNZ,1,1),      BBOATBI_MASK, PWRCOM,	{ BDA } },
-{ "bdz-",    BBO(16,BODZ,0,0),       BBOATBI_MASK, PPCCOM,	{ BDM } },
-{ "bdz+",    BBO(16,BODZ,0,0),       BBOATBI_MASK, PPCCOM,	{ BDP } },
-{ "bdz",     BBO(16,BODZ,0,0),       BBOATBI_MASK, COM,		{ BD } },
-{ "bdzl-",   BBO(16,BODZ,0,1),       BBOATBI_MASK, PPCCOM,	{ BDM } },
-{ "bdzl+",   BBO(16,BODZ,0,1),       BBOATBI_MASK, PPCCOM,	{ BDP } },
-{ "bdzl",    BBO(16,BODZ,0,1),       BBOATBI_MASK, COM,		{ BD } },
-{ "bdza-",   BBO(16,BODZ,1,0),       BBOATBI_MASK, PPCCOM,	{ BDMA } },
-{ "bdza+",   BBO(16,BODZ,1,0),       BBOATBI_MASK, PPCCOM,	{ BDPA } },
-{ "bdza",    BBO(16,BODZ,1,0),       BBOATBI_MASK, COM,		{ BDA } },
-{ "bdzla-",  BBO(16,BODZ,1,1),       BBOATBI_MASK, PPCCOM,	{ BDMA } },
-{ "bdzla+",  BBO(16,BODZ,1,1),       BBOATBI_MASK, PPCCOM,	{ BDPA } },
-{ "bdzla",   BBO(16,BODZ,1,1),       BBOATBI_MASK, COM,		{ BDA } },
-{ "blt-",    BBOCB(16,BOT,CBLT,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "blt+",    BBOCB(16,BOT,CBLT,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "blt",     BBOCB(16,BOT,CBLT,0,0), BBOATCB_MASK, COM,		{ CR, BD } },
-{ "bltl-",   BBOCB(16,BOT,CBLT,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "bltl+",   BBOCB(16,BOT,CBLT,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "bltl",    BBOCB(16,BOT,CBLT,0,1), BBOATCB_MASK, COM,		{ CR, BD } },
-{ "blta-",   BBOCB(16,BOT,CBLT,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "blta+",   BBOCB(16,BOT,CBLT,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "blta",    BBOCB(16,BOT,CBLT,1,0), BBOATCB_MASK, COM,		{ CR, BDA } },
-{ "bltla-",  BBOCB(16,BOT,CBLT,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "bltla+",  BBOCB(16,BOT,CBLT,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "bltla",   BBOCB(16,BOT,CBLT,1,1), BBOATCB_MASK, COM,		{ CR, BDA } },
-{ "bgt-",    BBOCB(16,BOT,CBGT,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "bgt+",    BBOCB(16,BOT,CBGT,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "bgt",     BBOCB(16,BOT,CBGT,0,0), BBOATCB_MASK, COM,		{ CR, BD } },
-{ "bgtl-",   BBOCB(16,BOT,CBGT,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "bgtl+",   BBOCB(16,BOT,CBGT,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "bgtl",    BBOCB(16,BOT,CBGT,0,1), BBOATCB_MASK, COM,		{ CR, BD } },
-{ "bgta-",   BBOCB(16,BOT,CBGT,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "bgta+",   BBOCB(16,BOT,CBGT,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "bgta",    BBOCB(16,BOT,CBGT,1,0), BBOATCB_MASK, COM,		{ CR, BDA } },
-{ "bgtla-",  BBOCB(16,BOT,CBGT,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "bgtla+",  BBOCB(16,BOT,CBGT,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "bgtla",   BBOCB(16,BOT,CBGT,1,1), BBOATCB_MASK, COM,		{ CR, BDA } },
-{ "beq-",    BBOCB(16,BOT,CBEQ,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "beq+",    BBOCB(16,BOT,CBEQ,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "beq",     BBOCB(16,BOT,CBEQ,0,0), BBOATCB_MASK, COM,		{ CR, BD } },
-{ "beql-",   BBOCB(16,BOT,CBEQ,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "beql+",   BBOCB(16,BOT,CBEQ,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "beql",    BBOCB(16,BOT,CBEQ,0,1), BBOATCB_MASK, COM,		{ CR, BD } },
-{ "beqa-",   BBOCB(16,BOT,CBEQ,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "beqa+",   BBOCB(16,BOT,CBEQ,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "beqa",    BBOCB(16,BOT,CBEQ,1,0), BBOATCB_MASK, COM,		{ CR, BDA } },
-{ "beqla-",  BBOCB(16,BOT,CBEQ,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "beqla+",  BBOCB(16,BOT,CBEQ,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "beqla",   BBOCB(16,BOT,CBEQ,1,1), BBOATCB_MASK, COM,		{ CR, BDA } },
-{ "bso-",    BBOCB(16,BOT,CBSO,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "bso+",    BBOCB(16,BOT,CBSO,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "bso",     BBOCB(16,BOT,CBSO,0,0), BBOATCB_MASK, COM,		{ CR, BD } },
-{ "bsol-",   BBOCB(16,BOT,CBSO,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "bsol+",   BBOCB(16,BOT,CBSO,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "bsol",    BBOCB(16,BOT,CBSO,0,1), BBOATCB_MASK, COM,		{ CR, BD } },
-{ "bsoa-",   BBOCB(16,BOT,CBSO,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "bsoa+",   BBOCB(16,BOT,CBSO,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "bsoa",    BBOCB(16,BOT,CBSO,1,0), BBOATCB_MASK, COM,		{ CR, BDA } },
-{ "bsola-",  BBOCB(16,BOT,CBSO,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "bsola+",  BBOCB(16,BOT,CBSO,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "bsola",   BBOCB(16,BOT,CBSO,1,1), BBOATCB_MASK, COM,		{ CR, BDA } },
-{ "bun-",    BBOCB(16,BOT,CBSO,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "bun+",    BBOCB(16,BOT,CBSO,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "bun",     BBOCB(16,BOT,CBSO,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BD } },
-{ "bunl-",   BBOCB(16,BOT,CBSO,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "bunl+",   BBOCB(16,BOT,CBSO,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "bunl",    BBOCB(16,BOT,CBSO,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BD } },
-{ "buna-",   BBOCB(16,BOT,CBSO,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "buna+",   BBOCB(16,BOT,CBSO,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "buna",    BBOCB(16,BOT,CBSO,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDA } },
-{ "bunla-",  BBOCB(16,BOT,CBSO,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "bunla+",  BBOCB(16,BOT,CBSO,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "bunla",   BBOCB(16,BOT,CBSO,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDA } },
-{ "bge-",    BBOCB(16,BOF,CBLT,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "bge+",    BBOCB(16,BOF,CBLT,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "bge",     BBOCB(16,BOF,CBLT,0,0), BBOATCB_MASK, COM,		{ CR, BD } },
-{ "bgel-",   BBOCB(16,BOF,CBLT,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "bgel+",   BBOCB(16,BOF,CBLT,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "bgel",    BBOCB(16,BOF,CBLT,0,1), BBOATCB_MASK, COM,		{ CR, BD } },
-{ "bgea-",   BBOCB(16,BOF,CBLT,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "bgea+",   BBOCB(16,BOF,CBLT,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "bgea",    BBOCB(16,BOF,CBLT,1,0), BBOATCB_MASK, COM,		{ CR, BDA } },
-{ "bgela-",  BBOCB(16,BOF,CBLT,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "bgela+",  BBOCB(16,BOF,CBLT,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "bgela",   BBOCB(16,BOF,CBLT,1,1), BBOATCB_MASK, COM,		{ CR, BDA } },
-{ "bnl-",    BBOCB(16,BOF,CBLT,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "bnl+",    BBOCB(16,BOF,CBLT,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "bnl",     BBOCB(16,BOF,CBLT,0,0), BBOATCB_MASK, COM,		{ CR, BD } },
-{ "bnll-",   BBOCB(16,BOF,CBLT,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "bnll+",   BBOCB(16,BOF,CBLT,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "bnll",    BBOCB(16,BOF,CBLT,0,1), BBOATCB_MASK, COM,		{ CR, BD } },
-{ "bnla-",   BBOCB(16,BOF,CBLT,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "bnla+",   BBOCB(16,BOF,CBLT,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "bnla",    BBOCB(16,BOF,CBLT,1,0), BBOATCB_MASK, COM,		{ CR, BDA } },
-{ "bnlla-",  BBOCB(16,BOF,CBLT,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "bnlla+",  BBOCB(16,BOF,CBLT,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "bnlla",   BBOCB(16,BOF,CBLT,1,1), BBOATCB_MASK, COM,		{ CR, BDA } },
-{ "ble-",    BBOCB(16,BOF,CBGT,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "ble+",    BBOCB(16,BOF,CBGT,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "ble",     BBOCB(16,BOF,CBGT,0,0), BBOATCB_MASK, COM,		{ CR, BD } },
-{ "blel-",   BBOCB(16,BOF,CBGT,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "blel+",   BBOCB(16,BOF,CBGT,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "blel",    BBOCB(16,BOF,CBGT,0,1), BBOATCB_MASK, COM,		{ CR, BD } },
-{ "blea-",   BBOCB(16,BOF,CBGT,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "blea+",   BBOCB(16,BOF,CBGT,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "blea",    BBOCB(16,BOF,CBGT,1,0), BBOATCB_MASK, COM,		{ CR, BDA } },
-{ "blela-",  BBOCB(16,BOF,CBGT,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "blela+",  BBOCB(16,BOF,CBGT,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "blela",   BBOCB(16,BOF,CBGT,1,1), BBOATCB_MASK, COM,		{ CR, BDA } },
-{ "bng-",    BBOCB(16,BOF,CBGT,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "bng+",    BBOCB(16,BOF,CBGT,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "bng",     BBOCB(16,BOF,CBGT,0,0), BBOATCB_MASK, COM,		{ CR, BD } },
-{ "bngl-",   BBOCB(16,BOF,CBGT,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "bngl+",   BBOCB(16,BOF,CBGT,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "bngl",    BBOCB(16,BOF,CBGT,0,1), BBOATCB_MASK, COM,		{ CR, BD } },
-{ "bnga-",   BBOCB(16,BOF,CBGT,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "bnga+",   BBOCB(16,BOF,CBGT,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "bnga",    BBOCB(16,BOF,CBGT,1,0), BBOATCB_MASK, COM,		{ CR, BDA } },
-{ "bngla-",  BBOCB(16,BOF,CBGT,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "bngla+",  BBOCB(16,BOF,CBGT,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "bngla",   BBOCB(16,BOF,CBGT,1,1), BBOATCB_MASK, COM,		{ CR, BDA } },
-{ "bne-",    BBOCB(16,BOF,CBEQ,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "bne+",    BBOCB(16,BOF,CBEQ,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "bne",     BBOCB(16,BOF,CBEQ,0,0), BBOATCB_MASK, COM,		{ CR, BD } },
-{ "bnel-",   BBOCB(16,BOF,CBEQ,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "bnel+",   BBOCB(16,BOF,CBEQ,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "bnel",    BBOCB(16,BOF,CBEQ,0,1), BBOATCB_MASK, COM,		{ CR, BD } },
-{ "bnea-",   BBOCB(16,BOF,CBEQ,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "bnea+",   BBOCB(16,BOF,CBEQ,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "bnea",    BBOCB(16,BOF,CBEQ,1,0), BBOATCB_MASK, COM,		{ CR, BDA } },
-{ "bnela-",  BBOCB(16,BOF,CBEQ,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "bnela+",  BBOCB(16,BOF,CBEQ,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "bnela",   BBOCB(16,BOF,CBEQ,1,1), BBOATCB_MASK, COM,		{ CR, BDA } },
-{ "bns-",    BBOCB(16,BOF,CBSO,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "bns+",    BBOCB(16,BOF,CBSO,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "bns",     BBOCB(16,BOF,CBSO,0,0), BBOATCB_MASK, COM,		{ CR, BD } },
-{ "bnsl-",   BBOCB(16,BOF,CBSO,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "bnsl+",   BBOCB(16,BOF,CBSO,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "bnsl",    BBOCB(16,BOF,CBSO,0,1), BBOATCB_MASK, COM,		{ CR, BD } },
-{ "bnsa-",   BBOCB(16,BOF,CBSO,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "bnsa+",   BBOCB(16,BOF,CBSO,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "bnsa",    BBOCB(16,BOF,CBSO,1,0), BBOATCB_MASK, COM,		{ CR, BDA } },
-{ "bnsla-",  BBOCB(16,BOF,CBSO,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "bnsla+",  BBOCB(16,BOF,CBSO,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "bnsla",   BBOCB(16,BOF,CBSO,1,1), BBOATCB_MASK, COM,		{ CR, BDA } },
-{ "bnu-",    BBOCB(16,BOF,CBSO,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "bnu+",    BBOCB(16,BOF,CBSO,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "bnu",     BBOCB(16,BOF,CBSO,0,0), BBOATCB_MASK, PPCCOM,	{ CR, BD } },
-{ "bnul-",   BBOCB(16,BOF,CBSO,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDM } },
-{ "bnul+",   BBOCB(16,BOF,CBSO,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BDP } },
-{ "bnul",    BBOCB(16,BOF,CBSO,0,1), BBOATCB_MASK, PPCCOM,	{ CR, BD } },
-{ "bnua-",   BBOCB(16,BOF,CBSO,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "bnua+",   BBOCB(16,BOF,CBSO,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "bnua",    BBOCB(16,BOF,CBSO,1,0), BBOATCB_MASK, PPCCOM,	{ CR, BDA } },
-{ "bnula-",  BBOCB(16,BOF,CBSO,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDMA } },
-{ "bnula+",  BBOCB(16,BOF,CBSO,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDPA } },
-{ "bnula",   BBOCB(16,BOF,CBSO,1,1), BBOATCB_MASK, PPCCOM,	{ CR, BDA } },
-{ "bdnzt-",  BBO(16,BODNZT,0,0), BBOY_MASK, NOPOWER4,	{ BI, BDM } },
-{ "bdnzt+",  BBO(16,BODNZT,0,0), BBOY_MASK, NOPOWER4,	{ BI, BDP } },
-{ "bdnzt",   BBO(16,BODNZT,0,0), BBOY_MASK, PPCCOM,	{ BI, BD } },
-{ "bdnztl-", BBO(16,BODNZT,0,1), BBOY_MASK, NOPOWER4,	{ BI, BDM } },
-{ "bdnztl+", BBO(16,BODNZT,0,1), BBOY_MASK, NOPOWER4,	{ BI, BDP } },
-{ "bdnztl",  BBO(16,BODNZT,0,1), BBOY_MASK, PPCCOM,	{ BI, BD } },
-{ "bdnzta-", BBO(16,BODNZT,1,0), BBOY_MASK, NOPOWER4,	{ BI, BDMA } },
-{ "bdnzta+", BBO(16,BODNZT,1,0), BBOY_MASK, NOPOWER4,	{ BI, BDPA } },
-{ "bdnzta",  BBO(16,BODNZT,1,0), BBOY_MASK, PPCCOM,	{ BI, BDA } },
-{ "bdnztla-",BBO(16,BODNZT,1,1), BBOY_MASK, NOPOWER4,	{ BI, BDMA } },
-{ "bdnztla+",BBO(16,BODNZT,1,1), BBOY_MASK, NOPOWER4,	{ BI, BDPA } },
-{ "bdnztla", BBO(16,BODNZT,1,1), BBOY_MASK, PPCCOM,	{ BI, BDA } },
-{ "bdnzf-",  BBO(16,BODNZF,0,0), BBOY_MASK, NOPOWER4,	{ BI, BDM } },
-{ "bdnzf+",  BBO(16,BODNZF,0,0), BBOY_MASK, NOPOWER4,	{ BI, BDP } },
-{ "bdnzf",   BBO(16,BODNZF,0,0), BBOY_MASK, PPCCOM,	{ BI, BD } },
-{ "bdnzfl-", BBO(16,BODNZF,0,1), BBOY_MASK, NOPOWER4,	{ BI, BDM } },
-{ "bdnzfl+", BBO(16,BODNZF,0,1), BBOY_MASK, NOPOWER4,	{ BI, BDP } },
-{ "bdnzfl",  BBO(16,BODNZF,0,1), BBOY_MASK, PPCCOM,	{ BI, BD } },
-{ "bdnzfa-", BBO(16,BODNZF,1,0), BBOY_MASK, NOPOWER4,	{ BI, BDMA } },
-{ "bdnzfa+", BBO(16,BODNZF,1,0), BBOY_MASK, NOPOWER4,	{ BI, BDPA } },
-{ "bdnzfa",  BBO(16,BODNZF,1,0), BBOY_MASK, PPCCOM,	{ BI, BDA } },
-{ "bdnzfla-",BBO(16,BODNZF,1,1), BBOY_MASK, NOPOWER4,	{ BI, BDMA } },
-{ "bdnzfla+",BBO(16,BODNZF,1,1), BBOY_MASK, NOPOWER4,	{ BI, BDPA } },
-{ "bdnzfla", BBO(16,BODNZF,1,1), BBOY_MASK, PPCCOM,	{ BI, BDA } },
-{ "bt-",     BBO(16,BOT,0,0), BBOAT_MASK, PPCCOM,	{ BI, BDM } },
-{ "bt+",     BBO(16,BOT,0,0), BBOAT_MASK, PPCCOM,	{ BI, BDP } },
-{ "bt",	     BBO(16,BOT,0,0), BBOAT_MASK, PPCCOM,	{ BI, BD } },
-{ "bbt",     BBO(16,BOT,0,0), BBOAT_MASK, PWRCOM,	{ BI, BD } },
-{ "btl-",    BBO(16,BOT,0,1), BBOAT_MASK, PPCCOM,	{ BI, BDM } },
-{ "btl+",    BBO(16,BOT,0,1), BBOAT_MASK, PPCCOM,	{ BI, BDP } },
-{ "btl",     BBO(16,BOT,0,1), BBOAT_MASK, PPCCOM,	{ BI, BD } },
-{ "bbtl",    BBO(16,BOT,0,1), BBOAT_MASK, PWRCOM,	{ BI, BD } },
-{ "bta-",    BBO(16,BOT,1,0), BBOAT_MASK, PPCCOM,	{ BI, BDMA } },
-{ "bta+",    BBO(16,BOT,1,0), BBOAT_MASK, PPCCOM,	{ BI, BDPA } },
-{ "bta",     BBO(16,BOT,1,0), BBOAT_MASK, PPCCOM,	{ BI, BDA } },
-{ "bbta",    BBO(16,BOT,1,0), BBOAT_MASK, PWRCOM,	{ BI, BDA } },
-{ "btla-",   BBO(16,BOT,1,1), BBOAT_MASK, PPCCOM,	{ BI, BDMA } },
-{ "btla+",   BBO(16,BOT,1,1), BBOAT_MASK, PPCCOM,	{ BI, BDPA } },
-{ "btla",    BBO(16,BOT,1,1), BBOAT_MASK, PPCCOM,	{ BI, BDA } },
-{ "bbtla",   BBO(16,BOT,1,1), BBOAT_MASK, PWRCOM,	{ BI, BDA } },
-{ "bf-",     BBO(16,BOF,0,0), BBOAT_MASK, PPCCOM,	{ BI, BDM } },
-{ "bf+",     BBO(16,BOF,0,0), BBOAT_MASK, PPCCOM,	{ BI, BDP } },
-{ "bf",	     BBO(16,BOF,0,0), BBOAT_MASK, PPCCOM,	{ BI, BD } },
-{ "bbf",     BBO(16,BOF,0,0), BBOAT_MASK, PWRCOM,	{ BI, BD } },
-{ "bfl-",    BBO(16,BOF,0,1), BBOAT_MASK, PPCCOM,	{ BI, BDM } },
-{ "bfl+",    BBO(16,BOF,0,1), BBOAT_MASK, PPCCOM,	{ BI, BDP } },
-{ "bfl",     BBO(16,BOF,0,1), BBOAT_MASK, PPCCOM,	{ BI, BD } },
-{ "bbfl",    BBO(16,BOF,0,1), BBOAT_MASK, PWRCOM,	{ BI, BD } },
-{ "bfa-",    BBO(16,BOF,1,0), BBOAT_MASK, PPCCOM,	{ BI, BDMA } },
-{ "bfa+",    BBO(16,BOF,1,0), BBOAT_MASK, PPCCOM,	{ BI, BDPA } },
-{ "bfa",     BBO(16,BOF,1,0), BBOAT_MASK, PPCCOM,	{ BI, BDA } },
-{ "bbfa",    BBO(16,BOF,1,0), BBOAT_MASK, PWRCOM,	{ BI, BDA } },
-{ "bfla-",   BBO(16,BOF,1,1), BBOAT_MASK, PPCCOM,	{ BI, BDMA } },
-{ "bfla+",   BBO(16,BOF,1,1), BBOAT_MASK, PPCCOM,	{ BI, BDPA } },
-{ "bfla",    BBO(16,BOF,1,1), BBOAT_MASK, PPCCOM,	{ BI, BDA } },
-{ "bbfla",   BBO(16,BOF,1,1), BBOAT_MASK, PWRCOM,	{ BI, BDA } },
-{ "bdzt-",   BBO(16,BODZT,0,0), BBOY_MASK, NOPOWER4,	{ BI, BDM } },
-{ "bdzt+",   BBO(16,BODZT,0,0), BBOY_MASK, NOPOWER4,	{ BI, BDP } },
-{ "bdzt",    BBO(16,BODZT,0,0), BBOY_MASK, PPCCOM,	{ BI, BD } },
-{ "bdztl-",  BBO(16,BODZT,0,1), BBOY_MASK, NOPOWER4,	{ BI, BDM } },
-{ "bdztl+",  BBO(16,BODZT,0,1), BBOY_MASK, NOPOWER4,	{ BI, BDP } },
-{ "bdztl",   BBO(16,BODZT,0,1), BBOY_MASK, PPCCOM,	{ BI, BD } },
-{ "bdzta-",  BBO(16,BODZT,1,0), BBOY_MASK, NOPOWER4,	{ BI, BDMA } },
-{ "bdzta+",  BBO(16,BODZT,1,0), BBOY_MASK, NOPOWER4,	{ BI, BDPA } },
-{ "bdzta",   BBO(16,BODZT,1,0), BBOY_MASK, PPCCOM,	{ BI, BDA } },
-{ "bdztla-", BBO(16,BODZT,1,1), BBOY_MASK, NOPOWER4,	{ BI, BDMA } },
-{ "bdztla+", BBO(16,BODZT,1,1), BBOY_MASK, NOPOWER4,	{ BI, BDPA } },
-{ "bdztla",  BBO(16,BODZT,1,1), BBOY_MASK, PPCCOM,	{ BI, BDA } },
-{ "bdzf-",   BBO(16,BODZF,0,0), BBOY_MASK, NOPOWER4,	{ BI, BDM } },
-{ "bdzf+",   BBO(16,BODZF,0,0), BBOY_MASK, NOPOWER4,	{ BI, BDP } },
-{ "bdzf",    BBO(16,BODZF,0,0), BBOY_MASK, PPCCOM,	{ BI, BD } },
-{ "bdzfl-",  BBO(16,BODZF,0,1), BBOY_MASK, NOPOWER4,	{ BI, BDM } },
-{ "bdzfl+",  BBO(16,BODZF,0,1), BBOY_MASK, NOPOWER4,	{ BI, BDP } },
-{ "bdzfl",   BBO(16,BODZF,0,1), BBOY_MASK, PPCCOM,	{ BI, BD } },
-{ "bdzfa-",  BBO(16,BODZF,1,0), BBOY_MASK, NOPOWER4,	{ BI, BDMA } },
-{ "bdzfa+",  BBO(16,BODZF,1,0), BBOY_MASK, NOPOWER4,	{ BI, BDPA } },
-{ "bdzfa",   BBO(16,BODZF,1,0), BBOY_MASK, PPCCOM,	{ BI, BDA } },
-{ "bdzfla-", BBO(16,BODZF,1,1), BBOY_MASK, NOPOWER4,	{ BI, BDMA } },
-{ "bdzfla+", BBO(16,BODZF,1,1), BBOY_MASK, NOPOWER4,	{ BI, BDPA } },
-{ "bdzfla",  BBO(16,BODZF,1,1), BBOY_MASK, PPCCOM,	{ BI, BDA } },
-{ "bc-",     B(16,0,0),	B_MASK,		PPCCOM,		{ BOE, BI, BDM } },
-{ "bc+",     B(16,0,0),	B_MASK,		PPCCOM,		{ BOE, BI, BDP } },
-{ "bc",	     B(16,0,0),	B_MASK,		COM,		{ BO, BI, BD } },
-{ "bcl-",    B(16,0,1),	B_MASK,		PPCCOM,		{ BOE, BI, BDM } },
-{ "bcl+",    B(16,0,1),	B_MASK,		PPCCOM,		{ BOE, BI, BDP } },
-{ "bcl",     B(16,0,1),	B_MASK,		COM,		{ BO, BI, BD } },
-{ "bca-",    B(16,1,0),	B_MASK,		PPCCOM,		{ BOE, BI, BDMA } },
-{ "bca+",    B(16,1,0),	B_MASK,		PPCCOM,		{ BOE, BI, BDPA } },
-{ "bca",     B(16,1,0),	B_MASK,		COM,		{ BO, BI, BDA } },
-{ "bcla-",   B(16,1,1),	B_MASK,		PPCCOM,		{ BOE, BI, BDMA } },
-{ "bcla+",   B(16,1,1),	B_MASK,		PPCCOM,		{ BOE, BI, BDPA } },
-{ "bcla",    B(16,1,1),	B_MASK,		COM,		{ BO, BI, BDA } },
-
-{ "sc",      SC(17,1,0), SC_MASK,	PPC,		{ LEV } },
-{ "svc",     SC(17,0,0), SC_MASK,	POWER,		{ SVC_LEV, FL1, FL2 } },
-{ "svcl",    SC(17,0,1), SC_MASK,	POWER,		{ SVC_LEV, FL1, FL2 } },
-{ "svca",    SC(17,1,0), SC_MASK,	PWRCOM,		{ SV } },
-{ "svcla",   SC(17,1,1), SC_MASK,	POWER,		{ SV } },
-
-{ "b",	     B(18,0,0),	B_MASK,		COM,		{ LI } },
-{ "bl",      B(18,0,1),	B_MASK,		COM,		{ LI } },
-{ "ba",      B(18,1,0),	B_MASK,		COM,		{ LIA } },
-{ "bla",     B(18,1,1),	B_MASK,		COM,		{ LIA } },
-
-{ "mcrf",    XL(19,0),	XLBB_MASK|(3 << 21)|(3 << 16), COM,	{ BF, BFA } },
-
-{ "blr",     XLO(19,BOU,16,0), XLBOBIBB_MASK, PPCCOM,	{ 0 } },
-{ "br",      XLO(19,BOU,16,0), XLBOBIBB_MASK, PWRCOM,	{ 0 } },
-{ "blrl",    XLO(19,BOU,16,1), XLBOBIBB_MASK, PPCCOM,	{ 0 } },
-{ "brl",     XLO(19,BOU,16,1), XLBOBIBB_MASK, PWRCOM,	{ 0 } },
-{ "bdnzlr",  XLO(19,BODNZ,16,0), XLBOBIBB_MASK, PPCCOM,	{ 0 } },
-{ "bdnzlr-", XLO(19,BODNZ,16,0), XLBOBIBB_MASK, NOPOWER4,	{ 0 } },
-{ "bdnzlr-", XLO(19,BODNZM4,16,0), XLBOBIBB_MASK, POWER4,	{ 0 } },
-{ "bdnzlr+", XLO(19,BODNZP,16,0), XLBOBIBB_MASK, NOPOWER4,	{ 0 } },
-{ "bdnzlr+", XLO(19,BODNZP4,16,0), XLBOBIBB_MASK, POWER4,	{ 0 } },
-{ "bdnzlrl", XLO(19,BODNZ,16,1), XLBOBIBB_MASK, PPCCOM,	{ 0 } },
-{ "bdnzlrl-",XLO(19,BODNZ,16,1), XLBOBIBB_MASK, NOPOWER4,	{ 0 } },
-{ "bdnzlrl-",XLO(19,BODNZM4,16,1), XLBOBIBB_MASK, POWER4,	{ 0 } },
-{ "bdnzlrl+",XLO(19,BODNZP,16,1), XLBOBIBB_MASK, NOPOWER4,	{ 0 } },
-{ "bdnzlrl+",XLO(19,BODNZP4,16,1), XLBOBIBB_MASK, POWER4,	{ 0 } },
-{ "bdzlr",   XLO(19,BODZ,16,0), XLBOBIBB_MASK, PPCCOM,	{ 0 } },
-{ "bdzlr-",  XLO(19,BODZ,16,0), XLBOBIBB_MASK, NOPOWER4,	{ 0 } },
-{ "bdzlr-",  XLO(19,BODZM4,16,0), XLBOBIBB_MASK, POWER4,	{ 0 } },
-{ "bdzlr+",  XLO(19,BODZP,16,0), XLBOBIBB_MASK, NOPOWER4,	{ 0 } },
-{ "bdzlr+",  XLO(19,BODZP4,16,0), XLBOBIBB_MASK, POWER4,	{ 0 } },
-{ "bdzlrl",  XLO(19,BODZ,16,1), XLBOBIBB_MASK, PPCCOM,	{ 0 } },
-{ "bdzlrl-", XLO(19,BODZ,16,1), XLBOBIBB_MASK, NOPOWER4,	{ 0 } },
-{ "bdzlrl-", XLO(19,BODZM4,16,1), XLBOBIBB_MASK, POWER4,	{ 0 } },
-{ "bdzlrl+", XLO(19,BODZP,16,1), XLBOBIBB_MASK, NOPOWER4,	{ 0 } },
-{ "bdzlrl+", XLO(19,BODZP4,16,1), XLBOBIBB_MASK, POWER4,	{ 0 } },
-{ "bltlr",   XLOCB(19,BOT,CBLT,16,0), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "bltlr-",  XLOCB(19,BOT,CBLT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bltlr-",  XLOCB(19,BOTM4,CBLT,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bltlr+",  XLOCB(19,BOTP,CBLT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bltlr+",  XLOCB(19,BOTP4,CBLT,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bltr",    XLOCB(19,BOT,CBLT,16,0), XLBOCBBB_MASK, PWRCOM, { CR } },
-{ "bltlrl",  XLOCB(19,BOT,CBLT,16,1), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "bltlrl-", XLOCB(19,BOT,CBLT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bltlrl-", XLOCB(19,BOTM4,CBLT,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bltlrl+", XLOCB(19,BOTP,CBLT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bltlrl+", XLOCB(19,BOTP4,CBLT,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bltrl",   XLOCB(19,BOT,CBLT,16,1), XLBOCBBB_MASK, PWRCOM, { CR } },
-{ "bgtlr",   XLOCB(19,BOT,CBGT,16,0), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "bgtlr-",  XLOCB(19,BOT,CBGT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bgtlr-",  XLOCB(19,BOTM4,CBGT,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bgtlr+",  XLOCB(19,BOTP,CBGT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bgtlr+",  XLOCB(19,BOTP4,CBGT,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bgtr",    XLOCB(19,BOT,CBGT,16,0), XLBOCBBB_MASK, PWRCOM, { CR } },
-{ "bgtlrl",  XLOCB(19,BOT,CBGT,16,1), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "bgtlrl-", XLOCB(19,BOT,CBGT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bgtlrl-", XLOCB(19,BOTM4,CBGT,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bgtlrl+", XLOCB(19,BOTP,CBGT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bgtlrl+", XLOCB(19,BOTP4,CBGT,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bgtrl",   XLOCB(19,BOT,CBGT,16,1), XLBOCBBB_MASK, PWRCOM, { CR } },
-{ "beqlr",   XLOCB(19,BOT,CBEQ,16,0), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "beqlr-",  XLOCB(19,BOT,CBEQ,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "beqlr-",  XLOCB(19,BOTM4,CBEQ,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "beqlr+",  XLOCB(19,BOTP,CBEQ,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "beqlr+",  XLOCB(19,BOTP4,CBEQ,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "beqr",    XLOCB(19,BOT,CBEQ,16,0), XLBOCBBB_MASK, PWRCOM, { CR } },
-{ "beqlrl",  XLOCB(19,BOT,CBEQ,16,1), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "beqlrl-", XLOCB(19,BOT,CBEQ,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "beqlrl-", XLOCB(19,BOTM4,CBEQ,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "beqlrl+", XLOCB(19,BOTP,CBEQ,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "beqlrl+", XLOCB(19,BOTP4,CBEQ,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "beqrl",   XLOCB(19,BOT,CBEQ,16,1), XLBOCBBB_MASK, PWRCOM, { CR } },
-{ "bsolr",   XLOCB(19,BOT,CBSO,16,0), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "bsolr-",  XLOCB(19,BOT,CBSO,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bsolr-",  XLOCB(19,BOTM4,CBSO,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bsolr+",  XLOCB(19,BOTP,CBSO,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bsolr+",  XLOCB(19,BOTP4,CBSO,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bsor",    XLOCB(19,BOT,CBSO,16,0), XLBOCBBB_MASK, PWRCOM, { CR } },
-{ "bsolrl",  XLOCB(19,BOT,CBSO,16,1), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "bsolrl-", XLOCB(19,BOT,CBSO,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bsolrl-", XLOCB(19,BOTM4,CBSO,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bsolrl+", XLOCB(19,BOTP,CBSO,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bsolrl+", XLOCB(19,BOTP4,CBSO,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bsorl",   XLOCB(19,BOT,CBSO,16,1), XLBOCBBB_MASK, PWRCOM, { CR } },
-{ "bunlr",   XLOCB(19,BOT,CBSO,16,0), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "bunlr-",  XLOCB(19,BOT,CBSO,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bunlr-",  XLOCB(19,BOTM4,CBSO,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bunlr+",  XLOCB(19,BOTP,CBSO,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bunlr+",  XLOCB(19,BOTP4,CBSO,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bunlrl",  XLOCB(19,BOT,CBSO,16,1), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "bunlrl-", XLOCB(19,BOT,CBSO,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bunlrl-", XLOCB(19,BOTM4,CBSO,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bunlrl+", XLOCB(19,BOTP,CBSO,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bunlrl+", XLOCB(19,BOTP4,CBSO,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bgelr",   XLOCB(19,BOF,CBLT,16,0), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "bgelr-",  XLOCB(19,BOF,CBLT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bgelr-",  XLOCB(19,BOFM4,CBLT,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bgelr+",  XLOCB(19,BOFP,CBLT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bgelr+",  XLOCB(19,BOFP4,CBLT,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bger",    XLOCB(19,BOF,CBLT,16,0), XLBOCBBB_MASK, PWRCOM, { CR } },
-{ "bgelrl",  XLOCB(19,BOF,CBLT,16,1), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "bgelrl-", XLOCB(19,BOF,CBLT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bgelrl-", XLOCB(19,BOFM4,CBLT,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bgelrl+", XLOCB(19,BOFP,CBLT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bgelrl+", XLOCB(19,BOFP4,CBLT,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bgerl",   XLOCB(19,BOF,CBLT,16,1), XLBOCBBB_MASK, PWRCOM, { CR } },
-{ "bnllr",   XLOCB(19,BOF,CBLT,16,0), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "bnllr-",  XLOCB(19,BOF,CBLT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnllr-",  XLOCB(19,BOFM4,CBLT,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnllr+",  XLOCB(19,BOFP,CBLT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnllr+",  XLOCB(19,BOFP4,CBLT,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnlr",    XLOCB(19,BOF,CBLT,16,0), XLBOCBBB_MASK, PWRCOM, { CR } },
-{ "bnllrl",  XLOCB(19,BOF,CBLT,16,1), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "bnllrl-", XLOCB(19,BOF,CBLT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnllrl-", XLOCB(19,BOFM4,CBLT,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnllrl+", XLOCB(19,BOFP,CBLT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnllrl+", XLOCB(19,BOFP4,CBLT,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnlrl",   XLOCB(19,BOF,CBLT,16,1), XLBOCBBB_MASK, PWRCOM, { CR } },
-{ "blelr",   XLOCB(19,BOF,CBGT,16,0), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "blelr-",  XLOCB(19,BOF,CBGT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "blelr-",  XLOCB(19,BOFM4,CBGT,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "blelr+",  XLOCB(19,BOFP,CBGT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "blelr+",  XLOCB(19,BOFP4,CBGT,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bler",    XLOCB(19,BOF,CBGT,16,0), XLBOCBBB_MASK, PWRCOM, { CR } },
-{ "blelrl",  XLOCB(19,BOF,CBGT,16,1), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "blelrl-", XLOCB(19,BOF,CBGT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "blelrl-", XLOCB(19,BOFM4,CBGT,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "blelrl+", XLOCB(19,BOFP,CBGT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "blelrl+", XLOCB(19,BOFP4,CBGT,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "blerl",   XLOCB(19,BOF,CBGT,16,1), XLBOCBBB_MASK, PWRCOM, { CR } },
-{ "bnglr",   XLOCB(19,BOF,CBGT,16,0), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "bnglr-",  XLOCB(19,BOF,CBGT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnglr-",  XLOCB(19,BOFM4,CBGT,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnglr+",  XLOCB(19,BOFP,CBGT,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnglr+",  XLOCB(19,BOFP4,CBGT,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bngr",    XLOCB(19,BOF,CBGT,16,0), XLBOCBBB_MASK, PWRCOM, { CR } },
-{ "bnglrl",  XLOCB(19,BOF,CBGT,16,1), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "bnglrl-", XLOCB(19,BOF,CBGT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnglrl-", XLOCB(19,BOFM4,CBGT,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnglrl+", XLOCB(19,BOFP,CBGT,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnglrl+", XLOCB(19,BOFP4,CBGT,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bngrl",   XLOCB(19,BOF,CBGT,16,1), XLBOCBBB_MASK, PWRCOM, { CR } },
-{ "bnelr",   XLOCB(19,BOF,CBEQ,16,0), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "bnelr-",  XLOCB(19,BOF,CBEQ,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnelr-",  XLOCB(19,BOFM4,CBEQ,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnelr+",  XLOCB(19,BOFP,CBEQ,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnelr+",  XLOCB(19,BOFP4,CBEQ,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bner",    XLOCB(19,BOF,CBEQ,16,0), XLBOCBBB_MASK, PWRCOM, { CR } },
-{ "bnelrl",  XLOCB(19,BOF,CBEQ,16,1), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "bnelrl-", XLOCB(19,BOF,CBEQ,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnelrl-", XLOCB(19,BOFM4,CBEQ,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnelrl+", XLOCB(19,BOFP,CBEQ,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnelrl+", XLOCB(19,BOFP4,CBEQ,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnerl",   XLOCB(19,BOF,CBEQ,16,1), XLBOCBBB_MASK, PWRCOM, { CR } },
-{ "bnslr",   XLOCB(19,BOF,CBSO,16,0), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "bnslr-",  XLOCB(19,BOF,CBSO,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnslr-",  XLOCB(19,BOFM4,CBSO,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnslr+",  XLOCB(19,BOFP,CBSO,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnslr+",  XLOCB(19,BOFP4,CBSO,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnsr",    XLOCB(19,BOF,CBSO,16,0), XLBOCBBB_MASK, PWRCOM, { CR } },
-{ "bnslrl",  XLOCB(19,BOF,CBSO,16,1), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "bnslrl-", XLOCB(19,BOF,CBSO,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnslrl-", XLOCB(19,BOFM4,CBSO,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnslrl+", XLOCB(19,BOFP,CBSO,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnslrl+", XLOCB(19,BOFP4,CBSO,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnsrl",   XLOCB(19,BOF,CBSO,16,1), XLBOCBBB_MASK, PWRCOM, { CR } },
-{ "bnulr",   XLOCB(19,BOF,CBSO,16,0), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "bnulr-",  XLOCB(19,BOF,CBSO,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnulr-",  XLOCB(19,BOFM4,CBSO,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnulr+",  XLOCB(19,BOFP,CBSO,16,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnulr+",  XLOCB(19,BOFP4,CBSO,16,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnulrl",  XLOCB(19,BOF,CBSO,16,1), XLBOCBBB_MASK, PPCCOM, { CR } },
-{ "bnulrl-", XLOCB(19,BOF,CBSO,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnulrl-", XLOCB(19,BOFM4,CBSO,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnulrl+", XLOCB(19,BOFP,CBSO,16,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnulrl+", XLOCB(19,BOFP4,CBSO,16,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "btlr",    XLO(19,BOT,16,0), XLBOBB_MASK, PPCCOM,	{ BI } },
-{ "btlr-",   XLO(19,BOT,16,0), XLBOBB_MASK, NOPOWER4,	{ BI } },
-{ "btlr-",   XLO(19,BOTM4,16,0), XLBOBB_MASK, POWER4,	{ BI } },
-{ "btlr+",   XLO(19,BOTP,16,0), XLBOBB_MASK, NOPOWER4,	{ BI } },
-{ "btlr+",   XLO(19,BOTP4,16,0), XLBOBB_MASK, POWER4,	{ BI } },
-{ "bbtr",    XLO(19,BOT,16,0), XLBOBB_MASK, PWRCOM,	{ BI } },
-{ "btlrl",   XLO(19,BOT,16,1), XLBOBB_MASK, PPCCOM,	{ BI } },
-{ "btlrl-",  XLO(19,BOT,16,1), XLBOBB_MASK, NOPOWER4,	{ BI } },
-{ "btlrl-",  XLO(19,BOTM4,16,1), XLBOBB_MASK, POWER4,	{ BI } },
-{ "btlrl+",  XLO(19,BOTP,16,1), XLBOBB_MASK, NOPOWER4,	{ BI } },
-{ "btlrl+",  XLO(19,BOTP4,16,1), XLBOBB_MASK, POWER4,	{ BI } },
-{ "bbtrl",   XLO(19,BOT,16,1), XLBOBB_MASK, PWRCOM,	{ BI } },
-{ "bflr",    XLO(19,BOF,16,0), XLBOBB_MASK, PPCCOM,	{ BI } },
-{ "bflr-",   XLO(19,BOF,16,0), XLBOBB_MASK, NOPOWER4,	{ BI } },
-{ "bflr-",   XLO(19,BOFM4,16,0), XLBOBB_MASK, POWER4,	{ BI } },
-{ "bflr+",   XLO(19,BOFP,16,0), XLBOBB_MASK, NOPOWER4,	{ BI } },
-{ "bflr+",   XLO(19,BOFP4,16,0), XLBOBB_MASK, POWER4,	{ BI } },
-{ "bbfr",    XLO(19,BOF,16,0), XLBOBB_MASK, PWRCOM,	{ BI } },
-{ "bflrl",   XLO(19,BOF,16,1), XLBOBB_MASK, PPCCOM,	{ BI } },
-{ "bflrl-",  XLO(19,BOF,16,1), XLBOBB_MASK, NOPOWER4,	{ BI } },
-{ "bflrl-",  XLO(19,BOFM4,16,1), XLBOBB_MASK, POWER4,	{ BI } },
-{ "bflrl+",  XLO(19,BOFP,16,1), XLBOBB_MASK, NOPOWER4,	{ BI } },
-{ "bflrl+",  XLO(19,BOFP4,16,1), XLBOBB_MASK, POWER4,	{ BI } },
-{ "bbfrl",   XLO(19,BOF,16,1), XLBOBB_MASK, PWRCOM,	{ BI } },
-{ "bdnztlr", XLO(19,BODNZT,16,0), XLBOBB_MASK, PPCCOM,	{ BI } },
-{ "bdnztlr-",XLO(19,BODNZT,16,0), XLBOBB_MASK, NOPOWER4, { BI } },
-{ "bdnztlr+",XLO(19,BODNZTP,16,0), XLBOBB_MASK, NOPOWER4, { BI } },
-{ "bdnztlrl",XLO(19,BODNZT,16,1), XLBOBB_MASK, PPCCOM,	{ BI } },
-{ "bdnztlrl-",XLO(19,BODNZT,16,1), XLBOBB_MASK, NOPOWER4, { BI } },
-{ "bdnztlrl+",XLO(19,BODNZTP,16,1), XLBOBB_MASK, NOPOWER4, { BI } },
-{ "bdnzflr", XLO(19,BODNZF,16,0), XLBOBB_MASK, PPCCOM,	{ BI } },
-{ "bdnzflr-",XLO(19,BODNZF,16,0), XLBOBB_MASK, NOPOWER4, { BI } },
-{ "bdnzflr+",XLO(19,BODNZFP,16,0), XLBOBB_MASK, NOPOWER4, { BI } },
-{ "bdnzflrl",XLO(19,BODNZF,16,1), XLBOBB_MASK, PPCCOM,	{ BI } },
-{ "bdnzflrl-",XLO(19,BODNZF,16,1), XLBOBB_MASK, NOPOWER4, { BI } },
-{ "bdnzflrl+",XLO(19,BODNZFP,16,1), XLBOBB_MASK, NOPOWER4, { BI } },
-{ "bdztlr",  XLO(19,BODZT,16,0), XLBOBB_MASK, PPCCOM,	{ BI } },
-{ "bdztlr-", XLO(19,BODZT,16,0), XLBOBB_MASK, NOPOWER4,	{ BI } },
-{ "bdztlr+", XLO(19,BODZTP,16,0), XLBOBB_MASK, NOPOWER4, { BI } },
-{ "bdztlrl", XLO(19,BODZT,16,1), XLBOBB_MASK, PPCCOM,	{ BI } },
-{ "bdztlrl-",XLO(19,BODZT,16,1), XLBOBB_MASK, NOPOWER4,	{ BI } },
-{ "bdztlrl+",XLO(19,BODZTP,16,1), XLBOBB_MASK, NOPOWER4, { BI } },
-{ "bdzflr",  XLO(19,BODZF,16,0), XLBOBB_MASK, PPCCOM,	{ BI } },
-{ "bdzflr-", XLO(19,BODZF,16,0), XLBOBB_MASK, NOPOWER4,	{ BI } },
-{ "bdzflr+", XLO(19,BODZFP,16,0), XLBOBB_MASK, NOPOWER4, { BI } },
-{ "bdzflrl", XLO(19,BODZF,16,1), XLBOBB_MASK, PPCCOM,	{ BI } },
-{ "bdzflrl-",XLO(19,BODZF,16,1), XLBOBB_MASK, NOPOWER4,	{ BI } },
-{ "bdzflrl+",XLO(19,BODZFP,16,1), XLBOBB_MASK, NOPOWER4, { BI } },
-{ "bclr+",   XLYLK(19,16,1,0), XLYBB_MASK, PPCCOM,	{ BOE, BI } },
-{ "bclrl+",  XLYLK(19,16,1,1), XLYBB_MASK, PPCCOM,	{ BOE, BI } },
-{ "bclr-",   XLYLK(19,16,0,0), XLYBB_MASK, PPCCOM,	{ BOE, BI } },
-{ "bclrl-",  XLYLK(19,16,0,1), XLYBB_MASK, PPCCOM,	{ BOE, BI } },
-{ "bclr",    XLLK(19,16,0), XLBH_MASK,	PPCCOM,		{ BO, BI, BH } },
-{ "bclrl",   XLLK(19,16,1), XLBH_MASK,	PPCCOM,		{ BO, BI, BH } },
-{ "bcr",     XLLK(19,16,0), XLBB_MASK,	PWRCOM,		{ BO, BI } },
-{ "bcrl",    XLLK(19,16,1), XLBB_MASK,	PWRCOM,		{ BO, BI } },
-{ "bclre",   XLLK(19,17,0), XLBB_MASK,	BOOKE64,	{ BO, BI } },
-{ "bclrel",  XLLK(19,17,1), XLBB_MASK,	BOOKE64,	{ BO, BI } },
-
-{ "rfid",    XL(19,18),	0xffffffff,	PPC64,		{ 0 } },
-
-{ "crnot",   XL(19,33), XL_MASK,	PPCCOM,		{ BT, BA, BBA } },
-{ "crnor",   XL(19,33),	XL_MASK,	COM,		{ BT, BA, BB } },
-{ "rfmci",    X(19,38), 0xffffffff,	PPCRFMCI,	{ 0 } },
-
-{ "rfi",     XL(19,50),	0xffffffff,	COM,		{ 0 } },
-{ "rfci",    XL(19,51),	0xffffffff,	PPC403 | BOOKE,	{ 0 } },
-
-{ "rfsvc",   XL(19,82),	0xffffffff,	POWER,		{ 0 } },
-
-{ "crandc",  XL(19,129), XL_MASK,	COM,		{ BT, BA, BB } },
-
-{ "isync",   XL(19,150), 0xffffffff,	PPCCOM,		{ 0 } },
-{ "ics",     XL(19,150), 0xffffffff,	PWRCOM,		{ 0 } },
-
-{ "crclr",   XL(19,193), XL_MASK,	PPCCOM,		{ BT, BAT, BBA } },
-{ "crxor",   XL(19,193), XL_MASK,	COM,		{ BT, BA, BB } },
-
-{ "crnand",  XL(19,225), XL_MASK,	COM,		{ BT, BA, BB } },
-
-{ "crand",   XL(19,257), XL_MASK,	COM,		{ BT, BA, BB } },
-
-{ "hrfid",   XL(19,274), 0xffffffff,	POWER5 | CELL,	{ 0 } },
-
-{ "crset",   XL(19,289), XL_MASK,	PPCCOM,		{ BT, BAT, BBA } },
-{ "creqv",   XL(19,289), XL_MASK,	COM,		{ BT, BA, BB } },
-
-{ "doze",    XL(19,402), 0xffffffff,	POWER6,		{ 0 } },
-
-{ "crorc",   XL(19,417), XL_MASK,	COM,		{ BT, BA, BB } },
-
-{ "nap",     XL(19,434), 0xffffffff,	POWER6,		{ 0 } },
-
-{ "crmove",  XL(19,449), XL_MASK,	PPCCOM,		{ BT, BA, BBA } },
-{ "cror",    XL(19,449), XL_MASK,	COM,		{ BT, BA, BB } },
-
-{ "sleep",   XL(19,466), 0xffffffff,	POWER6,		{ 0 } },
-{ "rvwinkle", XL(19,498), 0xffffffff,	POWER6,		{ 0 } },
-
-{ "bctr",    XLO(19,BOU,528,0), XLBOBIBB_MASK, COM,	{ 0 } },
-{ "bctrl",   XLO(19,BOU,528,1), XLBOBIBB_MASK, COM,	{ 0 } },
-{ "bltctr",  XLOCB(19,BOT,CBLT,528,0),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "bltctr-", XLOCB(19,BOT,CBLT,528,0),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bltctr-", XLOCB(19,BOTM4,CBLT,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bltctr+", XLOCB(19,BOTP,CBLT,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bltctr+", XLOCB(19,BOTP4,CBLT,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bltctrl", XLOCB(19,BOT,CBLT,528,1),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "bltctrl-",XLOCB(19,BOT,CBLT,528,1),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bltctrl-",XLOCB(19,BOTM4,CBLT,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bltctrl+",XLOCB(19,BOTP,CBLT,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bltctrl+",XLOCB(19,BOTP4,CBLT,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bgtctr",  XLOCB(19,BOT,CBGT,528,0),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "bgtctr-", XLOCB(19,BOT,CBGT,528,0),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bgtctr-", XLOCB(19,BOTM4,CBGT,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bgtctr+", XLOCB(19,BOTP,CBGT,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bgtctr+", XLOCB(19,BOTP4,CBGT,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bgtctrl", XLOCB(19,BOT,CBGT,528,1),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "bgtctrl-",XLOCB(19,BOT,CBGT,528,1),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bgtctrl-",XLOCB(19,BOTM4,CBGT,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bgtctrl+",XLOCB(19,BOTP,CBGT,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bgtctrl+",XLOCB(19,BOTP4,CBGT,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "beqctr",  XLOCB(19,BOT,CBEQ,528,0),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "beqctr-", XLOCB(19,BOT,CBEQ,528,0),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "beqctr-", XLOCB(19,BOTM4,CBEQ,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "beqctr+", XLOCB(19,BOTP,CBEQ,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "beqctr+", XLOCB(19,BOTP4,CBEQ,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "beqctrl", XLOCB(19,BOT,CBEQ,528,1),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "beqctrl-",XLOCB(19,BOT,CBEQ,528,1),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "beqctrl-",XLOCB(19,BOTM4,CBEQ,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "beqctrl+",XLOCB(19,BOTP,CBEQ,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "beqctrl+",XLOCB(19,BOTP4,CBEQ,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bsoctr",  XLOCB(19,BOT,CBSO,528,0),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "bsoctr-", XLOCB(19,BOT,CBSO,528,0),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bsoctr-", XLOCB(19,BOTM4,CBSO,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bsoctr+", XLOCB(19,BOTP,CBSO,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bsoctr+", XLOCB(19,BOTP4,CBSO,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bsoctrl", XLOCB(19,BOT,CBSO,528,1),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "bsoctrl-",XLOCB(19,BOT,CBSO,528,1),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bsoctrl-",XLOCB(19,BOTM4,CBSO,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bsoctrl+",XLOCB(19,BOTP,CBSO,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bsoctrl+",XLOCB(19,BOTP4,CBSO,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bunctr",  XLOCB(19,BOT,CBSO,528,0),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "bunctr-", XLOCB(19,BOT,CBSO,528,0),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bunctr-", XLOCB(19,BOTM4,CBSO,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bunctr+", XLOCB(19,BOTP,CBSO,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bunctr+", XLOCB(19,BOTP4,CBSO,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bunctrl", XLOCB(19,BOT,CBSO,528,1),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "bunctrl-",XLOCB(19,BOT,CBSO,528,1),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bunctrl-",XLOCB(19,BOTM4,CBSO,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bunctrl+",XLOCB(19,BOTP,CBSO,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bunctrl+",XLOCB(19,BOTP4,CBSO,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bgectr",  XLOCB(19,BOF,CBLT,528,0),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "bgectr-", XLOCB(19,BOF,CBLT,528,0),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bgectr-", XLOCB(19,BOFM4,CBLT,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bgectr+", XLOCB(19,BOFP,CBLT,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bgectr+", XLOCB(19,BOFP4,CBLT,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bgectrl", XLOCB(19,BOF,CBLT,528,1),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "bgectrl-",XLOCB(19,BOF,CBLT,528,1),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bgectrl-",XLOCB(19,BOFM4,CBLT,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bgectrl+",XLOCB(19,BOFP,CBLT,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bgectrl+",XLOCB(19,BOFP4,CBLT,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnlctr",  XLOCB(19,BOF,CBLT,528,0),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "bnlctr-", XLOCB(19,BOF,CBLT,528,0),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnlctr-", XLOCB(19,BOFM4,CBLT,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnlctr+", XLOCB(19,BOFP,CBLT,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnlctr+", XLOCB(19,BOFP4,CBLT,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnlctrl", XLOCB(19,BOF,CBLT,528,1),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "bnlctrl-",XLOCB(19,BOF,CBLT,528,1),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnlctrl-",XLOCB(19,BOFM4,CBLT,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnlctrl+",XLOCB(19,BOFP,CBLT,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnlctrl+",XLOCB(19,BOFP4,CBLT,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "blectr",  XLOCB(19,BOF,CBGT,528,0),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "blectr-", XLOCB(19,BOF,CBGT,528,0),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "blectr-", XLOCB(19,BOFM4,CBGT,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "blectr+", XLOCB(19,BOFP,CBGT,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "blectr+", XLOCB(19,BOFP4,CBGT,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "blectrl", XLOCB(19,BOF,CBGT,528,1),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "blectrl-",XLOCB(19,BOF,CBGT,528,1),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "blectrl-",XLOCB(19,BOFM4,CBGT,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "blectrl+",XLOCB(19,BOFP,CBGT,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "blectrl+",XLOCB(19,BOFP4,CBGT,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bngctr",  XLOCB(19,BOF,CBGT,528,0),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "bngctr-", XLOCB(19,BOF,CBGT,528,0),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bngctr-", XLOCB(19,BOFM4,CBGT,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bngctr+", XLOCB(19,BOFP,CBGT,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bngctr+", XLOCB(19,BOFP4,CBGT,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bngctrl", XLOCB(19,BOF,CBGT,528,1),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "bngctrl-",XLOCB(19,BOF,CBGT,528,1),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bngctrl-",XLOCB(19,BOFM4,CBGT,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bngctrl+",XLOCB(19,BOFP,CBGT,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bngctrl+",XLOCB(19,BOFP4,CBGT,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnectr",  XLOCB(19,BOF,CBEQ,528,0),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "bnectr-", XLOCB(19,BOF,CBEQ,528,0),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnectr-", XLOCB(19,BOFM4,CBEQ,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnectr+", XLOCB(19,BOFP,CBEQ,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnectr+", XLOCB(19,BOFP4,CBEQ,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnectrl", XLOCB(19,BOF,CBEQ,528,1),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "bnectrl-",XLOCB(19,BOF,CBEQ,528,1),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnectrl-",XLOCB(19,BOFM4,CBEQ,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnectrl+",XLOCB(19,BOFP,CBEQ,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnectrl+",XLOCB(19,BOFP4,CBEQ,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnsctr",  XLOCB(19,BOF,CBSO,528,0),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "bnsctr-", XLOCB(19,BOF,CBSO,528,0),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnsctr-", XLOCB(19,BOFM4,CBSO,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnsctr+", XLOCB(19,BOFP,CBSO,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnsctr+", XLOCB(19,BOFP4,CBSO,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnsctrl", XLOCB(19,BOF,CBSO,528,1),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "bnsctrl-",XLOCB(19,BOF,CBSO,528,1),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnsctrl-",XLOCB(19,BOFM4,CBSO,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnsctrl+",XLOCB(19,BOFP,CBSO,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnsctrl+",XLOCB(19,BOFP4,CBSO,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnuctr",  XLOCB(19,BOF,CBSO,528,0),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "bnuctr-", XLOCB(19,BOF,CBSO,528,0),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnuctr-", XLOCB(19,BOFM4,CBSO,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnuctr+", XLOCB(19,BOFP,CBSO,528,0), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnuctr+", XLOCB(19,BOFP4,CBSO,528,0), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnuctrl", XLOCB(19,BOF,CBSO,528,1),  XLBOCBBB_MASK, PPCCOM,	{ CR } },
-{ "bnuctrl-",XLOCB(19,BOF,CBSO,528,1),  XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnuctrl-",XLOCB(19,BOFM4,CBSO,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "bnuctrl+",XLOCB(19,BOFP,CBSO,528,1), XLBOCBBB_MASK, NOPOWER4, { CR } },
-{ "bnuctrl+",XLOCB(19,BOFP4,CBSO,528,1), XLBOCBBB_MASK, POWER4, { CR } },
-{ "btctr",   XLO(19,BOT,528,0),  XLBOBB_MASK, PPCCOM,	{ BI } },
-{ "btctr-",  XLO(19,BOT,528,0),  XLBOBB_MASK, NOPOWER4,	{ BI } },
-{ "btctr-",  XLO(19,BOTM4,528,0), XLBOBB_MASK, POWER4, { BI } },
-{ "btctr+",  XLO(19,BOTP,528,0), XLBOBB_MASK, NOPOWER4,	{ BI } },
-{ "btctr+",  XLO(19,BOTP4,528,0), XLBOBB_MASK, POWER4, { BI } },
-{ "btctrl",  XLO(19,BOT,528,1),  XLBOBB_MASK, PPCCOM,	{ BI } },
-{ "btctrl-", XLO(19,BOT,528,1),  XLBOBB_MASK, NOPOWER4,	{ BI } },
-{ "btctrl-", XLO(19,BOTM4,528,1), XLBOBB_MASK, POWER4, { BI } },
-{ "btctrl+", XLO(19,BOTP,528,1), XLBOBB_MASK, NOPOWER4,	{ BI } },
-{ "btctrl+", XLO(19,BOTP4,528,1), XLBOBB_MASK, POWER4, { BI } },
-{ "bfctr",   XLO(19,BOF,528,0),  XLBOBB_MASK, PPCCOM,	{ BI } },
-{ "bfctr-",  XLO(19,BOF,528,0),  XLBOBB_MASK, NOPOWER4, { BI } },
-{ "bfctr-",  XLO(19,BOFM4,528,0), XLBOBB_MASK, POWER4, { BI } },
-{ "bfctr+",  XLO(19,BOFP,528,0), XLBOBB_MASK, NOPOWER4, { BI } },
-{ "bfctr+",  XLO(19,BOFP4,528,0), XLBOBB_MASK, POWER4, { BI } },
-{ "bfctrl",  XLO(19,BOF,528,1),  XLBOBB_MASK, PPCCOM,	{ BI } },
-{ "bfctrl-", XLO(19,BOF,528,1),  XLBOBB_MASK, NOPOWER4, { BI } },
-{ "bfctrl-", XLO(19,BOFM4,528,1), XLBOBB_MASK, POWER4, { BI } },
-{ "bfctrl+", XLO(19,BOFP,528,1), XLBOBB_MASK, NOPOWER4, { BI } },
-{ "bfctrl+", XLO(19,BOFP4,528,1), XLBOBB_MASK, POWER4, { BI } },
-{ "bcctr-",  XLYLK(19,528,0,0),  XLYBB_MASK,  PPCCOM,	{ BOE, BI } },
-{ "bcctr+",  XLYLK(19,528,1,0),  XLYBB_MASK,  PPCCOM,	{ BOE, BI } },
-{ "bcctrl-", XLYLK(19,528,0,1),  XLYBB_MASK,  PPCCOM,	{ BOE, BI } },
-{ "bcctrl+", XLYLK(19,528,1,1),  XLYBB_MASK,  PPCCOM,	{ BOE, BI } },
-{ "bcctr",   XLLK(19,528,0),     XLBH_MASK,   PPCCOM,	{ BO, BI, BH } },
-{ "bcctrl",  XLLK(19,528,1),     XLBH_MASK,   PPCCOM,	{ BO, BI, BH } },
-{ "bcc",     XLLK(19,528,0),     XLBB_MASK,   PWRCOM,	{ BO, BI } },
-{ "bccl",    XLLK(19,528,1),     XLBB_MASK,   PWRCOM,	{ BO, BI } },
-{ "bcctre",  XLLK(19,529,0),     XLBB_MASK,   BOOKE64,	{ BO, BI } },
-{ "bcctrel", XLLK(19,529,1),     XLBB_MASK,   BOOKE64,	{ BO, BI } },
-
-{ "rlwimi",  M(20,0),	M_MASK,		PPCCOM,		{ RA,RS,SH,MBE,ME } },
-{ "rlimi",   M(20,0),	M_MASK,		PWRCOM,		{ RA,RS,SH,MBE,ME } },
-
-{ "rlwimi.", M(20,1),	M_MASK,		PPCCOM,		{ RA,RS,SH,MBE,ME } },
-{ "rlimi.",  M(20,1),	M_MASK,		PWRCOM,		{ RA,RS,SH,MBE,ME } },
-
-{ "rotlwi",  MME(21,31,0), MMBME_MASK,	PPCCOM,		{ RA, RS, SH } },
-{ "clrlwi",  MME(21,31,0), MSHME_MASK,	PPCCOM,		{ RA, RS, MB } },
-{ "rlwinm",  M(21,0),	M_MASK,		PPCCOM,		{ RA,RS,SH,MBE,ME } },
-{ "rlinm",   M(21,0),	M_MASK,		PWRCOM,		{ RA,RS,SH,MBE,ME } },
-{ "rotlwi.", MME(21,31,1), MMBME_MASK,	PPCCOM,		{ RA,RS,SH } },
-{ "clrlwi.", MME(21,31,1), MSHME_MASK,	PPCCOM,		{ RA, RS, MB } },
-{ "rlwinm.", M(21,1),	M_MASK,		PPCCOM,		{ RA,RS,SH,MBE,ME } },
-{ "rlinm.",  M(21,1),	M_MASK,		PWRCOM,		{ RA,RS,SH,MBE,ME } },
-
-{ "rlmi",    M(22,0),	M_MASK,		M601,		{ RA,RS,RB,MBE,ME } },
-{ "rlmi.",   M(22,1),	M_MASK,		M601,		{ RA,RS,RB,MBE,ME } },
-
-{ "be",	     B(22,0,0),	B_MASK,		BOOKE64,	{ LI } },
-{ "bel",     B(22,0,1),	B_MASK,		BOOKE64,	{ LI } },
-{ "bea",     B(22,1,0),	B_MASK,		BOOKE64,	{ LIA } },
-{ "bela",    B(22,1,1),	B_MASK,		BOOKE64,	{ LIA } },
-
-{ "rotlw",   MME(23,31,0), MMBME_MASK,	PPCCOM,		{ RA, RS, RB } },
-{ "rlwnm",   M(23,0),	M_MASK,		PPCCOM,		{ RA,RS,RB,MBE,ME } },
-{ "rlnm",    M(23,0),	M_MASK,		PWRCOM,		{ RA,RS,RB,MBE,ME } },
-{ "rotlw.",  MME(23,31,1), MMBME_MASK,	PPCCOM,		{ RA, RS, RB } },
-{ "rlwnm.",  M(23,1),	M_MASK,		PPCCOM,		{ RA,RS,RB,MBE,ME } },
-{ "rlnm.",   M(23,1),	M_MASK,		PWRCOM,		{ RA,RS,RB,MBE,ME } },
-
-{ "nop",     OP(24),	0xffffffff,	PPCCOM,		{ 0 } },
-{ "ori",     OP(24),	OP_MASK,	PPCCOM,		{ RA, RS, UI } },
-{ "oril",    OP(24),	OP_MASK,	PWRCOM,		{ RA, RS, UI } },
-
-{ "oris",    OP(25),	OP_MASK,	PPCCOM,		{ RA, RS, UI } },
-{ "oriu",    OP(25),	OP_MASK,	PWRCOM,		{ RA, RS, UI } },
-
-{ "xori",    OP(26),	OP_MASK,	PPCCOM,		{ RA, RS, UI } },
-{ "xoril",   OP(26),	OP_MASK,	PWRCOM,		{ RA, RS, UI } },
-
-{ "xoris",   OP(27),	OP_MASK,	PPCCOM,		{ RA, RS, UI } },
-{ "xoriu",   OP(27),	OP_MASK,	PWRCOM,		{ RA, RS, UI } },
-
-{ "andi.",   OP(28),	OP_MASK,	PPCCOM,		{ RA, RS, UI } },
-{ "andil.",  OP(28),	OP_MASK,	PWRCOM,		{ RA, RS, UI } },
-
-{ "andis.",  OP(29),	OP_MASK,	PPCCOM,		{ RA, RS, UI } },
-{ "andiu.",  OP(29),	OP_MASK,	PWRCOM,		{ RA, RS, UI } },
-
-{ "rotldi",  MD(30,0,0), MDMB_MASK,	PPC64,		{ RA, RS, SH6 } },
-{ "clrldi",  MD(30,0,0), MDSH_MASK,	PPC64,		{ RA, RS, MB6 } },
-{ "rldicl",  MD(30,0,0), MD_MASK,	PPC64,		{ RA, RS, SH6, MB6 } },
-{ "rotldi.", MD(30,0,1), MDMB_MASK,	PPC64,		{ RA, RS, SH6 } },
-{ "clrldi.", MD(30,0,1), MDSH_MASK,	PPC64,		{ RA, RS, MB6 } },
-{ "rldicl.", MD(30,0,1), MD_MASK,	PPC64,		{ RA, RS, SH6, MB6 } },
-
-{ "rldicr",  MD(30,1,0), MD_MASK,	PPC64,		{ RA, RS, SH6, ME6 } },
-{ "rldicr.", MD(30,1,1), MD_MASK,	PPC64,		{ RA, RS, SH6, ME6 } },
-
-{ "rldic",   MD(30,2,0), MD_MASK,	PPC64,		{ RA, RS, SH6, MB6 } },
-{ "rldic.",  MD(30,2,1), MD_MASK,	PPC64,		{ RA, RS, SH6, MB6 } },
-
-{ "rldimi",  MD(30,3,0), MD_MASK,	PPC64,		{ RA, RS, SH6, MB6 } },
-{ "rldimi.", MD(30,3,1), MD_MASK,	PPC64,		{ RA, RS, SH6, MB6 } },
-
-{ "rotld",   MDS(30,8,0), MDSMB_MASK,	PPC64,		{ RA, RS, RB } },
-{ "rldcl",   MDS(30,8,0), MDS_MASK,	PPC64,		{ RA, RS, RB, MB6 } },
-{ "rotld.",  MDS(30,8,1), MDSMB_MASK,	PPC64,		{ RA, RS, RB } },
-{ "rldcl.",  MDS(30,8,1), MDS_MASK,	PPC64,		{ RA, RS, RB, MB6 } },
-
-{ "rldcr",   MDS(30,9,0), MDS_MASK,	PPC64,		{ RA, RS, RB, ME6 } },
-{ "rldcr.",  MDS(30,9,1), MDS_MASK,	PPC64,		{ RA, RS, RB, ME6 } },
-
-{ "cmpw",    XOPL(31,0,0), XCMPL_MASK, PPCCOM,		{ OBF, RA, RB } },
-{ "cmpd",    XOPL(31,0,1), XCMPL_MASK, PPC64,		{ OBF, RA, RB } },
-{ "cmp",     X(31,0),	XCMP_MASK,	PPC,		{ BF, L, RA, RB } },
-{ "cmp",     X(31,0),	XCMPL_MASK,	PWRCOM,		{ BF, RA, RB } },
-
-{ "twlgt",   XTO(31,4,TOLGT), XTO_MASK, PPCCOM,		{ RA, RB } },
-{ "tlgt",    XTO(31,4,TOLGT), XTO_MASK, PWRCOM,		{ RA, RB } },
-{ "twllt",   XTO(31,4,TOLLT), XTO_MASK, PPCCOM,		{ RA, RB } },
-{ "tllt",    XTO(31,4,TOLLT), XTO_MASK, PWRCOM,		{ RA, RB } },
-{ "tweq",    XTO(31,4,TOEQ), XTO_MASK,	PPCCOM,		{ RA, RB } },
-{ "teq",     XTO(31,4,TOEQ), XTO_MASK,	PWRCOM,		{ RA, RB } },
-{ "twlge",   XTO(31,4,TOLGE), XTO_MASK, PPCCOM,		{ RA, RB } },
-{ "tlge",    XTO(31,4,TOLGE), XTO_MASK, PWRCOM,		{ RA, RB } },
-{ "twlnl",   XTO(31,4,TOLNL), XTO_MASK, PPCCOM,		{ RA, RB } },
-{ "tlnl",    XTO(31,4,TOLNL), XTO_MASK, PWRCOM,		{ RA, RB } },
-{ "twlle",   XTO(31,4,TOLLE), XTO_MASK, PPCCOM,		{ RA, RB } },
-{ "tlle",    XTO(31,4,TOLLE), XTO_MASK, PWRCOM,		{ RA, RB } },
-{ "twlng",   XTO(31,4,TOLNG), XTO_MASK, PPCCOM,		{ RA, RB } },
-{ "tlng",    XTO(31,4,TOLNG), XTO_MASK, PWRCOM,		{ RA, RB } },
-{ "twgt",    XTO(31,4,TOGT), XTO_MASK,	PPCCOM,		{ RA, RB } },
-{ "tgt",     XTO(31,4,TOGT), XTO_MASK,	PWRCOM,		{ RA, RB } },
-{ "twge",    XTO(31,4,TOGE), XTO_MASK,	PPCCOM,		{ RA, RB } },
-{ "tge",     XTO(31,4,TOGE), XTO_MASK,	PWRCOM,		{ RA, RB } },
-{ "twnl",    XTO(31,4,TONL), XTO_MASK,	PPCCOM,		{ RA, RB } },
-{ "tnl",     XTO(31,4,TONL), XTO_MASK,	PWRCOM,		{ RA, RB } },
-{ "twlt",    XTO(31,4,TOLT), XTO_MASK,	PPCCOM,		{ RA, RB } },
-{ "tlt",     XTO(31,4,TOLT), XTO_MASK,	PWRCOM,		{ RA, RB } },
-{ "twle",    XTO(31,4,TOLE), XTO_MASK,	PPCCOM,		{ RA, RB } },
-{ "tle",     XTO(31,4,TOLE), XTO_MASK,	PWRCOM,		{ RA, RB } },
-{ "twng",    XTO(31,4,TONG), XTO_MASK,	PPCCOM,		{ RA, RB } },
-{ "tng",     XTO(31,4,TONG), XTO_MASK,	PWRCOM,		{ RA, RB } },
-{ "twne",    XTO(31,4,TONE), XTO_MASK,	PPCCOM,		{ RA, RB } },
-{ "tne",     XTO(31,4,TONE), XTO_MASK,	PWRCOM,		{ RA, RB } },
-{ "trap",    XTO(31,4,TOU), 0xffffffff,	PPCCOM,		{ 0 } },
-{ "tw",      X(31,4),	X_MASK,		PPCCOM,		{ TO, RA, RB } },
-{ "t",       X(31,4),	X_MASK,		PWRCOM,		{ TO, RA, RB } },
-
-{ "subfc",   XO(31,8,0,0), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "sf",      XO(31,8,0,0), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
-{ "subc",    XO(31,8,0,0), XO_MASK,	PPC,		{ RT, RB, RA } },
-{ "subfc.",  XO(31,8,0,1), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "sf.",     XO(31,8,0,1), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
-{ "subc.",   XO(31,8,0,1), XO_MASK,	PPCCOM,		{ RT, RB, RA } },
-{ "subfco",  XO(31,8,1,0), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "sfo",     XO(31,8,1,0), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
-{ "subco",   XO(31,8,1,0), XO_MASK,	PPC,		{ RT, RB, RA } },
-{ "subfco.", XO(31,8,1,1), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "sfo.",    XO(31,8,1,1), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
-{ "subco.",  XO(31,8,1,1), XO_MASK,	PPC,		{ RT, RB, RA } },
-
-{ "mulhdu",  XO(31,9,0,0), XO_MASK,	PPC64,		{ RT, RA, RB } },
-{ "mulhdu.", XO(31,9,0,1), XO_MASK,	PPC64,		{ RT, RA, RB } },
-
-{ "addc",    XO(31,10,0,0), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "a",       XO(31,10,0,0), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
-{ "addc.",   XO(31,10,0,1), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "a.",      XO(31,10,0,1), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
-{ "addco",   XO(31,10,1,0), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "ao",      XO(31,10,1,0), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
-{ "addco.",  XO(31,10,1,1), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "ao.",     XO(31,10,1,1), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
-
-{ "mulhwu",  XO(31,11,0,0), XO_MASK,	PPC,		{ RT, RA, RB } },
-{ "mulhwu.", XO(31,11,0,1), XO_MASK,	PPC,		{ RT, RA, RB } },
-
-{ "isellt",  X(31,15),      X_MASK,	PPCISEL,	{ RT, RA, RB } },
-{ "iselgt",  X(31,47),      X_MASK,	PPCISEL,	{ RT, RA, RB } },
-{ "iseleq",  X(31,79),      X_MASK,	PPCISEL,	{ RT, RA, RB } },
-{ "isel",    XISEL(31,15),  XISEL_MASK,	PPCISEL,	{ RT, RA, RB, CRB } },
-
-{ "mfocrf",  XFXM(31,19,0,1), XFXFXM_MASK, COM,		{ RT, FXM } },
-{ "mfcr",    X(31,19),	XRARB_MASK,	NOPOWER4 | COM,	{ RT } },
-{ "mfcr",    X(31,19),	XFXFXM_MASK,	POWER4,		{ RT, FXM4 } },
-
-{ "lwarx",   X(31,20),	XEH_MASK,	PPC,		{ RT, RA0, RB, EH } },
-
-{ "ldx",     X(31,21),	X_MASK,		PPC64,		{ RT, RA0, RB } },
-
-{ "icbt",    X(31,22),	X_MASK,		BOOKE|PPCE300,	{ CT, RA, RB } },
-{ "icbt",    X(31,262),	XRT_MASK,	PPC403,		{ RA, RB } },
-
-{ "lwzx",    X(31,23),	X_MASK,		PPCCOM,		{ RT, RA0, RB } },
-{ "lx",      X(31,23),	X_MASK,		PWRCOM,		{ RT, RA, RB } },
-
-{ "slw",     XRC(31,24,0), X_MASK,	PPCCOM,		{ RA, RS, RB } },
-{ "sl",      XRC(31,24,0), X_MASK,	PWRCOM,		{ RA, RS, RB } },
-{ "slw.",    XRC(31,24,1), X_MASK,	PPCCOM,		{ RA, RS, RB } },
-{ "sl.",     XRC(31,24,1), X_MASK,	PWRCOM,		{ RA, RS, RB } },
-
-{ "cntlzw",  XRC(31,26,0), XRB_MASK,	PPCCOM,		{ RA, RS } },
-{ "cntlz",   XRC(31,26,0), XRB_MASK,	PWRCOM,		{ RA, RS } },
-{ "cntlzw.", XRC(31,26,1), XRB_MASK,	PPCCOM,		{ RA, RS } },
-{ "cntlz.",  XRC(31,26,1), XRB_MASK, 	PWRCOM,		{ RA, RS } },
-
-{ "sld",     XRC(31,27,0), X_MASK,	PPC64,		{ RA, RS, RB } },
-{ "sld.",    XRC(31,27,1), X_MASK,	PPC64,		{ RA, RS, RB } },
-
-{ "and",     XRC(31,28,0), X_MASK,	COM,		{ RA, RS, RB } },
-{ "and.",    XRC(31,28,1), X_MASK,	COM,		{ RA, RS, RB } },
-
-{ "maskg",   XRC(31,29,0), X_MASK,	M601,		{ RA, RS, RB } },
-{ "maskg.",  XRC(31,29,1), X_MASK,	M601,		{ RA, RS, RB } },
-
-{ "icbte",   X(31,30),	X_MASK,		BOOKE64,	{ CT, RA, RB } },
-
-{ "lwzxe",   X(31,31),	X_MASK,		BOOKE64,	{ RT, RA0, RB } },
-
-{ "cmplw",   XOPL(31,32,0), XCMPL_MASK, PPCCOM,	{ OBF, RA, RB } },
-{ "cmpld",   XOPL(31,32,1), XCMPL_MASK, PPC64,		{ OBF, RA, RB } },
-{ "cmpl",    X(31,32),	XCMP_MASK,	 PPC,		{ BF, L, RA, RB } },
-{ "cmpl",    X(31,32),	XCMPL_MASK,	 PWRCOM,	{ BF, RA, RB } },
-
-{ "subf",    XO(31,40,0,0), XO_MASK,	PPC,		{ RT, RA, RB } },
-{ "sub",     XO(31,40,0,0), XO_MASK,	PPC,		{ RT, RB, RA } },
-{ "subf.",   XO(31,40,0,1), XO_MASK,	PPC,		{ RT, RA, RB } },
-{ "sub.",    XO(31,40,0,1), XO_MASK,	PPC,		{ RT, RB, RA } },
-{ "subfo",   XO(31,40,1,0), XO_MASK,	PPC,		{ RT, RA, RB } },
-{ "subo",    XO(31,40,1,0), XO_MASK,	PPC,		{ RT, RB, RA } },
-{ "subfo.",  XO(31,40,1,1), XO_MASK,	PPC,		{ RT, RA, RB } },
-{ "subo.",   XO(31,40,1,1), XO_MASK,	PPC,		{ RT, RB, RA } },
+{"attn",	X(0,256),	X_MASK,	  POWER4|PPCA2,	PPC476|PPCVLE,	{0}},
+{"tdlgti",	OPTO(2,TOLGT),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdllti",	OPTO(2,TOLLT),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdeqi",	OPTO(2,TOEQ),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdlgei",	OPTO(2,TOLGE),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdlnli",	OPTO(2,TOLNL),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdllei",	OPTO(2,TOLLE),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdlngi",	OPTO(2,TOLNG),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdgti",	OPTO(2,TOGT),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdgei",	OPTO(2,TOGE),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdnli",	OPTO(2,TONL),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdlti",	OPTO(2,TOLT),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdlei",	OPTO(2,TOLE),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdngi",	OPTO(2,TONG),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdnei",	OPTO(2,TONE),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdui",	OPTO(2,TOU),	OPTO_MASK,   PPC64,	PPCVLE,		{RA, SI}},
+{"tdi",		OP(2),		OP_MASK,     PPC64,	PPCVLE,		{TO, RA, SI}},
+
+{"twlgti",	OPTO(3,TOLGT),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tlgti",	OPTO(3,TOLGT),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twllti",	OPTO(3,TOLLT),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tllti",	OPTO(3,TOLLT),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"tweqi",	OPTO(3,TOEQ),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"teqi",	OPTO(3,TOEQ),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twlgei",	OPTO(3,TOLGE),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tlgei",	OPTO(3,TOLGE),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twlnli",	OPTO(3,TOLNL),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tlnli",	OPTO(3,TOLNL),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twllei",	OPTO(3,TOLLE),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tllei",	OPTO(3,TOLLE),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twlngi",	OPTO(3,TOLNG),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tlngi",	OPTO(3,TOLNG),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twgti",	OPTO(3,TOGT),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tgti",	OPTO(3,TOGT),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twgei",	OPTO(3,TOGE),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tgei",	OPTO(3,TOGE),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twnli",	OPTO(3,TONL),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tnli",	OPTO(3,TONL),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twlti",	OPTO(3,TOLT),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tlti",	OPTO(3,TOLT),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twlei",	OPTO(3,TOLE),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tlei",	OPTO(3,TOLE),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twngi",	OPTO(3,TONG),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tngi",	OPTO(3,TONG),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twnei",	OPTO(3,TONE),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tnei",	OPTO(3,TONE),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twui",	OPTO(3,TOU),	OPTO_MASK,   PPCCOM,	PPCVLE,		{RA, SI}},
+{"tui",		OPTO(3,TOU),	OPTO_MASK,   PWRCOM,	PPCVLE,		{RA, SI}},
+{"twi",		OP(3),		OP_MASK,     PPCCOM,	PPCVLE,		{TO, RA, SI}},
+{"ti",		OP(3),		OP_MASK,     PWRCOM,	PPCVLE,		{TO, RA, SI}},
+
+{"ps_cmpu0",	X  (4,	 0),	XBF_MASK,    PPCPS,	0,		{BF, FRA, FRB}},
+{"vaddubm",	VX (4,	 0),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vmul10cuq",	VX (4,	 1),	VXVB_MASK,   PPCVEC3,	0,		{VD, VA}},
+{"vmaxub",	VX (4,	 2),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vrlb",	VX (4,	 4),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpequb",	VXR(4,	 6,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpneb",	VXR(4,	 7,0),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"vmuloub",	VX (4,	 8),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vaddfp",	VX (4,	10),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"psq_lx",	XW (4,	 6,0),	XW_MASK,     PPCPS,	0,		{FRT,RA,RB,PSWM,PSQM}},
+{"vmrghb",	VX (4,	12),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"psq_stx",	XW (4,	 7,0),	XW_MASK,     PPCPS,	0,		{FRS,RA,RB,PSWM,PSQM}},
+{"vpkuhum",	VX (4,	14),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"mulhhwu",	XRC(4,	 8,0),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"mulhhwu.",	XRC(4,	 8,1),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"ps_sum0",	A  (4,	10,0),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"ps_sum0.",	A  (4,	10,1),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"ps_sum1",	A  (4,	11,0),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"ps_sum1.",	A  (4,	11,1),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"ps_muls0",	A  (4,	12,0),	AFRB_MASK,   PPCPS,	0,		{FRT, FRA, FRC}},
+{"machhwu",	XO (4,	12,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"ps_muls0.",	A  (4,	12,1),	AFRB_MASK,   PPCPS,	0,		{FRT, FRA, FRC}},
+{"machhwu.",	XO (4,	12,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"ps_muls1",	A  (4,	13,0),	AFRB_MASK,   PPCPS,	0,		{FRT, FRA, FRC}},
+{"ps_muls1.",	A  (4,	13,1),	AFRB_MASK,   PPCPS,	0,		{FRT, FRA, FRC}},
+{"ps_madds0",	A  (4,	14,0),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"ps_madds0.",	A  (4,	14,1),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"ps_madds1",	A  (4,	15,0),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"ps_madds1.",	A  (4,	15,1),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"vmhaddshs",	VXA(4,	32),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"vmhraddshs",	VXA(4,	33),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"vmladduhm",	VXA(4,	34),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"vmsumudm",	VXA(4,	35),	VXA_MASK,    PPCVEC3,	0,		{VD, VA, VB, VC}},
+{"ps_div",	A  (4,	18,0),	AFRC_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"vmsumubm",	VXA(4,	36),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"ps_div.",	A  (4,	18,1),	AFRC_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"vmsummbm",	VXA(4,	37),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"vmsumuhm",	VXA(4,	38),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"vmsumuhs",	VXA(4,	39),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"ps_sub",	A  (4,	20,0),	AFRC_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"vmsumshm",	VXA(4,	40),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"ps_sub.",	A  (4,	20,1),	AFRC_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"vmsumshs",	VXA(4,	41),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"ps_add",	A  (4,	21,0),	AFRC_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"vsel",	VXA(4,	42),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"ps_add.",	A  (4,	21,1),	AFRC_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"vperm",	VXA(4,	43),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VB, VC}},
+{"vsldoi",	VXA(4,	44),	VXASHB_MASK, PPCVEC,	0,		{VD, VA, VB, SHB}},
+{"vpermxor",	VXA(4,	45),	VXA_MASK,    PPCVEC2,	0,		{VD, VA, VB, VC}},
+{"ps_sel",	A  (4,	23,0),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"vmaddfp",	VXA(4,	46),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VC, VB}},
+{"ps_sel.",	A  (4,	23,1),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"vnmsubfp",	VXA(4,	47),	VXA_MASK,    PPCVEC,	0,		{VD, VA, VC, VB}},
+{"ps_res",	A  (4,	24,0), AFRAFRC_MASK, PPCPS,	0,		{FRT, FRB}},
+{"maddhd",	VXA(4,	48),	VXA_MASK,    POWER9,	0,		{RT, RA, RB, RC}},
+{"ps_res.",	A  (4,	24,1), AFRAFRC_MASK, PPCPS,	0,		{FRT, FRB}},
+{"maddhdu",	VXA(4,	49),	VXA_MASK,    POWER9,	0,		{RT, RA, RB, RC}},
+{"ps_mul",	A  (4,	25,0),	AFRB_MASK,   PPCPS,	0,		{FRT, FRA, FRC}},
+{"ps_mul.",	A  (4,	25,1),	AFRB_MASK,   PPCPS,	0,		{FRT, FRA, FRC}},
+{"maddld",	VXA(4,	51),	VXA_MASK,    POWER9,	0,		{RT, RA, RB, RC}},
+{"ps_rsqrte",	A  (4,	26,0), AFRAFRC_MASK, PPCPS,	0,		{FRT, FRB}},
+{"ps_rsqrte.",	A  (4,	26,1), AFRAFRC_MASK, PPCPS,	0,		{FRT, FRB}},
+{"ps_msub",	A  (4,	28,0),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"ps_msub.",	A  (4,	28,1),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"ps_madd",	A  (4,	29,0),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"ps_madd.",	A  (4,	29,1),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"vpermr",	VXA(4,	59),	VXA_MASK,    PPCVEC3,	0,		{VD, VA, VB, VC}},
+{"ps_nmsub",	A  (4,	30,0),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"vaddeuqm",	VXA(4,	60),	VXA_MASK,    PPCVEC2,	0,		{VD, VA, VB, VC}},
+{"ps_nmsub.",	A  (4,	30,1),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"vaddecuq",	VXA(4,	61),	VXA_MASK,    PPCVEC2,	0,		{VD, VA, VB, VC}},
+{"ps_nmadd",	A  (4,	31,0),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"vsubeuqm",	VXA(4,	62),	VXA_MASK,    PPCVEC2,	0,		{VD, VA, VB, VC}},
+{"ps_nmadd.",	A  (4,	31,1),	A_MASK,	     PPCPS,	0,		{FRT, FRA, FRC, FRB}},
+{"vsubecuq",	VXA(4,	63),	VXA_MASK,    PPCVEC2,	0,		{VD, VA, VB, VC}},
+{"ps_cmpo0",	X  (4,	32),	XBF_MASK,    PPCPS,	0,		{BF, FRA, FRB}},
+{"vadduhm",	VX (4,	64),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vmul10ecuq",	VX (4,	65),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vmaxuh",	VX (4,	66),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vrlh",	VX (4,	68),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpequh",	VXR(4,	70,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpneh",	VXR(4,	71,0),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"vmulouh",	VX (4,	72),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vsubfp",	VX (4,	74),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"psq_lux",	XW (4,	38,0),	XW_MASK,     PPCPS,	0,		{FRT,RA,RB,PSWM,PSQM}},
+{"vmrghh",	VX (4,	76),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"psq_stux",	XW (4,	39,0),	XW_MASK,     PPCPS,	0,		{FRS,RA,RB,PSWM,PSQM}},
+{"vpkuwum",	VX (4,	78),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"ps_neg",	XRC(4,	40,0),	XRA_MASK,    PPCPS,	0,		{FRT, FRB}},
+{"mulhhw",	XRC(4,	40,0),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"ps_neg.",	XRC(4,	40,1),	XRA_MASK,    PPCPS,	0,		{FRT, FRB}},
+{"mulhhw.",	XRC(4,	40,1),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"machhw",	XO (4,	44,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"machhw.",	XO (4,	44,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmachhw",	XO (4,	46,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmachhw.",	XO (4,	46,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"ps_cmpu1",	X  (4,	64),	XBF_MASK,    PPCPS,	0,		{BF, FRA, FRB}},
+{"vadduwm",	VX (4,	128),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vmaxuw",	VX (4,	130),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vrlw",	VX (4,	132),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vrlwmi",	VX (4,	133),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vcmpequw",	VXR(4,	134,0), VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpnew",	VXR(4,	135,0),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"vmulouw",	VX (4,	136),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vmuluwm",	VX (4,	137),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vmrghw",	VX (4,	140),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vpkuhus",	VX (4,	142),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"ps_mr",	XRC(4,	72,0),	XRA_MASK,    PPCPS,	0,		{FRT, FRB}},
+{"ps_mr.",	XRC(4,	72,1),	XRA_MASK,    PPCPS,	0,		{FRT, FRB}},
+{"machhwsu",	XO (4,	76,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"machhwsu.",	XO (4,	76,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"ps_cmpo1",	X  (4,	96),	XBF_MASK,    PPCPS,	0,		{BF, FRA, FRB}},
+{"vaddudm",	VX (4, 192),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vmaxud",	VX (4, 194),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vrld",	VX (4, 196),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vrldmi",	VX (4, 197),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vcmpeqfp",	VXR(4, 198,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpequd",	VXR(4, 199,0),	VXR_MASK,    PPCVEC2,	0,		{VD, VA, VB}},
+{"vpkuwus",	VX (4, 206),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"machhws",	XO (4, 108,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"machhws.",	XO (4, 108,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmachhws",	XO (4, 110,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmachhws.",	XO (4, 110,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"vadduqm",	VX (4, 256),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vmaxsb",	VX (4, 258),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vslb",	VX (4, 260),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpnezb",	VXR(4, 263,0),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"vmulosb",	VX (4, 264),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vrefp",	VX (4, 266),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"vmrglb",	VX (4, 268),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vpkshus",	VX (4, 270),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"ps_nabs",	XRC(4, 136,0),	XRA_MASK,    PPCPS,	0,		{FRT, FRB}},
+{"mulchwu",	XRC(4, 136,0),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"ps_nabs.",	XRC(4, 136,1),	XRA_MASK,    PPCPS,	0,		{FRT, FRB}},
+{"mulchwu.",	XRC(4, 136,1),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"macchwu",	XO (4, 140,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"macchwu.",	XO (4, 140,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"vaddcuq",	VX (4, 320),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vmaxsh",	VX (4, 322),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vslh",	VX (4, 324),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpnezh",	VXR(4, 327,0),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"vmulosh",	VX (4, 328),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vrsqrtefp",	VX (4, 330),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"vmrglh",	VX (4, 332),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vpkswus",	VX (4, 334),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"mulchw",	XRC(4, 168,0),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"mulchw.",	XRC(4, 168,1),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"macchw",	XO (4, 172,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"macchw.",	XO (4, 172,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmacchw",	XO (4, 174,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmacchw.",	XO (4, 174,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"vaddcuw",	VX (4, 384),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vmaxsw",	VX (4, 386),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vslw",	VX (4, 388),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vrlwnm",	VX (4, 389),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vcmpnezw",	VXR(4, 391,0),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"vmulosw",	VX (4, 392),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vexptefp",	VX (4, 394),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"vmrglw",	VX (4, 396),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vpkshss",	VX (4, 398),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"macchwsu",	XO (4, 204,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"macchwsu.",	XO (4, 204,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"vmaxsd",	VX (4, 450),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vsl",		VX (4, 452),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vrldnm",	VX (4, 453),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vcmpgefp",	VXR(4, 454,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vlogefp",	VX (4, 458),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"vpkswss",	VX (4, 462),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"macchws",	XO (4, 236,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"macchws.",	XO (4, 236,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmacchws",	XO (4, 238,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmacchws.",	XO (4, 238,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"evaddw",	VX (4, 512),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vaddubs",	VX (4, 512),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vmul10uq",	VX (4, 513),	VXVB_MASK,   PPCVEC3,	0,		{VD, VA}},
+{"evaddiw",	VX (4, 514),	VX_MASK,     PPCSPE,	0,		{RS, RB, UIMM}},
+{"vminub",	VX (4, 514),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evsubfw",	VX (4, 516),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evsubw",	VX (4, 516),	VX_MASK,     PPCSPE,	0,		{RS, RB, RA}},
+{"vsrb",	VX (4, 516),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evsubifw",	VX (4, 518),	VX_MASK,     PPCSPE,	0,		{RS, UIMM, RB}},
+{"evsubiw",	VX (4, 518),	VX_MASK,     PPCSPE,	0,		{RS, RB, UIMM}},
+{"vcmpgtub",	VXR(4, 518,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"evabs",	VX (4, 520),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"vmuleub",	VX (4, 520),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evneg",	VX (4, 521),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"evextsb",	VX (4, 522),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"vrfin",	VX (4, 522),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"evextsh",	VX (4, 523),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"evrndw",	VX (4, 524),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"vspltb",	VX (4, 524),   VXUIMM4_MASK, PPCVEC,	0,		{VD, VB, UIMM4}},
+{"vextractub",	VX (4, 525),   VXUIMM4_MASK, PPCVEC3,	0,		{VD, VB, UIMM4}},
+{"evcntlzw",	VX (4, 525),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"evcntlsw",	VX (4, 526),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"vupkhsb",	VX (4, 526),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"brinc",	VX (4, 527),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"ps_abs",	XRC(4, 264,0),	XRA_MASK,    PPCPS,	0,		{FRT, FRB}},
+{"ps_abs.",	XRC(4, 264,1),	XRA_MASK,    PPCPS,	0,		{FRT, FRB}},
+{"evand",	VX (4, 529),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evandc",	VX (4, 530),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evxor",	VX (4, 534),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmr",	VX (4, 535),	VX_MASK,     PPCSPE,	0,		{RS, RA, BBA}},
+{"evor",	VX (4, 535),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evnor",	VX (4, 536),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evnot",	VX (4, 536),	VX_MASK,     PPCSPE,	0,		{RS, RA, BBA}},
+{"get",		APU(4, 268,0),	APU_RA_MASK, PPC405,	0,		{RT, FSL}},
+{"eveqv",	VX (4, 537),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evorc",	VX (4, 539),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evnand",	VX (4, 542),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evsrwu",	VX (4, 544),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evsrws",	VX (4, 545),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evsrwiu",	VX (4, 546),	VX_MASK,     PPCSPE,	0,		{RS, RA, EVUIMM}},
+{"evsrwis",	VX (4, 547),	VX_MASK,     PPCSPE,	0,		{RS, RA, EVUIMM}},
+{"evslw",	VX (4, 548),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evslwi",	VX (4, 550),	VX_MASK,     PPCSPE,	0,		{RS, RA, EVUIMM}},
+{"evrlw",	VX (4, 552),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evsplati",	VX (4, 553),	VX_MASK,     PPCSPE,	0,		{RS, SIMM}},
+{"evrlwi",	VX (4, 554),	VX_MASK,     PPCSPE,	0,		{RS, RA, EVUIMM}},
+{"evsplatfi",	VX (4, 555),	VX_MASK,     PPCSPE,	0,		{RS, SIMM}},
+{"evmergehi",	VX (4, 556),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmergelo",	VX (4, 557),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmergehilo",	VX (4, 558),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmergelohi",	VX (4, 559),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evcmpgtu",	VX (4, 560),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"evcmpgts",	VX (4, 561),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"evcmpltu",	VX (4, 562),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"evcmplts",	VX (4, 563),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"evcmpeq",	VX (4, 564),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"cget",	APU(4, 284,0),	APU_RA_MASK, PPC405,	0,		{RT, FSL}},
+{"vadduhs",	VX (4, 576),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vmul10euq",	VX (4, 577),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vminuh",	VX (4, 578),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vsrh",	VX (4, 580),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpgtuh",	VXR(4, 582,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vmuleuh",	VX (4, 584),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vrfiz",	VX (4, 586),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"vsplth",	VX (4, 588),   VXUIMM3_MASK, PPCVEC,	0,		{VD, VB, UIMM3}},
+{"vextractuh",	VX (4, 589),   VXUIMM4_MASK, PPCVEC3,	0,		{VD, VB, UIMM4}},
+{"vupkhsh",	VX (4, 590),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"nget",	APU(4, 300,0),	APU_RA_MASK, PPC405,	0,		{RT, FSL}},
+{"evsel",	EVSEL(4,79),	EVSEL_MASK,  PPCSPE,	0,		{RS, RA, RB, CRFS}},
+{"ncget",	APU(4, 316,0),	APU_RA_MASK, PPC405,	0,		{RT, FSL}},
+{"evfsadd",	VX (4, 640),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vadduws",	VX (4, 640),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evfssub",	VX (4, 641),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vminuw",	VX (4, 642),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evfsabs",	VX (4, 644),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"vsrw",	VX (4, 644),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evfsnabs",	VX (4, 645),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"evfsneg",	VX (4, 646),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"vcmpgtuw",	VXR(4, 646,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vmuleuw",	VX (4, 648),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evfsmul",	VX (4, 648),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evfsdiv",	VX (4, 649),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vrfip",	VX (4, 650),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"evfscmpgt",	VX (4, 652),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"vspltw",	VX (4, 652),   VXUIMM2_MASK, PPCVEC,	0,		{VD, VB, UIMM2}},
+{"vextractuw",	VX (4, 653),   VXUIMM4_MASK, PPCVEC3,	0,		{VD, VB, UIMM4}},
+{"evfscmplt",	VX (4, 653),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"evfscmpeq",	VX (4, 654),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"vupklsb",	VX (4, 654),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"evfscfui",	VX (4, 656),	VX_MASK,     PPCSPE,	0,		{RS, RB}},
+{"evfscfsi",	VX (4, 657),	VX_MASK,     PPCSPE,	0,		{RS, RB}},
+{"evfscfuf",	VX (4, 658),	VX_MASK,     PPCSPE,	0,		{RS, RB}},
+{"evfscfsf",	VX (4, 659),	VX_MASK,     PPCSPE,	0,		{RS, RB}},
+{"evfsctui",	VX (4, 660),	VX_MASK,     PPCSPE,	0,		{RS, RB}},
+{"evfsctsi",	VX (4, 661),	VX_MASK,     PPCSPE,	0,		{RS, RB}},
+{"evfsctuf",	VX (4, 662),	VX_MASK,     PPCSPE,	0,		{RS, RB}},
+{"evfsctsf",	VX (4, 663),	VX_MASK,     PPCSPE,	0,		{RS, RB}},
+{"evfsctuiz",	VX (4, 664),	VX_MASK,     PPCSPE,	0,		{RS, RB}},
+{"put",		APU(4, 332,0),	APU_RT_MASK, PPC405,	0,		{RA, FSL}},
+{"evfsctsiz",	VX (4, 666),	VX_MASK,     PPCSPE,	0,		{RS, RB}},
+{"evfststgt",	VX (4, 668),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"evfststlt",	VX (4, 669),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"evfststeq",	VX (4, 670),	VX_MASK,     PPCSPE,	0,		{CRFD, RA, RB}},
+{"cput",	APU(4, 348,0),	APU_RT_MASK, PPC405,	0,		{RA, FSL}},
+{"efsadd",	VX (4, 704),	VX_MASK,     PPCEFS,	0,		{RS, RA, RB}},
+{"efssub",	VX (4, 705),	VX_MASK,     PPCEFS,	0,		{RS, RA, RB}},
+{"vminud",	VX (4, 706),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"efsabs",	VX (4, 708),	VX_MASK,     PPCEFS,	0,		{RS, RA}},
+{"vsr",		VX (4, 708),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"efsnabs",	VX (4, 709),	VX_MASK,     PPCEFS,	0,		{RS, RA}},
+{"efsneg",	VX (4, 710),	VX_MASK,     PPCEFS,	0,		{RS, RA}},
+{"vcmpgtfp",	VXR(4, 710,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpgtud",	VXR(4, 711,0),	VXR_MASK,    PPCVEC2,	0,		{VD, VA, VB}},
+{"efsmul",	VX (4, 712),	VX_MASK,     PPCEFS,	0,		{RS, RA, RB}},
+{"efsdiv",	VX (4, 713),	VX_MASK,     PPCEFS,	0,		{RS, RA, RB}},
+{"vrfim",	VX (4, 714),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"efscmpgt",	VX (4, 716),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"vextractd",	VX (4, 717),   VXUIMM4_MASK, PPCVEC3,	0,		{VD, VB, UIMM4}},
+{"efscmplt",	VX (4, 717),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"efscmpeq",	VX (4, 718),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"vupklsh",	VX (4, 718),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"efscfd",	VX (4, 719),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efscfui",	VX (4, 720),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efscfsi",	VX (4, 721),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efscfuf",	VX (4, 722),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efscfsf",	VX (4, 723),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efsctui",	VX (4, 724),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efsctsi",	VX (4, 725),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efsctuf",	VX (4, 726),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efsctsf",	VX (4, 727),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efsctuiz",	VX (4, 728),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"nput",	APU(4, 364,0),	APU_RT_MASK, PPC405,	0,		{RA, FSL}},
+{"efsctsiz",	VX (4, 730),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efststgt",	VX (4, 732),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"efststlt",	VX (4, 733),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"efststeq",	VX (4, 734),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"efdadd",	VX (4, 736),	VX_MASK,     PPCEFS,	0,		{RS, RA, RB}},
+{"efdsub",	VX (4, 737),	VX_MASK,     PPCEFS,	0,		{RS, RA, RB}},
+{"efdcfuid",	VX (4, 738),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdcfsid",	VX (4, 739),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdabs",	VX (4, 740),	VX_MASK,     PPCEFS,	0,		{RS, RA}},
+{"efdnabs",	VX (4, 741),	VX_MASK,     PPCEFS,	0,		{RS, RA}},
+{"efdneg",	VX (4, 742),	VX_MASK,     PPCEFS,	0,		{RS, RA}},
+{"efdmul",	VX (4, 744),	VX_MASK,     PPCEFS,	0,		{RS, RA, RB}},
+{"efddiv",	VX (4, 745),	VX_MASK,     PPCEFS,	0,		{RS, RA, RB}},
+{"efdctuidz",	VX (4, 746),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdctsidz",	VX (4, 747),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdcmpgt",	VX (4, 748),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"efdcmplt",	VX (4, 749),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"efdcmpeq",	VX (4, 750),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"efdcfs",	VX (4, 751),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdcfui",	VX (4, 752),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdcfsi",	VX (4, 753),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdcfuf",	VX (4, 754),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdcfsf",	VX (4, 755),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdctui",	VX (4, 756),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdctsi",	VX (4, 757),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdctuf",	VX (4, 758),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdctsf",	VX (4, 759),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdctuiz",	VX (4, 760),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"ncput",	APU(4, 380,0),	APU_RT_MASK, PPC405,	0,		{RA, FSL}},
+{"efdctsiz",	VX (4, 762),	VX_MASK,     PPCEFS,	0,		{RS, RB}},
+{"efdtstgt",	VX (4, 764),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"efdtstlt",	VX (4, 765),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"efdtsteq",	VX (4, 766),	VX_MASK,     PPCEFS,	0,		{CRFD, RA, RB}},
+{"evlddx",	VX (4, 768),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vaddsbs",	VX (4, 768),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evldd",	VX (4, 769),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_8, RA}},
+{"evldwx",	VX (4, 770),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vminsb",	VX (4, 770),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evldw",	VX (4, 771),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_8, RA}},
+{"evldhx",	VX (4, 772),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vsrab",	VX (4, 772),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evldh",	VX (4, 773),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_8, RA}},
+{"vcmpgtsb",	VXR(4, 774,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"evlhhesplatx",VX (4, 776),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vmulesb",	VX (4, 776),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evlhhesplat",	VX (4, 777),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_2, RA}},
+{"vcfux",	VX (4, 778),	VX_MASK,     PPCVEC,	0,		{VD, VB, UIMM}},
+{"vcuxwfp",	VX (4, 778),	VX_MASK,     PPCVEC,	0,		{VD, VB, UIMM}},
+{"evlhhousplatx",VX(4, 780),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vspltisb",	VX (4, 780),	VXVB_MASK,   PPCVEC,	0,		{VD, SIMM}},
+{"vinsertb",	VX (4, 781),   VXUIMM4_MASK, PPCVEC3,	0,		{VD, VB, UIMM4}},
+{"evlhhousplat",VX (4, 781),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_2, RA}},
+{"evlhhossplatx",VX(4, 782),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vpkpx",	VX (4, 782),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evlhhossplat",VX (4, 783),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_2, RA}},
+{"mullhwu",	XRC(4, 392,0),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"evlwhex",	VX (4, 784),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"mullhwu.",	XRC(4, 392,1),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"evlwhe",	VX (4, 785),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_4, RA}},
+{"evlwhoux",	VX (4, 788),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evlwhou",	VX (4, 789),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_4, RA}},
+{"evlwhosx",	VX (4, 790),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evlwhos",	VX (4, 791),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_4, RA}},
+{"maclhwu",	XO (4, 396,0,0),XO_MASK,     MULHW,	0,		{RT, RA, RB}},
+{"evlwwsplatx",	VX (4, 792),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"maclhwu.",	XO (4, 396,0,1),XO_MASK,     MULHW,	0,		{RT, RA, RB}},
+{"evlwwsplat",	VX (4, 793),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_4, RA}},
+{"evlwhsplatx",	VX (4, 796),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evlwhsplat",	VX (4, 797),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_4, RA}},
+{"evstddx",	VX (4, 800),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evstdd",	VX (4, 801),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_8, RA}},
+{"evstdwx",	VX (4, 802),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evstdw",	VX (4, 803),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_8, RA}},
+{"evstdhx",	VX (4, 804),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evstdh",	VX (4, 805),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_8, RA}},
+{"evstwhex",	VX (4, 816),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evstwhe",	VX (4, 817),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_4, RA}},
+{"evstwhox",	VX (4, 820),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evstwho",	VX (4, 821),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_4, RA}},
+{"evstwwex",	VX (4, 824),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evstwwe",	VX (4, 825),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_4, RA}},
+{"evstwwox",	VX (4, 828),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evstwwo",	VX (4, 829),	VX_MASK,     PPCSPE,	0,		{RS, EVUIMM_4, RA}},
+{"vaddshs",	VX (4, 832),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"bcdcpsgn.",	VX (4, 833),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vminsh",	VX (4, 834),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vsrah",	VX (4, 836),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpgtsh",	VXR(4, 838,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vmulesh",	VX (4, 840),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcfsx",	VX (4, 842),	VX_MASK,     PPCVEC,	0,		{VD, VB, UIMM}},
+{"vcsxwfp",	VX (4, 842),	VX_MASK,     PPCVEC,	0,		{VD, VB, UIMM}},
+{"vspltish",	VX (4, 844),	VXVB_MASK,   PPCVEC,	0,		{VD, SIMM}},
+{"vinserth",	VX (4, 845),   VXUIMM4_MASK, PPCVEC3,	0,		{VD, VB, UIMM4}},
+{"vupkhpx",	VX (4, 846),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"mullhw",	XRC(4, 424,0),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"mullhw.",	XRC(4, 424,1),	X_MASK,	     MULHW,	0,		{RT, RA, RB}},
+{"maclhw",	XO (4, 428,0,0),XO_MASK,     MULHW,	0,		{RT, RA, RB}},
+{"maclhw.",	XO (4, 428,0,1),XO_MASK,     MULHW,	0,		{RT, RA, RB}},
+{"nmaclhw",	XO (4, 430,0,0),XO_MASK,     MULHW,	0,		{RT, RA, RB}},
+{"nmaclhw.",	XO (4, 430,0,1),XO_MASK,     MULHW,	0,		{RT, RA, RB}},
+{"vaddsws",	VX (4, 896),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vminsw",	VX (4, 898),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vsraw",	VX (4, 900),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpgtsw",	VXR(4, 902,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vmulesw",	VX (4, 904),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vctuxs",	VX (4, 906),	VX_MASK,     PPCVEC,	0,		{VD, VB, UIMM}},
+{"vcfpuxws",	VX (4, 906),	VX_MASK,     PPCVEC,	0,		{VD, VB, UIMM}},
+{"vspltisw",	VX (4, 908),	VXVB_MASK,   PPCVEC,	0,		{VD, SIMM}},
+{"vinsertw",	VX (4, 909),   VXUIMM4_MASK, PPCVEC3,	0,		{VD, VB, UIMM4}},
+{"maclhwsu",	XO (4, 460,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"maclhwsu.",	XO (4, 460,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"vminsd",	VX (4, 962),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vsrad",	VX (4, 964),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vcmpbfp",	VXR(4, 966,0),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpgtsd",	VXR(4, 967,0),	VXR_MASK,    PPCVEC2,	0,		{VD, VA, VB}},
+{"vctsxs",	VX (4, 970),	VX_MASK,     PPCVEC,	0,		{VD, VB, UIMM}},
+{"vcfpsxws",	VX (4, 970),	VX_MASK,     PPCVEC,	0,		{VD, VB, UIMM}},
+{"vinsertd",	VX (4, 973),   VXUIMM4_MASK, PPCVEC3,	0,		{VD, VB, UIMM4}},
+{"vupklpx",	VX (4, 974),	VXVA_MASK,   PPCVEC,	0,		{VD, VB}},
+{"maclhws",	XO (4, 492,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"maclhws.",	XO (4, 492,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmaclhws",	XO (4, 494,0,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmaclhws.",	XO (4, 494,0,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"vsububm",	VX (4,1024),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"bcdadd.",	VX (4,1025),	VXPS_MASK,   PPCVEC2,	0,		{VD, VA, VB, PS}},
+{"vavgub",	VX (4,1026),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vabsdub",	VX (4,1027),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmhessf",	VX (4,1027),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vand",	VX (4,1028),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpequb.",	VXR(4,	 6,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpneb.",	VXR(4,	 7,1),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"udi0fcm.",	APU(4, 515,0),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"udi0fcm",	APU(4, 515,1),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"evmhossf",	VX (4,1031),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vpmsumb",	VX (4,1032),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmheumi",	VX (4,1032),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhesmi",	VX (4,1033),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vmaxfp",	VX (4,1034),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evmhesmf",	VX (4,1035),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhoumi",	VX (4,1036),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vslo",	VX (4,1036),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evmhosmi",	VX (4,1037),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhosmf",	VX (4,1039),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"machhwuo",	XO (4,	12,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"machhwuo.",	XO (4,	12,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"ps_merge00",	XOPS(4,528,0),	XOPS_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"ps_merge00.",	XOPS(4,528,1),	XOPS_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"evmhessfa",	VX (4,1059),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhossfa",	VX (4,1063),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmheumia",	VX (4,1064),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhesmia",	VX (4,1065),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhesmfa",	VX (4,1067),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhoumia",	VX (4,1068),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhosmia",	VX (4,1069),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhosmfa",	VX (4,1071),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vsubuhm",	VX (4,1088),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"bcdsub.",	VX (4,1089),	VXPS_MASK,   PPCVEC2,	0,		{VD, VA, VB, PS}},
+{"vavguh",	VX (4,1090),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vabsduh",	VX (4,1091),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vandc",	VX (4,1092),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpequh.",	VXR(4,	70,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"udi1fcm.",	APU(4, 547,0),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"udi1fcm",	APU(4, 547,1),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"vcmpneh.",	VXR(4,	71,1),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"evmwhssf",	VX (4,1095),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vpmsumh",	VX (4,1096),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmwlumi",	VX (4,1096),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vminfp",	VX (4,1098),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evmwhumi",	VX (4,1100),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vsro",	VX (4,1100),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evmwhsmi",	VX (4,1101),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vpkudum",	VX (4,1102),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmwhsmf",	VX (4,1103),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwssf",	VX (4,1107),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"machhwo",	XO (4,	44,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"evmwumi",	VX (4,1112),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"machhwo.",	XO (4,	44,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"evmwsmi",	VX (4,1113),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwsmf",	VX (4,1115),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"nmachhwo",	XO (4,	46,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmachhwo.",	XO (4,	46,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"ps_merge01",	XOPS(4,560,0),	XOPS_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"ps_merge01.",	XOPS(4,560,1),	XOPS_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"evmwhssfa",	VX (4,1127),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwlumia",	VX (4,1128),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwhumia",	VX (4,1132),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwhsmia",	VX (4,1133),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwhsmfa",	VX (4,1135),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwssfa",	VX (4,1139),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwumia",	VX (4,1144),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwsmia",	VX (4,1145),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwsmfa",	VX (4,1147),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vsubuwm",	VX (4,1152),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"bcdus.",	VX (4,1153),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vavguw",	VX (4,1154),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vabsduw",	VX (4,1155),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vmr",		VX (4,1156),	VX_MASK,     PPCVEC,	0,		{VD, VA, VBA}},
+{"vor",		VX (4,1156),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vcmpnew.",	VXR(4, 135,1),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"vpmsumw",	VX (4,1160),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vcmpequw.",	VXR(4, 134,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"udi2fcm.",	APU(4, 579,0),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"udi2fcm",	APU(4, 579,1),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"machhwsuo",	XO (4,	76,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"machhwsuo.",	XO (4,	76,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"ps_merge10",	XOPS(4,592,0),	XOPS_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"ps_merge10.",	XOPS(4,592,1),	XOPS_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"vsubudm",	VX (4,1216),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evaddusiaaw",	VX (4,1216),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"bcds.",	VX (4,1217),	VXPS_MASK,   PPCVEC3,	0,		{VD, VA, VB, PS}},
+{"evaddssiaaw",	VX (4,1217),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"evsubfusiaaw",VX (4,1218),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"evsubfssiaaw",VX (4,1219),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"evmra",	VX (4,1220),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"vxor",	VX (4,1220),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evdivws",	VX (4,1222),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vcmpeqfp.",	VXR(4, 198,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"udi3fcm.",	APU(4, 611,0),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"vcmpequd.",	VXR(4, 199,1),	VXR_MASK,    PPCVEC2,	0,		{VD, VA, VB}},
+{"udi3fcm",	APU(4, 611,1),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"evdivwu",	VX (4,1223),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vpmsumd",	VX (4,1224),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evaddumiaaw",	VX (4,1224),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"evaddsmiaaw",	VX (4,1225),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"evsubfumiaaw",VX (4,1226),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"evsubfsmiaaw",VX (4,1227),	VX_MASK,     PPCSPE,	0,		{RS, RA}},
+{"vpkudus",	VX (4,1230),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"machhwso",	XO (4, 108,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"machhwso.",	XO (4, 108,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmachhwso",	XO (4, 110,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmachhwso.",	XO (4, 110,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"ps_merge11",	XOPS(4,624,0),	XOPS_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"ps_merge11.",	XOPS(4,624,1),	XOPS_MASK,   PPCPS,	0,		{FRT, FRA, FRB}},
+{"vsubuqm",	VX (4,1280),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmheusiaaw",	VX (4,1280),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"bcdtrunc.",	VX (4,1281),	VXPS_MASK,   PPCVEC3,	0,		{VD, VA, VB, PS}},
+{"evmhessiaaw",	VX (4,1281),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vavgsb",	VX (4,1282),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evmhessfaaw",	VX (4,1283),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhousiaaw",	VX (4,1284),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vnot",	VX (4,1284),	VX_MASK,     PPCVEC,	0,		{VD, VA, VBA}},
+{"vnor",	VX (4,1284),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evmhossiaaw",	VX (4,1285),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"udi4fcm.",	APU(4, 643,0),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"udi4fcm",	APU(4, 643,1),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"vcmpnezb.",	VXR(4, 263,1),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"evmhossfaaw",	VX (4,1287),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmheumiaaw",	VX (4,1288),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vcipher",	VX (4,1288),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vcipherlast",	VX (4,1289),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmhesmiaaw",	VX (4,1289),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhesmfaaw",	VX (4,1291),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vgbbd",	VX (4,1292),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"evmhoumiaaw",	VX (4,1292),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhosmiaaw",	VX (4,1293),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhosmfaaw",	VX (4,1295),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"macchwuo",	XO (4, 140,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"macchwuo.",	XO (4, 140,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"evmhegumiaa",	VX (4,1320),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhegsmiaa",	VX (4,1321),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhegsmfaa",	VX (4,1323),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhogumiaa",	VX (4,1324),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhogsmiaa",	VX (4,1325),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhogsmfaa",	VX (4,1327),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vsubcuq",	VX (4,1344),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmwlusiaaw",	VX (4,1344),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"bcdutrunc.",	VX (4,1345),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"evmwlssiaaw",	VX (4,1345),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vavgsh",	VX (4,1346),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vorc",	VX (4,1348),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"udi5fcm.",	APU(4, 675,0),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"udi5fcm",	APU(4, 675,1),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"vcmpnezh.",	VXR(4, 327,1),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"vncipher",	VX (4,1352),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmwlumiaaw",	VX (4,1352),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vncipherlast",VX (4,1353),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmwlsmiaaw",	VX (4,1353),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vbpermq",	VX (4,1356),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vpksdus",	VX (4,1358),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmwssfaa",	VX (4,1363),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"macchwo",	XO (4, 172,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"evmwumiaa",	VX (4,1368),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"macchwo.",	XO (4, 172,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"evmwsmiaa",	VX (4,1369),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwsmfaa",	VX (4,1371),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"nmacchwo",	XO (4, 174,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmacchwo.",	XO (4, 174,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"evmheusianw",	VX (4,1408),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vsubcuw",	VX (4,1408),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evmhessianw",	VX (4,1409),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"bcdctsq.",	VXVA(4,1409,0),	VXVA_MASK,   PPCVEC3,	0,		{VD, VB}},
+{"bcdcfsq.",	VXVA(4,1409,2),	VXVAPS_MASK, PPCVEC3,	0,		{VD, VB, PS}},
+{"bcdctz.",	VXVA(4,1409,4),	VXVAPS_MASK, PPCVEC3,	0,		{VD, VB, PS}},
+{"bcdctn.",	VXVA(4,1409,5),	VXVA_MASK,   PPCVEC3,	0,		{VD, VB}},
+{"bcdcfz.",	VXVA(4,1409,6),	VXVAPS_MASK, PPCVEC3,	0,		{VD, VB, PS}},
+{"bcdcfn.",	VXVA(4,1409,7),	VXVAPS_MASK, PPCVEC3,	0,		{VD, VB, PS}},
+{"bcdsetsgn.",	VXVA(4,1409,31), VXVAPS_MASK, PPCVEC3,	0,		{VD, VB, PS}},
+{"vavgsw",	VX (4,1410),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"evmhessfanw",	VX (4,1411),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vnand",	VX (4,1412),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmhousianw",	VX (4,1412),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhossianw",	VX (4,1413),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"udi6fcm.",	APU(4, 707,0),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"udi6fcm",	APU(4, 707,1),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"vcmpnezw.",	VXR(4, 391,1),	VXR_MASK,    PPCVEC3,	0,		{VD, VA, VB}},
+{"evmhossfanw",	VX (4,1415),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmheumianw",	VX (4,1416),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhesmianw",	VX (4,1417),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhesmfanw",	VX (4,1419),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhoumianw",	VX (4,1420),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhosmianw",	VX (4,1421),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhosmfanw",	VX (4,1423),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"macchwsuo",	XO (4, 204,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"macchwsuo.",	XO (4, 204,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"evmhegumian",	VX (4,1448),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhegsmian",	VX (4,1449),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhegsmfan",	VX (4,1451),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhogumian",	VX (4,1452),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhogsmian",	VX (4,1453),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmhogsmfan",	VX (4,1455),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwlusianw",	VX (4,1472),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"bcdsr.",	VX (4,1473),	VXPS_MASK,   PPCVEC3,	0,		{VD, VA, VB, PS}},
+{"evmwlssianw",	VX (4,1473),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vsld",	VX (4,1476),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vcmpgefp.",	VXR(4, 454,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"udi7fcm.",	APU(4, 739,0),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"udi7fcm",	APU(4, 739,1),	APU_MASK, PPC405|PPC440, PPC476,	{URT, URA, URB}},
+{"vsbox",	VX (4,1480),	VXVB_MASK,   PPCVEC2,	0,		{VD, VA}},
+{"evmwlumianw",	VX (4,1480),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwlsmianw",	VX (4,1481),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"vbpermd",	VX (4,1484),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vpksdss",	VX (4,1486),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"evmwssfan",	VX (4,1491),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"macchwso",	XO (4, 236,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"evmwumian",	VX (4,1496),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"macchwso.",	XO (4, 236,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"evmwsmian",	VX (4,1497),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"evmwsmfan",	VX (4,1499),	VX_MASK,     PPCSPE,	0,		{RS, RA, RB}},
+{"nmacchwso",	XO (4, 238,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmacchwso.",	XO (4, 238,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"vsububs",	VX (4,1536),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vclzlsbb",	VXVA(4,1538,0), VXVA_MASK,   PPCVEC3,	0,		{RT, VB}},
+{"vctzlsbb",	VXVA(4,1538,1), VXVA_MASK,   PPCVEC3,	0,		{RT, VB}},
+{"vnegw",	VXVA(4,1538,6), VXVA_MASK,   PPCVEC3,	0,		{VD, VB}},
+{"vnegd",	VXVA(4,1538,7), VXVA_MASK,   PPCVEC3,	0,		{VD, VB}},
+{"vprtybw",	VXVA(4,1538,8), VXVA_MASK,   PPCVEC3,	0,		{VD, VB}},
+{"vprtybd",	VXVA(4,1538,9), VXVA_MASK,   PPCVEC3,	0,		{VD, VB}},
+{"vprtybq",	VXVA(4,1538,10), VXVA_MASK,  PPCVEC3,	0,		{VD, VB}},
+{"vextsb2w",	VXVA(4,1538,16), VXVA_MASK,  PPCVEC3,	0,		{VD, VB}},
+{"vextsh2w",	VXVA(4,1538,17), VXVA_MASK,  PPCVEC3,	0,		{VD, VB}},
+{"vextsb2d",	VXVA(4,1538,24), VXVA_MASK,  PPCVEC3,	0,		{VD, VB}},
+{"vextsh2d",	VXVA(4,1538,25), VXVA_MASK,  PPCVEC3,	0,		{VD, VB}},
+{"vextsw2d",	VXVA(4,1538,26), VXVA_MASK,  PPCVEC3,	0,		{VD, VB}},
+{"vctzb",	VXVA(4,1538,28), VXVA_MASK,  PPCVEC3,	0,		{VD, VB}},
+{"vctzh",	VXVA(4,1538,29), VXVA_MASK,  PPCVEC3,	0,		{VD, VB}},
+{"vctzw",	VXVA(4,1538,30), VXVA_MASK,  PPCVEC3,	0,		{VD, VB}},
+{"vctzd",	VXVA(4,1538,31), VXVA_MASK,  PPCVEC3,	0,		{VD, VB}},
+{"mfvscr",	VX (4,1540),	VXVAVB_MASK, PPCVEC,	0,		{VD}},
+{"vcmpgtub.",	VXR(4, 518,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"udi8fcm.",	APU(4, 771,0),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"udi8fcm",	APU(4, 771,1),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"vsum4ubs",	VX (4,1544),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vextublx",	VX (4,1549),	VX_MASK,     PPCVEC3,	0,		{RT, RA, VB}},
+{"vsubuhs",	VX (4,1600),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"mtvscr",	VX (4,1604),	VXVDVA_MASK, PPCVEC,	0,		{VB}},
+{"vcmpgtuh.",	VXR(4, 582,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vsum4shs",	VX (4,1608),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"udi9fcm.",	APU(4, 804,0),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"udi9fcm",	APU(4, 804,1),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"vextuhlx",	VX (4,1613),	VX_MASK,     PPCVEC3,	0,		{RT, RA, VB}},
+{"vupkhsw",	VX (4,1614),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"vsubuws",	VX (4,1664),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vshasigmaw",	VX (4,1666),	VX_MASK,     PPCVEC2,	0,		{VD, VA, ST, SIX}},
+{"veqv",	VX (4,1668),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vcmpgtuw.",	VXR(4, 646,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"udi10fcm.",	APU(4, 835,0),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"udi10fcm",	APU(4, 835,1),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"vsum2sws",	VX (4,1672),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vmrgow",	VX (4,1676),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vextuwlx",	VX (4,1677),	VX_MASK,     PPCVEC3,	0,		{RT, RA, VB}},
+{"vshasigmad",	VX (4,1730),	VX_MASK,     PPCVEC2,	0,		{VD, VA, ST, SIX}},
+{"vsrd",	VX (4,1732),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vcmpgtfp.",	VXR(4, 710,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"udi11fcm.",	APU(4, 867,0),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"vcmpgtud.",	VXR(4, 711,1),	VXR_MASK,    PPCVEC2,	0,		{VD, VA, VB}},
+{"udi11fcm",	APU(4, 867,1),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"vupklsw",	VX (4,1742),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"vsubsbs",	VX (4,1792),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vclzb",	VX (4,1794),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"vpopcntb",	VX (4,1795),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"vsrv",	VX (4,1796),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vcmpgtsb.",	VXR(4, 774,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"udi12fcm.",	APU(4, 899,0),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"udi12fcm",	APU(4, 899,1),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"vsum4sbs",	VX (4,1800),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vextubrx",	VX (4,1805),	VX_MASK,     PPCVEC3,	0,		{RT, RA, VB}},
+{"maclhwuo",	XO (4, 396,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"maclhwuo.",	XO (4, 396,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"vsubshs",	VX (4,1856),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vclzh",	VX (4,1858),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"vpopcnth",	VX (4,1859),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"vslv",	VX (4,1860),	VX_MASK,     PPCVEC3,	0,		{VD, VA, VB}},
+{"vcmpgtsh.",	VXR(4, 838,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"vextuhrx",	VX (4,1869),	VX_MASK,     PPCVEC3,	0,		{RT, RA, VB}},
+{"udi13fcm.",	APU(4, 931,0),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"udi13fcm",	APU(4, 931,1),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"maclhwo",	XO (4, 428,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"maclhwo.",	XO (4, 428,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmaclhwo",	XO (4, 430,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmaclhwo.",	XO (4, 430,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"vsubsws",	VX (4,1920),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vclzw",	VX (4,1922),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"vpopcntw",	VX (4,1923),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"vcmpgtsw.",	VXR(4, 902,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"udi14fcm.",	APU(4, 963,0),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"udi14fcm",	APU(4, 963,1),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"vsumsws",	VX (4,1928),	VX_MASK,     PPCVEC,	0,		{VD, VA, VB}},
+{"vmrgew",	VX (4,1932),	VX_MASK,     PPCVEC2,	0,		{VD, VA, VB}},
+{"vextuwrx",	VX (4,1933),	VX_MASK,     PPCVEC3,	0,		{RT, RA, VB}},
+{"maclhwsuo",	XO (4, 460,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"maclhwsuo.",	XO (4, 460,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"vclzd",	VX (4,1986),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"vpopcntd",	VX (4,1987),	VXVA_MASK,   PPCVEC2,	0,		{VD, VB}},
+{"vcmpbfp.",	VXR(4, 966,1),	VXR_MASK,    PPCVEC,	0,		{VD, VA, VB}},
+{"udi15fcm.",	APU(4, 995,0),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"vcmpgtsd.",	VXR(4, 967,1),	VXR_MASK,    PPCVEC2,	0,		{VD, VA, VB}},
+{"udi15fcm",	APU(4, 995,1),	APU_MASK,    PPC440,	PPC476,		{URT, URA, URB}},
+{"maclhwso",	XO (4, 492,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"maclhwso.",	XO (4, 492,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmaclhwso",	XO (4, 494,1,0), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"nmaclhwso.",	XO (4, 494,1,1), XO_MASK,    MULHW,	0,		{RT, RA, RB}},
+{"dcbz_l",	X  (4,1014),	XRT_MASK,    PPCPS,	0,		{RA, RB}},
+
+{"mulli",	OP(7),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, RA, SI}},
+{"muli",	OP(7),		OP_MASK,     PWRCOM,	PPCVLE,		{RT, RA, SI}},
+
+{"subfic",	OP(8),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, RA, SI}},
+{"sfi",		OP(8),		OP_MASK,     PWRCOM,	PPCVLE,		{RT, RA, SI}},
+
+{"dozi",	OP(9),		OP_MASK,     M601,	PPCVLE,		{RT, RA, SI}},
+
+{"cmplwi",	OPL(10,0),	OPL_MASK,    PPCCOM,	PPCVLE,		{OBF, RA, UISIGNOPT}},
+{"cmpldi",	OPL(10,1),	OPL_MASK,    PPC64,	PPCVLE,		{OBF, RA, UISIGNOPT}},
+{"cmpli",	OP(10),		OP_MASK,     PPC,	PPCVLE,		{BF, L32OPT, RA, UISIGNOPT}},
+{"cmpli",	OP(10),		OP_MASK,     PWRCOM,	PPC|PPCVLE,	{BF, RA, UISIGNOPT}},
+
+{"cmpwi",	OPL(11,0),	OPL_MASK,    PPCCOM,	PPCVLE,		{OBF, RA, SI}},
+{"cmpdi",	OPL(11,1),	OPL_MASK,    PPC64,	PPCVLE,		{OBF, RA, SI}},
+{"cmpi",	OP(11),		OP_MASK,     PPC,	PPCVLE,		{BF, L32OPT, RA, SI}},
+{"cmpi",	OP(11),		OP_MASK,     PWRCOM,	PPC|PPCVLE,	{BF, RA, SI}},
+
+{"addic",	OP(12),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, RA, SI}},
+{"ai",		OP(12),		OP_MASK,     PWRCOM,	PPCVLE,		{RT, RA, SI}},
+{"subic",	OP(12),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, RA, NSI}},
+
+{"addic.",	OP(13),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, RA, SI}},
+{"ai.",		OP(13),		OP_MASK,     PWRCOM,	PPCVLE,		{RT, RA, SI}},
+{"subic.",	OP(13),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, RA, NSI}},
+
+{"li",		OP(14),		DRA_MASK,    PPCCOM,	PPCVLE,		{RT, SI}},
+{"lil",		OP(14),		DRA_MASK,    PWRCOM,	PPCVLE,		{RT, SI}},
+{"addi",	OP(14),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, RA0, SI}},
+{"cal",		OP(14),		OP_MASK,     PWRCOM,	PPCVLE,		{RT, D, RA0}},
+{"subi",	OP(14),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, RA0, NSI}},
+{"la",		OP(14),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, D, RA0}},
+
+{"lis",		OP(15),		DRA_MASK,    PPCCOM,	PPCVLE,		{RT, SISIGNOPT}},
+{"liu",		OP(15),		DRA_MASK,    PWRCOM,	PPCVLE,		{RT, SISIGNOPT}},
+{"addis",	OP(15),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, RA0, SISIGNOPT}},
+{"cau",		OP(15),		OP_MASK,     PWRCOM,	PPCVLE,		{RT, RA0, SISIGNOPT}},
+{"subis",	OP(15),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, RA0, NSISIGNOPT}},
+
+{"bdnz-",    BBO(16,BODNZ,0,0),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDM}},
+{"bdnz+",    BBO(16,BODNZ,0,0),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDP}},
+{"bdnz",     BBO(16,BODNZ,0,0),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BD}},
+{"bdn",	     BBO(16,BODNZ,0,0),		BBOATBI_MASK,  PWRCOM,	 PPCVLE,	{BD}},
+{"bdnzl-",   BBO(16,BODNZ,0,1),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDM}},
+{"bdnzl+",   BBO(16,BODNZ,0,1),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDP}},
+{"bdnzl",    BBO(16,BODNZ,0,1),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BD}},
+{"bdnl",     BBO(16,BODNZ,0,1),		BBOATBI_MASK,  PWRCOM,	 PPCVLE,	{BD}},
+{"bdnza-",   BBO(16,BODNZ,1,0),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDMA}},
+{"bdnza+",   BBO(16,BODNZ,1,0),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDPA}},
+{"bdnza",    BBO(16,BODNZ,1,0),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDA}},
+{"bdna",     BBO(16,BODNZ,1,0),		BBOATBI_MASK,  PWRCOM,	 PPCVLE,	{BDA}},
+{"bdnzla-",  BBO(16,BODNZ,1,1),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDMA}},
+{"bdnzla+",  BBO(16,BODNZ,1,1),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDPA}},
+{"bdnzla",   BBO(16,BODNZ,1,1),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDA}},
+{"bdnla",    BBO(16,BODNZ,1,1),		BBOATBI_MASK,  PWRCOM,	 PPCVLE,	{BDA}},
+{"bdz-",     BBO(16,BODZ,0,0),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDM}},
+{"bdz+",     BBO(16,BODZ,0,0),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDP}},
+{"bdz",	     BBO(16,BODZ,0,0),		BBOATBI_MASK,  COM,	 PPCVLE,	{BD}},
+{"bdzl-",    BBO(16,BODZ,0,1),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDM}},
+{"bdzl+",    BBO(16,BODZ,0,1),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDP}},
+{"bdzl",     BBO(16,BODZ,0,1),		BBOATBI_MASK,  COM,	 PPCVLE,	{BD}},
+{"bdza-",    BBO(16,BODZ,1,0),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDMA}},
+{"bdza+",    BBO(16,BODZ,1,0),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDPA}},
+{"bdza",     BBO(16,BODZ,1,0),		BBOATBI_MASK,  COM,	 PPCVLE,	{BDA}},
+{"bdzla-",   BBO(16,BODZ,1,1),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDMA}},
+{"bdzla+",   BBO(16,BODZ,1,1),		BBOATBI_MASK,  PPCCOM,	 PPCVLE,	{BDPA}},
+{"bdzla",    BBO(16,BODZ,1,1),		BBOATBI_MASK,  COM,	 PPCVLE,	{BDA}},
+
+{"bge-",     BBOCB(16,BOF,CBLT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bge+",     BBOCB(16,BOF,CBLT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bge",	     BBOCB(16,BOF,CBLT,0,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bnl-",     BBOCB(16,BOF,CBLT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bnl+",     BBOCB(16,BOF,CBLT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bnl",	     BBOCB(16,BOF,CBLT,0,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bgel-",    BBOCB(16,BOF,CBLT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bgel+",    BBOCB(16,BOF,CBLT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bgel",     BBOCB(16,BOF,CBLT,0,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bnll-",    BBOCB(16,BOF,CBLT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bnll+",    BBOCB(16,BOF,CBLT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bnll",     BBOCB(16,BOF,CBLT,0,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bgea-",    BBOCB(16,BOF,CBLT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bgea+",    BBOCB(16,BOF,CBLT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bgea",     BBOCB(16,BOF,CBLT,1,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bnla-",    BBOCB(16,BOF,CBLT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bnla+",    BBOCB(16,BOF,CBLT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bnla",     BBOCB(16,BOF,CBLT,1,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bgela-",   BBOCB(16,BOF,CBLT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bgela+",   BBOCB(16,BOF,CBLT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bgela",    BBOCB(16,BOF,CBLT,1,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bnlla-",   BBOCB(16,BOF,CBLT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bnlla+",   BBOCB(16,BOF,CBLT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bnlla",    BBOCB(16,BOF,CBLT,1,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"ble-",     BBOCB(16,BOF,CBGT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"ble+",     BBOCB(16,BOF,CBGT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"ble",	     BBOCB(16,BOF,CBGT,0,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bng-",     BBOCB(16,BOF,CBGT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bng+",     BBOCB(16,BOF,CBGT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bng",	     BBOCB(16,BOF,CBGT,0,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"blel-",    BBOCB(16,BOF,CBGT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"blel+",    BBOCB(16,BOF,CBGT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"blel",     BBOCB(16,BOF,CBGT,0,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bngl-",    BBOCB(16,BOF,CBGT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bngl+",    BBOCB(16,BOF,CBGT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bngl",     BBOCB(16,BOF,CBGT,0,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"blea-",    BBOCB(16,BOF,CBGT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"blea+",    BBOCB(16,BOF,CBGT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"blea",     BBOCB(16,BOF,CBGT,1,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bnga-",    BBOCB(16,BOF,CBGT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bnga+",    BBOCB(16,BOF,CBGT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bnga",     BBOCB(16,BOF,CBGT,1,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"blela-",   BBOCB(16,BOF,CBGT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"blela+",   BBOCB(16,BOF,CBGT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"blela",    BBOCB(16,BOF,CBGT,1,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bngla-",   BBOCB(16,BOF,CBGT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bngla+",   BBOCB(16,BOF,CBGT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bngla",    BBOCB(16,BOF,CBGT,1,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bne-",     BBOCB(16,BOF,CBEQ,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bne+",     BBOCB(16,BOF,CBEQ,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bne",	     BBOCB(16,BOF,CBEQ,0,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bnel-",    BBOCB(16,BOF,CBEQ,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bnel+",    BBOCB(16,BOF,CBEQ,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bnel",     BBOCB(16,BOF,CBEQ,0,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bnea-",    BBOCB(16,BOF,CBEQ,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bnea+",    BBOCB(16,BOF,CBEQ,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bnea",     BBOCB(16,BOF,CBEQ,1,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bnela-",   BBOCB(16,BOF,CBEQ,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bnela+",   BBOCB(16,BOF,CBEQ,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bnela",    BBOCB(16,BOF,CBEQ,1,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bns-",     BBOCB(16,BOF,CBSO,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bns+",     BBOCB(16,BOF,CBSO,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bns",	     BBOCB(16,BOF,CBSO,0,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bnu-",     BBOCB(16,BOF,CBSO,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bnu+",     BBOCB(16,BOF,CBSO,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bnu",	     BBOCB(16,BOF,CBSO,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BD}},
+{"bnsl-",    BBOCB(16,BOF,CBSO,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bnsl+",    BBOCB(16,BOF,CBSO,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bnsl",     BBOCB(16,BOF,CBSO,0,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bnul-",    BBOCB(16,BOF,CBSO,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bnul+",    BBOCB(16,BOF,CBSO,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bnul",     BBOCB(16,BOF,CBSO,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BD}},
+{"bnsa-",    BBOCB(16,BOF,CBSO,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bnsa+",    BBOCB(16,BOF,CBSO,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bnsa",     BBOCB(16,BOF,CBSO,1,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bnua-",    BBOCB(16,BOF,CBSO,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bnua+",    BBOCB(16,BOF,CBSO,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bnua",     BBOCB(16,BOF,CBSO,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDA}},
+{"bnsla-",   BBOCB(16,BOF,CBSO,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bnsla+",   BBOCB(16,BOF,CBSO,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bnsla",    BBOCB(16,BOF,CBSO,1,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bnula-",   BBOCB(16,BOF,CBSO,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bnula+",   BBOCB(16,BOF,CBSO,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bnula",    BBOCB(16,BOF,CBSO,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDA}},
+
+{"blt-",     BBOCB(16,BOT,CBLT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"blt+",     BBOCB(16,BOT,CBLT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"blt",	     BBOCB(16,BOT,CBLT,0,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bltl-",    BBOCB(16,BOT,CBLT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bltl+",    BBOCB(16,BOT,CBLT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bltl",     BBOCB(16,BOT,CBLT,0,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"blta-",    BBOCB(16,BOT,CBLT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"blta+",    BBOCB(16,BOT,CBLT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"blta",     BBOCB(16,BOT,CBLT,1,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bltla-",   BBOCB(16,BOT,CBLT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bltla+",   BBOCB(16,BOT,CBLT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bltla",    BBOCB(16,BOT,CBLT,1,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bgt-",     BBOCB(16,BOT,CBGT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bgt+",     BBOCB(16,BOT,CBGT,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bgt",	     BBOCB(16,BOT,CBGT,0,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bgtl-",    BBOCB(16,BOT,CBGT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bgtl+",    BBOCB(16,BOT,CBGT,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bgtl",     BBOCB(16,BOT,CBGT,0,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bgta-",    BBOCB(16,BOT,CBGT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bgta+",    BBOCB(16,BOT,CBGT,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bgta",     BBOCB(16,BOT,CBGT,1,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bgtla-",   BBOCB(16,BOT,CBGT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bgtla+",   BBOCB(16,BOT,CBGT,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bgtla",    BBOCB(16,BOT,CBGT,1,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"beq-",     BBOCB(16,BOT,CBEQ,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"beq+",     BBOCB(16,BOT,CBEQ,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"beq",	     BBOCB(16,BOT,CBEQ,0,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"beql-",    BBOCB(16,BOT,CBEQ,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"beql+",    BBOCB(16,BOT,CBEQ,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"beql",     BBOCB(16,BOT,CBEQ,0,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"beqa-",    BBOCB(16,BOT,CBEQ,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"beqa+",    BBOCB(16,BOT,CBEQ,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"beqa",     BBOCB(16,BOT,CBEQ,1,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"beqla-",   BBOCB(16,BOT,CBEQ,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"beqla+",   BBOCB(16,BOT,CBEQ,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"beqla",    BBOCB(16,BOT,CBEQ,1,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bso-",     BBOCB(16,BOT,CBSO,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bso+",     BBOCB(16,BOT,CBSO,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bso",	     BBOCB(16,BOT,CBSO,0,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bun-",     BBOCB(16,BOT,CBSO,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bun+",     BBOCB(16,BOT,CBSO,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bun",	     BBOCB(16,BOT,CBSO,0,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BD}},
+{"bsol-",    BBOCB(16,BOT,CBSO,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bsol+",    BBOCB(16,BOT,CBSO,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bsol",     BBOCB(16,BOT,CBSO,0,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BD}},
+{"bunl-",    BBOCB(16,BOT,CBSO,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDM}},
+{"bunl+",    BBOCB(16,BOT,CBSO,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDP}},
+{"bunl",     BBOCB(16,BOT,CBSO,0,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BD}},
+{"bsoa-",    BBOCB(16,BOT,CBSO,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bsoa+",    BBOCB(16,BOT,CBSO,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bsoa",     BBOCB(16,BOT,CBSO,1,0),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"buna-",    BBOCB(16,BOT,CBSO,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"buna+",    BBOCB(16,BOT,CBSO,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"buna",     BBOCB(16,BOT,CBSO,1,0),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDA}},
+{"bsola-",   BBOCB(16,BOT,CBSO,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bsola+",   BBOCB(16,BOT,CBSO,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bsola",    BBOCB(16,BOT,CBSO,1,1),	BBOATCB_MASK,  COM,	 PPCVLE,	{CR, BDA}},
+{"bunla-",   BBOCB(16,BOT,CBSO,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDMA}},
+{"bunla+",   BBOCB(16,BOT,CBSO,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDPA}},
+{"bunla",    BBOCB(16,BOT,CBSO,1,1),	BBOATCB_MASK,  PPCCOM,	 PPCVLE,	{CR, BDA}},
+
+{"bdnzf-",   BBO(16,BODNZF,0,0),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDM}},
+{"bdnzf+",   BBO(16,BODNZF,0,0),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDP}},
+{"bdnzf",    BBO(16,BODNZF,0,0),	BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bdnzfl-",  BBO(16,BODNZF,0,1),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDM}},
+{"bdnzfl+",  BBO(16,BODNZF,0,1),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDP}},
+{"bdnzfl",   BBO(16,BODNZF,0,1),	BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bdnzfa-",  BBO(16,BODNZF,1,0),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDMA}},
+{"bdnzfa+",  BBO(16,BODNZF,1,0),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDPA}},
+{"bdnzfa",   BBO(16,BODNZF,1,0),	BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BDA}},
+{"bdnzfla-", BBO(16,BODNZF,1,1),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDMA}},
+{"bdnzfla+", BBO(16,BODNZF,1,1),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDPA}},
+{"bdnzfla",  BBO(16,BODNZF,1,1),	BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BDA}},
+{"bdzf-",    BBO(16,BODZF,0,0),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDM}},
+{"bdzf+",    BBO(16,BODZF,0,0),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDP}},
+{"bdzf",     BBO(16,BODZF,0,0),		BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bdzfl-",   BBO(16,BODZF,0,1),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDM}},
+{"bdzfl+",   BBO(16,BODZF,0,1),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDP}},
+{"bdzfl",    BBO(16,BODZF,0,1),		BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bdzfa-",   BBO(16,BODZF,1,0),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDMA}},
+{"bdzfa+",   BBO(16,BODZF,1,0),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDPA}},
+{"bdzfa",    BBO(16,BODZF,1,0),		BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BDA}},
+{"bdzfla-",  BBO(16,BODZF,1,1),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDMA}},
+{"bdzfla+",  BBO(16,BODZF,1,1),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDPA}},
+{"bdzfla",   BBO(16,BODZF,1,1),		BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BDA}},
+
+{"bf-",	     BBO(16,BOF,0,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDM}},
+{"bf+",	     BBO(16,BOF,0,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDP}},
+{"bf",	     BBO(16,BOF,0,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bbf",	     BBO(16,BOF,0,0),		BBOAT_MASK,    PWRCOM,	 PPCVLE,	{BI, BD}},
+{"bfl-",     BBO(16,BOF,0,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDM}},
+{"bfl+",     BBO(16,BOF,0,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDP}},
+{"bfl",	     BBO(16,BOF,0,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bbfl",     BBO(16,BOF,0,1),		BBOAT_MASK,    PWRCOM,	 PPCVLE,	{BI, BD}},
+{"bfa-",     BBO(16,BOF,1,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDMA}},
+{"bfa+",     BBO(16,BOF,1,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDPA}},
+{"bfa",	     BBO(16,BOF,1,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDA}},
+{"bbfa",     BBO(16,BOF,1,0),		BBOAT_MASK,    PWRCOM,	 PPCVLE,	{BI, BDA}},
+{"bfla-",    BBO(16,BOF,1,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDMA}},
+{"bfla+",    BBO(16,BOF,1,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDPA}},
+{"bfla",     BBO(16,BOF,1,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDA}},
+{"bbfla",    BBO(16,BOF,1,1),		BBOAT_MASK,    PWRCOM,	 PPCVLE,	{BI, BDA}},
+
+{"bdnzt-",   BBO(16,BODNZT,0,0),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDM}},
+{"bdnzt+",   BBO(16,BODNZT,0,0),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDP}},
+{"bdnzt",    BBO(16,BODNZT,0,0),	BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bdnztl-",  BBO(16,BODNZT,0,1),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDM}},
+{"bdnztl+",  BBO(16,BODNZT,0,1),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDP}},
+{"bdnztl",   BBO(16,BODNZT,0,1),	BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bdnzta-",  BBO(16,BODNZT,1,0),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDMA}},
+{"bdnzta+",  BBO(16,BODNZT,1,0),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDPA}},
+{"bdnzta",   BBO(16,BODNZT,1,0),	BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BDA}},
+{"bdnztla-", BBO(16,BODNZT,1,1),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDMA}},
+{"bdnztla+", BBO(16,BODNZT,1,1),	BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDPA}},
+{"bdnztla",  BBO(16,BODNZT,1,1),	BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BDA}},
+{"bdzt-",    BBO(16,BODZT,0,0),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDM}},
+{"bdzt+",    BBO(16,BODZT,0,0),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDP}},
+{"bdzt",     BBO(16,BODZT,0,0),		BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bdztl-",   BBO(16,BODZT,0,1),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDM}},
+{"bdztl+",   BBO(16,BODZT,0,1),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDP}},
+{"bdztl",    BBO(16,BODZT,0,1),		BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bdzta-",   BBO(16,BODZT,1,0),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDMA}},
+{"bdzta+",   BBO(16,BODZT,1,0),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDPA}},
+{"bdzta",    BBO(16,BODZT,1,0),		BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BDA}},
+{"bdztla-",  BBO(16,BODZT,1,1),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDMA}},
+{"bdztla+",  BBO(16,BODZT,1,1),		BBOY_MASK,     PPCCOM,	 ISA_V2|PPCVLE,	{BI, BDPA}},
+{"bdztla",   BBO(16,BODZT,1,1),		BBOY_MASK,     PPCCOM,	 PPCVLE,	{BI, BDA}},
+
+{"bt-",	     BBO(16,BOT,0,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDM}},
+{"bt+",	     BBO(16,BOT,0,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDP}},
+{"bt",	     BBO(16,BOT,0,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bbt",	     BBO(16,BOT,0,0),		BBOAT_MASK,    PWRCOM,	 PPCVLE,	{BI, BD}},
+{"btl-",     BBO(16,BOT,0,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDM}},
+{"btl+",     BBO(16,BOT,0,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDP}},
+{"btl",	     BBO(16,BOT,0,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BD}},
+{"bbtl",     BBO(16,BOT,0,1),		BBOAT_MASK,    PWRCOM,	 PPCVLE,	{BI, BD}},
+{"bta-",     BBO(16,BOT,1,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDMA}},
+{"bta+",     BBO(16,BOT,1,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDPA}},
+{"bta",	     BBO(16,BOT,1,0),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDA}},
+{"bbta",     BBO(16,BOT,1,0),		BBOAT_MASK,    PWRCOM,	 PPCVLE,	{BI, BDA}},
+{"btla-",    BBO(16,BOT,1,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDMA}},
+{"btla+",    BBO(16,BOT,1,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDPA}},
+{"btla",     BBO(16,BOT,1,1),		BBOAT_MASK,    PPCCOM,	 PPCVLE,	{BI, BDA}},
+{"bbtla",    BBO(16,BOT,1,1),		BBOAT_MASK,    PWRCOM,	 PPCVLE,	{BI, BDA}},
+
+{"bc-",		B(16,0,0),	B_MASK,	     PPCCOM,	PPCVLE,		{BOE, BI, BDM}},
+{"bc+",		B(16,0,0),	B_MASK,	     PPCCOM,	PPCVLE,		{BOE, BI, BDP}},
+{"bc",		B(16,0,0),	B_MASK,	     COM,	PPCVLE,		{BO, BI, BD}},
+{"bcl-",	B(16,0,1),	B_MASK,	     PPCCOM,	PPCVLE,		{BOE, BI, BDM}},
+{"bcl+",	B(16,0,1),	B_MASK,	     PPCCOM,	PPCVLE,		{BOE, BI, BDP}},
+{"bcl",		B(16,0,1),	B_MASK,	     COM,	PPCVLE,		{BO, BI, BD}},
+{"bca-",	B(16,1,0),	B_MASK,	     PPCCOM,	PPCVLE,		{BOE, BI, BDMA}},
+{"bca+",	B(16,1,0),	B_MASK,	     PPCCOM,	PPCVLE,		{BOE, BI, BDPA}},
+{"bca",		B(16,1,0),	B_MASK,	     COM,	PPCVLE,		{BO, BI, BDA}},
+{"bcla-",	B(16,1,1),	B_MASK,	     PPCCOM,	PPCVLE,		{BOE, BI, BDMA}},
+{"bcla+",	B(16,1,1),	B_MASK,	     PPCCOM,	PPCVLE,		{BOE, BI, BDPA}},
+{"bcla",	B(16,1,1),	B_MASK,	     COM,	PPCVLE,		{BO, BI, BDA}},
+
+{"svc",		SC(17,0,0),	SC_MASK,     POWER,	PPCVLE,		{SVC_LEV, FL1, FL2}},
+{"svcl",	SC(17,0,1),	SC_MASK,     POWER,	PPCVLE,		{SVC_LEV, FL1, FL2}},
+{"sc",		SC(17,1,0),	SC_MASK,     PPC,	PPCVLE,		{LEV}},
+{"svca",	SC(17,1,0),	SC_MASK,     PWRCOM,	PPCVLE,		{SV}},
+{"svcla",	SC(17,1,1),	SC_MASK,     POWER,	PPCVLE,		{SV}},
+
+{"b",		B(18,0,0),	B_MASK,	     COM,	PPCVLE,		{LI}},
+{"bl",		B(18,0,1),	B_MASK,	     COM,	PPCVLE,		{LI}},
+{"ba",		B(18,1,0),	B_MASK,	     COM,	PPCVLE,		{LIA}},
+{"bla",		B(18,1,1),	B_MASK,	     COM,	PPCVLE,		{LIA}},
+
+{"mcrf",     XL(19,0), XLBB_MASK|(3<<21)|(3<<16), COM,	PPCVLE,		{BF, BFA}},
+
+{"addpcis",  DX(19,2),		DX_MASK,     POWER9,	PPCVLE,		{RT, DXD}},
+{"subpcis",  DX(19,2),		DX_MASK,     POWER9,	PPCVLE,		{RT, NDXD}},
+
+{"bdnzlr",   XLO(19,BODNZ,16,0),	XLBOBIBB_MASK, PPCCOM,	 PPCVLE,	{0}},
+{"bdnzlr-",  XLO(19,BODNZ,16,0),	XLBOBIBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{0}},
+{"bdnzlrl",  XLO(19,BODNZ,16,1),	XLBOBIBB_MASK, PPCCOM,	 PPCVLE,	{0}},
+{"bdnzlrl-", XLO(19,BODNZ,16,1),	XLBOBIBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{0}},
+{"bdnzlr+",  XLO(19,BODNZP,16,0),	XLBOBIBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{0}},
+{"bdnzlrl+", XLO(19,BODNZP,16,1),	XLBOBIBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{0}},
+{"bdzlr",    XLO(19,BODZ,16,0),		XLBOBIBB_MASK, PPCCOM,	 PPCVLE,	{0}},
+{"bdzlr-",   XLO(19,BODZ,16,0),		XLBOBIBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{0}},
+{"bdzlrl",   XLO(19,BODZ,16,1),		XLBOBIBB_MASK, PPCCOM,	 PPCVLE,	{0}},
+{"bdzlrl-",  XLO(19,BODZ,16,1),		XLBOBIBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{0}},
+{"bdzlr+",   XLO(19,BODZP,16,0),	XLBOBIBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{0}},
+{"bdzlrl+",  XLO(19,BODZP,16,1),	XLBOBIBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{0}},
+{"blr",	     XLO(19,BOU,16,0),		XLBOBIBB_MASK, PPCCOM,	 PPCVLE,	{0}},
+{"br",	     XLO(19,BOU,16,0),		XLBOBIBB_MASK, PWRCOM,	 PPCVLE,	{0}},
+{"blrl",     XLO(19,BOU,16,1),		XLBOBIBB_MASK, PPCCOM,	 PPCVLE,	{0}},
+{"brl",	     XLO(19,BOU,16,1),		XLBOBIBB_MASK, PWRCOM,	 PPCVLE,	{0}},
+{"bdnzlr-",  XLO(19,BODNZM4,16,0),	XLBOBIBB_MASK, ISA_V2,	 PPCVLE,	{0}},
+{"bdnzlrl-", XLO(19,BODNZM4,16,1),	XLBOBIBB_MASK, ISA_V2,	 PPCVLE,	{0}},
+{"bdnzlr+",  XLO(19,BODNZP4,16,0),	XLBOBIBB_MASK, ISA_V2,	 PPCVLE,	{0}},
+{"bdnzlrl+", XLO(19,BODNZP4,16,1),	XLBOBIBB_MASK, ISA_V2,	 PPCVLE,	{0}},
+{"bdzlr-",   XLO(19,BODZM4,16,0),	XLBOBIBB_MASK, ISA_V2,	 PPCVLE,	{0}},
+{"bdzlrl-",  XLO(19,BODZM4,16,1),	XLBOBIBB_MASK, ISA_V2,	 PPCVLE,	{0}},
+{"bdzlr+",   XLO(19,BODZP4,16,0),	XLBOBIBB_MASK, ISA_V2,	 PPCVLE,	{0}},
+{"bdzlrl+",  XLO(19,BODZP4,16,1),	XLBOBIBB_MASK, ISA_V2,	 PPCVLE,	{0}},
+
+{"bgelr",    XLOCB(19,BOF,CBLT,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bgelr-",   XLOCB(19,BOF,CBLT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bger",     XLOCB(19,BOF,CBLT,16,0),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bnllr",    XLOCB(19,BOF,CBLT,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnllr-",   XLOCB(19,BOF,CBLT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnlr",     XLOCB(19,BOF,CBLT,16,0),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bgelrl",   XLOCB(19,BOF,CBLT,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bgelrl-",  XLOCB(19,BOF,CBLT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgerl",    XLOCB(19,BOF,CBLT,16,1),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bnllrl",   XLOCB(19,BOF,CBLT,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnllrl-",  XLOCB(19,BOF,CBLT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnlrl",    XLOCB(19,BOF,CBLT,16,1),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"blelr",    XLOCB(19,BOF,CBGT,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"blelr-",   XLOCB(19,BOF,CBGT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bler",     XLOCB(19,BOF,CBGT,16,0),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bnglr",    XLOCB(19,BOF,CBGT,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnglr-",   XLOCB(19,BOF,CBGT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bngr",     XLOCB(19,BOF,CBGT,16,0),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"blelrl",   XLOCB(19,BOF,CBGT,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"blelrl-",  XLOCB(19,BOF,CBGT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"blerl",    XLOCB(19,BOF,CBGT,16,1),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bnglrl",   XLOCB(19,BOF,CBGT,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnglrl-",  XLOCB(19,BOF,CBGT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bngrl",    XLOCB(19,BOF,CBGT,16,1),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bnelr",    XLOCB(19,BOF,CBEQ,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnelr-",   XLOCB(19,BOF,CBEQ,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bner",     XLOCB(19,BOF,CBEQ,16,0),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bnelrl",   XLOCB(19,BOF,CBEQ,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnelrl-",  XLOCB(19,BOF,CBEQ,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnerl",    XLOCB(19,BOF,CBEQ,16,1),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bnslr",    XLOCB(19,BOF,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnslr-",   XLOCB(19,BOF,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnsr",     XLOCB(19,BOF,CBSO,16,0),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bnulr",    XLOCB(19,BOF,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnulr-",   XLOCB(19,BOF,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnslrl",   XLOCB(19,BOF,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnslrl-",  XLOCB(19,BOF,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnsrl",    XLOCB(19,BOF,CBSO,16,1),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bnulrl",   XLOCB(19,BOF,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnulrl-",  XLOCB(19,BOF,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgelr+",   XLOCB(19,BOFP,CBLT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnllr+",   XLOCB(19,BOFP,CBLT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgelrl+",  XLOCB(19,BOFP,CBLT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnllrl+",  XLOCB(19,BOFP,CBLT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"blelr+",   XLOCB(19,BOFP,CBGT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnglr+",   XLOCB(19,BOFP,CBGT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"blelrl+",  XLOCB(19,BOFP,CBGT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnglrl+",  XLOCB(19,BOFP,CBGT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnelr+",   XLOCB(19,BOFP,CBEQ,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnelrl+",  XLOCB(19,BOFP,CBEQ,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnslr+",   XLOCB(19,BOFP,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnulr+",   XLOCB(19,BOFP,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnslrl+",  XLOCB(19,BOFP,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnulrl+",  XLOCB(19,BOFP,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgelr-",   XLOCB(19,BOFM4,CBLT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnllr-",   XLOCB(19,BOFM4,CBLT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgelrl-",  XLOCB(19,BOFM4,CBLT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnllrl-",  XLOCB(19,BOFM4,CBLT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"blelr-",   XLOCB(19,BOFM4,CBGT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnglr-",   XLOCB(19,BOFM4,CBGT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"blelrl-",  XLOCB(19,BOFM4,CBGT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnglrl-",  XLOCB(19,BOFM4,CBGT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnelr-",   XLOCB(19,BOFM4,CBEQ,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnelrl-",  XLOCB(19,BOFM4,CBEQ,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnslr-",   XLOCB(19,BOFM4,CBSO,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnulr-",   XLOCB(19,BOFM4,CBSO,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnslrl-",  XLOCB(19,BOFM4,CBSO,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnulrl-",  XLOCB(19,BOFM4,CBSO,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgelr+",   XLOCB(19,BOFP4,CBLT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnllr+",   XLOCB(19,BOFP4,CBLT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgelrl+",  XLOCB(19,BOFP4,CBLT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnllrl+",  XLOCB(19,BOFP4,CBLT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"blelr+",   XLOCB(19,BOFP4,CBGT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnglr+",   XLOCB(19,BOFP4,CBGT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"blelrl+",  XLOCB(19,BOFP4,CBGT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnglrl+",  XLOCB(19,BOFP4,CBGT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnelr+",   XLOCB(19,BOFP4,CBEQ,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnelrl+",  XLOCB(19,BOFP4,CBEQ,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnslr+",   XLOCB(19,BOFP4,CBSO,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnulr+",   XLOCB(19,BOFP4,CBSO,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnslrl+",  XLOCB(19,BOFP4,CBSO,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnulrl+",  XLOCB(19,BOFP4,CBSO,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bltlr",    XLOCB(19,BOT,CBLT,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bltlr-",   XLOCB(19,BOT,CBLT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bltr",     XLOCB(19,BOT,CBLT,16,0),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bltlrl",   XLOCB(19,BOT,CBLT,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bltlrl-",  XLOCB(19,BOT,CBLT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bltrl",    XLOCB(19,BOT,CBLT,16,1),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bgtlr",    XLOCB(19,BOT,CBGT,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bgtlr-",   XLOCB(19,BOT,CBGT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgtr",     XLOCB(19,BOT,CBGT,16,0),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bgtlrl",   XLOCB(19,BOT,CBGT,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bgtlrl-",  XLOCB(19,BOT,CBGT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgtrl",    XLOCB(19,BOT,CBGT,16,1),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"beqlr",    XLOCB(19,BOT,CBEQ,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"beqlr-",   XLOCB(19,BOT,CBEQ,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"beqr",     XLOCB(19,BOT,CBEQ,16,0),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"beqlrl",   XLOCB(19,BOT,CBEQ,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"beqlrl-",  XLOCB(19,BOT,CBEQ,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"beqrl",    XLOCB(19,BOT,CBEQ,16,1),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bsolr",    XLOCB(19,BOT,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bsolr-",   XLOCB(19,BOT,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bsor",     XLOCB(19,BOT,CBSO,16,0),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bunlr",    XLOCB(19,BOT,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bunlr-",   XLOCB(19,BOT,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bsolrl",   XLOCB(19,BOT,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bsolrl-",  XLOCB(19,BOT,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bsorl",    XLOCB(19,BOT,CBSO,16,1),	XLBOCBBB_MASK, PWRCOM,	 PPCVLE,	{CR}},
+{"bunlrl",   XLOCB(19,BOT,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bunlrl-",  XLOCB(19,BOT,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bltlr+",   XLOCB(19,BOTP,CBLT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bltlrl+",  XLOCB(19,BOTP,CBLT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgtlr+",   XLOCB(19,BOTP,CBGT,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgtlrl+",  XLOCB(19,BOTP,CBGT,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"beqlr+",   XLOCB(19,BOTP,CBEQ,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"beqlrl+",  XLOCB(19,BOTP,CBEQ,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bsolr+",   XLOCB(19,BOTP,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bunlr+",   XLOCB(19,BOTP,CBSO,16,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bsolrl+",  XLOCB(19,BOTP,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bunlrl+",  XLOCB(19,BOTP,CBSO,16,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bltlr-",   XLOCB(19,BOTM4,CBLT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bltlrl-",  XLOCB(19,BOTM4,CBLT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgtlr-",   XLOCB(19,BOTM4,CBGT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgtlrl-",  XLOCB(19,BOTM4,CBGT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"beqlr-",   XLOCB(19,BOTM4,CBEQ,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"beqlrl-",  XLOCB(19,BOTM4,CBEQ,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bsolr-",   XLOCB(19,BOTM4,CBSO,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bunlr-",   XLOCB(19,BOTM4,CBSO,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bsolrl-",  XLOCB(19,BOTM4,CBSO,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bunlrl-",  XLOCB(19,BOTM4,CBSO,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bltlr+",   XLOCB(19,BOTP4,CBLT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bltlrl+",  XLOCB(19,BOTP4,CBLT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgtlr+",   XLOCB(19,BOTP4,CBGT,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgtlrl+",  XLOCB(19,BOTP4,CBGT,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"beqlr+",   XLOCB(19,BOTP4,CBEQ,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"beqlrl+",  XLOCB(19,BOTP4,CBEQ,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bsolr+",   XLOCB(19,BOTP4,CBSO,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bunlr+",   XLOCB(19,BOTP4,CBSO,16,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bsolrl+",  XLOCB(19,BOTP4,CBSO,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bunlrl+",  XLOCB(19,BOTP4,CBSO,16,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+
+{"bdnzflr",  XLO(19,BODNZF,16,0),	XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bdnzflr-", XLO(19,BODNZF,16,0),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdnzflrl", XLO(19,BODNZF,16,1),	XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bdnzflrl-",XLO(19,BODNZF,16,1),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdnzflr+", XLO(19,BODNZFP,16,0),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdnzflrl+",XLO(19,BODNZFP,16,1),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdzflr",   XLO(19,BODZF,16,0),	XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bdzflr-",  XLO(19,BODZF,16,0),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdzflrl",  XLO(19,BODZF,16,1),	XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bdzflrl-", XLO(19,BODZF,16,1),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdzflr+",  XLO(19,BODZFP,16,0),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdzflrl+", XLO(19,BODZFP,16,1),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bflr",     XLO(19,BOF,16,0),		XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bflr-",    XLO(19,BOF,16,0),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bbfr",     XLO(19,BOF,16,0),		XLBOBB_MASK,   PWRCOM,	 PPCVLE,	{BI}},
+{"bflrl",    XLO(19,BOF,16,1),		XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bflrl-",   XLO(19,BOF,16,1),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bbfrl",    XLO(19,BOF,16,1),		XLBOBB_MASK,   PWRCOM,	 PPCVLE,	{BI}},
+{"bflr+",    XLO(19,BOFP,16,0),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bflrl+",   XLO(19,BOFP,16,1),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bflr-",    XLO(19,BOFM4,16,0),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"bflrl-",   XLO(19,BOFM4,16,1),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"bflr+",    XLO(19,BOFP4,16,0),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"bflrl+",   XLO(19,BOFP4,16,1),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"bdnztlr",  XLO(19,BODNZT,16,0),	XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bdnztlr-", XLO(19,BODNZT,16,0),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdnztlrl", XLO(19,BODNZT,16,1),	XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bdnztlrl-", XLO(19,BODNZT,16,1),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdnztlr+", XLO(19,BODNZTP,16,0),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdnztlrl+", XLO(19,BODNZTP,16,1),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdztlr",   XLO(19,BODZT,16,0),	XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bdztlr-",  XLO(19,BODZT,16,0),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdztlrl",  XLO(19,BODZT,16,1),	XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bdztlrl-", XLO(19,BODZT,16,1),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdztlr+",  XLO(19,BODZTP,16,0),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bdztlrl+", XLO(19,BODZTP,16,1),	XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"btlr",     XLO(19,BOT,16,0),		XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"btlr-",    XLO(19,BOT,16,0),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bbtr",     XLO(19,BOT,16,0),		XLBOBB_MASK,   PWRCOM,	 PPCVLE,	{BI}},
+{"btlrl",    XLO(19,BOT,16,1),		XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"btlrl-",   XLO(19,BOT,16,1),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bbtrl",    XLO(19,BOT,16,1),		XLBOBB_MASK,   PWRCOM,	 PPCVLE,	{BI}},
+{"btlr+",    XLO(19,BOTP,16,0),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"btlrl+",   XLO(19,BOTP,16,1),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"btlr-",    XLO(19,BOTM4,16,0),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"btlrl-",   XLO(19,BOTM4,16,1),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"btlr+",    XLO(19,BOTP4,16,0),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"btlrl+",   XLO(19,BOTP4,16,1),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+
+{"bclr-",    XLYLK(19,16,0,0),		XLYBB_MASK,    PPCCOM,	 PPCVLE,	{BOE, BI}},
+{"bclrl-",   XLYLK(19,16,0,1),		XLYBB_MASK,    PPCCOM,	 PPCVLE,	{BOE, BI}},
+{"bclr+",    XLYLK(19,16,1,0),		XLYBB_MASK,    PPCCOM,	 PPCVLE,	{BOE, BI}},
+{"bclrl+",   XLYLK(19,16,1,1),		XLYBB_MASK,    PPCCOM,	 PPCVLE,	{BOE, BI}},
+{"bclr",     XLLK(19,16,0),		XLBH_MASK,     PPCCOM,	 PPCVLE,	{BO, BI, BH}},
+{"bcr",	     XLLK(19,16,0),		XLBB_MASK,     PWRCOM,	 PPCVLE,	{BO, BI}},
+{"bclrl",    XLLK(19,16,1),		XLBH_MASK,     PPCCOM,	 PPCVLE,	{BO, BI, BH}},
+{"bcrl",     XLLK(19,16,1),		XLBB_MASK,     PWRCOM,	 PPCVLE,	{BO, BI}},
+
+{"rfid",	XL(19,18),	0xffffffff,  PPC64,	PPCVLE,	{0}},
+
+{"crnot",	XL(19,33),	XL_MASK,     PPCCOM,	PPCVLE,		{BT, BA, BBA}},
+{"crnor",	XL(19,33),	XL_MASK,     COM,	PPCVLE,		{BT, BA, BB}},
+{"rfmci",	X(19,38),    0xffffffff, PPCRFMCI|PPCA2|PPC476, PPCVLE,	{0}},
+
+{"rfdi",	XL(19,39),	0xffffffff,  E500MC,	PPCVLE,		{0}},
+{"rfi",		XL(19,50),	0xffffffff,  COM,	PPCVLE,		{0}},
+{"rfci",	XL(19,51), 0xffffffff, PPC403|BOOKE|PPCE300|PPCA2|PPC476, PPCVLE, {0}},
+
+{"rfsvc",	XL(19,82),	0xffffffff,  POWER,	PPCVLE,		{0}},
+
+{"rfgi",	XL(19,102),   0xffffffff, E500MC|PPCA2,	PPCVLE,		{0}},
+
+{"crandc",	XL(19,129),	XL_MASK,     COM,	PPCVLE,		{BT, BA, BB}},
+
+{"rfebb",	XL(19,146),	XLS_MASK,    POWER8,	PPCVLE,		{SXL}},
+
+{"isync",	XL(19,150),	0xffffffff,  PPCCOM,	PPCVLE,		{0}},
+{"ics",		XL(19,150),	0xffffffff,  PWRCOM,	PPCVLE,		{0}},
+
+{"crclr",	XL(19,193),	XL_MASK,     PPCCOM,	PPCVLE,		{BT, BAT, BBA}},
+{"crxor",	XL(19,193),	XL_MASK,     COM,	PPCVLE,		{BT, BA, BB}},
+
+{"dnh",		X(19,198),	X_MASK,	     E500MC,	PPCVLE,		{DUI, DUIS}},
+
+{"crnand",	XL(19,225),	XL_MASK,     COM,	PPCVLE,		{BT, BA, BB}},
+
+{"crand",	XL(19,257),	XL_MASK,     COM,	PPCVLE,		{BT, BA, BB}},
+
+{"hrfid",	XL(19,274),    0xffffffff, POWER5|CELL, PPC476|PPCVLE,	{0}},
+
+{"crset",	XL(19,289),	XL_MASK,     PPCCOM,	PPCVLE,		{BT, BAT, BBA}},
+{"creqv",	XL(19,289),	XL_MASK,     COM,	PPCVLE,		{BT, BA, BB}},
+
+{"urfid",	XL(19,306),	0xffffffff,  POWER9,	PPCVLE,		{0}},
+{"stop",	XL(19,370),	0xffffffff,  POWER9,	PPCVLE,		{0}},
+
+{"doze",	XL(19,402),	0xffffffff,  POWER6,	POWER9|PPCVLE,	{0}},
+
+{"crorc",	XL(19,417),	XL_MASK,     COM,	PPCVLE,		{BT, BA, BB}},
+
+{"nap",		XL(19,434),	0xffffffff,  POWER6,	POWER9|PPCVLE,	{0}},
+
+{"crmove",	XL(19,449),	XL_MASK,     PPCCOM,	PPCVLE,		{BT, BA, BBA}},
+{"cror",	XL(19,449),	XL_MASK,     COM,	PPCVLE,		{BT, BA, BB}},
+
+{"sleep",	XL(19,466),	0xffffffff,  POWER6,	POWER9|PPCVLE,	{0}},
+{"rvwinkle",	XL(19,498),	0xffffffff,  POWER6,	POWER9|PPCVLE,	{0}},
+
+{"bctr",    XLO(19,BOU,528,0),		XLBOBIBB_MASK, COM,	 PPCVLE,	{0}},
+{"bctrl",   XLO(19,BOU,528,1),		XLBOBIBB_MASK, COM,	 PPCVLE,	{0}},
+
+{"bgectr",  XLOCB(19,BOF,CBLT,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bgectr-", XLOCB(19,BOF,CBLT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnlctr",  XLOCB(19,BOF,CBLT,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnlctr-", XLOCB(19,BOF,CBLT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgectrl", XLOCB(19,BOF,CBLT,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bgectrl-",XLOCB(19,BOF,CBLT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnlctrl", XLOCB(19,BOF,CBLT,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnlctrl-",XLOCB(19,BOF,CBLT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"blectr",  XLOCB(19,BOF,CBGT,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"blectr-", XLOCB(19,BOF,CBGT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bngctr",  XLOCB(19,BOF,CBGT,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bngctr-", XLOCB(19,BOF,CBGT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"blectrl", XLOCB(19,BOF,CBGT,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"blectrl-",XLOCB(19,BOF,CBGT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bngctrl", XLOCB(19,BOF,CBGT,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bngctrl-",XLOCB(19,BOF,CBGT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnectr",  XLOCB(19,BOF,CBEQ,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnectr-", XLOCB(19,BOF,CBEQ,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnectrl", XLOCB(19,BOF,CBEQ,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnectrl-",XLOCB(19,BOF,CBEQ,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnsctr",  XLOCB(19,BOF,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnsctr-", XLOCB(19,BOF,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnuctr",  XLOCB(19,BOF,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnuctr-", XLOCB(19,BOF,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnsctrl", XLOCB(19,BOF,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnsctrl-",XLOCB(19,BOF,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnuctrl", XLOCB(19,BOF,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bnuctrl-",XLOCB(19,BOF,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgectr+", XLOCB(19,BOFP,CBLT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnlctr+", XLOCB(19,BOFP,CBLT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgectrl+",XLOCB(19,BOFP,CBLT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnlctrl+",XLOCB(19,BOFP,CBLT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"blectr+", XLOCB(19,BOFP,CBGT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bngctr+", XLOCB(19,BOFP,CBGT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"blectrl+",XLOCB(19,BOFP,CBGT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bngctrl+",XLOCB(19,BOFP,CBGT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnectr+", XLOCB(19,BOFP,CBEQ,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnectrl+",XLOCB(19,BOFP,CBEQ,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnsctr+", XLOCB(19,BOFP,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnuctr+", XLOCB(19,BOFP,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnsctrl+",XLOCB(19,BOFP,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bnuctrl+",XLOCB(19,BOFP,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgectr-", XLOCB(19,BOFM4,CBLT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnlctr-", XLOCB(19,BOFM4,CBLT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgectrl-",XLOCB(19,BOFM4,CBLT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnlctrl-",XLOCB(19,BOFM4,CBLT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"blectr-", XLOCB(19,BOFM4,CBGT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bngctr-", XLOCB(19,BOFM4,CBGT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"blectrl-",XLOCB(19,BOFM4,CBGT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bngctrl-",XLOCB(19,BOFM4,CBGT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnectr-", XLOCB(19,BOFM4,CBEQ,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnectrl-",XLOCB(19,BOFM4,CBEQ,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnsctr-", XLOCB(19,BOFM4,CBSO,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnuctr-", XLOCB(19,BOFM4,CBSO,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnsctrl-",XLOCB(19,BOFM4,CBSO,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnuctrl-",XLOCB(19,BOFM4,CBSO,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgectr+", XLOCB(19,BOFP4,CBLT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnlctr+", XLOCB(19,BOFP4,CBLT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgectrl+",XLOCB(19,BOFP4,CBLT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnlctrl+",XLOCB(19,BOFP4,CBLT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"blectr+", XLOCB(19,BOFP4,CBGT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bngctr+", XLOCB(19,BOFP4,CBGT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"blectrl+",XLOCB(19,BOFP4,CBGT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bngctrl+",XLOCB(19,BOFP4,CBGT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnectr+", XLOCB(19,BOFP4,CBEQ,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnectrl+",XLOCB(19,BOFP4,CBEQ,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnsctr+", XLOCB(19,BOFP4,CBSO,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnuctr+", XLOCB(19,BOFP4,CBSO,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnsctrl+",XLOCB(19,BOFP4,CBSO,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bnuctrl+",XLOCB(19,BOFP4,CBSO,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bltctr",  XLOCB(19,BOT,CBLT,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bltctr-", XLOCB(19,BOT,CBLT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bltctrl", XLOCB(19,BOT,CBLT,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bltctrl-",XLOCB(19,BOT,CBLT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgtctr",  XLOCB(19,BOT,CBGT,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bgtctr-", XLOCB(19,BOT,CBGT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgtctrl", XLOCB(19,BOT,CBGT,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bgtctrl-",XLOCB(19,BOT,CBGT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"beqctr",  XLOCB(19,BOT,CBEQ,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"beqctr-", XLOCB(19,BOT,CBEQ,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"beqctrl", XLOCB(19,BOT,CBEQ,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"beqctrl-",XLOCB(19,BOT,CBEQ,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bsoctr",  XLOCB(19,BOT,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bsoctr-", XLOCB(19,BOT,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bunctr",  XLOCB(19,BOT,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bunctr-", XLOCB(19,BOT,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bsoctrl", XLOCB(19,BOT,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bsoctrl-",XLOCB(19,BOT,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bunctrl", XLOCB(19,BOT,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 PPCVLE,	{CR}},
+{"bunctrl-",XLOCB(19,BOT,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bltctr+", XLOCB(19,BOTP,CBLT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bltctrl+",XLOCB(19,BOTP,CBLT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgtctr+", XLOCB(19,BOTP,CBGT,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bgtctrl+",XLOCB(19,BOTP,CBGT,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"beqctr+", XLOCB(19,BOTP,CBEQ,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"beqctrl+",XLOCB(19,BOTP,CBEQ,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bsoctr+", XLOCB(19,BOTP,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bunctr+", XLOCB(19,BOTP,CBSO,528,0),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bsoctrl+",XLOCB(19,BOTP,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bunctrl+",XLOCB(19,BOTP,CBSO,528,1),	XLBOCBBB_MASK, PPCCOM,	 ISA_V2|PPCVLE,	{CR}},
+{"bltctr-", XLOCB(19,BOTM4,CBLT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bltctrl-",XLOCB(19,BOTM4,CBLT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgtctr-", XLOCB(19,BOTM4,CBGT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgtctrl-",XLOCB(19,BOTM4,CBGT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"beqctr-", XLOCB(19,BOTM4,CBEQ,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"beqctrl-",XLOCB(19,BOTM4,CBEQ,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bsoctr-", XLOCB(19,BOTM4,CBSO,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bunctr-", XLOCB(19,BOTM4,CBSO,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bsoctrl-",XLOCB(19,BOTM4,CBSO,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bunctrl-",XLOCB(19,BOTM4,CBSO,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bltctr+", XLOCB(19,BOTP4,CBLT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bltctrl+",XLOCB(19,BOTP4,CBLT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgtctr+", XLOCB(19,BOTP4,CBGT,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bgtctrl+",XLOCB(19,BOTP4,CBGT,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"beqctr+", XLOCB(19,BOTP4,CBEQ,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"beqctrl+",XLOCB(19,BOTP4,CBEQ,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bsoctr+", XLOCB(19,BOTP4,CBSO,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bunctr+", XLOCB(19,BOTP4,CBSO,528,0),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bsoctrl+",XLOCB(19,BOTP4,CBSO,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+{"bunctrl+",XLOCB(19,BOTP4,CBSO,528,1),	XLBOCBBB_MASK, ISA_V2,	 PPCVLE,	{CR}},
+
+{"bfctr",   XLO(19,BOF,528,0),		XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bfctr-",  XLO(19,BOF,528,0),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bfctrl",  XLO(19,BOF,528,1),		XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"bfctrl-", XLO(19,BOF,528,1),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bfctr+",  XLO(19,BOFP,528,0),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bfctrl+", XLO(19,BOFP,528,1),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"bfctr-",  XLO(19,BOFM4,528,0),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"bfctrl-", XLO(19,BOFM4,528,1),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"bfctr+",  XLO(19,BOFP4,528,0),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"bfctrl+", XLO(19,BOFP4,528,1),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"btctr",   XLO(19,BOT,528,0),		XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"btctr-",  XLO(19,BOT,528,0),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"btctrl",  XLO(19,BOT,528,1),		XLBOBB_MASK,   PPCCOM,	 PPCVLE,	{BI}},
+{"btctrl-", XLO(19,BOT,528,1),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"btctr+",  XLO(19,BOTP,528,0),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"btctrl+", XLO(19,BOTP,528,1),		XLBOBB_MASK,   PPCCOM,	 ISA_V2|PPCVLE,	{BI}},
+{"btctr-",  XLO(19,BOTM4,528,0),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"btctrl-", XLO(19,BOTM4,528,1),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"btctr+",  XLO(19,BOTP4,528,0),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+{"btctrl+", XLO(19,BOTP4,528,1),	XLBOBB_MASK,   ISA_V2,	 PPCVLE,	{BI}},
+
+{"bcctr-",  XLYLK(19,528,0,0),		XLYBB_MASK,    PPCCOM,	 PPCVLE,	{BOE, BI}},
+{"bcctrl-", XLYLK(19,528,0,1),		XLYBB_MASK,    PPCCOM,	 PPCVLE,	{BOE, BI}},
+{"bcctr+",  XLYLK(19,528,1,0),		XLYBB_MASK,    PPCCOM,	 PPCVLE,	{BOE, BI}},
+{"bcctrl+", XLYLK(19,528,1,1),		XLYBB_MASK,    PPCCOM,	 PPCVLE,	{BOE, BI}},
+{"bcctr",   XLLK(19,528,0),		XLBH_MASK,     PPCCOM,	 PPCVLE,	{BO, BI, BH}},
+{"bcc",	    XLLK(19,528,0),		XLBB_MASK,     PWRCOM,	 PPCVLE,	{BO, BI}},
+{"bcctrl",  XLLK(19,528,1),		XLBH_MASK,     PPCCOM,	 PPCVLE,	{BO, BI, BH}},
+{"bccl",    XLLK(19,528,1),		XLBB_MASK,     PWRCOM,	 PPCVLE,	{BO, BI}},
+
+{"bctar-",  XLYLK(19,560,0,0),		XLYBB_MASK,    POWER8,	 PPCVLE,	{BOE, BI}},
+{"bctarl-", XLYLK(19,560,0,1),		XLYBB_MASK,    POWER8,	 PPCVLE,	{BOE, BI}},
+{"bctar+",  XLYLK(19,560,1,0),		XLYBB_MASK,    POWER8,	 PPCVLE,	{BOE, BI}},
+{"bctarl+", XLYLK(19,560,1,1),		XLYBB_MASK,    POWER8,	 PPCVLE,	{BOE, BI}},
+{"bctar",   XLLK(19,560,0),		XLBH_MASK,     POWER8,	 PPCVLE,	{BO, BI, BH}},
+{"bctarl",  XLLK(19,560,1),		XLBH_MASK,     POWER8,	 PPCVLE,	{BO, BI, BH}},
+
+{"rlwimi",	M(20,0),	M_MASK,	     PPCCOM,	PPCVLE,		{RA, RS, SH, MBE, ME}},
+{"rlimi",	M(20,0),	M_MASK,	     PWRCOM,	PPCVLE,		{RA, RS, SH, MBE, ME}},
+
+{"rlwimi.",	M(20,1),	M_MASK,	     PPCCOM,	PPCVLE,		{RA, RS, SH, MBE, ME}},
+{"rlimi.",	M(20,1),	M_MASK,	     PWRCOM,	PPCVLE,		{RA, RS, SH, MBE, ME}},
+
+{"rotlwi",	MME(21,31,0),	MMBME_MASK,  PPCCOM,	PPCVLE,		{RA, RS, SH}},
+{"clrlwi",	MME(21,31,0),	MSHME_MASK,  PPCCOM,	PPCVLE,		{RA, RS, MB}},
+{"rlwinm",	M(21,0),	M_MASK,	     PPCCOM,	PPCVLE,		{RA, RS, SH, MBE, ME}},
+{"rlinm",	M(21,0),	M_MASK,	     PWRCOM,	PPCVLE,		{RA, RS, SH, MBE, ME}},
+{"rotlwi.",	MME(21,31,1),	MMBME_MASK,  PPCCOM,	PPCVLE,		{RA, RS, SH}},
+{"clrlwi.",	MME(21,31,1),	MSHME_MASK,  PPCCOM,	PPCVLE,		{RA, RS, MB}},
+{"rlwinm.",	M(21,1),	M_MASK,	     PPCCOM,	PPCVLE,		{RA, RS, SH, MBE, ME}},
+{"rlinm.",	M(21,1),	M_MASK,	     PWRCOM,	PPCVLE,		{RA, RS, SH, MBE, ME}},
+
+{"rlmi",	M(22,0),	M_MASK,	     M601,	PPCVLE,		{RA, RS, RB, MBE, ME}},
+{"rlmi.",	M(22,1),	M_MASK,	     M601,	PPCVLE,		{RA, RS, RB, MBE, ME}},
+
+{"rotlw",	MME(23,31,0),	MMBME_MASK,  PPCCOM,	PPCVLE,		{RA, RS, RB}},
+{"rlwnm",	M(23,0),	M_MASK,	     PPCCOM,	PPCVLE,		{RA, RS, RB, MBE, ME}},
+{"rlnm",	M(23,0),	M_MASK,	     PWRCOM,	PPCVLE,		{RA, RS, RB, MBE, ME}},
+{"rotlw.",	MME(23,31,1),	MMBME_MASK,  PPCCOM,	PPCVLE,		{RA, RS, RB}},
+{"rlwnm.",	M(23,1),	M_MASK,	     PPCCOM,	PPCVLE,		{RA, RS, RB, MBE, ME}},
+{"rlnm.",	M(23,1),	M_MASK,	     PWRCOM,	PPCVLE,		{RA, RS, RB, MBE, ME}},
+
+{"nop",		OP(24),		0xffffffff,  PPCCOM,	PPCVLE,		{0}},
+{"ori",		OP(24),		OP_MASK,     PPCCOM,	PPCVLE,		{RA, RS, UI}},
+{"oril",	OP(24),		OP_MASK,     PWRCOM,	PPCVLE,		{RA, RS, UI}},
+
+{"oris",	OP(25),		OP_MASK,     PPCCOM,	PPCVLE,		{RA, RS, UI}},
+{"oriu",	OP(25),		OP_MASK,     PWRCOM,	PPCVLE,		{RA, RS, UI}},
+
+{"xnop",	OP(26),		0xffffffff,  PPCCOM,	PPCVLE,		{0}},
+{"xori",	OP(26),		OP_MASK,     PPCCOM,	PPCVLE,		{RA, RS, UI}},
+{"xoril",	OP(26),		OP_MASK,     PWRCOM,	PPCVLE,		{RA, RS, UI}},
+
+{"xoris",	OP(27),		OP_MASK,     PPCCOM,	PPCVLE,		{RA, RS, UI}},
+{"xoriu",	OP(27),		OP_MASK,     PWRCOM,	PPCVLE,		{RA, RS, UI}},
+
+{"andi.",	OP(28),		OP_MASK,     PPCCOM,	PPCVLE,		{RA, RS, UI}},
+{"andil.",	OP(28),		OP_MASK,     PWRCOM,	PPCVLE,		{RA, RS, UI}},
+
+{"andis.",	OP(29),		OP_MASK,     PPCCOM,	PPCVLE,		{RA, RS, UI}},
+{"andiu.",	OP(29),		OP_MASK,     PWRCOM,	PPCVLE,		{RA, RS, UI}},
+
+{"rotldi",	MD(30,0,0),	MDMB_MASK,   PPC64,	PPCVLE,		{RA, RS, SH6}},
+{"clrldi",	MD(30,0,0),	MDSH_MASK,   PPC64,	PPCVLE,		{RA, RS, MB6}},
+{"rldicl",	MD(30,0,0),	MD_MASK,     PPC64,	PPCVLE,		{RA, RS, SH6, MB6}},
+{"rotldi.",	MD(30,0,1),	MDMB_MASK,   PPC64,	PPCVLE,		{RA, RS, SH6}},
+{"clrldi.",	MD(30,0,1),	MDSH_MASK,   PPC64,	PPCVLE,		{RA, RS, MB6}},
+{"rldicl.",	MD(30,0,1),	MD_MASK,     PPC64,	PPCVLE,		{RA, RS, SH6, MB6}},
+
+{"rldicr",	MD(30,1,0),	MD_MASK,     PPC64,	PPCVLE,		{RA, RS, SH6, ME6}},
+{"rldicr.",	MD(30,1,1),	MD_MASK,     PPC64,	PPCVLE,		{RA, RS, SH6, ME6}},
+
+{"rldic",	MD(30,2,0),	MD_MASK,     PPC64,	PPCVLE,		{RA, RS, SH6, MB6}},
+{"rldic.",	MD(30,2,1),	MD_MASK,     PPC64,	PPCVLE,		{RA, RS, SH6, MB6}},
+
+{"rldimi",	MD(30,3,0),	MD_MASK,     PPC64,	PPCVLE,		{RA, RS, SH6, MB6}},
+{"rldimi.",	MD(30,3,1),	MD_MASK,     PPC64,	PPCVLE,		{RA, RS, SH6, MB6}},
+
+{"rotld",	MDS(30,8,0),	MDSMB_MASK,  PPC64,	PPCVLE,		{RA, RS, RB}},
+{"rldcl",	MDS(30,8,0),	MDS_MASK,    PPC64,	PPCVLE,		{RA, RS, RB, MB6}},
+{"rotld.",	MDS(30,8,1),	MDSMB_MASK,  PPC64,	PPCVLE,		{RA, RS, RB}},
+{"rldcl.",	MDS(30,8,1),	MDS_MASK,    PPC64,	PPCVLE,		{RA, RS, RB, MB6}},
+
+{"rldcr",	MDS(30,9,0),	MDS_MASK,    PPC64,	PPCVLE,		{RA, RS, RB, ME6}},
+{"rldcr.",	MDS(30,9,1),	MDS_MASK,    PPC64,	PPCVLE,		{RA, RS, RB, ME6}},
+
+{"cmpw",	XOPL(31,0,0),	XCMPL_MASK,  PPCCOM,	0,		{OBF, RA, RB}},
+{"cmpd",	XOPL(31,0,1),	XCMPL_MASK,  PPC64,	0,		{OBF, RA, RB}},
+{"cmp",		X(31,0),	XCMP_MASK,   PPC,	0,		{BF, L32OPT, RA, RB}},
+{"cmp",		X(31,0),	XCMPL_MASK,  PWRCOM,	PPC,		{BF, RA, RB}},
+
+{"twlgt",	XTO(31,4,TOLGT), XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tlgt",	XTO(31,4,TOLGT), XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twllt",	XTO(31,4,TOLLT), XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tllt",	XTO(31,4,TOLLT), XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"tweq",	XTO(31,4,TOEQ),	 XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"teq",		XTO(31,4,TOEQ),	 XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twlge",	XTO(31,4,TOLGE), XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tlge",	XTO(31,4,TOLGE), XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twlnl",	XTO(31,4,TOLNL), XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tlnl",	XTO(31,4,TOLNL), XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twlle",	XTO(31,4,TOLLE), XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tlle",	XTO(31,4,TOLLE), XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twlng",	XTO(31,4,TOLNG), XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tlng",	XTO(31,4,TOLNG), XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twgt",	XTO(31,4,TOGT),	 XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tgt",		XTO(31,4,TOGT),	 XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twge",	XTO(31,4,TOGE),	 XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tge",		XTO(31,4,TOGE),	 XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twnl",	XTO(31,4,TONL),	 XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tnl",		XTO(31,4,TONL),	 XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twlt",	XTO(31,4,TOLT),	 XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tlt",		XTO(31,4,TOLT),	 XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twle",	XTO(31,4,TOLE),	 XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tle",		XTO(31,4,TOLE),	 XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twng",	XTO(31,4,TONG),	 XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tng",		XTO(31,4,TONG),	 XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"twne",	XTO(31,4,TONE),	 XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tne",		XTO(31,4,TONE),	 XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"trap",	XTO(31,4,TOU),	 0xffffffff, PPCCOM,	0,		{0}},
+{"twu",		XTO(31,4,TOU),	 XTO_MASK,   PPCCOM,	0,		{RA, RB}},
+{"tu",		XTO(31,4,TOU),	 XTO_MASK,   PWRCOM,	0,		{RA, RB}},
+{"tw",		X(31,4),	 X_MASK,     PPCCOM,	0,		{TO, RA, RB}},
+{"t",		X(31,4),	 X_MASK,     PWRCOM,	0,		{TO, RA, RB}},
+
+{"lvsl",	X(31,6),	X_MASK,	     PPCVEC,	0,		{VD, RA0, RB}},
+{"lvebx",	X(31,7),	X_MASK,	     PPCVEC,	0,		{VD, RA0, RB}},
+{"lbfcmx",	APU(31,7,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+
+{"subfc",	XO(31,8,0,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"sf",		XO(31,8,0,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"subc",	XO(31,8,0,0),	XO_MASK,     PPCCOM,	0,		{RT, RB, RA}},
+{"subfc.",	XO(31,8,0,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"sf.",		XO(31,8,0,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"subc.",	XO(31,8,0,1),	XO_MASK,     PPCCOM,	0,		{RT, RB, RA}},
+
+{"mulhdu",	XO(31,9,0,0),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+{"mulhdu.",	XO(31,9,0,1),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+
+{"addc",	XO(31,10,0,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"a",		XO(31,10,0,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"addc.",	XO(31,10,0,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"a.",		XO(31,10,0,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+
+{"mulhwu",	XO(31,11,0,0),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+{"mulhwu.",	XO(31,11,0,1),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+
+{"lxsiwzx",	X(31,12),	XX1_MASK,    PPCVSX2,	0,		{XT6, RA0, RB}},
+
+{"isellt",	X(31,15),	X_MASK,	     PPCISEL,	0,		{RT, RA0, RB}},
+
+{"tlbilxlpid",	XTO(31,18,0),	XTO_MASK, E500MC|PPCA2,	0,		{0}},
+{"tlbilxpid",	XTO(31,18,1),	XTO_MASK, E500MC|PPCA2,	0,		{0}},
+{"tlbilxva",	XTO(31,18,3),	XTO_MASK, E500MC|PPCA2,	0,		{RA0, RB}},
+{"tlbilx",	X(31,18),	X_MASK,	  E500MC|PPCA2,	0,		{T, RA0, RB}},
+
+{"mfcr",	XFXM(31,19,0,0), XFXFXM_MASK, COM,	0,		{RT, FXM4}},
+{"mfocrf",	XFXM(31,19,0,1), XFXFXM_MASK, COM,	0,		{RT, FXM}},
+
+{"lwarx",	X(31,20),	XEH_MASK,    PPC,	0,		{RT, RA0, RB, EH}},
+
+{"ldx",		X(31,21),	X_MASK,	     PPC64,	0,		{RT, RA0, RB}},
+
+{"icbt",	X(31,22),  X_MASK, BOOKE|PPCE300|PPCA2|PPC476, 0,	{CT, RA0, RB}},
+
+{"lwzx",	X(31,23),	X_MASK,	     PPCCOM,	0,		{RT, RA0, RB}},
+{"lx",		X(31,23),	X_MASK,	     PWRCOM,	0,		{RT, RA, RB}},
+
+{"slw",		XRC(31,24,0),	X_MASK,	     PPCCOM,	0,		{RA, RS, RB}},
+{"sl",		XRC(31,24,0),	X_MASK,	     PWRCOM,	0,		{RA, RS, RB}},
+{"slw.",	XRC(31,24,1),	X_MASK,	     PPCCOM,	0,		{RA, RS, RB}},
+{"sl.",		XRC(31,24,1),	X_MASK,	     PWRCOM,	0,		{RA, RS, RB}},
+
+{"cntlzw",	XRC(31,26,0),	XRB_MASK,    PPCCOM,	0,		{RA, RS}},
+{"cntlz",	XRC(31,26,0),	XRB_MASK,    PWRCOM,	0,		{RA, RS}},
+{"cntlzw.",	XRC(31,26,1),	XRB_MASK,    PPCCOM,	0,		{RA, RS}},
+{"cntlz.",	XRC(31,26,1),	XRB_MASK,    PWRCOM,	0,		{RA, RS}},
+
+{"sld",		XRC(31,27,0),	X_MASK,	     PPC64,	0,		{RA, RS, RB}},
+{"sld.",	XRC(31,27,1),	X_MASK,	     PPC64,	0,		{RA, RS, RB}},
+
+{"and",		XRC(31,28,0),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+{"and.",	XRC(31,28,1),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+
+{"maskg",	XRC(31,29,0),	X_MASK,	     M601,	PPCA2,		{RA, RS, RB}},
+{"maskg.",	XRC(31,29,1),	X_MASK,	     M601,	PPCA2,		{RA, RS, RB}},
+
+{"ldepx",	X(31,29),	X_MASK,	  E500MC|PPCA2, 0,		{RT, RA0, RB}},
+
+{"waitasec",	X(31,30),      XRTRARB_MASK, POWER8,	POWER9,		{0}},
+{"wait",	X(31,30),	XWC_MASK,    POWER9,	0,		{WC}},
+
+{"lwepx",	X(31,31),	X_MASK,	  E500MC|PPCA2, 0,		{RT, RA0, RB}},
+
+{"cmplw",	XOPL(31,32,0),	XCMPL_MASK,  PPCCOM,	0,		{OBF, RA, RB}},
+{"cmpld",	XOPL(31,32,1),	XCMPL_MASK,  PPC64,	0,		{OBF, RA, RB}},
+{"cmpl",	X(31,32),	XCMP_MASK,   PPC,	0,		{BF, L32OPT, RA, RB}},
+{"cmpl",	X(31,32),	XCMPL_MASK,  PWRCOM,	PPC,		{BF, RA, RB}},
+
+{"lvsr",	X(31,38),	X_MASK,	     PPCVEC,	0,		{VD, RA0, RB}},
+{"lvehx",	X(31,39),	X_MASK,	     PPCVEC,	0,		{VD, RA0, RB}},
+{"lhfcmx",	APU(31,39,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+
+{"mviwsplt",	X(31,46),	X_MASK,	     PPCVEC2,	0,		{VD, RA, RB}},
+
+{"iselgt",	X(31,47),	X_MASK,	     PPCISEL,	0,		{RT, RA0, RB}},
+
+{"lvewx",	X(31,71),	X_MASK,	     PPCVEC,	0,		{VD, RA0, RB}},
+
+{"addg6s",	XO(31,74,0,0),	XO_MASK,     POWER6,	0,		{RT, RA, RB}},
+
+{"lxsiwax",	X(31,76),	XX1_MASK,    PPCVSX2,	0,		{XT6, RA0, RB}},
+
+{"iseleq",	X(31,79),	X_MASK,	     PPCISEL,	0,		{RT, RA0, RB}},
+
+{"isel",	XISEL(31,15), XISEL_MASK, PPCISEL|TITAN, 0,		{RT, RA0, RB, CRB}},
+
+{"subf",	XO(31,40,0,0),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+{"sub",		XO(31,40,0,0),	XO_MASK,     PPC,	0,		{RT, RB, RA}},
+{"subf.",	XO(31,40,0,1),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+{"sub.",	XO(31,40,0,1),	XO_MASK,     PPC,	0,		{RT, RB, RA}},
+
+{"mfvsrd",	X(31,51),	XX1RB_MASK,   PPCVSX2,	0,		{RA, XS6}},
+{"mffprd",	X(31,51),	XX1RB_MASK|1, PPCVSX2,	0,		{RA, FRS}},
+{"mfvrd",	X(31,51)|1,	XX1RB_MASK|1, PPCVSX2,	0,		{RA, VS}},
+{"eratilx",	X(31,51),	X_MASK,	     PPCA2,	0,		{ERAT_T, RA, RB}},
+
+{"lbarx",	X(31,52),	XEH_MASK, POWER8|E6500, 0,		{RT, RA0, RB, EH}},
+
+{"ldux",	X(31,53),	X_MASK,	     PPC64,	0,		{RT, RAL, RB}},
+
+{"dcbst",	X(31,54),	XRT_MASK,    PPC,	0,		{RA0, RB}},
+
+{"lwzux",	X(31,55),	X_MASK,	     PPCCOM,	0,		{RT, RAL, RB}},
+{"lux",		X(31,55),	X_MASK,	     PWRCOM,	0,		{RT, RA, RB}},
+
+{"cntlzd",	XRC(31,58,0),	XRB_MASK,    PPC64,	0,		{RA, RS}},
+{"cntlzd.",	XRC(31,58,1),	XRB_MASK,    PPC64,	0,		{RA, RS}},
+
+{"andc",	XRC(31,60,0),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+{"andc.",	XRC(31,60,1),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+
+{"waitrsv",	X(31,62)|(1<<21), 0xffffffff, E500MC|PPCA2, 0,		{0}},
+{"waitimpl",	X(31,62)|(2<<21), 0xffffffff, E500MC|PPCA2, 0,		{0}},
+{"wait",	X(31,62),	XWC_MASK,    E500MC|PPCA2, 0,		{WC}},
+
+{"dcbstep",	XRT(31,63,0),	XRT_MASK,    E500MC|PPCA2, 0,		{RA0, RB}},
+
+{"tdlgt",	XTO(31,68,TOLGT), XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdllt",	XTO(31,68,TOLLT), XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdeq",	XTO(31,68,TOEQ),  XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdlge",	XTO(31,68,TOLGE), XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdlnl",	XTO(31,68,TOLNL), XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdlle",	XTO(31,68,TOLLE), XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdlng",	XTO(31,68,TOLNG), XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdgt",	XTO(31,68,TOGT),  XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdge",	XTO(31,68,TOGE),  XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdnl",	XTO(31,68,TONL),  XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdlt",	XTO(31,68,TOLT),  XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdle",	XTO(31,68,TOLE),  XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdng",	XTO(31,68,TONG),  XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdne",	XTO(31,68,TONE),  XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"tdu",		XTO(31,68,TOU),	  XTO_MASK,  PPC64,	0,		{RA, RB}},
+{"td",		X(31,68),	X_MASK,	     PPC64,	0,		{TO, RA, RB}},
+
+{"lwfcmx",	APU(31,71,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+{"mulhd",	XO(31,73,0,0),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+{"mulhd.",	XO(31,73,0,1),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+
+{"mulhw",	XO(31,75,0,0),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+{"mulhw.",	XO(31,75,0,1),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+
+{"dlmzb",	XRC(31,78,0), X_MASK, PPC403|PPC440|TITAN, 0,		{RA, RS, RB}},
+{"dlmzb.",	XRC(31,78,1), X_MASK, PPC403|PPC440|TITAN, 0,		{RA, RS, RB}},
+
+{"mtsrd",	X(31,82),  XRB_MASK|(1<<20), PPC64,	0,		{SR, RS}},
+
+{"mfmsr",	X(31,83),	XRARB_MASK,  COM,	0,		{RT}},
+
+{"ldarx",	X(31,84),	XEH_MASK,    PPC64,	0,		{RT, RA0, RB, EH}},
+
+{"dcbfl",	XOPL(31,86,1),	XRT_MASK,    POWER5,	PPC476,		{RA0, RB}},
+{"dcbf",	X(31,86),	XLRT_MASK,   PPC,	0,		{RA0, RB, L2OPT}},
+
+{"lbzx",	X(31,87),	X_MASK,	     COM,	0,		{RT, RA0, RB}},
 
-{ "ldux",    X(31,53),	X_MASK,		PPC64,		{ RT, RAL, RB } },
+{"lbepx",	X(31,95),	X_MASK,	  E500MC|PPCA2, 0,		{RT, RA0, RB}},
 
-{ "dcbst",   X(31,54),	XRT_MASK,	PPC,		{ RA, RB } },
+{"dni",		XRC(31,97,1),	XRB_MASK,    E6500,	0,		{DUI, DCTL}},
 
-{ "lwzux",   X(31,55),	X_MASK,		PPCCOM,		{ RT, RAL, RB } },
-{ "lux",     X(31,55),	X_MASK,		PWRCOM,		{ RT, RA, RB } },
+{"lvx",		X(31,103),	X_MASK,	     PPCVEC,	0,		{VD, RA0, RB}},
+{"lqfcmx",	APU(31,103,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
 
-{ "dcbste",  X(31,62),	XRT_MASK,	BOOKE64,	{ RA, RB } },
+{"neg",		XO(31,104,0,0),	XORB_MASK,   COM,	0,		{RT, RA}},
+{"neg.",	XO(31,104,0,1),	XORB_MASK,   COM,	0,		{RT, RA}},
 
-{ "lwzuxe",  X(31,63),	X_MASK,		BOOKE64,	{ RT, RAL, RB } },
+{"mul",		XO(31,107,0,0),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+{"mul.",	XO(31,107,0,1),	XO_MASK,     M601,	0,		{RT, RA, RB}},
 
-{ "cntlzd",  XRC(31,58,0), XRB_MASK,	PPC64,		{ RA, RS } },
-{ "cntlzd.", XRC(31,58,1), XRB_MASK,	PPC64,		{ RA, RS } },
+{"mvidsplt",	X(31,110),	X_MASK,	     PPCVEC2,	0,		{VD, RA, RB}},
 
-{ "andc",    XRC(31,60,0), X_MASK,	COM,		{ RA, RS, RB } },
-{ "andc.",   XRC(31,60,1), X_MASK,	COM,		{ RA, RS, RB } },
+{"mtsrdin",	X(31,114),	XRA_MASK,    PPC64,	0,		{RS, RB}},
 
-{ "tdlgt",   XTO(31,68,TOLGT), XTO_MASK, PPC64,		{ RA, RB } },
-{ "tdllt",   XTO(31,68,TOLLT), XTO_MASK, PPC64,		{ RA, RB } },
-{ "tdeq",    XTO(31,68,TOEQ), XTO_MASK,  PPC64,		{ RA, RB } },
-{ "tdlge",   XTO(31,68,TOLGE), XTO_MASK, PPC64,		{ RA, RB } },
-{ "tdlnl",   XTO(31,68,TOLNL), XTO_MASK, PPC64,		{ RA, RB } },
-{ "tdlle",   XTO(31,68,TOLLE), XTO_MASK, PPC64,		{ RA, RB } },
-{ "tdlng",   XTO(31,68,TOLNG), XTO_MASK, PPC64,		{ RA, RB } },
-{ "tdgt",    XTO(31,68,TOGT), XTO_MASK,  PPC64,		{ RA, RB } },
-{ "tdge",    XTO(31,68,TOGE), XTO_MASK,  PPC64,		{ RA, RB } },
-{ "tdnl",    XTO(31,68,TONL), XTO_MASK,  PPC64,		{ RA, RB } },
-{ "tdlt",    XTO(31,68,TOLT), XTO_MASK,  PPC64,		{ RA, RB } },
-{ "tdle",    XTO(31,68,TOLE), XTO_MASK,  PPC64,		{ RA, RB } },
-{ "tdng",    XTO(31,68,TONG), XTO_MASK,  PPC64,		{ RA, RB } },
-{ "tdne",    XTO(31,68,TONE), XTO_MASK,  PPC64,		{ RA, RB } },
-{ "td",	     X(31,68),	X_MASK,		 PPC64,		{ TO, RA, RB } },
+{"mffprwz",	X(31,115),	XX1RB_MASK|1, PPCVSX2,	0,		{RA, FRS}},
+{"mfvrwz",	X(31,115)|1,	XX1RB_MASK|1, PPCVSX2,	0,		{RA, VS}},
+{"mfvsrwz",	X(31,115),	XX1RB_MASK,   PPCVSX2,	0,		{RA, XS6}},
 
-{ "mulhd",   XO(31,73,0,0), XO_MASK,	 PPC64,		{ RT, RA, RB } },
-{ "mulhd.",  XO(31,73,0,1), XO_MASK,	 PPC64,		{ RT, RA, RB } },
+{"lharx",	X(31,116),	XEH_MASK, POWER8|E6500, 0,		{RT, RA0, RB, EH}},
 
-{ "mulhw",   XO(31,75,0,0), XO_MASK,	PPC,		{ RT, RA, RB } },
-{ "mulhw.",  XO(31,75,0,1), XO_MASK,	PPC,		{ RT, RA, RB } },
+{"clf",		X(31,118),	XTO_MASK,    POWER,	0,		{RA, RB}},
 
-{ "dlmzb",   XRC(31,78,0),  X_MASK,	PPC403|PPC440,	{ RA, RS, RB } },
-{ "dlmzb.",  XRC(31,78,1),  X_MASK,	PPC403|PPC440,	{ RA, RS, RB } },
+{"lbzux",	X(31,119),	X_MASK,	     COM,	0,		{RT, RAL, RB}},
 
-{ "mtsrd",   X(31,82),	XRB_MASK|(1<<20), PPC64,	{ SR, RS } },
+{"popcntb",	X(31,122),	XRB_MASK,    POWER5,	0,		{RA, RS}},
 
-{ "mfmsr",   X(31,83),	XRARB_MASK,	COM,		{ RT } },
+{"not",		XRC(31,124,0),	X_MASK,	     COM,	0,		{RA, RS, RBS}},
+{"nor",		XRC(31,124,0),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+{"not.",	XRC(31,124,1),	X_MASK,	     COM,	0,		{RA, RS, RBS}},
+{"nor.",	XRC(31,124,1),	X_MASK,	     COM,	0,		{RA, RS, RB}},
 
-{ "ldarx",   X(31,84),	XEH_MASK,	PPC64,		{ RT, RA0, RB, EH } },
+{"dcbfep",	XRT(31,127,0),	XRT_MASK, E500MC|PPCA2, 0,		{RA0, RB}},
 
-{ "dcbfl",   XOPL(31,86,1), XRT_MASK,	POWER5,		{ RA, RB } },
-{ "dcbf",    X(31,86),	XLRT_MASK,	PPC,		{ RA, RB, L } },
+{"setb",	X(31,128),	XRB_MASK|(3<<16), POWER9, 0,		{RT, BFA}},
 
-{ "lbzx",    X(31,87),	X_MASK,		COM,		{ RT, RA0, RB } },
+{"wrtee",	X(31,131), XRARB_MASK, PPC403|BOOKE|PPCA2|PPC476, 0,	{RS}},
 
-{ "dcbfe",   X(31,94),	XRT_MASK,	BOOKE64,	{ RA, RB } },
+{"dcbtstls",	X(31,134),	X_MASK, PPCCHLK|PPC476|TITAN, 0,	{CT, RA0, RB}},
 
-{ "lbzxe",   X(31,95),	X_MASK,		BOOKE64,	{ RT, RA0, RB } },
+{"stvebx",	X(31,135),	X_MASK,	     PPCVEC,	0,		{VS, RA0, RB}},
+{"stbfcmx",	APU(31,135,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
 
-{ "neg",     XO(31,104,0,0), XORB_MASK,	COM,		{ RT, RA } },
-{ "neg.",    XO(31,104,0,1), XORB_MASK,	COM,		{ RT, RA } },
-{ "nego",    XO(31,104,1,0), XORB_MASK,	COM,		{ RT, RA } },
-{ "nego.",   XO(31,104,1,1), XORB_MASK,	COM,		{ RT, RA } },
+{"subfe",	XO(31,136,0,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"sfe",		XO(31,136,0,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"subfe.",	XO(31,136,0,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"sfe.",	XO(31,136,0,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
 
-{ "mul",     XO(31,107,0,0), XO_MASK,	M601,		{ RT, RA, RB } },
-{ "mul.",    XO(31,107,0,1), XO_MASK,	M601,		{ RT, RA, RB } },
-{ "mulo",    XO(31,107,1,0), XO_MASK,	M601,		{ RT, RA, RB } },
-{ "mulo.",   XO(31,107,1,1), XO_MASK,	M601,		{ RT, RA, RB } },
+{"adde",	XO(31,138,0,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"ae",		XO(31,138,0,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"adde.",	XO(31,138,0,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"ae.",		XO(31,138,0,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
 
-{ "mtsrdin", X(31,114),	XRA_MASK,	PPC64,		{ RS, RB } },
+{"stxsiwx",	X(31,140),	XX1_MASK,    PPCVSX2,	0,		{XS6, RA0, RB}},
 
-{ "clf",     X(31,118), XTO_MASK,	POWER,		{ RA, RB } },
+{"msgsndp",	XRTRA(31,142,0,0), XRTRA_MASK, POWER8,	0,		{RB}},
+{"dcbtstlse",	X(31,142),	X_MASK,	     PPCCHLK,	E500MC,		{CT, RA0, RB}},
 
-{ "lbzux",   X(31,119),	X_MASK,		COM,		{ RT, RAL, RB } },
+{"mtcr",	XFXM(31,144,0xff,0), XRARB_MASK, COM,	0,		{RS}},
+{"mtcrf",	XFXM(31,144,0,0), XFXFXM_MASK, COM,	0,		{FXM, RS}},
+{"mtocrf",	XFXM(31,144,0,1), XFXFXM_MASK, COM,	0,		{FXM, RS}},
 
-{ "popcntb", X(31,122), XRB_MASK,	POWER5,		{ RA, RS } },
+{"mtmsr",	X(31,146),	XRLARB_MASK, COM,	0,		{RS, A_L}},
 
-{ "not",     XRC(31,124,0), X_MASK,	COM,		{ RA, RS, RBS } },
-{ "nor",     XRC(31,124,0), X_MASK,	COM,		{ RA, RS, RB } },
-{ "not.",    XRC(31,124,1), X_MASK,	COM,		{ RA, RS, RBS } },
-{ "nor.",    XRC(31,124,1), X_MASK,	COM,		{ RA, RS, RB } },
+{"mtsle",	X(31,147),    XRTLRARB_MASK, POWER8,	0,		{L}},
 
-{ "lwarxe",  X(31,126),	X_MASK,		BOOKE64,	{ RT, RA0, RB } },
+{"eratsx",	XRC(31,147,0),	X_MASK,	     PPCA2,	0,		{RT, RA0, RB}},
+{"eratsx.",	XRC(31,147,1),	X_MASK,	     PPCA2,	0,		{RT, RA0, RB}},
 
-{ "lbzuxe",  X(31,127),	X_MASK,		BOOKE64,	{ RT, RAL, RB } },
+{"stdx",	X(31,149),	X_MASK,	     PPC64,	0,		{RS, RA0, RB}},
 
-{ "wrtee",   X(31,131),	XRARB_MASK,	PPC403 | BOOKE,	{ RS } },
+{"stwcx.",	XRC(31,150,1),	X_MASK,	     PPC,	0,		{RS, RA0, RB}},
 
-{ "dcbtstls",X(31,134),	X_MASK,		PPCCHLK,	{ CT, RA, RB }},
+{"stwx",	X(31,151),	X_MASK,	     PPCCOM,	0,		{RS, RA0, RB}},
+{"stx",		X(31,151),	X_MASK,	     PWRCOM,	0,		{RS, RA, RB}},
 
-{ "subfe",   XO(31,136,0,0), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "sfe",     XO(31,136,0,0), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
-{ "subfe.",  XO(31,136,0,1), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "sfe.",    XO(31,136,0,1), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
-{ "subfeo",  XO(31,136,1,0), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "sfeo",    XO(31,136,1,0), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
-{ "subfeo.", XO(31,136,1,1), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "sfeo.",   XO(31,136,1,1), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
+{"slq",		XRC(31,152,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"slq.",	XRC(31,152,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
 
-{ "adde",    XO(31,138,0,0), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "ae",      XO(31,138,0,0), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
-{ "adde.",   XO(31,138,0,1), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "ae.",     XO(31,138,0,1), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
-{ "addeo",   XO(31,138,1,0), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "aeo",     XO(31,138,1,0), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
-{ "addeo.",  XO(31,138,1,1), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "aeo.",    XO(31,138,1,1), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
+{"sle",		XRC(31,153,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"sle.",	XRC(31,153,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
 
-{ "dcbtstlse",X(31,142),X_MASK,		PPCCHLK64,	{ CT, RA, RB }},
+{"prtyw",	X(31,154),    XRB_MASK, POWER6|PPCA2|PPC476, 0,		{RA, RS}},
 
-{ "mtocrf",  XFXM(31,144,0,1), XFXFXM_MASK, COM,	{ FXM, RS } },
-{ "mtcr",    XFXM(31,144,0xff,0), XRARB_MASK, COM,	{ RS }},
-{ "mtcrf",   X(31,144),	XFXFXM_MASK,	COM,		{ FXM, RS } },
+{"stdepx",	X(31,157),	X_MASK,	  E500MC|PPCA2, 0,		{RS, RA0, RB}},
 
-{ "mtmsr",   X(31,146),	XRARB_MASK,	COM,		{ RS } },
+{"stwepx",	X(31,159),	X_MASK,	  E500MC|PPCA2, 0,		{RS, RA0, RB}},
 
-{ "stdx",    X(31,149), X_MASK,		PPC64,		{ RS, RA0, RB } },
+{"wrteei",	X(31,163), XE_MASK, PPC403|BOOKE|PPCA2|PPC476, 0,	{E}},
 
-{ "stwcx.",  XRC(31,150,1), X_MASK,	PPC,		{ RS, RA0, RB } },
+{"dcbtls",	X(31,166),	X_MASK,	 PPCCHLK|PPC476|TITAN, 0,	{CT, RA0, RB}},
 
-{ "stwx",    X(31,151), X_MASK,		PPCCOM,		{ RS, RA0, RB } },
-{ "stx",     X(31,151), X_MASK,		PWRCOM,		{ RS, RA, RB } },
+{"stvehx",	X(31,167),	X_MASK,	     PPCVEC,	0,		{VS, RA0, RB}},
+{"sthfcmx",	APU(31,167,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
 
-{ "stwcxe.", XRC(31,158,1), X_MASK,	BOOKE64,	{ RS, RA0, RB } },
+{"addex",	ZRC(31,170,0),	Z2_MASK,     POWER9,	0,		{RT, RA, RB, CY}},
 
-{ "stwxe",   X(31,159), X_MASK,		BOOKE64,	{ RS, RA0, RB } },
+{"msgclrp",	XRTRA(31,174,0,0), XRTRA_MASK, POWER8,	0,		{RB}},
+{"dcbtlse",	X(31,174),	X_MASK,	     PPCCHLK,	E500MC,		{CT, RA0, RB}},
 
-{ "slq",     XRC(31,152,0), X_MASK,	M601,		{ RA, RS, RB } },
-{ "slq.",    XRC(31,152,1), X_MASK,	M601,		{ RA, RS, RB } },
+{"mtmsrd",	X(31,178),	XRLARB_MASK, PPC64,	0,		{RS, A_L}},
 
-{ "sle",     XRC(31,153,0), X_MASK,	M601,		{ RA, RS, RB } },
-{ "sle.",    XRC(31,153,1), X_MASK,	M601,		{ RA, RS, RB } },
+{"mtvsrd",	X(31,179),	XX1RB_MASK,   PPCVSX2,	0,		{XT6, RA}},
+{"mtfprd",	X(31,179),	XX1RB_MASK|1, PPCVSX2,	0,		{FRT, RA}},
+{"mtvrd",	X(31,179)|1,	XX1RB_MASK|1, PPCVSX2,	0,		{VD, RA}},
+{"eratre",	X(31,179),	X_MASK,	     PPCA2,	0,		{RT, RA, WS}},
 
-{ "prtyw",   X(31,154),	XRB_MASK,	POWER6,		{ RA, RS } },
+{"stdux",	X(31,181),	X_MASK,	     PPC64,	0,		{RS, RAS, RB}},
 
-{ "wrteei",  X(31,163),	XE_MASK,	PPC403 | BOOKE,	{ E } },
+{"stqcx.",	XRC(31,182,1),	X_MASK,	     POWER8,	0,		{RSQ, RA0, RB}},
+{"wchkall",	X(31,182),	X_MASK,	     PPCA2,	0,		{OBF}},
 
-{ "dcbtls",  X(31,166),	X_MASK,		PPCCHLK,	{ CT, RA, RB }},
-{ "dcbtlse", X(31,174),	X_MASK,		PPCCHLK64,	{ CT, RA, RB }},
+{"stwux",	X(31,183),	X_MASK,	     PPCCOM,	0,		{RS, RAS, RB}},
+{"stux",	X(31,183),	X_MASK,	     PWRCOM,	0,		{RS, RA0, RB}},
 
-{ "mtmsrd",  X(31,178),	XRLARB_MASK,	PPC64,		{ RS, A_L } },
+{"sliq",	XRC(31,184,0),	X_MASK,	     M601,	0,		{RA, RS, SH}},
+{"sliq.",	XRC(31,184,1),	X_MASK,	     M601,	0,		{RA, RS, SH}},
 
-{ "stdux",   X(31,181),	X_MASK,		PPC64,		{ RS, RAS, RB } },
+{"prtyd",	X(31,186),	XRB_MASK, POWER6|PPCA2,	0,		{RA, RS}},
 
-{ "stwux",   X(31,183),	X_MASK,		PPCCOM,		{ RS, RAS, RB } },
-{ "stux",    X(31,183),	X_MASK,		PWRCOM,		{ RS, RA0, RB } },
+{"cmprb",	X(31,192),	XCMP_MASK,   POWER9,	0,		{BF, L, RA, RB}},
 
-{ "sliq",    XRC(31,184,0), X_MASK,	M601,		{ RA, RS, SH } },
-{ "sliq.",   XRC(31,184,1), X_MASK,	M601,		{ RA, RS, SH } },
+{"icblq.",	XRC(31,198,1),	X_MASK,	     E6500,	0,		{CT, RA0, RB}},
 
-{ "prtyd",   X(31,186),	XRB_MASK,	POWER6,		{ RA, RS } },
+{"stvewx",	X(31,199),	X_MASK,	     PPCVEC,	0,		{VS, RA0, RB}},
+{"stwfcmx",	APU(31,199,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
 
-{ "stwuxe",  X(31,191),	X_MASK,		BOOKE64,	{ RS, RAS, RB } },
+{"subfze",	XO(31,200,0,0),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"sfze",	XO(31,200,0,0),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+{"subfze.",	XO(31,200,0,1),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"sfze.",	XO(31,200,0,1),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
 
-{ "subfze",  XO(31,200,0,0), XORB_MASK, PPCCOM,		{ RT, RA } },
-{ "sfze",    XO(31,200,0,0), XORB_MASK, PWRCOM,		{ RT, RA } },
-{ "subfze.", XO(31,200,0,1), XORB_MASK, PPCCOM,		{ RT, RA } },
-{ "sfze.",   XO(31,200,0,1), XORB_MASK, PWRCOM,		{ RT, RA } },
-{ "subfzeo", XO(31,200,1,0), XORB_MASK, PPCCOM,		{ RT, RA } },
-{ "sfzeo",   XO(31,200,1,0), XORB_MASK, PWRCOM,		{ RT, RA } },
-{ "subfzeo.",XO(31,200,1,1), XORB_MASK, PPCCOM,		{ RT, RA } },
-{ "sfzeo.",  XO(31,200,1,1), XORB_MASK, PWRCOM,		{ RT, RA } },
+{"addze",	XO(31,202,0,0),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"aze",		XO(31,202,0,0),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+{"addze.",	XO(31,202,0,1),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"aze.",	XO(31,202,0,1),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
 
-{ "addze",   XO(31,202,0,0), XORB_MASK, PPCCOM,		{ RT, RA } },
-{ "aze",     XO(31,202,0,0), XORB_MASK, PWRCOM,		{ RT, RA } },
-{ "addze.",  XO(31,202,0,1), XORB_MASK, PPCCOM,		{ RT, RA } },
-{ "aze.",    XO(31,202,0,1), XORB_MASK, PWRCOM,		{ RT, RA } },
-{ "addzeo",  XO(31,202,1,0), XORB_MASK, PPCCOM,		{ RT, RA } },
-{ "azeo",    XO(31,202,1,0), XORB_MASK, PWRCOM,		{ RT, RA } },
-{ "addzeo.", XO(31,202,1,1), XORB_MASK, PPCCOM,		{ RT, RA } },
-{ "azeo.",   XO(31,202,1,1), XORB_MASK, PWRCOM,		{ RT, RA } },
+{"msgsnd",	XRTRA(31,206,0,0), XRTRA_MASK, E500MC|PPCA2|POWER8, 0,	{RB}},
 
-{ "mtsr",    X(31,210),	XRB_MASK|(1<<20), COM32,	{ SR, RS } },
+{"mtsr",	X(31,210), XRB_MASK|(1<<20), COM,	NON32,		{SR, RS}},
 
-{ "stdcx.",  XRC(31,214,1), X_MASK,	PPC64,		{ RS, RA0, RB } },
+{"mtfprwa",	X(31,211),	XX1RB_MASK|1, PPCVSX2,	0,		{FRT, RA}},
+{"mtvrwa",	X(31,211)|1,	XX1RB_MASK|1, PPCVSX2,	0,		{VD, RA}},
+{"mtvsrwa",	X(31,211),	XX1RB_MASK,   PPCVSX2,	0,		{XT6, RA}},
+{"eratwe",	X(31,211),	X_MASK,	     PPCA2,	0,		{RS, RA, WS}},
 
-{ "stbx",    X(31,215),	X_MASK,		COM,		{ RS, RA0, RB } },
+{"ldawx.",	XRC(31,212,1),	X_MASK,	     PPCA2,	0,		{RT, RA0, RB}},
 
-{ "sllq",    XRC(31,216,0), X_MASK,	M601,		{ RA, RS, RB } },
-{ "sllq.",   XRC(31,216,1), X_MASK,	M601,		{ RA, RS, RB } },
+{"stdcx.",	XRC(31,214,1),	X_MASK,	     PPC64,	0,		{RS, RA0, RB}},
 
-{ "sleq",    XRC(31,217,0), X_MASK,	M601,		{ RA, RS, RB } },
-{ "sleq.",   XRC(31,217,1), X_MASK,	M601,		{ RA, RS, RB } },
+{"stbx",	X(31,215),	X_MASK,	     COM,	0,		{RS, RA0, RB}},
 
-{ "stbxe",   X(31,223),	X_MASK,		BOOKE64,	{ RS, RA0, RB } },
+{"sllq",	XRC(31,216,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"sllq.",	XRC(31,216,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
 
-{ "icblc",   X(31,230),	X_MASK,		PPCCHLK,	{ CT, RA, RB }},
+{"sleq",	XRC(31,217,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"sleq.",	XRC(31,217,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
 
-{ "subfme",  XO(31,232,0,0), XORB_MASK, PPCCOM,		{ RT, RA } },
-{ "sfme",    XO(31,232,0,0), XORB_MASK, PWRCOM,		{ RT, RA } },
-{ "subfme.", XO(31,232,0,1), XORB_MASK, PPCCOM,		{ RT, RA } },
-{ "sfme.",   XO(31,232,0,1), XORB_MASK, PWRCOM,		{ RT, RA } },
-{ "subfmeo", XO(31,232,1,0), XORB_MASK, PPCCOM,		{ RT, RA } },
-{ "sfmeo",   XO(31,232,1,0), XORB_MASK, PWRCOM,		{ RT, RA } },
-{ "subfmeo.",XO(31,232,1,1), XORB_MASK, PPCCOM,		{ RT, RA } },
-{ "sfmeo.",  XO(31,232,1,1), XORB_MASK, PWRCOM,		{ RT, RA } },
+{"stbepx",	X(31,223),	X_MASK,	  E500MC|PPCA2, 0,		{RS, RA0, RB}},
 
-{ "mulld",   XO(31,233,0,0), XO_MASK,	PPC64,		{ RT, RA, RB } },
-{ "mulld.",  XO(31,233,0,1), XO_MASK,	PPC64,		{ RT, RA, RB } },
-{ "mulldo",  XO(31,233,1,0), XO_MASK,	PPC64,		{ RT, RA, RB } },
-{ "mulldo.", XO(31,233,1,1), XO_MASK,	PPC64,		{ RT, RA, RB } },
+{"cmpeqb",	X(31,224),	XCMPL_MASK,  POWER9,	0,		{BF, RA, RB}},
 
-{ "addme",   XO(31,234,0,0), XORB_MASK, PPCCOM,		{ RT, RA } },
-{ "ame",     XO(31,234,0,0), XORB_MASK, PWRCOM,		{ RT, RA } },
-{ "addme.",  XO(31,234,0,1), XORB_MASK, PPCCOM,		{ RT, RA } },
-{ "ame.",    XO(31,234,0,1), XORB_MASK, PWRCOM,		{ RT, RA } },
-{ "addmeo",  XO(31,234,1,0), XORB_MASK, PPCCOM,		{ RT, RA } },
-{ "ameo",    XO(31,234,1,0), XORB_MASK, PWRCOM,		{ RT, RA } },
-{ "addmeo.", XO(31,234,1,1), XORB_MASK, PPCCOM,		{ RT, RA } },
-{ "ameo.",   XO(31,234,1,1), XORB_MASK, PWRCOM,		{ RT, RA } },
+{"icblc",	X(31,230),	X_MASK,	PPCCHLK|PPC476|TITAN, 0,	{CT, RA0, RB}},
 
-{ "mullw",   XO(31,235,0,0), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "muls",    XO(31,235,0,0), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
-{ "mullw.",  XO(31,235,0,1), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "muls.",   XO(31,235,0,1), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
-{ "mullwo",  XO(31,235,1,0), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "mulso",   XO(31,235,1,0), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
-{ "mullwo.", XO(31,235,1,1), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "mulso.",  XO(31,235,1,1), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
+{"stvx",	X(31,231),	X_MASK,	     PPCVEC,	0,		{VS, RA0, RB}},
+{"stqfcmx",	APU(31,231,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
 
-{ "icblce",  X(31,238),	X_MASK,		PPCCHLK64,	{ CT, RA, RB }},
-{ "mtsrin",  X(31,242),	XRA_MASK,	PPC32,		{ RS, RB } },
-{ "mtsri",   X(31,242),	XRA_MASK,	POWER32,	{ RS, RB } },
+{"subfme",	XO(31,232,0,0),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"sfme",	XO(31,232,0,0),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+{"subfme.",	XO(31,232,0,1),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"sfme.",	XO(31,232,0,1),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
 
-{ "dcbtst",  X(31,246),	X_MASK,	PPC,			{ CT, RA, RB } },
+{"mulld",	XO(31,233,0,0),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+{"mulld.",	XO(31,233,0,1),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
 
-{ "stbux",   X(31,247),	X_MASK,		COM,		{ RS, RAS, RB } },
-
-{ "slliq",   XRC(31,248,0), X_MASK,	M601,		{ RA, RS, SH } },
-{ "slliq.",  XRC(31,248,1), X_MASK,	M601,		{ RA, RS, SH } },
-
-{ "dcbtste", X(31,253),	X_MASK,		BOOKE64,	{ CT, RA, RB } },
-
-{ "stbuxe",  X(31,255),	X_MASK,		BOOKE64,	{ RS, RAS, RB } },
-
-{ "mfdcrx",  X(31,259),	X_MASK,		BOOKE,		{ RS, RA } },
-
-{ "doz",     XO(31,264,0,0), XO_MASK,	M601,		{ RT, RA, RB } },
-{ "doz.",    XO(31,264,0,1), XO_MASK,	M601,		{ RT, RA, RB } },
-{ "dozo",    XO(31,264,1,0), XO_MASK,	M601,		{ RT, RA, RB } },
-{ "dozo.",   XO(31,264,1,1), XO_MASK,	M601,		{ RT, RA, RB } },
-
-{ "add",     XO(31,266,0,0), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "cax",     XO(31,266,0,0), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
-{ "add.",    XO(31,266,0,1), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "cax.",    XO(31,266,0,1), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
-{ "addo",    XO(31,266,1,0), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "caxo",    XO(31,266,1,0), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
-{ "addo.",   XO(31,266,1,1), XO_MASK,	PPCCOM,		{ RT, RA, RB } },
-{ "caxo.",   XO(31,266,1,1), XO_MASK,	PWRCOM,		{ RT, RA, RB } },
-
-{ "tlbiel",  X(31,274), XRTLRA_MASK,	POWER4,		{ RB, L } },
-
-{ "mfapidi", X(31,275), X_MASK,		BOOKE,		{ RT, RA } },
-
-{ "lscbx",   XRC(31,277,0), X_MASK,	M601,		{ RT, RA, RB } },
-{ "lscbx.",  XRC(31,277,1), X_MASK,	M601,		{ RT, RA, RB } },
-
-{ "dcbt",    X(31,278),	X_MASK,		PPC,		{ CT, RA, RB } },
-
-{ "lhzx",    X(31,279),	X_MASK,		COM,		{ RT, RA0, RB } },
-
-{ "eqv",     XRC(31,284,0), X_MASK,	COM,		{ RA, RS, RB } },
-{ "eqv.",    XRC(31,284,1), X_MASK,	COM,		{ RA, RS, RB } },
-
-{ "dcbte",   X(31,286),	X_MASK,		BOOKE64,	{ CT, RA, RB } },
-
-{ "lhzxe",   X(31,287),	X_MASK,		BOOKE64,	{ RT, RA0, RB } },
-
-{ "tlbie",   X(31,306),	XRTLRA_MASK,	PPC,		{ RB, L } },
-{ "tlbi",    X(31,306),	XRT_MASK,	POWER,		{ RA0, RB } },
-
-{ "eciwx",   X(31,310), X_MASK,		PPC,		{ RT, RA, RB } },
-
-{ "lhzux",   X(31,311),	X_MASK,		COM,		{ RT, RAL, RB } },
-
-{ "xor",     XRC(31,316,0), X_MASK,	COM,		{ RA, RS, RB } },
-{ "xor.",    XRC(31,316,1), X_MASK,	COM,		{ RA, RS, RB } },
-
-{ "lhzuxe",  X(31,319),	X_MASK,		BOOKE64,	{ RT, RAL, RB } },
-
-{ "mfexisr",  XSPR(31,323,64),  XSPR_MASK, PPC403,	{ RT } },
-{ "mfexier",  XSPR(31,323,66),  XSPR_MASK, PPC403,	{ RT } },
-{ "mfbr0",    XSPR(31,323,128), XSPR_MASK, PPC403,	{ RT } },
-{ "mfbr1",    XSPR(31,323,129), XSPR_MASK, PPC403,	{ RT } },
-{ "mfbr2",    XSPR(31,323,130), XSPR_MASK, PPC403,	{ RT } },
-{ "mfbr3",    XSPR(31,323,131), XSPR_MASK, PPC403,	{ RT } },
-{ "mfbr4",    XSPR(31,323,132), XSPR_MASK, PPC403,	{ RT } },
-{ "mfbr5",    XSPR(31,323,133), XSPR_MASK, PPC403,	{ RT } },
-{ "mfbr6",    XSPR(31,323,134), XSPR_MASK, PPC403,	{ RT } },
-{ "mfbr7",    XSPR(31,323,135), XSPR_MASK, PPC403,	{ RT } },
-{ "mfbear",   XSPR(31,323,144), XSPR_MASK, PPC403,	{ RT } },
-{ "mfbesr",   XSPR(31,323,145), XSPR_MASK, PPC403,	{ RT } },
-{ "mfiocr",   XSPR(31,323,160), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdmacr0", XSPR(31,323,192), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdmact0", XSPR(31,323,193), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdmada0", XSPR(31,323,194), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdmasa0", XSPR(31,323,195), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdmacc0", XSPR(31,323,196), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdmacr1", XSPR(31,323,200), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdmact1", XSPR(31,323,201), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdmada1", XSPR(31,323,202), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdmasa1", XSPR(31,323,203), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdmacc1", XSPR(31,323,204), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdmacr2", XSPR(31,323,208), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdmact2", XSPR(31,323,209), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdmada2", XSPR(31,323,210), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdmasa2", XSPR(31,323,211), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdmacc2", XSPR(31,323,212), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdmacr3", XSPR(31,323,216), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdmact3", XSPR(31,323,217), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdmada3", XSPR(31,323,218), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdmasa3", XSPR(31,323,219), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdmacc3", XSPR(31,323,220), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdmasr",  XSPR(31,323,224), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdcr",    X(31,323),	X_MASK,	PPC403 | BOOKE,	{ RT, SPR } },
-
-{ "div",     XO(31,331,0,0), XO_MASK,	M601,		{ RT, RA, RB } },
-{ "div.",    XO(31,331,0,1), XO_MASK,	M601,		{ RT, RA, RB } },
-{ "divo",    XO(31,331,1,0), XO_MASK,	M601,		{ RT, RA, RB } },
-{ "divo.",   XO(31,331,1,1), XO_MASK,	M601,		{ RT, RA, RB } },
-
-{ "mfpmr",   X(31,334),	X_MASK,		PPCPMR,		{ RT, PMR }},
-
-{ "mfmq",       XSPR(31,339,0),    XSPR_MASK, M601,	{ RT } },
-{ "mfxer",      XSPR(31,339,1),    XSPR_MASK, COM,	{ RT } },
-{ "mfrtcu",     XSPR(31,339,4),    XSPR_MASK, COM,	{ RT } },
-{ "mfrtcl",     XSPR(31,339,5),    XSPR_MASK, COM,	{ RT } },
-{ "mfdec",      XSPR(31,339,6),    XSPR_MASK, MFDEC1,	{ RT } },
-{ "mfdec",      XSPR(31,339,22),   XSPR_MASK, MFDEC2,	{ RT } },
-{ "mflr",       XSPR(31,339,8),    XSPR_MASK, COM,	{ RT } },
-{ "mfctr",      XSPR(31,339,9),    XSPR_MASK, COM,	{ RT } },
-{ "mftid",      XSPR(31,339,17),   XSPR_MASK, POWER,	{ RT } },
-{ "mfdsisr",    XSPR(31,339,18),   XSPR_MASK, COM,	{ RT } },
-{ "mfdar",      XSPR(31,339,19),   XSPR_MASK, COM,	{ RT } },
-{ "mfsdr0",     XSPR(31,339,24),   XSPR_MASK, POWER,	{ RT } },
-{ "mfsdr1",     XSPR(31,339,25),   XSPR_MASK, COM,	{ RT } },
-{ "mfsrr0",     XSPR(31,339,26),   XSPR_MASK, COM,	{ RT } },
-{ "mfsrr1",     XSPR(31,339,27),   XSPR_MASK, COM,	{ RT } },
-{ "mfcfar",     XSPR(31,339,28),   XSPR_MASK, POWER6,	{ RT } },
-{ "mfpid",      XSPR(31,339,48),   XSPR_MASK, BOOKE,    { RT } },
-{ "mfpid",      XSPR(31,339,945),  XSPR_MASK, PPC403,	{ RT } },
-{ "mfcsrr0",    XSPR(31,339,58),   XSPR_MASK, BOOKE,    { RT } },
-{ "mfcsrr1",    XSPR(31,339,59),   XSPR_MASK, BOOKE,    { RT } },
-{ "mfdear",     XSPR(31,339,61),   XSPR_MASK, BOOKE,    { RT } },
-{ "mfdear",     XSPR(31,339,981),  XSPR_MASK, PPC403,	{ RT } },
-{ "mfesr",      XSPR(31,339,62),   XSPR_MASK, BOOKE,    { RT } },
-{ "mfesr",      XSPR(31,339,980),  XSPR_MASK, PPC403,	{ RT } },
-{ "mfivpr",     XSPR(31,339,63),   XSPR_MASK, BOOKE,    { RT } },
-{ "mfcmpa",     XSPR(31,339,144),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfcmpb",     XSPR(31,339,145),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfcmpc",     XSPR(31,339,146),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfcmpd",     XSPR(31,339,147),  XSPR_MASK, PPC860,	{ RT } },
-{ "mficr",      XSPR(31,339,148),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfder",      XSPR(31,339,149),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfcounta",   XSPR(31,339,150),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfcountb",   XSPR(31,339,151),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfcmpe",     XSPR(31,339,152),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfcmpf",     XSPR(31,339,153),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfcmpg",     XSPR(31,339,154),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfcmph",     XSPR(31,339,155),  XSPR_MASK, PPC860,	{ RT } },
-{ "mflctrl1",   XSPR(31,339,156),  XSPR_MASK, PPC860,	{ RT } },
-{ "mflctrl2",   XSPR(31,339,157),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfictrl",    XSPR(31,339,158),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfbar",      XSPR(31,339,159),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfvrsave",   XSPR(31,339,256),  XSPR_MASK, PPCVEC,	{ RT } },
-{ "mfusprg0",   XSPR(31,339,256),  XSPR_MASK, BOOKE,    { RT } },
-{ "mftb",       X(31,371),	   X_MASK,    CLASSIC,	{ RT, TBR } },
-{ "mftb",       XSPR(31,339,268),  XSPR_MASK, BOOKE,    { RT } },
-{ "mftbl",      XSPR(31,371,268),  XSPR_MASK, CLASSIC,	{ RT } },
-{ "mftbl",      XSPR(31,339,268),  XSPR_MASK, BOOKE,    { RT } },
-{ "mftbu",      XSPR(31,371,269),  XSPR_MASK, CLASSIC,	{ RT } },
-{ "mftbu",      XSPR(31,339,269),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfsprg",     XSPR(31,339,256),  XSPRG_MASK, PPC,	{ RT, SPRG } },
-{ "mfsprg0",    XSPR(31,339,272),  XSPR_MASK, PPC,	{ RT } },
-{ "mfsprg1",    XSPR(31,339,273),  XSPR_MASK, PPC,	{ RT } },
-{ "mfsprg2",    XSPR(31,339,274),  XSPR_MASK, PPC,	{ RT } },
-{ "mfsprg3",    XSPR(31,339,275),  XSPR_MASK, PPC,	{ RT } },
-{ "mfsprg4",    XSPR(31,339,260),  XSPR_MASK, PPC405 | BOOKE,	{ RT } },
-{ "mfsprg5",    XSPR(31,339,261),  XSPR_MASK, PPC405 | BOOKE,	{ RT } },
-{ "mfsprg6",    XSPR(31,339,262),  XSPR_MASK, PPC405 | BOOKE,	{ RT } },
-{ "mfsprg7",    XSPR(31,339,263),  XSPR_MASK, PPC405 | BOOKE,	{ RT } },
-{ "mfasr",      XSPR(31,339,280),  XSPR_MASK, PPC64,	{ RT } },
-{ "mfear",      XSPR(31,339,282),  XSPR_MASK, PPC,	{ RT } },
-{ "mfpir",      XSPR(31,339,286),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfpvr",      XSPR(31,339,287),  XSPR_MASK, PPC,	{ RT } },
-{ "mfdbsr",     XSPR(31,339,304),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfdbsr",     XSPR(31,339,1008), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdbcr0",    XSPR(31,339,308),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfdbcr0",    XSPR(31,339,1010), XSPR_MASK, PPC405,	{ RT } },
-{ "mfdbcr1",    XSPR(31,339,309),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfdbcr1",    XSPR(31,339,957),  XSPR_MASK, PPC405,	{ RT } },
-{ "mfdbcr2",    XSPR(31,339,310),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfiac1",     XSPR(31,339,312),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfiac1",     XSPR(31,339,1012), XSPR_MASK, PPC403,	{ RT } },
-{ "mfiac2",     XSPR(31,339,313),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfiac2",     XSPR(31,339,1013), XSPR_MASK, PPC403,	{ RT } },
-{ "mfiac3",     XSPR(31,339,314),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfiac3",     XSPR(31,339,948),  XSPR_MASK, PPC405,	{ RT } },
-{ "mfiac4",     XSPR(31,339,315),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfiac4",     XSPR(31,339,949),  XSPR_MASK, PPC405,	{ RT } },
-{ "mfdac1",     XSPR(31,339,316),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfdac1",     XSPR(31,339,1014), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdac2",     XSPR(31,339,317),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfdac2",     XSPR(31,339,1015), XSPR_MASK, PPC403,	{ RT } },
-{ "mfdvc1",     XSPR(31,339,318),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfdvc1",     XSPR(31,339,950),  XSPR_MASK, PPC405,	{ RT } },
-{ "mfdvc2",     XSPR(31,339,319),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfdvc2",     XSPR(31,339,951),  XSPR_MASK, PPC405,	{ RT } },
-{ "mftsr",      XSPR(31,339,336),  XSPR_MASK, BOOKE,    { RT } },
-{ "mftsr",      XSPR(31,339,984),  XSPR_MASK, PPC403,	{ RT } },
-{ "mftcr",      XSPR(31,339,340),  XSPR_MASK, BOOKE,    { RT } },
-{ "mftcr",      XSPR(31,339,986),  XSPR_MASK, PPC403,	{ RT } },
-{ "mfivor0",    XSPR(31,339,400),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfivor1",    XSPR(31,339,401),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfivor2",    XSPR(31,339,402),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfivor3",    XSPR(31,339,403),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfivor4",    XSPR(31,339,404),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfivor5",    XSPR(31,339,405),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfivor6",    XSPR(31,339,406),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfivor7",    XSPR(31,339,407),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfivor8",    XSPR(31,339,408),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfivor9",    XSPR(31,339,409),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfivor10",   XSPR(31,339,410),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfivor11",   XSPR(31,339,411),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfivor12",   XSPR(31,339,412),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfivor13",   XSPR(31,339,413),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfivor14",   XSPR(31,339,414),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfivor15",   XSPR(31,339,415),  XSPR_MASK, BOOKE,    { RT } },
-{ "mfspefscr",  XSPR(31,339,512),  XSPR_MASK, PPCSPE,	{ RT } },
-{ "mfbbear",    XSPR(31,339,513),  XSPR_MASK, PPCBRLK,  { RT } },
-{ "mfbbtar",    XSPR(31,339,514),  XSPR_MASK, PPCBRLK,  { RT } },
-{ "mfivor32",   XSPR(31,339,528),  XSPR_MASK, PPCSPE,	{ RT } },
-{ "mfivor33",   XSPR(31,339,529),  XSPR_MASK, PPCSPE,	{ RT } },
-{ "mfivor34",   XSPR(31,339,530),  XSPR_MASK, PPCSPE,	{ RT } },
-{ "mfivor35",   XSPR(31,339,531),  XSPR_MASK, PPCPMR,	{ RT } },
-{ "mfibatu",    XSPR(31,339,528),  XSPRBAT_MASK, PPC,	{ RT, SPRBAT } },
-{ "mfibatl",    XSPR(31,339,529),  XSPRBAT_MASK, PPC,	{ RT, SPRBAT } },
-{ "mfdbatu",    XSPR(31,339,536),  XSPRBAT_MASK, PPC,	{ RT, SPRBAT } },
-{ "mfdbatl",    XSPR(31,339,537),  XSPRBAT_MASK, PPC,	{ RT, SPRBAT } },
-{ "mfic_cst",   XSPR(31,339,560),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfic_adr",   XSPR(31,339,561),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfic_dat",   XSPR(31,339,562),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfdc_cst",   XSPR(31,339,568),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfdc_adr",   XSPR(31,339,569),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfmcsrr0",   XSPR(31,339,570),  XSPR_MASK, PPCRFMCI, { RT } },
-{ "mfdc_dat",   XSPR(31,339,570),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfmcsrr1",   XSPR(31,339,571),  XSPR_MASK, PPCRFMCI, { RT } },
-{ "mfmcsr",     XSPR(31,339,572),  XSPR_MASK, PPCRFMCI, { RT } },
-{ "mfmcar",     XSPR(31,339,573),  XSPR_MASK, PPCRFMCI, { RT } },
-{ "mfdpdr",     XSPR(31,339,630),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfdpir",     XSPR(31,339,631),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfimmr",     XSPR(31,339,638),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfmi_ctr",   XSPR(31,339,784),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfmi_ap",    XSPR(31,339,786),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfmi_epn",   XSPR(31,339,787),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfmi_twc",   XSPR(31,339,789),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfmi_rpn",   XSPR(31,339,790),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfmd_ctr",   XSPR(31,339,792),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfm_casid",  XSPR(31,339,793),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfmd_ap",    XSPR(31,339,794),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfmd_epn",   XSPR(31,339,795),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfmd_twb",   XSPR(31,339,796),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfmd_twc",   XSPR(31,339,797),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfmd_rpn",   XSPR(31,339,798),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfm_tw",     XSPR(31,339,799),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfmi_dbcam", XSPR(31,339,816),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfmi_dbram0",XSPR(31,339,817),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfmi_dbram1",XSPR(31,339,818),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfmd_dbcam", XSPR(31,339,824),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfmd_dbram0",XSPR(31,339,825),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfmd_dbram1",XSPR(31,339,826),  XSPR_MASK, PPC860,	{ RT } },
-{ "mfummcr0",   XSPR(31,339,936),  XSPR_MASK, PPC750,   { RT } },
-{ "mfupmc1",    XSPR(31,339,937),  XSPR_MASK, PPC750,   { RT } },
-{ "mfupmc2",    XSPR(31,339,938),  XSPR_MASK, PPC750,   { RT } },
-{ "mfusia",     XSPR(31,339,939),  XSPR_MASK, PPC750,   { RT } },
-{ "mfummcr1",   XSPR(31,339,940),  XSPR_MASK, PPC750,   { RT } },
-{ "mfupmc3",    XSPR(31,339,941),  XSPR_MASK, PPC750,   { RT } },
-{ "mfupmc4",    XSPR(31,339,942),  XSPR_MASK, PPC750,   { RT } },
-{ "mfzpr",   	XSPR(31,339,944),  XSPR_MASK, PPC403,	{ RT } },
-{ "mfccr0",  	XSPR(31,339,947),  XSPR_MASK, PPC405,	{ RT } },
-{ "mfmmcr0",	XSPR(31,339,952),  XSPR_MASK, PPC750,	{ RT } },
-{ "mfpmc1",	XSPR(31,339,953),  XSPR_MASK, PPC750,	{ RT } },
-{ "mfsgr",	XSPR(31,339,953),  XSPR_MASK, PPC403,	{ RT } },
-{ "mfpmc2",	XSPR(31,339,954),  XSPR_MASK, PPC750,	{ RT } },
-{ "mfdcwr", 	XSPR(31,339,954),  XSPR_MASK, PPC403,	{ RT } },
-{ "mfsia",	XSPR(31,339,955),  XSPR_MASK, PPC750,	{ RT } },
-{ "mfsler",	XSPR(31,339,955),  XSPR_MASK, PPC405,	{ RT } },
-{ "mfmmcr1",	XSPR(31,339,956),  XSPR_MASK, PPC750,	{ RT } },
-{ "mfsu0r",	XSPR(31,339,956),  XSPR_MASK, PPC405,	{ RT } },
-{ "mfpmc3",	XSPR(31,339,957),  XSPR_MASK, PPC750,	{ RT } },
-{ "mfpmc4",	XSPR(31,339,958),  XSPR_MASK, PPC750,	{ RT } },
-{ "mficdbdr",   XSPR(31,339,979),  XSPR_MASK, PPC403,   { RT } },
-{ "mfevpr",     XSPR(31,339,982),  XSPR_MASK, PPC403,	{ RT } },
-{ "mfcdbcr",    XSPR(31,339,983),  XSPR_MASK, PPC403,	{ RT } },
-{ "mfpit",      XSPR(31,339,987),  XSPR_MASK, PPC403,	{ RT } },
-{ "mftbhi",     XSPR(31,339,988),  XSPR_MASK, PPC403,	{ RT } },
-{ "mftblo",     XSPR(31,339,989),  XSPR_MASK, PPC403,	{ RT } },
-{ "mfsrr2",     XSPR(31,339,990),  XSPR_MASK, PPC403,	{ RT } },
-{ "mfsrr3",     XSPR(31,339,991),  XSPR_MASK, PPC403,	{ RT } },
-{ "mfl2cr",     XSPR(31,339,1017), XSPR_MASK, PPC750,   { RT } },
-{ "mfdccr",     XSPR(31,339,1018), XSPR_MASK, PPC403,	{ RT } },
-{ "mficcr",     XSPR(31,339,1019), XSPR_MASK, PPC403,	{ RT } },
-{ "mfictc",     XSPR(31,339,1019), XSPR_MASK, PPC750,   { RT } },
-{ "mfpbl1",     XSPR(31,339,1020), XSPR_MASK, PPC403,	{ RT } },
-{ "mfthrm1",    XSPR(31,339,1020), XSPR_MASK, PPC750,   { RT } },
-{ "mfpbu1",     XSPR(31,339,1021), XSPR_MASK, PPC403,	{ RT } },
-{ "mfthrm2",    XSPR(31,339,1021), XSPR_MASK, PPC750,   { RT } },
-{ "mfpbl2",     XSPR(31,339,1022), XSPR_MASK, PPC403,	{ RT } },
-{ "mfthrm3",    XSPR(31,339,1022), XSPR_MASK, PPC750,   { RT } },
-{ "mfpbu2",     XSPR(31,339,1023), XSPR_MASK, PPC403,	{ RT } },
-{ "mfspr",      X(31,339),	   X_MASK,    COM,	{ RT, SPR } },
-
-{ "lwax",    X(31,341),	X_MASK,		PPC64,		{ RT, RA0, RB } },
-
-{ "dst",     XDSS(31,342,0), XDSS_MASK,	PPCVEC,		{ RA, RB, STRM } },
-{ "dstt",    XDSS(31,342,1), XDSS_MASK,	PPCVEC,		{ RA, RB, STRM } },
-
-{ "lhax",    X(31,343),	X_MASK,		COM,		{ RT, RA0, RB } },
-
-{ "lhaxe",   X(31,351),	X_MASK,		BOOKE64,	{ RT, RA0, RB } },
-
-{ "dstst",   XDSS(31,374,0), XDSS_MASK,	PPCVEC,		{ RA, RB, STRM } },
-{ "dststt",  XDSS(31,374,1), XDSS_MASK,	PPCVEC,		{ RA, RB, STRM } },
-
-{ "dccci",   X(31,454),	XRT_MASK,	PPC403|PPC440,	{ RA, RB } },
-
-{ "abs",     XO(31,360,0,0), XORB_MASK, M601,		{ RT, RA } },
-{ "abs.",    XO(31,360,0,1), XORB_MASK, M601,		{ RT, RA } },
-{ "abso",    XO(31,360,1,0), XORB_MASK, M601,		{ RT, RA } },
-{ "abso.",   XO(31,360,1,1), XORB_MASK, M601,		{ RT, RA } },
-
-{ "divs",    XO(31,363,0,0), XO_MASK,	M601,		{ RT, RA, RB } },
-{ "divs.",   XO(31,363,0,1), XO_MASK,	M601,		{ RT, RA, RB } },
-{ "divso",   XO(31,363,1,0), XO_MASK,	M601,		{ RT, RA, RB } },
-{ "divso.",  XO(31,363,1,1), XO_MASK,	M601,		{ RT, RA, RB } },
-
-{ "tlbia",   X(31,370),	0xffffffff,	PPC,		{ 0 } },
-
-{ "lwaux",   X(31,373),	X_MASK,		PPC64,		{ RT, RAL, RB } },
-
-{ "lhaux",   X(31,375),	X_MASK,		COM,		{ RT, RAL, RB } },
-
-{ "lhauxe",  X(31,383),	X_MASK,		BOOKE64,	{ RT, RAL, RB } },
-
-{ "mtdcrx",  X(31,387),	X_MASK,		BOOKE,		{ RA, RS } },
-
-{ "dcblc",   X(31,390),	X_MASK,		PPCCHLK,	{ CT, RA, RB }},
-
-{ "subfe64", XO(31,392,0,0), XO_MASK,	BOOKE64,	{ RT, RA, RB } },
-{ "subfe64o",XO(31,392,1,0), XO_MASK,	BOOKE64,	{ RT, RA, RB } },
-
-{ "adde64",  XO(31,394,0,0), XO_MASK,	BOOKE64,	{ RT, RA, RB } },
-{ "adde64o", XO(31,394,1,0), XO_MASK,	BOOKE64,	{ RT, RA, RB } },
-
-{ "dcblce",  X(31,398),	X_MASK,		PPCCHLK64,	{ CT, RA, RB }},
-
-{ "slbmte",  X(31,402), XRA_MASK,	PPC64,		{ RS, RB } },
-
-{ "sthx",    X(31,407),	X_MASK,		COM,		{ RS, RA0, RB } },
-
-{ "cmpb",    X(31,508),	X_MASK,		POWER6,		{ RA, RS, RB } },
-
-{ "lfqx",    X(31,791),	X_MASK,		POWER2,		{ FRT, RA, RB } },
-
-{ "lfdpx",   X(31,791),	X_MASK,		POWER6,		{ FRT, RA, RB } },
-
-{ "lfqux",   X(31,823),	X_MASK,		POWER2,		{ FRT, RA, RB } },
-
-{ "stfqx",   X(31,919),	X_MASK,		POWER2,		{ FRS, RA, RB } },
-
-{ "stfdpx",  X(31,919),	X_MASK,		POWER6,		{ FRS, RA, RB } },
-
-{ "stfqux",  X(31,951),	X_MASK,		POWER2,		{ FRS, RA, RB } },
-
-{ "orc",     XRC(31,412,0), X_MASK,	COM,		{ RA, RS, RB } },
-{ "orc.",    XRC(31,412,1), X_MASK,	COM,		{ RA, RS, RB } },
-
-{ "sradi",   XS(31,413,0), XS_MASK,	PPC64,		{ RA, RS, SH6 } },
-{ "sradi.",  XS(31,413,1), XS_MASK,	PPC64,		{ RA, RS, SH6 } },
-
-{ "sthxe",   X(31,415),	X_MASK,		BOOKE64,	{ RS, RA0, RB } },
-
-{ "slbie",   X(31,434),	XRTRA_MASK,	PPC64,		{ RB } },
-
-{ "ecowx",   X(31,438),	X_MASK,		PPC,		{ RT, RA, RB } },
-
-{ "sthux",   X(31,439),	X_MASK,		COM,		{ RS, RAS, RB } },
-
-{ "sthuxe",  X(31,447),	X_MASK,		BOOKE64,	{ RS, RAS, RB } },
-
-{ "cctpl",   0x7c210b78,    0xffffffff,	CELL,		{ 0 }},
-{ "cctpm",   0x7c421378,    0xffffffff,	CELL,		{ 0 }},
-{ "cctph",   0x7c631b78,    0xffffffff,	CELL,		{ 0 }},
-{ "db8cyc",  0x7f9ce378,    0xffffffff,	CELL,		{ 0 }},
-{ "db10cyc", 0x7fbdeb78,    0xffffffff,	CELL,		{ 0 }},
-{ "db12cyc", 0x7fdef378,    0xffffffff,	CELL,		{ 0 }},
-{ "db16cyc", 0x7ffffb78,    0xffffffff,	CELL,		{ 0 }},
-{ "mr",	     XRC(31,444,0), X_MASK,	COM,		{ RA, RS, RBS } },
-{ "or",      XRC(31,444,0), X_MASK,	COM,		{ RA, RS, RB } },
-{ "mr.",     XRC(31,444,1), X_MASK,	COM,		{ RA, RS, RBS } },
-{ "or.",     XRC(31,444,1), X_MASK,	COM,		{ RA, RS, RB } },
-
-{ "mtexisr",  XSPR(31,451,64),  XSPR_MASK, PPC403,	{ RS } },
-{ "mtexier",  XSPR(31,451,66),  XSPR_MASK, PPC403,	{ RS } },
-{ "mtbr0",    XSPR(31,451,128), XSPR_MASK, PPC403,	{ RS } },
-{ "mtbr1",    XSPR(31,451,129), XSPR_MASK, PPC403,	{ RS } },
-{ "mtbr2",    XSPR(31,451,130), XSPR_MASK, PPC403,	{ RS } },
-{ "mtbr3",    XSPR(31,451,131), XSPR_MASK, PPC403,	{ RS } },
-{ "mtbr4",    XSPR(31,451,132), XSPR_MASK, PPC403,	{ RS } },
-{ "mtbr5",    XSPR(31,451,133), XSPR_MASK, PPC403,	{ RS } },
-{ "mtbr6",    XSPR(31,451,134), XSPR_MASK, PPC403,	{ RS } },
-{ "mtbr7",    XSPR(31,451,135), XSPR_MASK, PPC403,	{ RS } },
-{ "mtbear",   XSPR(31,451,144), XSPR_MASK, PPC403,	{ RS } },
-{ "mtbesr",   XSPR(31,451,145), XSPR_MASK, PPC403,	{ RS } },
-{ "mtiocr",   XSPR(31,451,160), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdmacr0", XSPR(31,451,192), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdmact0", XSPR(31,451,193), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdmada0", XSPR(31,451,194), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdmasa0", XSPR(31,451,195), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdmacc0", XSPR(31,451,196), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdmacr1", XSPR(31,451,200), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdmact1", XSPR(31,451,201), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdmada1", XSPR(31,451,202), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdmasa1", XSPR(31,451,203), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdmacc1", XSPR(31,451,204), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdmacr2", XSPR(31,451,208), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdmact2", XSPR(31,451,209), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdmada2", XSPR(31,451,210), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdmasa2", XSPR(31,451,211), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdmacc2", XSPR(31,451,212), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdmacr3", XSPR(31,451,216), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdmact3", XSPR(31,451,217), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdmada3", XSPR(31,451,218), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdmasa3", XSPR(31,451,219), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdmacc3", XSPR(31,451,220), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdmasr",  XSPR(31,451,224), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdcr",    X(31,451),	X_MASK,	PPC403 | BOOKE,	{ SPR, RS } },
-
-{ "subfze64",XO(31,456,0,0), XORB_MASK, BOOKE64,	{ RT, RA } },
-{ "subfze64o",XO(31,456,1,0), XORB_MASK, BOOKE64,	{ RT, RA } },
-
-{ "divdu",   XO(31,457,0,0), XO_MASK,	PPC64,		{ RT, RA, RB } },
-{ "divdu.",  XO(31,457,0,1), XO_MASK,	PPC64,		{ RT, RA, RB } },
-{ "divduo",  XO(31,457,1,0), XO_MASK,	PPC64,		{ RT, RA, RB } },
-{ "divduo.", XO(31,457,1,1), XO_MASK,	PPC64,		{ RT, RA, RB } },
-
-{ "addze64", XO(31,458,0,0), XORB_MASK, BOOKE64,	{ RT, RA } },
-{ "addze64o",XO(31,458,1,0), XORB_MASK, BOOKE64,	{ RT, RA } },
-
-{ "divwu",   XO(31,459,0,0), XO_MASK,	PPC,		{ RT, RA, RB } },
-{ "divwu.",  XO(31,459,0,1), XO_MASK,	PPC,		{ RT, RA, RB } },
-{ "divwuo",  XO(31,459,1,0), XO_MASK,	PPC,		{ RT, RA, RB } },
-{ "divwuo.", XO(31,459,1,1), XO_MASK,	PPC,		{ RT, RA, RB } },
-
-{ "mtmq",      XSPR(31,467,0),    XSPR_MASK, M601,	{ RS } },
-{ "mtxer",     XSPR(31,467,1),    XSPR_MASK, COM,	{ RS } },
-{ "mtlr",      XSPR(31,467,8),    XSPR_MASK, COM,	{ RS } },
-{ "mtctr",     XSPR(31,467,9),    XSPR_MASK, COM,	{ RS } },
-{ "mttid",     XSPR(31,467,17),   XSPR_MASK, POWER,	{ RS } },
-{ "mtdsisr",   XSPR(31,467,18),   XSPR_MASK, COM,	{ RS } },
-{ "mtdar",     XSPR(31,467,19),   XSPR_MASK, COM,	{ RS } },
-{ "mtrtcu",    XSPR(31,467,20),   XSPR_MASK, COM,	{ RS } },
-{ "mtrtcl",    XSPR(31,467,21),   XSPR_MASK, COM,	{ RS } },
-{ "mtdec",     XSPR(31,467,22),   XSPR_MASK, COM,	{ RS } },
-{ "mtsdr0",    XSPR(31,467,24),   XSPR_MASK, POWER,	{ RS } },
-{ "mtsdr1",    XSPR(31,467,25),   XSPR_MASK, COM,	{ RS } },
-{ "mtsrr0",    XSPR(31,467,26),   XSPR_MASK, COM,	{ RS } },
-{ "mtsrr1",    XSPR(31,467,27),   XSPR_MASK, COM,	{ RS } },
-{ "mtcfar",    XSPR(31,467,28),   XSPR_MASK, POWER6,	{ RS } },
-{ "mtpid",     XSPR(31,467,48),   XSPR_MASK, BOOKE,     { RS } },
-{ "mtpid",     XSPR(31,467,945),  XSPR_MASK, PPC403,	{ RS } },
-{ "mtdecar",   XSPR(31,467,54),   XSPR_MASK, BOOKE,     { RS } },
-{ "mtcsrr0",   XSPR(31,467,58),   XSPR_MASK, BOOKE,     { RS } },
-{ "mtcsrr1",   XSPR(31,467,59),   XSPR_MASK, BOOKE,     { RS } },
-{ "mtdear",    XSPR(31,467,61),   XSPR_MASK, BOOKE,     { RS } },
-{ "mtdear",    XSPR(31,467,981),  XSPR_MASK, PPC403,	{ RS } },
-{ "mtesr",     XSPR(31,467,62),   XSPR_MASK, BOOKE,     { RS } },
-{ "mtesr",     XSPR(31,467,980),  XSPR_MASK, PPC403,	{ RS } },
-{ "mtivpr",    XSPR(31,467,63),   XSPR_MASK, BOOKE,     { RS } },
-{ "mtcmpa",    XSPR(31,467,144),  XSPR_MASK, PPC860,	{ RS } },
-{ "mtcmpb",    XSPR(31,467,145),  XSPR_MASK, PPC860,	{ RS } },
-{ "mtcmpc",    XSPR(31,467,146),  XSPR_MASK, PPC860,	{ RS } },
-{ "mtcmpd",    XSPR(31,467,147),  XSPR_MASK, PPC860,	{ RS } },
-{ "mticr",     XSPR(31,467,148),  XSPR_MASK, PPC860,	{ RS } },
-{ "mtder",     XSPR(31,467,149),  XSPR_MASK, PPC860,	{ RS } },
-{ "mtcounta",  XSPR(31,467,150),  XSPR_MASK, PPC860,	{ RS } },
-{ "mtcountb",  XSPR(31,467,151),  XSPR_MASK, PPC860,	{ RS } },
-{ "mtcmpe",    XSPR(31,467,152),  XSPR_MASK, PPC860,	{ RS } },
-{ "mtcmpf",    XSPR(31,467,153),  XSPR_MASK, PPC860,	{ RS } },
-{ "mtcmpg",    XSPR(31,467,154),  XSPR_MASK, PPC860,	{ RS } },
-{ "mtcmph",    XSPR(31,467,155),  XSPR_MASK, PPC860,	{ RS } },
-{ "mtlctrl1",  XSPR(31,467,156),  XSPR_MASK, PPC860,	{ RS } },
-{ "mtlctrl2",  XSPR(31,467,157),  XSPR_MASK, PPC860,	{ RS } },
-{ "mtictrl",   XSPR(31,467,158),  XSPR_MASK, PPC860,	{ RS } },
-{ "mtbar",     XSPR(31,467,159),  XSPR_MASK, PPC860,	{ RS } },
-{ "mtvrsave",  XSPR(31,467,256),  XSPR_MASK, PPCVEC,	{ RS } },
-{ "mtusprg0",  XSPR(31,467,256),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtsprg",    XSPR(31,467,256),  XSPRG_MASK,PPC,	{ SPRG, RS } },
-{ "mtsprg0",   XSPR(31,467,272),  XSPR_MASK, PPC,	{ RS } },
-{ "mtsprg1",   XSPR(31,467,273),  XSPR_MASK, PPC,	{ RS } },
-{ "mtsprg2",   XSPR(31,467,274),  XSPR_MASK, PPC,	{ RS } },
-{ "mtsprg3",   XSPR(31,467,275),  XSPR_MASK, PPC,	{ RS } },
-{ "mtsprg4",   XSPR(31,467,276),  XSPR_MASK, PPC405 | BOOKE, { RS } },
-{ "mtsprg5",   XSPR(31,467,277),  XSPR_MASK, PPC405 | BOOKE, { RS } },
-{ "mtsprg6",   XSPR(31,467,278),  XSPR_MASK, PPC405 | BOOKE, { RS } },
-{ "mtsprg7",   XSPR(31,467,279),  XSPR_MASK, PPC405 | BOOKE, { RS } },
-{ "mtasr",     XSPR(31,467,280),  XSPR_MASK, PPC64,	{ RS } },
-{ "mtear",     XSPR(31,467,282),  XSPR_MASK, PPC,	{ RS } },
-{ "mttbl",     XSPR(31,467,284),  XSPR_MASK, PPC,	{ RS } },
-{ "mttbu",     XSPR(31,467,285),  XSPR_MASK, PPC,	{ RS } },
-{ "mtdbsr",    XSPR(31,467,304),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtdbsr",    XSPR(31,467,1008), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdbcr0",   XSPR(31,467,308),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtdbcr0",   XSPR(31,467,1010), XSPR_MASK, PPC405,	{ RS } },
-{ "mtdbcr1",   XSPR(31,467,309),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtdbcr1",   XSPR(31,467,957),  XSPR_MASK, PPC405,	{ RS } },
-{ "mtdbcr2",   XSPR(31,467,310),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtiac1",    XSPR(31,467,312),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtiac1",    XSPR(31,467,1012), XSPR_MASK, PPC403,	{ RS } },
-{ "mtiac2",    XSPR(31,467,313),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtiac2",    XSPR(31,467,1013), XSPR_MASK, PPC403,	{ RS } },
-{ "mtiac3",    XSPR(31,467,314),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtiac3",    XSPR(31,467,948),  XSPR_MASK, PPC405,	{ RS } },
-{ "mtiac4",    XSPR(31,467,315),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtiac4",    XSPR(31,467,949),  XSPR_MASK, PPC405,	{ RS } },
-{ "mtdac1",    XSPR(31,467,316),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtdac1",    XSPR(31,467,1014), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdac2",    XSPR(31,467,317),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtdac2",    XSPR(31,467,1015), XSPR_MASK, PPC403,	{ RS } },
-{ "mtdvc1",    XSPR(31,467,318),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtdvc1",    XSPR(31,467,950),  XSPR_MASK, PPC405,	{ RS } },
-{ "mtdvc2",    XSPR(31,467,319),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtdvc2",    XSPR(31,467,951),  XSPR_MASK, PPC405,	{ RS } },
-{ "mttsr",     XSPR(31,467,336),  XSPR_MASK, BOOKE,     { RS } },
-{ "mttsr",     XSPR(31,467,984),  XSPR_MASK, PPC403,	{ RS } },
-{ "mttcr",     XSPR(31,467,340),  XSPR_MASK, BOOKE,     { RS } },
-{ "mttcr",     XSPR(31,467,986),  XSPR_MASK, PPC403,	{ RS } },
-{ "mtivor0",   XSPR(31,467,400),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtivor1",   XSPR(31,467,401),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtivor2",   XSPR(31,467,402),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtivor3",   XSPR(31,467,403),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtivor4",   XSPR(31,467,404),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtivor5",   XSPR(31,467,405),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtivor6",   XSPR(31,467,406),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtivor7",   XSPR(31,467,407),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtivor8",   XSPR(31,467,408),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtivor9",   XSPR(31,467,409),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtivor10",  XSPR(31,467,410),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtivor11",  XSPR(31,467,411),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtivor12",  XSPR(31,467,412),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtivor13",  XSPR(31,467,413),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtivor14",  XSPR(31,467,414),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtivor15",  XSPR(31,467,415),  XSPR_MASK, BOOKE,     { RS } },
-{ "mtspefscr",  XSPR(31,467,512),  XSPR_MASK, PPCSPE,   { RS } },
-{ "mtbbear",   XSPR(31,467,513),  XSPR_MASK, PPCBRLK,   { RS } },
-{ "mtbbtar",   XSPR(31,467,514),  XSPR_MASK, PPCBRLK,  { RS } },
-{ "mtivor32",  XSPR(31,467,528),  XSPR_MASK, PPCSPE,	{ RS } },
-{ "mtivor33",  XSPR(31,467,529),  XSPR_MASK, PPCSPE,	{ RS } },
-{ "mtivor34",  XSPR(31,467,530),  XSPR_MASK, PPCSPE,	{ RS } },
-{ "mtivor35",  XSPR(31,467,531),  XSPR_MASK, PPCPMR,	{ RS } },
-{ "mtibatu",   XSPR(31,467,528),  XSPRBAT_MASK, PPC,	{ SPRBAT, RS } },
-{ "mtibatl",   XSPR(31,467,529),  XSPRBAT_MASK, PPC,	{ SPRBAT, RS } },
-{ "mtdbatu",   XSPR(31,467,536),  XSPRBAT_MASK, PPC,	{ SPRBAT, RS } },
-{ "mtdbatl",   XSPR(31,467,537),  XSPRBAT_MASK, PPC,	{ SPRBAT, RS } },
-{ "mtmcsrr0",  XSPR(31,467,570),  XSPR_MASK, PPCRFMCI,  { RS } },
-{ "mtmcsrr1",  XSPR(31,467,571),  XSPR_MASK, PPCRFMCI,  { RS } },
-{ "mtmcsr",    XSPR(31,467,572),  XSPR_MASK, PPCRFMCI,  { RS } },
-{ "mtummcr0",  XSPR(31,467,936),  XSPR_MASK, PPC750,    { RS } },
-{ "mtupmc1",   XSPR(31,467,937),  XSPR_MASK, PPC750,    { RS } },
-{ "mtupmc2",   XSPR(31,467,938),  XSPR_MASK, PPC750,    { RS } },
-{ "mtusia",    XSPR(31,467,939),  XSPR_MASK, PPC750,    { RS } },
-{ "mtummcr1",  XSPR(31,467,940),  XSPR_MASK, PPC750,    { RS } },
-{ "mtupmc3",   XSPR(31,467,941),  XSPR_MASK, PPC750,    { RS } },
-{ "mtupmc4",   XSPR(31,467,942),  XSPR_MASK, PPC750,    { RS } },
-{ "mtzpr",     XSPR(31,467,944),  XSPR_MASK, PPC403,	{ RS } },
-{ "mtccr0",    XSPR(31,467,947),  XSPR_MASK, PPC405,	{ RS } },
-{ "mtmmcr0",   XSPR(31,467,952),  XSPR_MASK, PPC750,    { RS } },
-{ "mtsgr",     XSPR(31,467,953),  XSPR_MASK, PPC403,	{ RS } },
-{ "mtpmc1",    XSPR(31,467,953),  XSPR_MASK, PPC750,    { RS } },
-{ "mtdcwr",    XSPR(31,467,954),  XSPR_MASK, PPC403,	{ RS } },
-{ "mtpmc2",    XSPR(31,467,954),  XSPR_MASK, PPC750,    { RS } },
-{ "mtsler",    XSPR(31,467,955),  XSPR_MASK, PPC405,	{ RS } },
-{ "mtsia",     XSPR(31,467,955),  XSPR_MASK, PPC750,    { RS } },
-{ "mtsu0r",    XSPR(31,467,956),  XSPR_MASK, PPC405,	{ RS } },
-{ "mtmmcr1",   XSPR(31,467,956),  XSPR_MASK, PPC750,    { RS } },
-{ "mtpmc3",    XSPR(31,467,957),  XSPR_MASK, PPC750,    { RS } },
-{ "mtpmc4",    XSPR(31,467,958),  XSPR_MASK, PPC750,    { RS } },
-{ "mticdbdr",  XSPR(31,467,979),  XSPR_MASK, PPC403,	{ RS } },
-{ "mtevpr",    XSPR(31,467,982),  XSPR_MASK, PPC403,	{ RS } },
-{ "mtcdbcr",   XSPR(31,467,983),  XSPR_MASK, PPC403,	{ RS } },
-{ "mtpit",     XSPR(31,467,987),  XSPR_MASK, PPC403,	{ RS } },
-{ "mttbhi",    XSPR(31,467,988),  XSPR_MASK, PPC403,	{ RS } },
-{ "mttblo",    XSPR(31,467,989),  XSPR_MASK, PPC403,	{ RS } },
-{ "mtsrr2",    XSPR(31,467,990),  XSPR_MASK, PPC403,	{ RS } },
-{ "mtsrr3",    XSPR(31,467,991),  XSPR_MASK, PPC403,	{ RS } },
-{ "mtl2cr",    XSPR(31,467,1017), XSPR_MASK, PPC750,    { RS } },
-{ "mtdccr",    XSPR(31,467,1018), XSPR_MASK, PPC403,	{ RS } },
-{ "mticcr",    XSPR(31,467,1019), XSPR_MASK, PPC403,	{ RS } },
-{ "mtictc",    XSPR(31,467,1019), XSPR_MASK, PPC750,    { RS } },
-{ "mtpbl1",    XSPR(31,467,1020), XSPR_MASK, PPC403,	{ RS } },
-{ "mtthrm1",   XSPR(31,467,1020), XSPR_MASK, PPC750,    { RS } },
-{ "mtpbu1",    XSPR(31,467,1021), XSPR_MASK, PPC403,	{ RS } },
-{ "mtthrm2",   XSPR(31,467,1021), XSPR_MASK, PPC750,    { RS } },
-{ "mtpbl2",    XSPR(31,467,1022), XSPR_MASK, PPC403,	{ RS } },
-{ "mtthrm3",   XSPR(31,467,1022), XSPR_MASK, PPC750,    { RS } },
-{ "mtpbu2",    XSPR(31,467,1023), XSPR_MASK, PPC403,	{ RS } },
-{ "mtspr",     X(31,467),	  X_MASK,    COM,	{ SPR, RS } },
-
-{ "dcbi",    X(31,470),	XRT_MASK,	PPC,		{ RA, RB } },
-
-{ "nand",    XRC(31,476,0), X_MASK,	COM,		{ RA, RS, RB } },
-{ "nand.",   XRC(31,476,1), X_MASK,	COM,		{ RA, RS, RB } },
-
-{ "dcbie",   X(31,478),	XRT_MASK,	BOOKE64,	{ RA, RB } },
-
-{ "dcread",  X(31,486),	X_MASK,		PPC403|PPC440,	{ RT, RA, RB }},
-
-{ "mtpmr",   X(31,462),	X_MASK,		PPCPMR,		{ PMR, RS }},
-
-{ "icbtls",  X(31,486),	X_MASK,		PPCCHLK,	{ CT, RA, RB }},
-
-{ "nabs",    XO(31,488,0,0), XORB_MASK, M601,		{ RT, RA } },
-{ "subfme64",XO(31,488,0,0), XORB_MASK, BOOKE64,	{ RT, RA } },
-{ "nabs.",   XO(31,488,0,1), XORB_MASK, M601,		{ RT, RA } },
-{ "nabso",   XO(31,488,1,0), XORB_MASK, M601,		{ RT, RA } },
-{ "subfme64o",XO(31,488,1,0), XORB_MASK, BOOKE64,	{ RT, RA } },
-{ "nabso.",  XO(31,488,1,1), XORB_MASK, M601,		{ RT, RA } },
-
-{ "divd",    XO(31,489,0,0), XO_MASK,	PPC64,		{ RT, RA, RB } },
-{ "divd.",   XO(31,489,0,1), XO_MASK,	PPC64,		{ RT, RA, RB } },
-{ "divdo",   XO(31,489,1,0), XO_MASK,	PPC64,		{ RT, RA, RB } },
-{ "divdo.",  XO(31,489,1,1), XO_MASK,	PPC64,		{ RT, RA, RB } },
-
-{ "addme64", XO(31,490,0,0), XORB_MASK, BOOKE64,	{ RT, RA } },
-{ "addme64o",XO(31,490,1,0), XORB_MASK, BOOKE64,	{ RT, RA } },
-
-{ "divw",    XO(31,491,0,0), XO_MASK,	PPC,		{ RT, RA, RB } },
-{ "divw.",   XO(31,491,0,1), XO_MASK,	PPC,		{ RT, RA, RB } },
-{ "divwo",   XO(31,491,1,0), XO_MASK,	PPC,		{ RT, RA, RB } },
-{ "divwo.",  XO(31,491,1,1), XO_MASK,	PPC,		{ RT, RA, RB } },
+{"addme",	XO(31,234,0,0),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"ame",		XO(31,234,0,0),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+{"addme.",	XO(31,234,0,1),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"ame.",	XO(31,234,0,1),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
 
-{ "icbtlse", X(31,494),	X_MASK,		PPCCHLK64,	{ CT, RA, RB }},
+{"mullw",	XO(31,235,0,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"muls",	XO(31,235,0,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"mullw.",	XO(31,235,0,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"muls.",	XO(31,235,0,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
 
-{ "slbia",   X(31,498),	0xffffffff,	PPC64,		{ 0 } },
+{"icblce",	X(31,238),	X_MASK,	     PPCCHLK,	E500MC|PPCA2,	{CT, RA, RB}},
+{"msgclr",	XRTRA(31,238,0,0), XRTRA_MASK, E500MC|PPCA2|POWER8, 0,	{RB}},
+{"mtsrin",	X(31,242),	XRA_MASK,    PPC,	NON32,		{RS, RB}},
+{"mtsri",	X(31,242),	XRA_MASK,    POWER,	NON32,		{RS, RB}},
 
-{ "cli",     X(31,502), XRB_MASK,	POWER,		{ RT, RA } },
+{"mtfprwz",	X(31,243),	XX1RB_MASK|1, PPCVSX2,	0,		{FRT, RA}},
+{"mtvrwz",	X(31,243)|1,	XX1RB_MASK|1, PPCVSX2,	0,		{VD, RA}},
+{"mtvsrwz",	X(31,243),	XX1RB_MASK,   PPCVSX2,	0,		{XT6, RA}},
 
-{ "stdcxe.", XRC(31,511,1), X_MASK,	BOOKE64,	{ RS, RA, RB } },
+{"dcbtstt",	XRT(31,246,0x10), XRT_MASK,  POWER7,	0,		{RA0, RB}},
+{"dcbtst",	X(31,246),	X_MASK,	     POWER4,	DCBT_EO,	{RA0, RB, CT}},
+{"dcbtst",	X(31,246),	X_MASK,	     DCBT_EO,	0,		{CT, RA0, RB}},
+{"dcbtst",	X(31,246),	X_MASK,	     PPC,	POWER4|DCBT_EO,	{RA0, RB}},
 
-{ "mcrxr",   X(31,512),	XRARB_MASK|(3<<21), COM,	{ BF } },
+{"stbux",	X(31,247),	X_MASK,	     COM,	0,		{RS, RAS, RB}},
 
-{ "bblels",  X(31,518),	X_MASK,		PPCBRLK,	{ 0 }},
-{ "mcrxr64", X(31,544),	XRARB_MASK|(3<<21), BOOKE64,	{ BF } },
+{"slliq",	XRC(31,248,0),	X_MASK,	     M601,	0,		{RA, RS, SH}},
+{"slliq.",	XRC(31,248,1),	X_MASK,	     M601,	0,		{RA, RS, SH}},
 
-{ "clcs",    X(31,531), XRB_MASK,	M601,		{ RT, RA } },
+{"bpermd",	X(31,252),	X_MASK,	  POWER7|PPCA2,	0,		{RA, RS, RB}},
 
-{ "ldbrx",   X(31,532),	X_MASK,		CELL,		{ RT, RA0, RB } },
+{"dcbtstep",	XRT(31,255,0),	X_MASK,	  E500MC|PPCA2, 0,		{RT, RA0, RB}},
 
-{ "lswx",    X(31,533),	X_MASK,		PPCCOM,		{ RT, RA0, RB } },
-{ "lsx",     X(31,533),	X_MASK,		PWRCOM,		{ RT, RA, RB } },
+{"mfdcrx",	X(31,259),	X_MASK, BOOKE|PPCA2|PPC476, TITAN,	{RS, RA}},
+{"mfdcrx.",	XRC(31,259,1),	X_MASK,	     PPCA2,	0,		{RS, RA}},
 
-{ "lwbrx",   X(31,534),	X_MASK,		PPCCOM,		{ RT, RA0, RB } },
-{ "lbrx",    X(31,534),	X_MASK,		PWRCOM,		{ RT, RA, RB } },
+{"lvexbx",	X(31,261),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
 
-{ "lfsx",    X(31,535),	X_MASK,		COM,		{ FRT, RA0, RB } },
+{"icbt",	X(31,262),	XRT_MASK,    PPC403,	0,		{RA, RB}},
 
-{ "srw",     XRC(31,536,0), X_MASK,	PPCCOM,		{ RA, RS, RB } },
-{ "sr",      XRC(31,536,0), X_MASK,	PWRCOM,		{ RA, RS, RB } },
-{ "srw.",    XRC(31,536,1), X_MASK,	PPCCOM,		{ RA, RS, RB } },
-{ "sr.",     XRC(31,536,1), X_MASK,	PWRCOM,		{ RA, RS, RB } },
+{"lvepxl",	X(31,263),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
 
-{ "rrib",    XRC(31,537,0), X_MASK,	M601,		{ RA, RS, RB } },
-{ "rrib.",   XRC(31,537,1), X_MASK,	M601,		{ RA, RS, RB } },
+{"ldfcmx",	APU(31,263,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+{"doz",		XO(31,264,0,0),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+{"doz.",	XO(31,264,0,1),	XO_MASK,     M601,	0,		{RT, RA, RB}},
 
-{ "srd",     XRC(31,539,0), X_MASK,	PPC64,		{ RA, RS, RB } },
-{ "srd.",    XRC(31,539,1), X_MASK,	PPC64,		{ RA, RS, RB } },
+{"modud",	X(31,265),	X_MASK,	     POWER9,	0,		{RT, RA, RB}},
 
-{ "maskir",  XRC(31,541,0), X_MASK,	M601,		{ RA, RS, RB } },
-{ "maskir.", XRC(31,541,1), X_MASK,	M601,		{ RA, RS, RB } },
+{"add",		XO(31,266,0,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"cax",		XO(31,266,0,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"add.",	XO(31,266,0,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"cax.",	XO(31,266,0,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
 
-{ "lwbrxe",  X(31,542),	X_MASK,		BOOKE64,	{ RT, RA0, RB } },
+{"moduw",	X(31,267),	X_MASK,	     POWER9,	0,		{RT, RA, RB}},
 
-{ "lfsxe",   X(31,543),	X_MASK,		BOOKE64,	{ FRT, RA0, RB } },
+{"lxvx",	X(31,268),	XX1_MASK|1<<6, PPCVSX3,	0,		{XT6, RA0, RB}},
+{"lxvl",	X(31,269),	XX1_MASK,    PPCVSX3,	0,		{XT6, RA0, RB}},
 
-{ "bbelr",   X(31,550),	X_MASK,		PPCBRLK,	{ 0 }},
+{"ehpriv",	X(31,270),	0xffffffff,  E500MC|PPCA2, 0,		{0}},
 
-{ "tlbsync", X(31,566),	0xffffffff,	PPC,		{ 0 } },
+{"tlbiel",	X(31,274),	X_MASK|1<<20,POWER9,	PPC476,		{RB, RSO, RIC, PRS, X_R}},
+{"tlbiel",	X(31,274),	XRTLRA_MASK, POWER4,	POWER9|PPC476,	{RB, LOPT}},
 
-{ "lfsux",   X(31,567),	X_MASK,		COM,		{ FRT, RAS, RB } },
+{"mfapidi",	X(31,275),	X_MASK,	     BOOKE,	E500|TITAN,	{RT, RA}},
 
-{ "lfsuxe",  X(31,575),	X_MASK,		BOOKE64,	{ FRT, RAS, RB } },
+{"lqarx",	X(31,276),	XEH_MASK,    POWER8,	0,		{RTQ, RAX, RBX, EH}},
 
-{ "mfsr",    X(31,595),	XRB_MASK|(1<<20), COM32,	{ RT, SR } },
+{"lscbx",	XRC(31,277,0),	X_MASK,	     M601,	0,		{RT, RA, RB}},
+{"lscbx.",	XRC(31,277,1),	X_MASK,	     M601,	0,		{RT, RA, RB}},
 
-{ "lswi",    X(31,597),	X_MASK,		PPCCOM,		{ RT, RA0, NB } },
-{ "lsi",     X(31,597),	X_MASK,		PWRCOM,		{ RT, RA0, NB } },
+{"dcbtt",	XRT(31,278,0x10), XRT_MASK,  POWER7,	0,		{RA0, RB}},
+{"dcbt",	X(31,278),	X_MASK,	     POWER4,	DCBT_EO,	{RA0, RB, CT}},
+{"dcbt",	X(31,278),	X_MASK,	     DCBT_EO,	0,		{CT, RA0, RB}},
+{"dcbt",	X(31,278),	X_MASK,	     PPC,	POWER4|DCBT_EO,	{RA0, RB}},
 
-{ "lwsync",  XSYNC(31,598,1), 0xffffffff, PPC,		{ 0 } },
-{ "ptesync", XSYNC(31,598,2), 0xffffffff, PPC64,	{ 0 } },
-{ "msync",   X(31,598), 0xffffffff,	BOOKE,		{ 0 } },
-{ "sync",    X(31,598), XSYNC_MASK,	PPCCOM,		{ LS } },
-{ "dcs",     X(31,598), 0xffffffff,	PWRCOM,		{ 0 } },
+{"lhzx",	X(31,279),	X_MASK,	     COM,	0,		{RT, RA0, RB}},
 
-{ "lfdx",    X(31,599), X_MASK,		COM,		{ FRT, RA0, RB } },
+{"cdtbcd",	X(31,282),	XRB_MASK,    POWER6,	0,		{RA, RS}},
 
-{ "lfdxe",   X(31,607), X_MASK,		BOOKE64,	{ FRT, RA0, RB } },
+{"eqv",		XRC(31,284,0),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+{"eqv.",	XRC(31,284,1),	X_MASK,	     COM,	0,		{RA, RS, RB}},
 
-{ "mffgpr",  XRC(31,607,0), XRA_MASK,	POWER6,		{ FRT, RB } },
+{"lhepx",	X(31,287),	X_MASK,	  E500MC|PPCA2, 0,		{RT, RA0, RB}},
 
-{ "mfsri",   X(31,627), X_MASK,		PWRCOM,		{ RT, RA, RB } },
+{"mfdcrux",	X(31,291),	X_MASK,	     PPC464,	0,		{RS, RA}},
 
-{ "dclst",   X(31,630), XRB_MASK,	PWRCOM,		{ RS, RA } },
+{"lvexhx",	X(31,293),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
+{"lvepx",	X(31,295),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
 
-{ "lfdux",   X(31,631), X_MASK,		COM,		{ FRT, RAS, RB } },
+{"lxvll",	X(31,301),	XX1_MASK,    PPCVSX3,	0,		{XT6, RA0, RB}},
 
-{ "lfduxe",  X(31,639), X_MASK,		BOOKE64,	{ FRT, RAS, RB } },
+{"mfbhrbe",	X(31,302),	X_MASK,	     POWER8,	0,		{RT, BHRBE}},
+
+{"tlbie",	X(31,306),	X_MASK|1<<20,POWER9,	TITAN,		{RB, RS, RIC, PRS, X_R}},
+{"tlbie",	X(31,306),	XRA_MASK,    POWER7,	POWER9|TITAN,	{RB, RS}},
+{"tlbie",	X(31,306),	XRTLRA_MASK, PPC,    E500|POWER7|TITAN,	{RB, LOPT}},
+{"tlbi",	X(31,306),	XRT_MASK,    POWER,	0,		{RA0, RB}},
+
+{"mfvsrld",	X(31,307),	XX1RB_MASK,  PPCVSX3,	0,		{RA, XS6}},
+
+{"ldmx",	X(31,309),	X_MASK,	     POWER9,	0,		{RT, RA0, RB}},
+
+{"eciwx",	X(31,310),	X_MASK,	     PPC,	E500|TITAN,	{RT, RA0, RB}},
+
+{"lhzux",	X(31,311),	X_MASK,	     COM,	0,		{RT, RAL, RB}},
+
+{"cbcdtd",	X(31,314),	XRB_MASK,    POWER6,	0,		{RA, RS}},
+
+{"xor",		XRC(31,316,0),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+{"xor.",	XRC(31,316,1),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+
+{"dcbtep",	XRT(31,319,0),	X_MASK,	  E500MC|PPCA2, 0,		{RT, RA0, RB}},
+
+{"mfexisr",	XSPR(31,323, 64), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfexier",	XSPR(31,323, 66), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfbr0",	XSPR(31,323,128), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfbr1",	XSPR(31,323,129), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfbr2",	XSPR(31,323,130), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfbr3",	XSPR(31,323,131), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfbr4",	XSPR(31,323,132), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfbr5",	XSPR(31,323,133), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfbr6",	XSPR(31,323,134), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfbr7",	XSPR(31,323,135), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfbear",	XSPR(31,323,144), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfbesr",	XSPR(31,323,145), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfiocr",	XSPR(31,323,160), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmacr0",	XSPR(31,323,192), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmact0",	XSPR(31,323,193), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmada0",	XSPR(31,323,194), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmasa0",	XSPR(31,323,195), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmacc0",	XSPR(31,323,196), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmacr1",	XSPR(31,323,200), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmact1",	XSPR(31,323,201), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmada1",	XSPR(31,323,202), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmasa1",	XSPR(31,323,203), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmacc1",	XSPR(31,323,204), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmacr2",	XSPR(31,323,208), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmact2",	XSPR(31,323,209), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmada2",	XSPR(31,323,210), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmasa2",	XSPR(31,323,211), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmacc2",	XSPR(31,323,212), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmacr3",	XSPR(31,323,216), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmact3",	XSPR(31,323,217), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmada3",	XSPR(31,323,218), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmasa3",	XSPR(31,323,219), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmacc3",	XSPR(31,323,220), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdmasr",	XSPR(31,323,224), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdcr",	X(31,323), X_MASK, PPC403|BOOKE|PPCA2|PPC476, E500|TITAN, {RT, SPR}},
+{"mfdcr.",	XRC(31,323,1),	X_MASK,	     PPCA2,	0,		{RT, SPR}},
+
+{"lvexwx",	X(31,325),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
+
+{"dcread",	X(31,326),	X_MASK,	  PPC476|TITAN,	0,		{RT, RA0, RB}},
+
+{"div",		XO(31,331,0,0),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+{"div.",	XO(31,331,0,1),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+
+{"lxvdsx",	X(31,332),	XX1_MASK,    PPCVSX,	0,		{XT6, RA0, RB}},
+
+{"mfpmr",	X(31,334),	X_MASK, PPCPMR|PPCE300, 0,		{RT, PMR}},
+{"mftmr",	X(31,366),	X_MASK,	PPCTMR|E6500,	0,		{RT, TMR}},
+
+{"slbsync",	X(31,338),	0xffffffff,  POWER9,	0,		{0}},
+
+{"mfmq",	XSPR(31,339,  0), XSPR_MASK, M601,	0,		{RT}},
+{"mfxer",	XSPR(31,339,  1), XSPR_MASK, COM,	0,		{RT}},
+{"mfrtcu",	XSPR(31,339,  4), XSPR_MASK, COM,	TITAN,		{RT}},
+{"mfrtcl",	XSPR(31,339,  5), XSPR_MASK, COM,	TITAN,		{RT}},
+{"mfdec",	XSPR(31,339,  6), XSPR_MASK, MFDEC1,	0,		{RT}},
+{"mflr",	XSPR(31,339,  8), XSPR_MASK, COM,	0,		{RT}},
+{"mfctr",	XSPR(31,339,  9), XSPR_MASK, COM,	0,		{RT}},
+{"mfdscr",	XSPR(31,339, 17), XSPR_MASK, POWER6,	0,		{RT}},
+{"mftid",	XSPR(31,339, 17), XSPR_MASK, POWER,	0,		{RT}},
+{"mfdsisr",	XSPR(31,339, 18), XSPR_MASK, COM,	TITAN,		{RT}},
+{"mfdar",	XSPR(31,339, 19), XSPR_MASK, COM,	TITAN,		{RT}},
+{"mfdec",	XSPR(31,339, 22), XSPR_MASK, MFDEC2,	MFDEC1,		{RT}},
+{"mfsdr0",	XSPR(31,339, 24), XSPR_MASK, POWER,	0,		{RT}},
+{"mfsdr1",	XSPR(31,339, 25), XSPR_MASK, COM,	TITAN,		{RT}},
+{"mfsrr0",	XSPR(31,339, 26), XSPR_MASK, COM,	0,		{RT}},
+{"mfsrr1",	XSPR(31,339, 27), XSPR_MASK, COM,	0,		{RT}},
+{"mfcfar",	XSPR(31,339, 28), XSPR_MASK, POWER6,	0,		{RT}},
+{"mfpid",	XSPR(31,339, 48), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfcsrr0",	XSPR(31,339, 58), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfcsrr1",	XSPR(31,339, 59), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfdear",	XSPR(31,339, 61), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfesr",	XSPR(31,339, 62), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivpr",	XSPR(31,339, 63), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfctrl",	XSPR(31,339,136), XSPR_MASK, POWER4,	0,		{RT}},
+{"mfcmpa",	XSPR(31,339,144), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfcmpb",	XSPR(31,339,145), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfcmpc",	XSPR(31,339,146), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfcmpd",	XSPR(31,339,147), XSPR_MASK, PPC860,	0,		{RT}},
+{"mficr",	XSPR(31,339,148), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfder",	XSPR(31,339,149), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfcounta",	XSPR(31,339,150), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfcountb",	XSPR(31,339,151), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfcmpe",	XSPR(31,339,152), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfcmpf",	XSPR(31,339,153), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfcmpg",	XSPR(31,339,154), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfcmph",	XSPR(31,339,155), XSPR_MASK, PPC860,	0,		{RT}},
+{"mflctrl1",	XSPR(31,339,156), XSPR_MASK, PPC860,	0,		{RT}},
+{"mflctrl2",	XSPR(31,339,157), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfictrl",	XSPR(31,339,158), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfbar",	XSPR(31,339,159), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfvrsave",	XSPR(31,339,256), XSPR_MASK, PPCVEC,	0,		{RT}},
+{"mfusprg0",	XSPR(31,339,256), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfsprg",	XSPR(31,339,256), XSPRG_MASK, PPC,	0,		{RT, SPRG}},
+{"mfsprg4",	XSPR(31,339,260), XSPR_MASK, PPC405|BOOKE, 0,		{RT}},
+{"mfsprg5",	XSPR(31,339,261), XSPR_MASK, PPC405|BOOKE, 0,		{RT}},
+{"mfsprg6",	XSPR(31,339,262), XSPR_MASK, PPC405|BOOKE, 0,		{RT}},
+{"mfsprg7",	XSPR(31,339,263), XSPR_MASK, PPC405|BOOKE, 0,		{RT}},
+{"mftbu",	XSPR(31,339,269), XSPR_MASK, POWER4|BOOKE, 0,		{RT}},
+{"mftb",	X(31,339),	  X_MASK,    POWER4|BOOKE, 0,		{RT, TBR}},
+{"mftbl",	XSPR(31,339,268), XSPR_MASK, POWER4|BOOKE, 0,		{RT}},
+{"mfsprg0",	XSPR(31,339,272), XSPR_MASK, PPC,	0,		{RT}},
+{"mfsprg1",	XSPR(31,339,273), XSPR_MASK, PPC,	0,		{RT}},
+{"mfsprg2",	XSPR(31,339,274), XSPR_MASK, PPC,	0,		{RT}},
+{"mfsprg3",	XSPR(31,339,275), XSPR_MASK, PPC,	0,		{RT}},
+{"mfasr",	XSPR(31,339,280), XSPR_MASK, PPC64,	0,		{RT}},
+{"mfear",	XSPR(31,339,282), XSPR_MASK, PPC,	TITAN,		{RT}},
+{"mfpir",	XSPR(31,339,286), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfpvr",	XSPR(31,339,287), XSPR_MASK, PPC,	0,		{RT}},
+{"mfdbsr",	XSPR(31,339,304), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfdbcr0",	XSPR(31,339,308), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfdbcr1",	XSPR(31,339,309), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfdbcr2",	XSPR(31,339,310), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfiac1",	XSPR(31,339,312), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfiac2",	XSPR(31,339,313), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfiac3",	XSPR(31,339,314), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfiac4",	XSPR(31,339,315), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfdac1",	XSPR(31,339,316), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfdac2",	XSPR(31,339,317), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfdvc1",	XSPR(31,339,318), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfdvc2",	XSPR(31,339,319), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mftsr",	XSPR(31,339,336), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mftcr",	XSPR(31,339,340), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor0",	XSPR(31,339,400), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor1",	XSPR(31,339,401), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor2",	XSPR(31,339,402), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor3",	XSPR(31,339,403), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor4",	XSPR(31,339,404), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor5",	XSPR(31,339,405), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor6",	XSPR(31,339,406), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor7",	XSPR(31,339,407), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor8",	XSPR(31,339,408), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor9",	XSPR(31,339,409), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor10",	XSPR(31,339,410), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor11",	XSPR(31,339,411), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor12",	XSPR(31,339,412), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor13",	XSPR(31,339,413), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor14",	XSPR(31,339,414), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfivor15",	XSPR(31,339,415), XSPR_MASK, BOOKE,	0,		{RT}},
+{"mfspefscr",	XSPR(31,339,512), XSPR_MASK, PPCSPE,	0,		{RT}},
+{"mfbbear",	XSPR(31,339,513), XSPR_MASK, PPCBRLK,	0,		{RT}},
+{"mfbbtar",	XSPR(31,339,514), XSPR_MASK, PPCBRLK,	0,		{RT}},
+{"mfivor32",	XSPR(31,339,528), XSPR_MASK, PPCSPE,	0,		{RT}},
+{"mfibatu",	XSPR(31,339,528), XSPRBAT_MASK, PPC,	TITAN,		{RT, SPRBAT}},
+{"mfivor33",	XSPR(31,339,529), XSPR_MASK, PPCSPE,	0,		{RT}},
+{"mfibatl",	XSPR(31,339,529), XSPRBAT_MASK, PPC,	TITAN,		{RT, SPRBAT}},
+{"mfivor34",	XSPR(31,339,530), XSPR_MASK, PPCSPE,	0,		{RT}},
+{"mfivor35",	XSPR(31,339,531), XSPR_MASK, PPCPMR,	0,		{RT}},
+{"mfdbatu",	XSPR(31,339,536), XSPRBAT_MASK, PPC,	TITAN,		{RT, SPRBAT}},
+{"mfdbatl",	XSPR(31,339,537), XSPRBAT_MASK, PPC,	TITAN,		{RT, SPRBAT}},
+{"mfic_cst",	XSPR(31,339,560), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfic_adr",	XSPR(31,339,561), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfic_dat",	XSPR(31,339,562), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfdc_cst",	XSPR(31,339,568), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfdc_adr",	XSPR(31,339,569), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfdc_dat",	XSPR(31,339,570), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmcsrr0",	XSPR(31,339,570), XSPR_MASK, PPCRFMCI,	0,		{RT}},
+{"mfmcsrr1",	XSPR(31,339,571), XSPR_MASK, PPCRFMCI,	0,		{RT}},
+{"mfmcsr",	XSPR(31,339,572), XSPR_MASK, PPCRFMCI,	0,		{RT}},
+{"mfmcar",	XSPR(31,339,573), XSPR_MASK, PPCRFMCI,	TITAN,		{RT}},
+{"mfdpdr",	XSPR(31,339,630), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfdpir",	XSPR(31,339,631), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfimmr",	XSPR(31,339,638), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmi_ctr",	XSPR(31,339,784), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmi_ap",	XSPR(31,339,786), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmi_epn",	XSPR(31,339,787), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmi_twc",	XSPR(31,339,789), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmi_rpn",	XSPR(31,339,790), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmd_ctr",	XSPR(31,339,792), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfm_casid",	XSPR(31,339,793), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmd_ap",	XSPR(31,339,794), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmd_epn",	XSPR(31,339,795), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmd_twb",	XSPR(31,339,796), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmd_twc",	XSPR(31,339,797), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmd_rpn",	XSPR(31,339,798), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfm_tw",	XSPR(31,339,799), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmi_dbcam",	XSPR(31,339,816), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmi_dbram0",	XSPR(31,339,817), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmi_dbram1",	XSPR(31,339,818), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmd_dbcam",	XSPR(31,339,824), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmd_dbram0",	XSPR(31,339,825), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfmd_dbram1",	XSPR(31,339,826), XSPR_MASK, PPC860,	0,		{RT}},
+{"mfivndx",	XSPR(31,339,880), XSPR_MASK, TITAN,	0,		{RT}},
+{"mfdvndx",	XSPR(31,339,881), XSPR_MASK, TITAN,	0,		{RT}},
+{"mfivlim",	XSPR(31,339,882), XSPR_MASK, TITAN,	0,		{RT}},
+{"mfdvlim",	XSPR(31,339,883), XSPR_MASK, TITAN,	0,		{RT}},
+{"mfclcsr",	XSPR(31,339,884), XSPR_MASK, TITAN,	0,		{RT}},
+{"mfccr1",	XSPR(31,339,888), XSPR_MASK, TITAN,	0,		{RT}},
+{"mfppr",	XSPR(31,339,896), XSPR_MASK, POWER7,	0,		{RT}},
+{"mfppr32",	XSPR(31,339,898), XSPR_MASK, POWER7,	0,		{RT}},
+{"mfrstcfg",	XSPR(31,339,923), XSPR_MASK, TITAN,	0,		{RT}},
+{"mfdcdbtrl",	XSPR(31,339,924), XSPR_MASK, TITAN,	0,		{RT}},
+{"mfdcdbtrh",	XSPR(31,339,925), XSPR_MASK, TITAN,	0,		{RT}},
+{"mficdbtr",	XSPR(31,339,927), XSPR_MASK, TITAN,	0,		{RT}},
+{"mfummcr0",	XSPR(31,339,936), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfupmc1",	XSPR(31,339,937), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfupmc2",	XSPR(31,339,938), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfusia",	XSPR(31,339,939), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfummcr1",	XSPR(31,339,940), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfupmc3",	XSPR(31,339,941), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfupmc4",	XSPR(31,339,942), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfzpr",	XSPR(31,339,944), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfpid",	XSPR(31,339,945), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfmmucr",	XSPR(31,339,946), XSPR_MASK, TITAN,	0,		{RT}},
+{"mfccr0",	XSPR(31,339,947), XSPR_MASK, PPC405|TITAN, 0,		{RT}},
+{"mfiac3",	XSPR(31,339,948), XSPR_MASK, PPC405,	0,		{RT}},
+{"mfiac4",	XSPR(31,339,949), XSPR_MASK, PPC405,	0,		{RT}},
+{"mfdvc1",	XSPR(31,339,950), XSPR_MASK, PPC405,	0,		{RT}},
+{"mfdvc2",	XSPR(31,339,951), XSPR_MASK, PPC405,	0,		{RT}},
+{"mfmmcr0",	XSPR(31,339,952), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfpmc1",	XSPR(31,339,953), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfsgr",	XSPR(31,339,953), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdcwr",	XSPR(31,339,954), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfpmc2",	XSPR(31,339,954), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfsia",	XSPR(31,339,955), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfsler",	XSPR(31,339,955), XSPR_MASK, PPC405,	0,		{RT}},
+{"mfmmcr1",	XSPR(31,339,956), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfsu0r",	XSPR(31,339,956), XSPR_MASK, PPC405,	0,		{RT}},
+{"mfdbcr1",	XSPR(31,339,957), XSPR_MASK, PPC405,	0,		{RT}},
+{"mfpmc3",	XSPR(31,339,957), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfpmc4",	XSPR(31,339,958), XSPR_MASK, PPC750,	0,		{RT}},
+{"mficdbdr",	XSPR(31,339,979), XSPR_MASK, PPC403|TITAN, 0,		{RT}},
+{"mfesr",	XSPR(31,339,980), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdear",	XSPR(31,339,981), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfevpr",	XSPR(31,339,982), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfcdbcr",	XSPR(31,339,983), XSPR_MASK, PPC403,	0,		{RT}},
+{"mftsr",	XSPR(31,339,984), XSPR_MASK, PPC403,	0,		{RT}},
+{"mftcr",	XSPR(31,339,986), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfpit",	XSPR(31,339,987), XSPR_MASK, PPC403,	0,		{RT}},
+{"mftbhi",	XSPR(31,339,988), XSPR_MASK, PPC403,	0,		{RT}},
+{"mftblo",	XSPR(31,339,989), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfsrr2",	XSPR(31,339,990), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfsrr3",	XSPR(31,339,991), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdbsr",	XSPR(31,339,1008), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdbcr0",	XSPR(31,339,1010), XSPR_MASK, PPC405,	0,		{RT}},
+{"mfdbdr",	XSPR(31,339,1011), XSPR_MASK, TITAN,	0,		{RS}},
+{"mfiac1",	XSPR(31,339,1012), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfiac2",	XSPR(31,339,1013), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdac1",	XSPR(31,339,1014), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfdac2",	XSPR(31,339,1015), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfl2cr",	XSPR(31,339,1017), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfdccr",	XSPR(31,339,1018), XSPR_MASK, PPC403,	0,		{RT}},
+{"mficcr",	XSPR(31,339,1019), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfictc",	XSPR(31,339,1019), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfpbl1",	XSPR(31,339,1020), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfthrm1",	XSPR(31,339,1020), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfpbu1",	XSPR(31,339,1021), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfthrm2",	XSPR(31,339,1021), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfpbl2",	XSPR(31,339,1022), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfthrm3",	XSPR(31,339,1022), XSPR_MASK, PPC750,	0,		{RT}},
+{"mfpbu2",	XSPR(31,339,1023), XSPR_MASK, PPC403,	0,		{RT}},
+{"mfspr",	X(31,339),	X_MASK,	     COM,	0,		{RT, SPR}},
+
+{"lwax",	X(31,341),	X_MASK,	     PPC64,	0,		{RT, RA0, RB}},
+
+{"dst",		XDSS(31,342,0),	XDSS_MASK,   PPCVEC,	0,		{RA, RB, STRM}},
+
+{"lhax",	X(31,343),	X_MASK,	     COM,	0,		{RT, RA0, RB}},
+
+{"lvxl",	X(31,359),	X_MASK,	     PPCVEC,	0,		{VD, RA0, RB}},
+
+{"abs",		XO(31,360,0,0),	XORB_MASK,   M601,	0,		{RT, RA}},
+{"abs.",	XO(31,360,0,1),	XORB_MASK,   M601,	0,		{RT, RA}},
+
+{"divs",	XO(31,363,0,0),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+{"divs.",	XO(31,363,0,1),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+
+{"lxvwsx",	X(31,364),	XX1_MASK,    PPCVSX3,	0,		{XT6, RA0, RB}},
+
+{"tlbia",	X(31,370),	0xffffffff,  PPC,	E500|TITAN,	{0}},
+
+{"mftbu",	XSPR(31,371,269), XSPR_MASK, PPC,	NO371|POWER4,	{RT}},
+{"mftb",	X(31,371),	X_MASK,	     PPC,	NO371|POWER4,	{RT, TBR}},
+{"mftbl",	XSPR(31,371,268), XSPR_MASK, PPC,	NO371|POWER4,	{RT}},
+
+{"lwaux",	X(31,373),	X_MASK,	     PPC64,	0,		{RT, RAL, RB}},
+
+{"dstst",	XDSS(31,374,0),	XDSS_MASK,   PPCVEC,	0,		{RA, RB, STRM}},
+
+{"lhaux",	X(31,375),	X_MASK,	     COM,	0,		{RT, RAL, RB}},
+
+{"popcntw",	X(31,378),	XRB_MASK,    POWER7|PPCA2, 0,		{RA, RS}},
+
+{"mtdcrx",	X(31,387),	X_MASK,	     BOOKE|PPCA2|PPC476, TITAN,	{RA, RS}},
+{"mtdcrx.",	XRC(31,387,1),	X_MASK,	     PPCA2,	0,		{RA, RS}},
+
+{"stvexbx",	X(31,389),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
+
+{"dcblc",	X(31,390),	X_MASK,	 PPCCHLK|PPC476|TITAN, 0,	{CT, RA0, RB}},
+{"stdfcmx",	APU(31,391,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
+
+{"divdeu",	XO(31,393,0,0),	XO_MASK,     POWER7|PPCA2, 0,		{RT, RA, RB}},
+{"divdeu.",	XO(31,393,0,1),	XO_MASK,     POWER7|PPCA2, 0,		{RT, RA, RB}},
+{"divweu",	XO(31,395,0,0),	XO_MASK,     POWER7|PPCA2, 0,		{RT, RA, RB}},
+{"divweu.",	XO(31,395,0,1),	XO_MASK,     POWER7|PPCA2, 0,		{RT, RA, RB}},
+
+{"stxvx",	X(31,396),	XX1_MASK,    PPCVSX3,	0,		{XS6, RA0, RB}},
+{"stxvl",	X(31,397),	XX1_MASK,    PPCVSX3,	0,		{XS6, RA0, RB}},
+
+{"dcblce",	X(31,398),	X_MASK,	     PPCCHLK,	E500MC,		{CT, RA, RB}},
+
+{"slbmte",	X(31,402),	XRA_MASK,    PPC64,	0,		{RS, RB}},
+
+{"mtvsrws",	X(31,403),	XX1RB_MASK,  PPCVSX3,	0,		{XT6, RA}},
+
+{"pbt.",	XRC(31,404,1),	X_MASK,	     POWER8,	0,		{RS, RA0, RB}},
+
+{"icswx",	XRC(31,406,0),	X_MASK,	  POWER7|PPCA2,	0,		{RS, RA, RB}},
+{"icswx.",	XRC(31,406,1),	X_MASK,	  POWER7|PPCA2,	0,		{RS, RA, RB}},
+
+{"sthx",	X(31,407),	X_MASK,	     COM,	0,		{RS, RA0, RB}},
+
+{"orc",		XRC(31,412,0),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+{"orc.",	XRC(31,412,1),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+
+{"sthepx",	X(31,415),	X_MASK,	  E500MC|PPCA2, 0,		{RS, RA0, RB}},
+
+{"mtdcrux",	X(31,419),	X_MASK,	     PPC464,	0,		{RA, RS}},
+
+{"stvexhx",	X(31,421),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
+
+{"dcblq.",	XRC(31,422,1),	X_MASK,	     E6500,	0,		{CT, RA0, RB}},
+
+{"divde",	XO(31,425,0,0),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+{"divde.",	XO(31,425,0,1),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+{"divwe",	XO(31,427,0,0),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+{"divwe.",	XO(31,427,0,1),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+
+{"stxvll",	X(31,429),	XX1_MASK,    PPCVSX3,	0,		{XS6, RA0, RB}},
+
+{"clrbhrb",	X(31,430),	0xffffffff,  POWER8,	0,		{0}},
+
+{"slbie",	X(31,434),	XRTRA_MASK,  PPC64,	0,		{RB}},
+
+{"mtvsrdd",	X(31,435),	XX1_MASK,    PPCVSX3,	0,		{XT6, RA0, RB}},
+
+{"ecowx",	X(31,438),	X_MASK,	     PPC,	E500|TITAN,	{RT, RA0, RB}},
+
+{"sthux",	X(31,439),	X_MASK,	     COM,	0,		{RS, RAS, RB}},
+
+{"mdors",	0x7f9ce378,	0xffffffff,  E500MC,	0,		{0}},
+
+{"miso",	0x7f5ad378,	0xffffffff,  E6500,	0,		{0}},
+
+/* The "yield", "mdoio" and "mdoom" instructions are extended mnemonics for
+   "or rX,rX,rX", with rX being r27, r29 and r30 respectively.	*/
+{"yield",	0x7f7bdb78,	0xffffffff,  POWER7,	0,		{0}},
+{"mdoio",	0x7fbdeb78,	0xffffffff,  POWER7,	0,		{0}},
+{"mdoom",	0x7fdef378,	0xffffffff,  POWER7,	0,		{0}},
+{"mr",		XRC(31,444,0),	X_MASK,	     COM,	0,		{RA, RS, RBS}},
+{"or",		XRC(31,444,0),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+{"mr.",		XRC(31,444,1),	X_MASK,	     COM,	0,		{RA, RS, RBS}},
+{"or.",		XRC(31,444,1),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+
+{"mtexisr",	XSPR(31,451, 64), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtexier",	XSPR(31,451, 66), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtbr0",	XSPR(31,451,128), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtbr1",	XSPR(31,451,129), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtbr2",	XSPR(31,451,130), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtbr3",	XSPR(31,451,131), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtbr4",	XSPR(31,451,132), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtbr5",	XSPR(31,451,133), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtbr6",	XSPR(31,451,134), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtbr7",	XSPR(31,451,135), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtbear",	XSPR(31,451,144), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtbesr",	XSPR(31,451,145), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtiocr",	XSPR(31,451,160), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmacr0",	XSPR(31,451,192), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmact0",	XSPR(31,451,193), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmada0",	XSPR(31,451,194), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmasa0",	XSPR(31,451,195), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmacc0",	XSPR(31,451,196), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmacr1",	XSPR(31,451,200), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmact1",	XSPR(31,451,201), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmada1",	XSPR(31,451,202), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmasa1",	XSPR(31,451,203), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmacc1",	XSPR(31,451,204), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmacr2",	XSPR(31,451,208), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmact2",	XSPR(31,451,209), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmada2",	XSPR(31,451,210), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmasa2",	XSPR(31,451,211), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmacc2",	XSPR(31,451,212), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmacr3",	XSPR(31,451,216), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmact3",	XSPR(31,451,217), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmada3",	XSPR(31,451,218), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmasa3",	XSPR(31,451,219), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmacc3",	XSPR(31,451,220), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdmasr",	XSPR(31,451,224), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdcr",	X(31,451), X_MASK, PPC403|BOOKE|PPCA2|PPC476, E500|TITAN, {SPR, RS}},
+{"mtdcr.",	XRC(31,451,1), X_MASK,	     PPCA2,	0,		{SPR, RS}},
+
+{"stvexwx",	X(31,453),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
+
+{"dccci",	X(31,454), XRT_MASK, PPC403|PPC440|TITAN|PPCA2, 0,	{RAOPT, RBOPT}},
+{"dci",		X(31,454),	XRARB_MASK, PPCA2|PPC476, 0,		{CT}},
+
+{"divdu",	XO(31,457,0,0),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+{"divdu.",	XO(31,457,0,1),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+
+{"divwu",	XO(31,459,0,0),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+{"divwu.",	XO(31,459,0,1),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+
+{"mtpmr",	X(31,462),	X_MASK, PPCPMR|PPCE300, 0,		{PMR, RS}},
+{"mttmr",	X(31,494),	X_MASK,	PPCTMR|E6500,	0,		{TMR, RS}},
+
+{"slbieg",	X(31,466),	XRA_MASK,    POWER9,	0,		{RS, RB}},
+
+{"mtmq",	XSPR(31,467,  0), XSPR_MASK, M601,	0,		{RS}},
+{"mtxer",	XSPR(31,467,  1), XSPR_MASK, COM,	0,		{RS}},
+{"mtlr",	XSPR(31,467,  8), XSPR_MASK, COM,	0,		{RS}},
+{"mtctr",	XSPR(31,467,  9), XSPR_MASK, COM,	0,		{RS}},
+{"mtdscr",	XSPR(31,467, 17), XSPR_MASK, POWER6,	0,		{RS}},
+{"mttid",	XSPR(31,467, 17), XSPR_MASK, POWER,	0,		{RS}},
+{"mtdsisr",	XSPR(31,467, 18), XSPR_MASK, COM,	TITAN,		{RS}},
+{"mtdar",	XSPR(31,467, 19), XSPR_MASK, COM,	TITAN,		{RS}},
+{"mtrtcu",	XSPR(31,467, 20), XSPR_MASK, COM,	TITAN,		{RS}},
+{"mtrtcl",	XSPR(31,467, 21), XSPR_MASK, COM,	TITAN,		{RS}},
+{"mtdec",	XSPR(31,467, 22), XSPR_MASK, COM,	0,		{RS}},
+{"mtsdr0",	XSPR(31,467, 24), XSPR_MASK, POWER,	0,		{RS}},
+{"mtsdr1",	XSPR(31,467, 25), XSPR_MASK, COM,	TITAN,		{RS}},
+{"mtsrr0",	XSPR(31,467, 26), XSPR_MASK, COM,	0,		{RS}},
+{"mtsrr1",	XSPR(31,467, 27), XSPR_MASK, COM,	0,		{RS}},
+{"mtcfar",	XSPR(31,467, 28), XSPR_MASK, POWER6,	0,		{RS}},
+{"mtpid",	XSPR(31,467, 48), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtdecar",	XSPR(31,467, 54), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtcsrr0",	XSPR(31,467, 58), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtcsrr1",	XSPR(31,467, 59), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtdear",	XSPR(31,467, 61), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtesr",	XSPR(31,467, 62), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivpr",	XSPR(31,467, 63), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtcmpa",	XSPR(31,467,144), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtcmpb",	XSPR(31,467,145), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtcmpc",	XSPR(31,467,146), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtcmpd",	XSPR(31,467,147), XSPR_MASK, PPC860,	0,		{RS}},
+{"mticr",	XSPR(31,467,148), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtder",	XSPR(31,467,149), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtcounta",	XSPR(31,467,150), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtcountb",	XSPR(31,467,151), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtctrl",	XSPR(31,467,152), XSPR_MASK, POWER4,	0,		{RS}},
+{"mtcmpe",	XSPR(31,467,152), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtcmpf",	XSPR(31,467,153), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtcmpg",	XSPR(31,467,154), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtcmph",	XSPR(31,467,155), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtlctrl1",	XSPR(31,467,156), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtlctrl2",	XSPR(31,467,157), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtictrl",	XSPR(31,467,158), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtbar",	XSPR(31,467,159), XSPR_MASK, PPC860,	0,		{RS}},
+{"mtvrsave",	XSPR(31,467,256), XSPR_MASK, PPCVEC,	0,		{RS}},
+{"mtusprg0",	XSPR(31,467,256), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtsprg",	XSPR(31,467,256), XSPRG_MASK, PPC,	0,		{SPRG, RS}},
+{"mtsprg0",	XSPR(31,467,272), XSPR_MASK, PPC,	0,		{RS}},
+{"mtsprg1",	XSPR(31,467,273), XSPR_MASK, PPC,	0,		{RS}},
+{"mtsprg2",	XSPR(31,467,274), XSPR_MASK, PPC,	0,		{RS}},
+{"mtsprg3",	XSPR(31,467,275), XSPR_MASK, PPC,	0,		{RS}},
+{"mtsprg4",	XSPR(31,467,276), XSPR_MASK, PPC405|BOOKE, 0,		{RS}},
+{"mtsprg5",	XSPR(31,467,277), XSPR_MASK, PPC405|BOOKE, 0,		{RS}},
+{"mtsprg6",	XSPR(31,467,278), XSPR_MASK, PPC405|BOOKE, 0,		{RS}},
+{"mtsprg7",	XSPR(31,467,279), XSPR_MASK, PPC405|BOOKE, 0,		{RS}},
+{"mtasr",	XSPR(31,467,280), XSPR_MASK, PPC64,	0,		{RS}},
+{"mtear",	XSPR(31,467,282), XSPR_MASK, PPC,	TITAN,		{RS}},
+{"mttbl",	XSPR(31,467,284), XSPR_MASK, PPC,	0,		{RS}},
+{"mttbu",	XSPR(31,467,285), XSPR_MASK, PPC,	0,		{RS}},
+{"mtdbsr",	XSPR(31,467,304), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtdbcr0",	XSPR(31,467,308), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtdbcr1",	XSPR(31,467,309), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtdbcr2",	XSPR(31,467,310), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtiac1",	XSPR(31,467,312), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtiac2",	XSPR(31,467,313), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtiac3",	XSPR(31,467,314), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtiac4",	XSPR(31,467,315), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtdac1",	XSPR(31,467,316), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtdac2",	XSPR(31,467,317), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtdvc1",	XSPR(31,467,318), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtdvc2",	XSPR(31,467,319), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mttsr",	XSPR(31,467,336), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mttcr",	XSPR(31,467,340), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor0",	XSPR(31,467,400), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor1",	XSPR(31,467,401), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor2",	XSPR(31,467,402), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor3",	XSPR(31,467,403), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor4",	XSPR(31,467,404), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor5",	XSPR(31,467,405), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor6",	XSPR(31,467,406), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor7",	XSPR(31,467,407), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor8",	XSPR(31,467,408), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor9",	XSPR(31,467,409), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor10",	XSPR(31,467,410), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor11",	XSPR(31,467,411), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor12",	XSPR(31,467,412), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor13",	XSPR(31,467,413), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor14",	XSPR(31,467,414), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtivor15",	XSPR(31,467,415), XSPR_MASK, BOOKE,	0,		{RS}},
+{"mtspefscr",	XSPR(31,467,512), XSPR_MASK, PPCSPE,	0,		{RS}},
+{"mtbbear",	XSPR(31,467,513), XSPR_MASK, PPCBRLK,	0,		{RS}},
+{"mtbbtar",	XSPR(31,467,514), XSPR_MASK, PPCBRLK,	0,		{RS}},
+{"mtivor32",	XSPR(31,467,528), XSPR_MASK, PPCSPE,	0,		{RS}},
+{"mtibatu",	XSPR(31,467,528), XSPRBAT_MASK, PPC,	TITAN,		{SPRBAT, RS}},
+{"mtivor33",	XSPR(31,467,529), XSPR_MASK, PPCSPE,	0,		{RS}},
+{"mtibatl",	XSPR(31,467,529), XSPRBAT_MASK, PPC,	TITAN,		{SPRBAT, RS}},
+{"mtivor34",	XSPR(31,467,530), XSPR_MASK, PPCSPE,	0,		{RS}},
+{"mtivor35",	XSPR(31,467,531), XSPR_MASK, PPCPMR,	0,		{RS}},
+{"mtdbatu",	XSPR(31,467,536), XSPRBAT_MASK, PPC,	TITAN,		{SPRBAT, RS}},
+{"mtdbatl",	XSPR(31,467,537), XSPRBAT_MASK, PPC,	TITAN,		{SPRBAT, RS}},
+{"mtmcsrr0",	XSPR(31,467,570), XSPR_MASK, PPCRFMCI,	0,		{RS}},
+{"mtmcsrr1",	XSPR(31,467,571), XSPR_MASK, PPCRFMCI,	0,		{RS}},
+{"mtmcsr",	XSPR(31,467,572), XSPR_MASK, PPCRFMCI,	0,		{RS}},
+{"mtivndx",	XSPR(31,467,880), XSPR_MASK, TITAN,	0,		{RS}},
+{"mtdvndx",	XSPR(31,467,881), XSPR_MASK, TITAN,	0,		{RS}},
+{"mtivlim",	XSPR(31,467,882), XSPR_MASK, TITAN,	0,		{RS}},
+{"mtdvlim",	XSPR(31,467,883), XSPR_MASK, TITAN,	0,		{RS}},
+{"mtclcsr",	XSPR(31,467,884), XSPR_MASK, TITAN,	0,		{RS}},
+{"mtccr1",	XSPR(31,467,888), XSPR_MASK, TITAN,	0,		{RS}},
+{"mtppr",	XSPR(31,467,896), XSPR_MASK, POWER7,	0,		{RS}},
+{"mtppr32",	XSPR(31,467,898), XSPR_MASK, POWER7,	0,		{RS}},
+{"mtummcr0",	XSPR(31,467,936), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtupmc1",	XSPR(31,467,937), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtupmc2",	XSPR(31,467,938), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtusia",	XSPR(31,467,939), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtummcr1",	XSPR(31,467,940), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtupmc3",	XSPR(31,467,941), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtupmc4",	XSPR(31,467,942), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtzpr",	XSPR(31,467,944), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtpid",	XSPR(31,467,945), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtrmmucr",	XSPR(31,467,946), XSPR_MASK, TITAN,	0,		{RS}},
+{"mtccr0",	XSPR(31,467,947), XSPR_MASK, PPC405|TITAN, 0,		{RS}},
+{"mtiac3",	XSPR(31,467,948), XSPR_MASK, PPC405,	0,		{RS}},
+{"mtiac4",	XSPR(31,467,949), XSPR_MASK, PPC405,	0,		{RS}},
+{"mtdvc1",	XSPR(31,467,950), XSPR_MASK, PPC405,	0,		{RS}},
+{"mtdvc2",	XSPR(31,467,951), XSPR_MASK, PPC405,	0,		{RS}},
+{"mtmmcr0",	XSPR(31,467,952), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtpmc1",	XSPR(31,467,953), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtsgr",	XSPR(31,467,953), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdcwr",	XSPR(31,467,954), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtpmc2",	XSPR(31,467,954), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtsia",	XSPR(31,467,955), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtsler",	XSPR(31,467,955), XSPR_MASK, PPC405,	0,		{RS}},
+{"mtmmcr1",	XSPR(31,467,956), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtsu0r",	XSPR(31,467,956), XSPR_MASK, PPC405,	0,		{RS}},
+{"mtdbcr1",	XSPR(31,467,957), XSPR_MASK, PPC405,	0,		{RS}},
+{"mtpmc3",	XSPR(31,467,957), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtpmc4",	XSPR(31,467,958), XSPR_MASK, PPC750,	0,		{RS}},
+{"mticdbdr",	XSPR(31,467,979), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtesr",	XSPR(31,467,980), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdear",	XSPR(31,467,981), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtevpr",	XSPR(31,467,982), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtcdbcr",	XSPR(31,467,983), XSPR_MASK, PPC403,	0,		{RS}},
+{"mttsr",	XSPR(31,467,984), XSPR_MASK, PPC403,	0,		{RS}},
+{"mttcr",	XSPR(31,467,986), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtpit",	XSPR(31,467,987), XSPR_MASK, PPC403,	0,		{RS}},
+{"mttbhi",	XSPR(31,467,988), XSPR_MASK, PPC403,	0,		{RS}},
+{"mttblo",	XSPR(31,467,989), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtsrr2",	XSPR(31,467,990), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtsrr3",	XSPR(31,467,991), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdbsr",	XSPR(31,467,1008), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdbdr",	XSPR(31,467,1011), XSPR_MASK, TITAN,	0,		{RS}},
+{"mtdbcr0",	XSPR(31,467,1010), XSPR_MASK, PPC405,	0,		{RS}},
+{"mtiac1",	XSPR(31,467,1012), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtiac2",	XSPR(31,467,1013), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdac1",	XSPR(31,467,1014), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtdac2",	XSPR(31,467,1015), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtl2cr",	XSPR(31,467,1017), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtdccr",	XSPR(31,467,1018), XSPR_MASK, PPC403,	0,		{RS}},
+{"mticcr",	XSPR(31,467,1019), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtictc",	XSPR(31,467,1019), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtpbl1",	XSPR(31,467,1020), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtthrm1",	XSPR(31,467,1020), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtpbu1",	XSPR(31,467,1021), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtthrm2",	XSPR(31,467,1021), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtpbl2",	XSPR(31,467,1022), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtthrm3",	XSPR(31,467,1022), XSPR_MASK, PPC750,	0,		{RS}},
+{"mtpbu2",	XSPR(31,467,1023), XSPR_MASK, PPC403,	0,		{RS}},
+{"mtspr",	X(31,467),	X_MASK,	     COM,	0,		{SPR, RS}},
+
+{"dcbi",	X(31,470),	XRT_MASK,    PPC,	0,		{RA0, RB}},
+
+{"nand",	XRC(31,476,0),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+{"nand.",	XRC(31,476,1),	X_MASK,	     COM,	0,		{RA, RS, RB}},
+
+{"dsn",		X(31,483),	XRT_MASK,    E500MC,	0,		{RA, RB}},
+
+{"dcread",	X(31,486),	X_MASK,	 PPC403|PPC440, PPCA2|PPC476,	{RT, RA0, RB}},
+
+{"icbtls",	X(31,486),	X_MASK,	 PPCCHLK|PPC476|TITAN, 0,	{CT, RA0, RB}},
+
+{"stvxl",	X(31,487),	X_MASK,	     PPCVEC,	0,		{VS, RA0, RB}},
+
+{"nabs",	XO(31,488,0,0),	XORB_MASK,   M601,	0,		{RT, RA}},
+{"nabs.",	XO(31,488,0,1),	XORB_MASK,   M601,	0,		{RT, RA}},
+
+{"divd",	XO(31,489,0,0),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+{"divd.",	XO(31,489,0,1),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+
+{"divw",	XO(31,491,0,0),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+{"divw.",	XO(31,491,0,1),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+
+{"icbtlse",	X(31,494),	X_MASK,	     PPCCHLK,	E500MC,		{CT, RA, RB}},
+
+{"slbia",	X(31,498),	0xff1fffff,  POWER6,	0,		{IH}},
+{"slbia",	X(31,498),	0xffffffff,  PPC64,	POWER6,		{0}},
+
+{"cli",		X(31,502),	XRB_MASK,    POWER,	0,		{RT, RA}},
+
+{"popcntd",	X(31,506),	XRB_MASK, POWER7|PPCA2,	0,		{RA, RS}},
+
+{"cmpb",	X(31,508),	X_MASK, POWER6|PPCA2|PPC476, 0,		{RA, RS, RB}},
+
+{"mcrxr",	X(31,512),	XBFRARB_MASK, COM,	POWER7,		{BF}},
+
+{"lbdcbx",	X(31,514),	X_MASK,      E200Z4,	0,		{RT, RA, RB}},
+{"lbdx",	X(31,515),	X_MASK,	     E500MC,	0,		{RT, RA, RB}},
 
-{ "mfsrin",  X(31,659), XRA_MASK,	PPC32,		{ RT, RB } },
+{"bblels",	X(31,518),	X_MASK,	     PPCBRLK,	0,		{0}},
 
-{ "stdbrx",  X(31,660), X_MASK,		CELL,		{ RS, RA0, RB } },
+{"lvlx",	X(31,519),	X_MASK,	     CELL,	0,		{VD, RA0, RB}},
+{"lbfcmux",	APU(31,519,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
 
-{ "stswx",   X(31,661), X_MASK,		PPCCOM,		{ RS, RA0, RB } },
-{ "stsx",    X(31,661), X_MASK,		PWRCOM,		{ RS, RA0, RB } },
+{"subfco",	XO(31,8,1,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"sfo",		XO(31,8,1,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"subco",	XO(31,8,1,0),	XO_MASK,     PPCCOM,	0,		{RT, RB, RA}},
+{"subfco.",	XO(31,8,1,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"sfo.",	XO(31,8,1,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"subco.",	XO(31,8,1,1),	XO_MASK,     PPCCOM,	0,		{RT, RB, RA}},
 
-{ "stwbrx",  X(31,662), X_MASK,		PPCCOM,		{ RS, RA0, RB } },
-{ "stbrx",   X(31,662), X_MASK,		PWRCOM,		{ RS, RA0, RB } },
+{"addco",	XO(31,10,1,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"ao",		XO(31,10,1,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"addco.",	XO(31,10,1,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"ao.",		XO(31,10,1,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
 
-{ "stfsx",   X(31,663), X_MASK,		COM,		{ FRS, RA0, RB } },
+{"lxsspx",	X(31,524),	XX1_MASK,    PPCVSX2,	0,		{XT6, RA0, RB}},
 
-{ "srq",     XRC(31,664,0), X_MASK,	M601,		{ RA, RS, RB } },
-{ "srq.",    XRC(31,664,1), X_MASK,	M601,		{ RA, RS, RB } },
+{"clcs",	X(31,531),	XRB_MASK,    M601,	0,		{RT, RA}},
 
-{ "sre",     XRC(31,665,0), X_MASK,	M601,		{ RA, RS, RB } },
-{ "sre.",    XRC(31,665,1), X_MASK,	M601,		{ RA, RS, RB } },
+{"ldbrx",	X(31,532),	X_MASK, CELL|POWER7|PPCA2, 0,		{RT, RA0, RB}},
 
-{ "stwbrxe", X(31,670), X_MASK,		BOOKE64,	{ RS, RA0, RB } },
+{"lswx",	X(31,533),	X_MASK,	     PPCCOM,	E500|E500MC,	{RT, RAX, RBX}},
+{"lsx",		X(31,533),	X_MASK,	     PWRCOM,	0,		{RT, RA, RB}},
 
-{ "stfsxe",  X(31,671), X_MASK,		BOOKE64,	{ FRS, RA0, RB } },
+{"lwbrx",	X(31,534),	X_MASK,	     PPCCOM,	0,		{RT, RA0, RB}},
+{"lbrx",	X(31,534),	X_MASK,	     PWRCOM,	0,		{RT, RA, RB}},
 
-{ "stfsux",  X(31,695),	X_MASK,		COM,		{ FRS, RAS, RB } },
+{"lfsx",	X(31,535),	X_MASK,	     COM,	PPCEFS,		{FRT, RA0, RB}},
 
-{ "sriq",    XRC(31,696,0), X_MASK,	M601,		{ RA, RS, SH } },
-{ "sriq.",   XRC(31,696,1), X_MASK,	M601,		{ RA, RS, SH } },
+{"srw",		XRC(31,536,0),	X_MASK,	     PPCCOM,	0,		{RA, RS, RB}},
+{"sr",		XRC(31,536,0),	X_MASK,	     PWRCOM,	0,		{RA, RS, RB}},
+{"srw.",	XRC(31,536,1),	X_MASK,	     PPCCOM,	0,		{RA, RS, RB}},
+{"sr.",		XRC(31,536,1),	X_MASK,	     PWRCOM,	0,		{RA, RS, RB}},
 
-{ "stfsuxe", X(31,703),	X_MASK,		BOOKE64,	{ FRS, RAS, RB } },
+{"rrib",	XRC(31,537,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"rrib.",	XRC(31,537,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
 
-{ "stswi",   X(31,725),	X_MASK,		PPCCOM,		{ RS, RA0, NB } },
-{ "stsi",    X(31,725),	X_MASK,		PWRCOM,		{ RS, RA0, NB } },
+{"cnttzw",	XRC(31,538,0),	XRB_MASK,    POWER9,	0,		{RA, RS}},
+{"cnttzw.",	XRC(31,538,1),	XRB_MASK,    POWER9,	0,		{RA, RS}},
 
-{ "stfdx",   X(31,727),	X_MASK,		COM,		{ FRS, RA0, RB } },
+{"srd",		XRC(31,539,0),	X_MASK,	     PPC64,	0,		{RA, RS, RB}},
+{"srd.",	XRC(31,539,1),	X_MASK,	     PPC64,	0,		{RA, RS, RB}},
 
-{ "srlq",    XRC(31,728,0), X_MASK,	M601,		{ RA, RS, RB } },
-{ "srlq.",   XRC(31,728,1), X_MASK,	M601,		{ RA, RS, RB } },
+{"maskir",	XRC(31,541,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"maskir.",	XRC(31,541,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
 
-{ "sreq",    XRC(31,729,0), X_MASK,	M601,		{ RA, RS, RB } },
-{ "sreq.",   XRC(31,729,1), X_MASK,	M601,		{ RA, RS, RB } },
+{"lhdcbx",	X(31,546),	X_MASK,      E200Z4,	0,		{RT, RA, RB}},
+{"lhdx",	X(31,547),	X_MASK,	     E500MC,	0,		{RT, RA, RB}},
 
-{ "stfdxe",  X(31,735),	X_MASK,		BOOKE64,	{ FRS, RA0, RB } },
+{"lvtrx",	X(31,549),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
 
-{ "mftgpr",  XRC(31,735,0), XRA_MASK,	POWER6,		{ RT, FRB } },
+{"bbelr",	X(31,550),	X_MASK,	     PPCBRLK,	0,		{0}},
 
-{ "dcba",    X(31,758),	XRT_MASK,	PPC405 | BOOKE,	{ RA, RB } },
+{"lvrx",	X(31,551),	X_MASK,	     CELL,	0,		{VD, RA0, RB}},
+{"lhfcmux",	APU(31,551,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
 
-{ "stfdux",  X(31,759),	X_MASK,		COM,		{ FRS, RAS, RB } },
+{"subfo",	XO(31,40,1,0),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+{"subo",	XO(31,40,1,0),	XO_MASK,     PPC,	0,		{RT, RB, RA}},
+{"subfo.",	XO(31,40,1,1),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+{"subo.",	XO(31,40,1,1),	XO_MASK,     PPC,	0,		{RT, RB, RA}},
 
-{ "srliq",   XRC(31,760,0), X_MASK,	M601,		{ RA, RS, SH } },
-{ "srliq.",  XRC(31,760,1), X_MASK,	M601,		{ RA, RS, SH } },
+{"tlbsync",	X(31,566),	0xffffffff,  PPC,	0,		{0}},
 
-{ "dcbae",   X(31,766),	XRT_MASK,	BOOKE64,	{ RA, RB } },
+{"lfsux",	X(31,567),	X_MASK,	     COM,	PPCEFS,		{FRT, RAS, RB}},
 
-{ "stfduxe", X(31,767),	X_MASK,		BOOKE64,	{ FRS, RAS, RB } },
+{"cnttzd",	XRC(31,570,0),	XRB_MASK,    POWER9,	0,		{RA, RS}},
+{"cnttzd.",	XRC(31,570,1),	XRB_MASK,    POWER9,	0,		{RA, RS}},
 
-{ "tlbivax", X(31,786),	XRT_MASK,	BOOKE,		{ RA, RB } },
-{ "tlbivaxe",X(31,787),	XRT_MASK,	BOOKE64,	{ RA, RB } },
+{"mcrxrx",	X(31,576),     XBFRARB_MASK, POWER9,	0,		{BF}},
 
-{ "lwzcix",  X(31,789),	X_MASK,		POWER6,		{ RT, RA0, RB } },
+{"lwdcbx",	X(31,578),	X_MASK,      E200Z4,	0,		{RT, RA, RB}},
+{"lwdx",	X(31,579),	X_MASK,	     E500MC,	0,		{RT, RA, RB}},
 
-{ "lhbrx",   X(31,790),	X_MASK,		COM,		{ RT, RA0, RB } },
+{"lvtlx",	X(31,581),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
 
-{ "sraw",    XRC(31,792,0), X_MASK,	PPCCOM,		{ RA, RS, RB } },
-{ "sra",     XRC(31,792,0), X_MASK,	PWRCOM,		{ RA, RS, RB } },
-{ "sraw.",   XRC(31,792,1), X_MASK,	PPCCOM,		{ RA, RS, RB } },
-{ "sra.",    XRC(31,792,1), X_MASK,	PWRCOM,		{ RA, RS, RB } },
+{"lwat",	X(31,582),	X_MASK,	     POWER9,	0,		{RT, RA0, FC}},
 
-{ "srad",    XRC(31,794,0), X_MASK,	PPC64,		{ RA, RS, RB } },
-{ "srad.",   XRC(31,794,1), X_MASK,	PPC64,		{ RA, RS, RB } },
+{"lwfcmux",	APU(31,583,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
 
-{ "lhbrxe",  X(31,798),	X_MASK,		BOOKE64,	{ RT, RA0, RB } },
+{"lxsdx",	X(31,588),	XX1_MASK,    PPCVSX,	0,		{XT6, RA0, RB}},
 
-{ "ldxe",    X(31,799),	X_MASK,		BOOKE64,	{ RT, RA0, RB } },
-{ "lduxe",   X(31,831),	X_MASK,		BOOKE64,	{ RT, RA0, RB } },
+{"mfsr",	X(31,595), XRB_MASK|(1<<20), COM,	NON32,		{RT, SR}},
 
-{ "rac",     X(31,818),	X_MASK,		PWRCOM,		{ RT, RA, RB } },
+{"lswi",	X(31,597),	X_MASK,	     PPCCOM,	E500|E500MC,	{RT, RAX, NBI}},
+{"lsi",		X(31,597),	X_MASK,	     PWRCOM,	0,		{RT, RA0, NB}},
 
-{ "lhzcix",  X(31,821),	X_MASK,		POWER6,		{ RT, RA0, RB } },
+{"hwsync",	XSYNC(31,598,0), 0xffffffff, POWER4,	BOOKE|PPC476,	{0}},
+{"lwsync",	XSYNC(31,598,1), 0xffffffff, PPC,	E500,		{0}},
+{"ptesync",	XSYNC(31,598,2), 0xffffffff, PPC64,	0,		{0}},
+{"sync",	X(31,598),     XSYNCLE_MASK, E6500,	0,		{LS, ESYNC}},
+{"sync",	X(31,598),     XSYNC_MASK,   PPCCOM,	BOOKE|PPC476,	{LS}},
+{"msync",	X(31,598),     0xffffffff, BOOKE|PPCA2|PPC476, 0,	{0}},
+{"sync",	X(31,598),     0xffffffff,   BOOKE|PPC476, E6500,	{0}},
+{"lwsync",	X(31,598),     0xffffffff,   E500,	0,		{0}},
+{"dcs",		X(31,598),     0xffffffff,   PWRCOM,	0,		{0}},
 
-{ "dss",     XDSS(31,822,0), XDSS_MASK,	PPCVEC,		{ STRM } },
-{ "dssall",  XDSS(31,822,1), XDSS_MASK,	PPCVEC,		{ 0 } },
+{"lfdx",	X(31,599),	X_MASK,	     COM,	PPCEFS,		{FRT, RA0, RB}},
 
-{ "srawi",   XRC(31,824,0), X_MASK,	PPCCOM,		{ RA, RS, SH } },
-{ "srai",    XRC(31,824,0), X_MASK,	PWRCOM,		{ RA, RS, SH } },
-{ "srawi.",  XRC(31,824,1), X_MASK,	PPCCOM,		{ RA, RS, SH } },
-{ "srai.",   XRC(31,824,1), X_MASK,	PWRCOM,		{ RA, RS, SH } },
+{"mffgpr",	XRC(31,607,0),	XRA_MASK,    POWER6,	POWER7,		{FRT, RB}},
+{"lfdepx",	X(31,607),	X_MASK,	  E500MC|PPCA2, 0,		{FRT, RA0, RB}},
 
-{ "slbmfev", X(31,851), XRA_MASK,	PPC64,		{ RT, RB } },
+{"lddx",	X(31,611),	X_MASK,	     E500MC,	0,		{RT, RA, RB}},
 
-{ "lbzcix",  X(31,853),	X_MASK,		POWER6,		{ RT, RA0, RB } },
+{"lvswx",	X(31,613),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
 
-{ "mbar",    X(31,854),	X_MASK,		BOOKE,		{ MO } },
-{ "eieio",   X(31,854),	0xffffffff,	PPC,		{ 0 } },
+{"ldat",	X(31,614),	X_MASK,	     POWER9,	0,		{RT, RA0, FC}},
 
-{ "lfiwax",  X(31,855),	X_MASK,		POWER6,		{ FRT, RA0, RB } },
+{"lqfcmux",	APU(31,615,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
 
-{ "ldcix",   X(31,885),	X_MASK,		POWER6,		{ RT, RA0, RB } },
+{"nego",	XO(31,104,1,0),	XORB_MASK,   COM,	0,		{RT, RA}},
+{"nego.",	XO(31,104,1,1),	XORB_MASK,   COM,	0,		{RT, RA}},
 
-{ "tlbsx",   XRC(31,914,0), X_MASK, 	PPC403|BOOKE,	{ RTO, RA, RB } },
-{ "tlbsx.",  XRC(31,914,1), X_MASK, 	PPC403|BOOKE,	{ RTO, RA, RB } },
-{ "tlbsxe",  XRC(31,915,0), X_MASK,	BOOKE64,	{ RTO, RA, RB } },
-{ "tlbsxe.", XRC(31,915,1), X_MASK,	BOOKE64,	{ RTO, RA, RB } },
+{"mulo",	XO(31,107,1,0),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+{"mulo.",	XO(31,107,1,1),	XO_MASK,     M601,	0,		{RT, RA, RB}},
 
-{ "slbmfee", X(31,915), XRA_MASK,	PPC64,		{ RT, RB } },
+{"mfsri",	X(31,627),	X_MASK,	     M601,	0,		{RT, RA, RB}},
 
-{ "stwcix",  X(31,917),	X_MASK,		POWER6,		{ RS, RA0, RB } },
+{"dclst",	X(31,630),	XRB_MASK,    M601,	0,		{RS, RA}},
 
-{ "sthbrx",  X(31,918),	X_MASK,		COM,		{ RS, RA0, RB } },
+{"lfdux",	X(31,631),	X_MASK,	     COM,	PPCEFS,		{FRT, RAS, RB}},
 
-{ "sraq",    XRC(31,920,0), X_MASK,	M601,		{ RA, RS, RB } },
-{ "sraq.",   XRC(31,920,1), X_MASK,	M601,		{ RA, RS, RB } },
+{"stbdcbx",	X(31,642),	X_MASK,      E200Z4,	0,		{RS, RA, RB}},
+{"stbdx",	X(31,643),	X_MASK,	     E500MC,	0,		{RS, RA, RB}},
 
-{ "srea",    XRC(31,921,0), X_MASK,	M601,		{ RA, RS, RB } },
-{ "srea.",   XRC(31,921,1), X_MASK,	M601,		{ RA, RS, RB } },
+{"stvlx",	X(31,647),	X_MASK,	     CELL,	0,		{VS, RA0, RB}},
+{"stbfcmux",	APU(31,647,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
 
-{ "extsh",   XRC(31,922,0), XRB_MASK,	PPCCOM,		{ RA, RS } },
-{ "exts",    XRC(31,922,0), XRB_MASK,	PWRCOM,		{ RA, RS } },
-{ "extsh.",  XRC(31,922,1), XRB_MASK,	PPCCOM,		{ RA, RS } },
-{ "exts.",   XRC(31,922,1), XRB_MASK,	PWRCOM,		{ RA, RS } },
+{"stxsspx",	X(31,652),	XX1_MASK,    PPCVSX2,	0,		{XS6, RA0, RB}},
 
-{ "sthbrxe", X(31,926),	X_MASK,		BOOKE64,	{ RS, RA0, RB } },
+{"tbegin.",	XRC(31,654,1), XRTLRARB_MASK, PPCHTM,	0,		{HTM_R}},
 
-{ "stdxe",   X(31,927), X_MASK,		BOOKE64,	{ RS, RA0, RB } },
+{"subfeo",	XO(31,136,1,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"sfeo",	XO(31,136,1,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"subfeo.",	XO(31,136,1,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"sfeo.",	XO(31,136,1,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
 
-{ "tlbrehi", XTLB(31,946,0), XTLB_MASK,	PPC403,		{ RT, RA } },
-{ "tlbrelo", XTLB(31,946,1), XTLB_MASK,	PPC403,		{ RT, RA } },
-{ "tlbre",   X(31,946),	X_MASK,		PPC403|BOOKE,	{ RSO, RAOPT, SHO } },
+{"addeo",	XO(31,138,1,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"aeo",		XO(31,138,1,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"addeo.",	XO(31,138,1,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"aeo.",	XO(31,138,1,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
 
-{ "sthcix",  X(31,949),	X_MASK,		POWER6,		{ RS, RA0, RB } },
+{"mfsrin",	X(31,659),	XRA_MASK,    PPC,	NON32,		{RT, RB}},
 
-{ "sraiq",   XRC(31,952,0), X_MASK,	M601,		{ RA, RS, SH } },
-{ "sraiq.",  XRC(31,952,1), X_MASK,	M601,		{ RA, RS, SH } },
+{"stdbrx",	X(31,660),	X_MASK, CELL|POWER7|PPCA2, 0,		{RS, RA0, RB}},
 
-{ "extsb",   XRC(31,954,0), XRB_MASK,	PPC,		{ RA, RS} },
-{ "extsb.",  XRC(31,954,1), XRB_MASK,	PPC,		{ RA, RS} },
+{"stswx",	X(31,661),	X_MASK,	     PPCCOM,	E500|E500MC,	{RS, RA0, RB}},
+{"stsx",	X(31,661),	X_MASK,	     PWRCOM,	0,		{RS, RA0, RB}},
 
-{ "stduxe",  X(31,959),	X_MASK,		BOOKE64,	{ RS, RAS, RB } },
+{"stwbrx",	X(31,662),	X_MASK,	     PPCCOM,	0,		{RS, RA0, RB}},
+{"stbrx",	X(31,662),	X_MASK,	     PWRCOM,	0,		{RS, RA0, RB}},
 
-{ "iccci",   X(31,966),	XRT_MASK,	PPC403|PPC440,	{ RA, RB } },
+{"stfsx",	X(31,663),	X_MASK,	     COM,	PPCEFS,		{FRS, RA0, RB}},
 
-{ "tlbwehi", XTLB(31,978,0), XTLB_MASK,	PPC403,		{ RT, RA } },
-{ "tlbwelo", XTLB(31,978,1), XTLB_MASK,	PPC403,		{ RT, RA } },
-{ "tlbwe",   X(31,978),	X_MASK,		PPC403|BOOKE,	{ RSO, RAOPT, SHO } },
-{ "tlbld",   X(31,978),	XRTRA_MASK,	PPC,		{ RB } },
+{"srq",		XRC(31,664,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"srq.",	XRC(31,664,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
 
-{ "stbcix",  X(31,981),	X_MASK,		POWER6,		{ RS, RA0, RB } },
+{"sre",		XRC(31,665,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"sre.",	XRC(31,665,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
 
-{ "icbi",    X(31,982),	XRT_MASK,	PPC,		{ RA, RB } },
+{"sthdcbx",	X(31,674),	X_MASK,      E200Z4,	0,		{RS, RA, RB}},
+{"sthdx",	X(31,675),	X_MASK,	     E500MC,	0,		{RS, RA, RB}},
 
-{ "stfiwx",  X(31,983),	X_MASK,		PPC,		{ FRS, RA0, RB } },
+{"stvfrx",	X(31,677),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
 
-{ "extsw",   XRC(31,986,0), XRB_MASK,	PPC64 | BOOKE64,{ RA, RS } },
-{ "extsw.",  XRC(31,986,1), XRB_MASK,	PPC64,		{ RA, RS } },
+{"stvrx",	X(31,679),	X_MASK,	     CELL,	0,		{VS, RA0, RB}},
+{"sthfcmux",	APU(31,679,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
 
-{ "icread",  X(31,998),	XRT_MASK,	PPC403|PPC440,	{ RA, RB } },
+{"tendall.",	XRC(31,686,1)|(1<<25), XRTRARB_MASK, PPCHTM, 0,		{0}},
+{"tend.",	XRC(31,686,1), XRTARARB_MASK, PPCHTM,	0,		{HTM_A}},
 
-{ "icbie",   X(31,990),	XRT_MASK,	BOOKE64,	{ RA, RB } },
-{ "stfiwxe", X(31,991),	X_MASK,		BOOKE64,	{ FRS, RA0, RB } },
+{"stbcx.",	XRC(31,694,1),	X_MASK,	  POWER8|E6500, 0,		{RS, RA0, RB}},
 
-{ "tlbli",   X(31,1010), XRTRA_MASK,	PPC,		{ RB } },
+{"stfsux",	X(31,695),	X_MASK,	     COM,	PPCEFS,		{FRS, RAS, RB}},
 
-{ "stdcix",  X(31,1013), X_MASK,	POWER6,		{ RS, RA0, RB } },
+{"sriq",	XRC(31,696,0),	X_MASK,	     M601,	0,		{RA, RS, SH}},
+{"sriq.",	XRC(31,696,1),	X_MASK,	     M601,	0,		{RA, RS, SH}},
 
-{ "dcbzl",   XOPL(31,1014,1), XRT_MASK,POWER4,            { RA, RB } },
-{ "dcbz",    X(31,1014), XRT_MASK,	PPC,		{ RA, RB } },
-{ "dclz",    X(31,1014), XRT_MASK,	PPC,		{ RA, RB } },
+{"stwdcbx",	X(31,706),	X_MASK,	     E200Z4,	0,		{RS, RA, RB}},
+{"stwdx",	X(31,707),	X_MASK,	     E500MC,	0,		{RS, RA, RB}},
 
-{ "dcbze",   X(31,1022), XRT_MASK,	BOOKE64,	{ RA, RB } },
+{"stvflx",	X(31,709),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
 
-{ "lvebx",   X(31,   7), X_MASK,	PPCVEC,		{ VD, RA, RB } },
-{ "lvehx",   X(31,  39), X_MASK,	PPCVEC,		{ VD, RA, RB } },
-{ "lvewx",   X(31,  71), X_MASK,	PPCVEC,		{ VD, RA, RB } },
-{ "lvsl",    X(31,   6), X_MASK,	PPCVEC,		{ VD, RA, RB } },
-{ "lvsr",    X(31,  38), X_MASK,	PPCVEC,		{ VD, RA, RB } },
-{ "lvx",     X(31, 103), X_MASK,	PPCVEC,		{ VD, RA, RB } },
-{ "lvxl",    X(31, 359), X_MASK,	PPCVEC,		{ VD, RA, RB } },
-{ "stvebx",  X(31, 135), X_MASK,	PPCVEC,		{ VS, RA, RB } },
-{ "stvehx",  X(31, 167), X_MASK,	PPCVEC,		{ VS, RA, RB } },
-{ "stvewx",  X(31, 199), X_MASK,	PPCVEC,		{ VS, RA, RB } },
-{ "stvx",    X(31, 231), X_MASK,	PPCVEC,		{ VS, RA, RB } },
-{ "stvxl",   X(31, 487), X_MASK,	PPCVEC,		{ VS, RA, RB } },
+{"stwat",	X(31,710),	X_MASK,	     POWER9,	0,		{RS, RA0, FC}},
 
-/* New load/store left/right index vector instructions that are in the Cell only.  */
-{ "lvlx",    X(31, 519), X_MASK,	CELL,		{ VD, RA0, RB } },
-{ "lvlxl",   X(31, 775), X_MASK,	CELL,		{ VD, RA0, RB } },
-{ "lvrx",    X(31, 551), X_MASK,	CELL,		{ VD, RA0, RB } },
-{ "lvrxl",   X(31, 807), X_MASK,	CELL,		{ VD, RA0, RB } },
-{ "stvlx",   X(31, 647), X_MASK,	CELL,		{ VS, RA0, RB } },
-{ "stvlxl",  X(31, 903), X_MASK,	CELL,		{ VS, RA0, RB } },
-{ "stvrx",   X(31, 679), X_MASK,	CELL,		{ VS, RA0, RB } },
-{ "stvrxl",  X(31, 935), X_MASK,	CELL,		{ VS, RA0, RB } },
+{"stwfcmux",	APU(31,711,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
 
-{ "lwz",     OP(32),	OP_MASK,	PPCCOM,		{ RT, D, RA0 } },
-{ "l",	     OP(32),	OP_MASK,	PWRCOM,		{ RT, D, RA0 } },
+{"stxsdx",	X(31,716),	XX1_MASK,    PPCVSX,	0,		{XS6, RA0, RB}},
 
-{ "lwzu",    OP(33),	OP_MASK,	PPCCOM,		{ RT, D, RAL } },
-{ "lu",      OP(33),	OP_MASK,	PWRCOM,		{ RT, D, RA0 } },
+{"tcheck",	X(31,718),   XRTBFRARB_MASK, PPCHTM,	0,		{BF}},
 
-{ "lbz",     OP(34),	OP_MASK,	COM,		{ RT, D, RA0 } },
+{"subfzeo",	XO(31,200,1,0),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"sfzeo",	XO(31,200,1,0),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+{"subfzeo.",	XO(31,200,1,1),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"sfzeo.",	XO(31,200,1,1),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
 
-{ "lbzu",    OP(35),	OP_MASK,	COM,		{ RT, D, RAL } },
+{"addzeo",	XO(31,202,1,0),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"azeo",	XO(31,202,1,0),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+{"addzeo.",	XO(31,202,1,1),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"azeo.",	XO(31,202,1,1),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
 
-{ "stw",     OP(36),	OP_MASK,	PPCCOM,		{ RS, D, RA0 } },
-{ "st",      OP(36),	OP_MASK,	PWRCOM,		{ RS, D, RA0 } },
+{"stswi",	X(31,725),	X_MASK,	     PPCCOM,	E500|E500MC,	{RS, RA0, NB}},
+{"stsi",	X(31,725),	X_MASK,	     PWRCOM,	0,		{RS, RA0, NB}},
 
-{ "stwu",    OP(37),	OP_MASK,	PPCCOM,		{ RS, D, RAS } },
-{ "stu",     OP(37),	OP_MASK,	PWRCOM,		{ RS, D, RA0 } },
+{"sthcx.",	XRC(31,726,1),	X_MASK,	  POWER8|E6500, 0,		{RS, RA0, RB}},
 
-{ "stb",     OP(38),	OP_MASK,	COM,		{ RS, D, RA0 } },
+{"stfdx",	X(31,727),	X_MASK,	     COM,	PPCEFS,		{FRS, RA0, RB}},
 
-{ "stbu",    OP(39),	OP_MASK,	COM,		{ RS, D, RAS } },
+{"srlq",	XRC(31,728,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"srlq.",	XRC(31,728,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
 
-{ "lhz",     OP(40),	OP_MASK,	COM,		{ RT, D, RA0 } },
+{"sreq",	XRC(31,729,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"sreq.",	XRC(31,729,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
 
-{ "lhzu",    OP(41),	OP_MASK,	COM,		{ RT, D, RAL } },
+{"mftgpr",	XRC(31,735,0),	XRA_MASK,    POWER6,	POWER7,		{RT, FRB}},
+{"stfdepx",	X(31,735),	X_MASK,	  E500MC|PPCA2, 0,		{FRS, RA0, RB}},
 
-{ "lha",     OP(42),	OP_MASK,	COM,		{ RT, D, RA0 } },
+{"stddx",	X(31,739),	X_MASK,	     E500MC,	0,		{RS, RA, RB}},
 
-{ "lhau",    OP(43),	OP_MASK,	COM,		{ RT, D, RAL } },
+{"stvswx",	X(31,741),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
 
-{ "sth",     OP(44),	OP_MASK,	COM,		{ RS, D, RA0 } },
+{"stdat",	X(31,742),	X_MASK,	     POWER9,	0,		{RS, RA0, FC}},
 
-{ "sthu",    OP(45),	OP_MASK,	COM,		{ RS, D, RAS } },
+{"stqfcmux",	APU(31,743,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
 
-{ "lmw",     OP(46),	OP_MASK,	PPCCOM,		{ RT, D, RAM } },
-{ "lm",      OP(46),	OP_MASK,	PWRCOM,		{ RT, D, RA0 } },
+{"subfmeo",	XO(31,232,1,0),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"sfmeo",	XO(31,232,1,0),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+{"subfmeo.",	XO(31,232,1,1),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"sfmeo.",	XO(31,232,1,1),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
 
-{ "stmw",    OP(47),	OP_MASK,	PPCCOM,		{ RS, D, RA0 } },
-{ "stm",     OP(47),	OP_MASK,	PWRCOM,		{ RS, D, RA0 } },
+{"mulldo",	XO(31,233,1,0),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+{"mulldo.",	XO(31,233,1,1),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
 
-{ "lfs",     OP(48),	OP_MASK,	COM,		{ FRT, D, RA0 } },
+{"addmeo",	XO(31,234,1,0),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"ameo",	XO(31,234,1,0),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
+{"addmeo.",	XO(31,234,1,1),	XORB_MASK,   PPCCOM,	0,		{RT, RA}},
+{"ameo.",	XO(31,234,1,1),	XORB_MASK,   PWRCOM,	0,		{RT, RA}},
 
-{ "lfsu",    OP(49),	OP_MASK,	COM,		{ FRT, D, RAS } },
+{"mullwo",	XO(31,235,1,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"mulso",	XO(31,235,1,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"mullwo.",	XO(31,235,1,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"mulso.",	XO(31,235,1,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
 
-{ "lfd",     OP(50),	OP_MASK,	COM,		{ FRT, D, RA0 } },
+{"tsuspend.",	XRCL(31,750,0,1), XRTRARB_MASK,PPCHTM,	0,		{0}},
+{"tresume.",	XRCL(31,750,1,1), XRTRARB_MASK,PPCHTM,	0,		{0}},
+{"tsr.",	XRC(31,750,1),	  XRTLRARB_MASK,PPCHTM,	0,		{L}},
 
-{ "lfdu",    OP(51),	OP_MASK,	COM,		{ FRT, D, RAS } },
+{"darn",	X(31,755),	XLRAND_MASK, POWER9,	0,		{RT, LRAND}},
 
-{ "stfs",    OP(52),	OP_MASK,	COM,		{ FRS, D, RA0 } },
+{"dcba",	X(31,758), XRT_MASK, PPC405|PPC7450|BOOKE|PPCA2|PPC476, 0, {RA0, RB}},
+{"dcbal",	XOPL(31,758,1), XRT_MASK,    E500MC,	0,		{RA0, RB}},
 
-{ "stfsu",   OP(53),	OP_MASK,	COM,		{ FRS, D, RAS } },
+{"stfdux",	X(31,759),	X_MASK,	     COM,	PPCEFS,		{FRS, RAS, RB}},
 
-{ "stfd",    OP(54),	OP_MASK,	COM,		{ FRS, D, RA0 } },
+{"srliq",	XRC(31,760,0),	X_MASK,	     M601,	0,		{RA, RS, SH}},
+{"srliq.",	XRC(31,760,1),	X_MASK,	     M601,	0,		{RA, RS, SH}},
 
-{ "stfdu",   OP(55),	OP_MASK,	COM,		{ FRS, D, RAS } },
+{"lvsm",	X(31,773),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
 
-{ "lq",      OP(56),	OP_MASK,	POWER4,		{ RTQ, DQ, RAQ } },
+{"copy",	XOPL(31,774,1),	XRT_MASK,    POWER9,	0,		{RA0, RB}},
 
-{ "lfq",     OP(56),	OP_MASK,	POWER2,		{ FRT, D, RA0 } },
+{"stvepxl",	X(31,775),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
+{"lvlxl",	X(31,775),	X_MASK,	     CELL,	0,		{VD, RA0, RB}},
+{"ldfcmux",	APU(31,775,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
 
-{ "lfqu",    OP(57),	OP_MASK,	POWER2,		{ FRT, D, RA0 } },
+{"dozo",	XO(31,264,1,0),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+{"dozo.",	XO(31,264,1,1),	XO_MASK,     M601,	0,		{RT, RA, RB}},
 
-{ "lfdp",    OP(57),	OP_MASK,	POWER6,		{ FRT, D, RA0 } },
+{"addo",	XO(31,266,1,0),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"caxo",	XO(31,266,1,0),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
+{"addo.",	XO(31,266,1,1),	XO_MASK,     PPCCOM,	0,		{RT, RA, RB}},
+{"caxo.",	XO(31,266,1,1),	XO_MASK,     PWRCOM,	0,		{RT, RA, RB}},
 
-{ "lbze",    DEO(58,0), DE_MASK,	BOOKE64,	{ RT, DE, RA0 } },
-{ "lbzue",   DEO(58,1), DE_MASK,	BOOKE64,	{ RT, DE, RAL } },
-{ "lhze",    DEO(58,2), DE_MASK,	BOOKE64,	{ RT, DE, RA0 } },
-{ "lhzue",   DEO(58,3), DE_MASK,	BOOKE64,	{ RT, DE, RAL } },
-{ "lhae",    DEO(58,4), DE_MASK,	BOOKE64,	{ RT, DE, RA0 } },
-{ "lhaue",   DEO(58,5), DE_MASK,	BOOKE64,	{ RT, DE, RAL } },
-{ "lwze",    DEO(58,6), DE_MASK,	BOOKE64,	{ RT, DE, RA0 } },
-{ "lwzue",   DEO(58,7), DE_MASK,	BOOKE64,	{ RT, DE, RAL } },
-{ "stbe",    DEO(58,8), DE_MASK,	BOOKE64,	{ RS, DE, RA0 } },
-{ "stbue",   DEO(58,9), DE_MASK,	BOOKE64,	{ RS, DE, RAS } },
-{ "sthe",    DEO(58,10), DE_MASK,	BOOKE64,	{ RS, DE, RA0 } },
-{ "sthue",   DEO(58,11), DE_MASK,	BOOKE64,	{ RS, DE, RAS } },
-{ "stwe",    DEO(58,14), DE_MASK,	BOOKE64,	{ RS, DE, RA0 } },
-{ "stwue",   DEO(58,15), DE_MASK,	BOOKE64,	{ RS, DE, RAS } },
+{"modsd",	X(31,777),	X_MASK,	     POWER9,	0,		{RT, RA, RB}},
+{"modsw",	X(31,779),	X_MASK,	     POWER9,	0,		{RT, RA, RB}},
 
-{ "ld",      DSO(58,0),	DS_MASK,	PPC64,		{ RT, DS, RA0 } },
+{"lxvw4x",	X(31,780),	XX1_MASK,    PPCVSX,	0,		{XT6, RA0, RB}},
+{"lxsibzx",	X(31,781),	XX1_MASK,    PPCVSX3,	0,		{XT6, RA0, RB}},
 
-{ "ldu",     DSO(58,1), DS_MASK,	PPC64,		{ RT, DS, RAL } },
+{"tabortwc.",	XRC(31,782,1),	X_MASK,	     PPCHTM,	0,		{TO, RA, RB}},
 
-{ "lwa",     DSO(58,2), DS_MASK,	PPC64,		{ RT, DS, RA0 } },
+{"tlbivax",	X(31,786),	XRT_MASK, BOOKE|PPCA2|PPC476, 0,	{RA0, RB}},
 
-{ "dadd",    XRC(59,2,0), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
-{ "dadd.",   XRC(59,2,1), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
+{"lwzcix",	X(31,789),	X_MASK,	     POWER6,	0,		{RT, RA0, RB}},
 
-{ "dqua",    ZRC(59,3,0), Z2_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
-{ "dqua.",   ZRC(59,3,1), Z2_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
+{"lhbrx",	X(31,790),	X_MASK,	     COM,	0,		{RT, RA0, RB}},
 
-{ "fdivs",   A(59,18,0), AFRC_MASK,	PPC,		{ FRT, FRA, FRB } },
-{ "fdivs.",  A(59,18,1), AFRC_MASK,	PPC,		{ FRT, FRA, FRB } },
+{"lfdpx",	X(31,791),	X_MASK,	     POWER6,	POWER7,		{FRTp, RA0, RB}},
+{"lfqx",	X(31,791),	X_MASK,	     POWER2,	0,		{FRT, RA, RB}},
 
-{ "fsubs",   A(59,20,0), AFRC_MASK,	PPC,		{ FRT, FRA, FRB } },
-{ "fsubs.",  A(59,20,1), AFRC_MASK,	PPC,		{ FRT, FRA, FRB } },
+{"sraw",	XRC(31,792,0),	X_MASK,	     PPCCOM,	0,		{RA, RS, RB}},
+{"sra",		XRC(31,792,0),	X_MASK,	     PWRCOM,	0,		{RA, RS, RB}},
+{"sraw.",	XRC(31,792,1),	X_MASK,	     PPCCOM,	0,		{RA, RS, RB}},
+{"sra.",	XRC(31,792,1),	X_MASK,	     PWRCOM,	0,		{RA, RS, RB}},
 
-{ "fadds",   A(59,21,0), AFRC_MASK,	PPC,		{ FRT, FRA, FRB } },
-{ "fadds.",  A(59,21,1), AFRC_MASK,	PPC,		{ FRT, FRA, FRB } },
+{"srad",	XRC(31,794,0),	X_MASK,	     PPC64,	0,		{RA, RS, RB}},
+{"srad.",	XRC(31,794,1),	X_MASK,	     PPC64,	0,		{RA, RS, RB}},
 
-{ "fsqrts",  A(59,22,0), AFRAFRC_MASK,	PPC,		{ FRT, FRB } },
-{ "fsqrts.", A(59,22,1), AFRAFRC_MASK,	PPC,		{ FRT, FRB } },
+{"lfddx",	X(31,803),	X_MASK,	     E500MC,	0,		{FRT, RA, RB}},
 
-{ "fres",    A(59,24,0), AFRALFRC_MASK,	PPC,		{ FRT, FRB, A_L } },
-{ "fres.",   A(59,24,1), AFRALFRC_MASK,	PPC,		{ FRT, FRB, A_L } },
+{"lvtrxl",	X(31,805),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
+{"stvepx",	X(31,807),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
+{"lvrxl",	X(31,807),	X_MASK,	     CELL,	0,		{VD, RA0, RB}},
 
-{ "fmuls",   A(59,25,0), AFRB_MASK,	PPC,		{ FRT, FRA, FRC } },
-{ "fmuls.",  A(59,25,1), AFRB_MASK,	PPC,		{ FRT, FRA, FRC } },
+{"lxvh8x",	X(31,812),	XX1_MASK,    PPCVSX3,	0,		{XT6, RA0, RB}},
+{"lxsihzx",	X(31,813),	XX1_MASK,    PPCVSX3,	0,		{XT6, RA0, RB}},
 
-{ "frsqrtes", A(59,26,0), AFRALFRC_MASK,POWER5,		{ FRT, FRB, A_L } },
-{ "frsqrtes.",A(59,26,1), AFRALFRC_MASK,POWER5,		{ FRT, FRB, A_L } },
+{"tabortdc.",	XRC(31,814,1),	X_MASK,	     PPCHTM,	0,		{TO, RA, RB}},
 
-{ "fmsubs",  A(59,28,0), A_MASK,	PPC,		{ FRT,FRA,FRC,FRB } },
-{ "fmsubs.", A(59,28,1), A_MASK,	PPC,		{ FRT,FRA,FRC,FRB } },
+{"rac",		X(31,818),	X_MASK,	     M601,	0,		{RT, RA, RB}},
 
-{ "fmadds",  A(59,29,0), A_MASK,	PPC,		{ FRT,FRA,FRC,FRB } },
-{ "fmadds.", A(59,29,1), A_MASK,	PPC,		{ FRT,FRA,FRC,FRB } },
+{"erativax",	X(31,819),	X_MASK,	     PPCA2,	0,		{RS, RA0, RB}},
 
-{ "fnmsubs", A(59,30,0), A_MASK,	PPC,		{ FRT,FRA,FRC,FRB } },
-{ "fnmsubs.",A(59,30,1), A_MASK,	PPC,		{ FRT,FRA,FRC,FRB } },
+{"lhzcix",	X(31,821),	X_MASK,	     POWER6,	0,		{RT, RA0, RB}},
 
-{ "fnmadds", A(59,31,0), A_MASK,	PPC,		{ FRT,FRA,FRC,FRB } },
-{ "fnmadds.",A(59,31,1), A_MASK,	PPC,		{ FRT,FRA,FRC,FRB } },
+{"dss",		XDSS(31,822,0),	XDSS_MASK,   PPCVEC,	0,		{STRM}},
 
-{ "dmul",    XRC(59,34,0), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
-{ "dmul.",   XRC(59,34,1), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
+{"lfqux",	X(31,823),	X_MASK,	     POWER2,	0,		{FRT, RA, RB}},
 
-{ "drrnd",   ZRC(59,35,0), Z2_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
-{ "drrnd.",  ZRC(59,35,1), Z2_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
+{"srawi",	XRC(31,824,0),	X_MASK,	     PPCCOM,	0,		{RA, RS, SH}},
+{"srai",	XRC(31,824,0),	X_MASK,	     PWRCOM,	0,		{RA, RS, SH}},
+{"srawi.",	XRC(31,824,1),	X_MASK,	     PPCCOM,	0,		{RA, RS, SH}},
+{"srai.",	XRC(31,824,1),	X_MASK,	     PWRCOM,	0,		{RA, RS, SH}},
 
-{ "dscli",   ZRC(59,66,0), Z_MASK,	POWER6,		{ FRT, FRA, SH16 } },
-{ "dscli.",  ZRC(59,66,1), Z_MASK,	POWER6,		{ FRT, FRA, SH16 } },
+{"sradi",	XS(31,413,0),	XS_MASK,     PPC64,	0,		{RA, RS, SH6}},
+{"sradi.",	XS(31,413,1),	XS_MASK,     PPC64,	0,		{RA, RS, SH6}},
 
-{ "dquai",   ZRC(59,67,0), Z2_MASK,	POWER6,		{ TE,  FRT, FRB, RMC } },
-{ "dquai.",  ZRC(59,67,1), Z2_MASK,	POWER6,		{ TE,  FRT, FRB, RMC } },
+{"lvtlxl",	X(31,837),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
 
-{ "dscri",   ZRC(59,98,0), Z_MASK,	POWER6,		{ FRT, FRA, SH16 } },
-{ "dscri.",  ZRC(59,98,1), Z_MASK,	POWER6,		{ FRT, FRA, SH16 } },
+{"cpabort",	X(31,838),	XRTRARB_MASK,POWER9,	0,		{0}},
 
-{ "drintx",  ZRC(59,99,0), Z2_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
-{ "drintx.", ZRC(59,99,1), Z2_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
+{"divo",	XO(31,331,1,0),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+{"divo.",	XO(31,331,1,1),	XO_MASK,     M601,	0,		{RT, RA, RB}},
 
-{ "dcmpo",   X(59,130),	   X_MASK,	POWER6,		{ BF,  FRA, FRB } },
+{"lxvd2x",	X(31,844),	XX1_MASK,    PPCVSX,	0,		{XT6, RA0, RB}},
+{"lxvx",	X(31,844),	XX1_MASK,    POWER8,	POWER9|PPCVSX3,	{XT6, RA0, RB}},
 
-{ "dtstex",  X(59,162),	   X_MASK,	POWER6,		{ BF,  FRA, FRB } },
-{ "dtstdc",  Z(59,194),	   Z_MASK,	POWER6,		{ BF,  FRA, DCM } },
-{ "dtstdg",  Z(59,226),	   Z_MASK,	POWER6,		{ BF,  FRA, DGM } },
+{"tabortwci.",	XRC(31,846,1),	X_MASK,	     PPCHTM,	0,		{TO, RA, HTM_SI}},
 
-{ "drintn",  ZRC(59,227,0), Z2_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
-{ "drintn.", ZRC(59,227,1), Z2_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
+{"tlbsrx.",	XRC(31,850,1),	XRT_MASK,    PPCA2,	0,		{RA0, RB}},
 
-{ "dctdp",   XRC(59,258,0), X_MASK,	POWER6,		{ FRT, FRB } },
-{ "dctdp.",  XRC(59,258,1), X_MASK,	POWER6,		{ FRT, FRB } },
+{"slbiag",	X(31,850),	XRARB_MASK,  POWER9,	0,		{RS}},
+{"slbmfev",	X(31,851),	XRLA_MASK,   POWER9,	0,		{RT, RB, A_L}},
+{"slbmfev",	X(31,851),	XRA_MASK,    PPC64,	POWER9,		{RT, RB}},
 
-{ "dctfix",  XRC(59,290,0), X_MASK,	POWER6,		{ FRT, FRB } },
-{ "dctfix.", XRC(59,290,1), X_MASK,	POWER6,		{ FRT, FRB } },
+{"lbzcix",	X(31,853),	X_MASK,	     POWER6,	0,		{RT, RA0, RB}},
 
-{ "ddedpd",  XRC(59,322,0), X_MASK,	POWER6,		{ SP, FRT, FRB } }, 
-{ "ddedpd.", XRC(59,322,1), X_MASK,	POWER6,		{ SP, FRT, FRB } }, 
+{"eieio",	X(31,854),	0xffffffff,  PPC,   BOOKE|PPCA2|PPC476,	{0}},
+{"mbar",	X(31,854),	X_MASK,	   BOOKE|PPCA2|PPC476, 0,	{MO}},
+{"eieio",	XMBAR(31,854,1),0xffffffff,  E500,	0,		{0}},
+{"eieio",	X(31,854),	0xffffffff, PPCA2|PPC476, 0,		{0}},
 
-{ "dxex",    XRC(59,354,0), X_MASK,	POWER6,		{ FRT, FRB } },
-{ "dxex.",   XRC(59,354,1), X_MASK,	POWER6,		{ FRT, FRB } },
+{"lfiwax",	X(31,855),	X_MASK, POWER6|PPCA2|PPC476, 0,		{FRT, RA0, RB}},
 
-{ "dsub",    XRC(59,514,0), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
-{ "dsub.",   XRC(59,514,1), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
+{"lvswxl",	X(31,869),	X_MASK,	     PPCVEC2,	0,		{VD, RA0, RB}},
 
-{ "ddiv",    XRC(59,546,0), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
-{ "ddiv.",   XRC(59,546,1), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
+{"abso",	XO(31,360,1,0),	XORB_MASK,   M601,	0,		{RT, RA}},
+{"abso.",	XO(31,360,1,1),	XORB_MASK,   M601,	0,		{RT, RA}},
 
-{ "dcmpu",   X(59,642),	    X_MASK,	POWER6,		{ BF,  FRA, FRB } },
+{"divso",	XO(31,363,1,0),	XO_MASK,     M601,	0,		{RT, RA, RB}},
+{"divso.",	XO(31,363,1,1),	XO_MASK,     M601,	0,		{RT, RA, RB}},
 
-{ "dtstsf",  X(59,674),	   X_MASK,	POWER6,		{ BF,  FRA, FRB } },
+{"lxvb16x",	X(31,876),	XX1_MASK,    PPCVSX3,	0,		{XT6, RA0, RB}},
 
-{ "drsp",    XRC(59,770,0), X_MASK,	POWER6,		{ FRT, FRB } },
-{ "drsp.",   XRC(59,770,1), X_MASK,	POWER6,		{ FRT, FRB } },
+{"tabortdci.",	XRC(31,878,1),	X_MASK,	     PPCHTM,	0,		{TO, RA, HTM_SI}},
 
-{ "dcffix",  XRC(59,802,0), X_MASK,	POWER6,		{ FRT, FRB } },
-{ "dcffix.", XRC(59,802,1), X_MASK,	POWER6,		{ FRT, FRB } },
+{"rmieg",	X(31,882),	XRTRA_MASK,  POWER9,	0,		{RB}},
 
-{ "denbcd",  XRC(59,834,0), X_MASK,	POWER6,		{ S, FRT, FRB } },
-{ "denbcd.", XRC(59,834,1), X_MASK,	POWER6,		{ S, FRT, FRB } },
+{"ldcix",	X(31,885),	X_MASK,	     POWER6,	0,		{RT, RA0, RB}},
 
-{ "diex",    XRC(59,866,0), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
-{ "diex.",   XRC(59,866,1), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
+{"msgsync",	X(31,886),	0xffffffff,  POWER9,	0,		{0}},
 
-{ "stfq",    OP(60),	OP_MASK,	POWER2,		{ FRS, D, RA } },
+{"lfiwzx",	X(31,887),	X_MASK,	  POWER7|PPCA2,	0,		{FRT, RA0, RB}},
 
-{ "stfqu",   OP(61),	OP_MASK,	POWER2,		{ FRS, D, RA } },
+{"extswsli",	XS(31,445,0),	XS_MASK,     POWER9,	0,		{RA, RS, SH6}},
+{"extswsli.",	XS(31,445,1),	XS_MASK,     POWER9,	0,		{RA, RS, SH6}},
 
-{ "stfdp",   OP(61),	OP_MASK,	POWER6,		{ FRT, D, RA0 } },
+{"paste.",	XRCL(31,902,1,1),XRT_MASK,   POWER9,	0,		{RA0, RB}},
 
-{ "lde",     DEO(62,0), DE_MASK,	BOOKE64,	{ RT, DES, RA0 } },
-{ "ldue",    DEO(62,1), DE_MASK,	BOOKE64,	{ RT, DES, RA0 } },
-{ "lfse",    DEO(62,4), DE_MASK,	BOOKE64,	{ FRT, DES, RA0 } },
-{ "lfsue",   DEO(62,5), DE_MASK,	BOOKE64,	{ FRT, DES, RAS } },
-{ "lfde",    DEO(62,6), DE_MASK,	BOOKE64,	{ FRT, DES, RA0 } },
-{ "lfdue",   DEO(62,7), DE_MASK,	BOOKE64,	{ FRT, DES, RAS } },
-{ "stde",    DEO(62,8), DE_MASK,	BOOKE64,	{ RS, DES, RA0 } },
-{ "stdue",   DEO(62,9), DE_MASK,	BOOKE64,	{ RS, DES, RAS } },
-{ "stfse",   DEO(62,12), DE_MASK,	BOOKE64,	{ FRS, DES, RA0 } },
-{ "stfsue",  DEO(62,13), DE_MASK,	BOOKE64,	{ FRS, DES, RAS } },
-{ "stfde",   DEO(62,14), DE_MASK,	BOOKE64,	{ FRS, DES, RA0 } },
-{ "stfdue",  DEO(62,15), DE_MASK,	BOOKE64,	{ FRS, DES, RAS } },
+{"stvlxl",	X(31,903),	X_MASK,	     CELL,	0,		{VS, RA0, RB}},
+{"stdfcmux",	APU(31,903,0),	APU_MASK,    PPC405,	0,		{FCRT, RA, RB}},
 
-{ "std",     DSO(62,0),	DS_MASK,	PPC64,		{ RS, DS, RA0 } },
+{"divdeuo",	XO(31,393,1,0),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+{"divdeuo.",	XO(31,393,1,1),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+{"divweuo",	XO(31,395,1,0),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+{"divweuo.",	XO(31,395,1,1),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
 
-{ "stdu",    DSO(62,1),	DS_MASK,	PPC64,		{ RS, DS, RAS } },
+{"stxvw4x",	X(31,908),	XX1_MASK,    PPCVSX,	0,		{XS6, RA0, RB}},
+{"stxsibx",	X(31,909),	XX1_MASK,    PPCVSX3,	0,		{XS6, RA0, RB}},
 
-{ "stq",     DSO(62,2),	DS_MASK,	POWER4,		{ RSQ, DS, RA0 } },
+{"tabort.",	XRC(31,910,1),	XRTRB_MASK,  PPCHTM,	0,		{RA}},
 
-{ "fcmpu",   X(63,0),	X_MASK|(3<<21),	COM,		{ BF, FRA, FRB } },
+{"tlbsx",	XRC(31,914,0),	X_MASK, PPC403|BOOKE|PPCA2|PPC476, 0,	{RTO, RA0, RB}},
+{"tlbsx.",	XRC(31,914,1),	X_MASK, PPC403|BOOKE|PPCA2|PPC476, 0,	{RTO, RA0, RB}},
 
-{ "daddq",   XRC(63,2,0), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
-{ "daddq.",  XRC(63,2,1), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
+{"slbmfee",	X(31,915),	XRLA_MASK,   POWER9,	0,		{RT, RB, A_L}},
+{"slbmfee",	X(31,915),	XRA_MASK,    PPC64,	POWER9,		{RT, RB}},
 
-{ "dquaq",   ZRC(63,3,0), Z2_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
-{ "dquaq.",  ZRC(63,3,1), Z2_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
+{"stwcix",	X(31,917),	X_MASK,	     POWER6,	0,		{RS, RA0, RB}},
 
-{ "fcpsgn",  XRC(63,8,0), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
-{ "fcpsgn.", XRC(63,8,1), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
+{"sthbrx",	X(31,918),	X_MASK,	     COM,	0,		{RS, RA0, RB}},
 
-{ "frsp",    XRC(63,12,0), XRA_MASK,	COM,		{ FRT, FRB } },
-{ "frsp.",   XRC(63,12,1), XRA_MASK,	COM,		{ FRT, FRB } },
+{"stfdpx",	X(31,919),	X_MASK,	     POWER6,	POWER7,		{FRSp, RA0, RB}},
+{"stfqx",	X(31,919),	X_MASK,	     POWER2,	0,		{FRS, RA0, RB}},
 
-{ "fctiw",   XRC(63,14,0), XRA_MASK,	PPCCOM,		{ FRT, FRB } },
-{ "fcir",    XRC(63,14,0), XRA_MASK,	POWER2,		{ FRT, FRB } },
-{ "fctiw.",  XRC(63,14,1), XRA_MASK,	PPCCOM,		{ FRT, FRB } },
-{ "fcir.",   XRC(63,14,1), XRA_MASK,	POWER2,		{ FRT, FRB } },
+{"sraq",	XRC(31,920,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"sraq.",	XRC(31,920,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
 
-{ "fctiwz",  XRC(63,15,0), XRA_MASK,	PPCCOM,		{ FRT, FRB } },
-{ "fcirz",   XRC(63,15,0), XRA_MASK,	POWER2,		{ FRT, FRB } },
-{ "fctiwz.", XRC(63,15,1), XRA_MASK,	PPCCOM,		{ FRT, FRB } },
-{ "fcirz.",  XRC(63,15,1), XRA_MASK,	POWER2,		{ FRT, FRB } },
+{"srea",	XRC(31,921,0),	X_MASK,	     M601,	0,		{RA, RS, RB}},
+{"srea.",	XRC(31,921,1),	X_MASK,	     M601,	0,		{RA, RS, RB}},
 
-{ "fdiv",    A(63,18,0), AFRC_MASK,	PPCCOM,		{ FRT, FRA, FRB } },
-{ "fd",      A(63,18,0), AFRC_MASK,	PWRCOM,		{ FRT, FRA, FRB } },
-{ "fdiv.",   A(63,18,1), AFRC_MASK,	PPCCOM,		{ FRT, FRA, FRB } },
-{ "fd.",     A(63,18,1), AFRC_MASK,	PWRCOM,		{ FRT, FRA, FRB } },
+{"extsh",	XRC(31,922,0),	XRB_MASK,    PPCCOM,	0,		{RA, RS}},
+{"exts",	XRC(31,922,0),	XRB_MASK,    PWRCOM,	0,		{RA, RS}},
+{"extsh.",	XRC(31,922,1),	XRB_MASK,    PPCCOM,	0,		{RA, RS}},
+{"exts.",	XRC(31,922,1),	XRB_MASK,    PWRCOM,	0,		{RA, RS}},
 
-{ "fsub",    A(63,20,0), AFRC_MASK,	PPCCOM,		{ FRT, FRA, FRB } },
-{ "fs",      A(63,20,0), AFRC_MASK,	PWRCOM,		{ FRT, FRA, FRB } },
-{ "fsub.",   A(63,20,1), AFRC_MASK,	PPCCOM,		{ FRT, FRA, FRB } },
-{ "fs.",     A(63,20,1), AFRC_MASK,	PWRCOM,		{ FRT, FRA, FRB } },
+{"stfddx",	X(31,931),	X_MASK,	     E500MC,	0,		{FRS, RA, RB}},
 
-{ "fadd",    A(63,21,0), AFRC_MASK,	PPCCOM,		{ FRT, FRA, FRB } },
-{ "fa",      A(63,21,0), AFRC_MASK,	PWRCOM,		{ FRT, FRA, FRB } },
-{ "fadd.",   A(63,21,1), AFRC_MASK,	PPCCOM,		{ FRT, FRA, FRB } },
-{ "fa.",     A(63,21,1), AFRC_MASK,	PWRCOM,		{ FRT, FRA, FRB } },
+{"stvfrxl",	X(31,933),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
 
-{ "fsqrt",   A(63,22,0), AFRAFRC_MASK,	PPCPWR2,	{ FRT, FRB } },
-{ "fsqrt.",  A(63,22,1), AFRAFRC_MASK,	PPCPWR2,	{ FRT, FRB } },
+{"wclrone",	XOPL2(31,934,2),XRT_MASK,    PPCA2,	0,		{RA0, RB}},
+{"wclrall",	X(31,934),	XRARB_MASK,  PPCA2,	0,		{L2}},
+{"wclr",	X(31,934),	X_MASK,	     PPCA2,	0,		{L2, RA0, RB}},
 
-{ "fsel",    A(63,23,0), A_MASK,	PPC,		{ FRT,FRA,FRC,FRB } },
-{ "fsel.",   A(63,23,1), A_MASK,	PPC,		{ FRT,FRA,FRC,FRB } },
+{"stvrxl",	X(31,935),	X_MASK,	     CELL,	0,		{VS, RA0, RB}},
 
-{ "fre",     A(63,24,0), AFRALFRC_MASK,	POWER5,		{ FRT, FRB, A_L } },
-{ "fre.",    A(63,24,1), AFRALFRC_MASK,	POWER5,		{ FRT, FRB, A_L } },
+{"divdeo",	XO(31,425,1,0),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+{"divdeo.",	XO(31,425,1,1),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+{"divweo",	XO(31,427,1,0),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
+{"divweo.",	XO(31,427,1,1),	XO_MASK,  POWER7|PPCA2,	0,		{RT, RA, RB}},
 
-{ "fmul",    A(63,25,0), AFRB_MASK,	PPCCOM,		{ FRT, FRA, FRC } },
-{ "fm",      A(63,25,0), AFRB_MASK,	PWRCOM,		{ FRT, FRA, FRC } },
-{ "fmul.",   A(63,25,1), AFRB_MASK,	PPCCOM,		{ FRT, FRA, FRC } },
-{ "fm.",     A(63,25,1), AFRB_MASK,	PWRCOM,		{ FRT, FRA, FRC } },
+{"stxvh8x",	X(31,940),	XX1_MASK,    PPCVSX3,	0,		{XS6, RA0, RB}},
+{"stxsihx",	X(31,941),	XX1_MASK,    PPCVSX3,	0,		{XS6, RA0, RB}},
 
-{ "frsqrte", A(63,26,0), AFRALFRC_MASK,	PPC,		{ FRT, FRB, A_L } },
-{ "frsqrte.",A(63,26,1), AFRALFRC_MASK,	PPC,		{ FRT, FRB, A_L } },
+{"treclaim.",	XRC(31,942,1),	XRTRB_MASK,  PPCHTM,	0,		{RA}},
 
-{ "fmsub",   A(63,28,0), A_MASK,	PPCCOM,		{ FRT,FRA,FRC,FRB } },
-{ "fms",     A(63,28,0), A_MASK,	PWRCOM,		{ FRT,FRA,FRC,FRB } },
-{ "fmsub.",  A(63,28,1), A_MASK,	PPCCOM,		{ FRT,FRA,FRC,FRB } },
-{ "fms.",    A(63,28,1), A_MASK,	PWRCOM,		{ FRT,FRA,FRC,FRB } },
+{"tlbrehi",	XTLB(31,946,0),	XTLB_MASK,   PPC403,	PPCA2,		{RT, RA}},
+{"tlbrelo",	XTLB(31,946,1),	XTLB_MASK,   PPC403,	PPCA2,		{RT, RA}},
+{"tlbre",	X(31,946),  X_MASK, PPC403|BOOKE|PPCA2|PPC476, 0,	{RSO, RAOPT, SHO}},
 
-{ "fmadd",   A(63,29,0), A_MASK,	PPCCOM,		{ FRT,FRA,FRC,FRB } },
-{ "fma",     A(63,29,0), A_MASK,	PWRCOM,		{ FRT,FRA,FRC,FRB } },
-{ "fmadd.",  A(63,29,1), A_MASK,	PPCCOM,		{ FRT,FRA,FRC,FRB } },
-{ "fma.",    A(63,29,1), A_MASK,	PWRCOM,		{ FRT,FRA,FRC,FRB } },
+{"sthcix",	X(31,949),	X_MASK,	     POWER6,	0,		{RS, RA0, RB}},
 
-{ "fnmsub",  A(63,30,0), A_MASK,	PPCCOM,		{ FRT,FRA,FRC,FRB } },
-{ "fnms",    A(63,30,0), A_MASK,	PWRCOM,		{ FRT,FRA,FRC,FRB } },
-{ "fnmsub.", A(63,30,1), A_MASK,	PPCCOM,		{ FRT,FRA,FRC,FRB } },
-{ "fnms.",   A(63,30,1), A_MASK,	PWRCOM,		{ FRT,FRA,FRC,FRB } },
+{"icswepx",	XRC(31,950,0),	X_MASK,	     PPCA2,	0,		{RS, RA, RB}},
+{"icswepx.",	XRC(31,950,1),	X_MASK,	     PPCA2,	0,		{RS, RA, RB}},
 
-{ "fnmadd",  A(63,31,0), A_MASK,	PPCCOM,		{ FRT,FRA,FRC,FRB } },
-{ "fnma",    A(63,31,0), A_MASK,	PWRCOM,		{ FRT,FRA,FRC,FRB } },
-{ "fnmadd.", A(63,31,1), A_MASK,	PPCCOM,		{ FRT,FRA,FRC,FRB } },
-{ "fnma.",   A(63,31,1), A_MASK,	PWRCOM,		{ FRT,FRA,FRC,FRB } },
+{"stfqux",	X(31,951),	X_MASK,	     POWER2,	0,		{FRS, RA, RB}},
 
-{ "fcmpo",   X(63,32),	X_MASK|(3<<21),	COM,		{ BF, FRA, FRB } },
+{"sraiq",	XRC(31,952,0),	X_MASK,	     M601,	0,		{RA, RS, SH}},
+{"sraiq.",	XRC(31,952,1),	X_MASK,	     M601,	0,		{RA, RS, SH}},
 
-{ "dmulq",   XRC(63,34,0), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
-{ "dmulq.",  XRC(63,34,1), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
+{"extsb",	XRC(31,954,0),	XRB_MASK,    PPC,	0,		{RA, RS}},
+{"extsb.",	XRC(31,954,1),	XRB_MASK,    PPC,	0,		{RA, RS}},
 
-{ "drrndq",  ZRC(63,35,0), Z2_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
-{ "drrndq.", ZRC(63,35,1), Z2_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
+{"stvflxl",	X(31,965),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
 
-{ "mtfsb1",  XRC(63,38,0), XRARB_MASK,	COM,		{ BT } },
-{ "mtfsb1.", XRC(63,38,1), XRARB_MASK,	COM,		{ BT } },
+{"iccci",	X(31,966), XRT_MASK, PPC403|PPC440|TITAN|PPCA2, 0,	 {RAOPT, RBOPT}},
+{"ici",		X(31,966),	XRARB_MASK,  PPCA2|PPC476, 0,		{CT}},
 
-{ "fneg",    XRC(63,40,0), XRA_MASK,	COM,		{ FRT, FRB } },
-{ "fneg.",   XRC(63,40,1), XRA_MASK,	COM,		{ FRT, FRB } },
+{"divduo",	XO(31,457,1,0),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+{"divduo.",	XO(31,457,1,1),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
 
-{ "mcrfs",   X(63,64),	XRB_MASK|(3<<21)|(3<<16), COM,	{ BF, BFA } },
+{"divwuo",	XO(31,459,1,0),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+{"divwuo.",	XO(31,459,1,1),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
 
-{ "dscliq",  ZRC(63,66,0), Z_MASK,	POWER6,		{ FRT, FRA, SH16 } },
-{ "dscliq.", ZRC(63,66,1), Z_MASK,	POWER6,		{ FRT, FRA, SH16 } },
+{"stxvd2x",	X(31,972),	XX1_MASK,    PPCVSX,	0,		{XS6, RA0, RB}},
+{"stxvx",	X(31,972),	XX1_MASK,    POWER8,	POWER9|PPCVSX3,	{XS6, RA0, RB}},
 
-{ "dquaiq",  ZRC(63,67,0), Z2_MASK,	POWER6,		{ TE,  FRT, FRB, RMC } },
-{ "dquaiq.", ZRC(63,67,1), Z2_MASK,	POWER6,		{ FRT, FRA, FRB, RMC } },
+{"tlbld",	X(31,978),	XRTRA_MASK,  PPC, PPC403|BOOKE|PPCA2|PPC476, {RB}},
+{"tlbwehi",	XTLB(31,978,0),	XTLB_MASK,   PPC403,	0,		{RT, RA}},
+{"tlbwelo",	XTLB(31,978,1),	XTLB_MASK,   PPC403,	0,		{RT, RA}},
+{"tlbwe",	X(31,978),  X_MASK, PPC403|BOOKE|PPCA2|PPC476, 0,	{RSO, RAOPT, SHO}},
 
-{ "mtfsb0",  XRC(63,70,0), XRARB_MASK,	COM,		{ BT } },
-{ "mtfsb0.", XRC(63,70,1), XRARB_MASK,	COM,		{ BT } },
+{"slbfee.",	XRC(31,979,1),	XRA_MASK,    POWER6,	0,		{RT, RB}},
 
-{ "fmr",     XRC(63,72,0), XRA_MASK,	COM,		{ FRT, FRB } },
-{ "fmr.",    XRC(63,72,1), XRA_MASK,	COM,		{ FRT, FRB } },
+{"stbcix",	X(31,981),	X_MASK,	     POWER6,	0,		{RS, RA0, RB}},
 
-{ "dscriq",  ZRC(63,98,0), Z_MASK,	POWER6,		{ FRT, FRA, SH16 } },
-{ "dscriq.", ZRC(63,98,1), Z_MASK,	POWER6,		{ FRT, FRA, SH16 } },
+{"icbi",	X(31,982),	XRT_MASK,    PPC,	0,		{RA0, RB}},
 
-{ "drintxq", ZRC(63,99,0), Z2_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
-{ "drintxq.",ZRC(63,99,1), Z2_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
+{"stfiwx",	X(31,983),	X_MASK,	     PPC,	PPCEFS,		{FRS, RA0, RB}},
 
-{ "dcmpoq",  X(63,130),	   X_MASK,	POWER6,		{ BF,  FRA, FRB } },
+{"extsw",	XRC(31,986,0),	XRB_MASK,    PPC64,	0,		{RA, RS}},
+{"extsw.",	XRC(31,986,1),	XRB_MASK,    PPC64,	0,		{RA, RS}},
 
-{ "mtfsfi",  XRC(63,134,0), XWRA_MASK|(3<<21)|(1<<11), COM, { BFF, U, W } },
-{ "mtfsfi.", XRC(63,134,1), XWRA_MASK|(3<<21)|(1<<11), COM, { BFF, U, W } },
+{"icbiep",	XRT(31,991,0),	XRT_MASK,    E500MC|PPCA2, 0,		{RA0, RB}},
 
-{ "fnabs",   XRC(63,136,0), XRA_MASK,	COM,		{ FRT, FRB } },
-{ "fnabs.",  XRC(63,136,1), XRA_MASK,	COM,		{ FRT, FRB } },
+{"stvswxl",	X(31,997),	X_MASK,	     PPCVEC2,	0,		{VS, RA0, RB}},
 
-{ "dtstexq", X(63,162),	    X_MASK,	POWER6,		{ BF,  FRA, FRB } },
-{ "dtstdcq", Z(63,194),	    Z_MASK,	POWER6,		{ BF,  FRA, DCM } },
-{ "dtstdgq", Z(63,226),	    Z_MASK,	POWER6,		{ BF,  FRA, DGM } },
+{"icread",	X(31,998),     XRT_MASK, PPC403|PPC440|PPC476|TITAN, 0,	{RA0, RB}},
 
-{ "drintnq", ZRC(63,227,0), Z2_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
-{ "drintnq.",ZRC(63,227,1), Z2_MASK,	POWER6,		{ R, FRT, FRB, RMC } },
+{"nabso",	XO(31,488,1,0),	XORB_MASK,   M601,	0,		{RT, RA}},
+{"nabso.",	XO(31,488,1,1),	XORB_MASK,   M601,	0,		{RT, RA}},
 
-{ "dctqpq",  XRC(63,258,0), X_MASK,	POWER6,		{ FRT, FRB } },
-{ "dctqpq.", XRC(63,258,1), X_MASK,	POWER6,		{ FRT, FRB } },
+{"divdo",	XO(31,489,1,0),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
+{"divdo.",	XO(31,489,1,1),	XO_MASK,     PPC64,	0,		{RT, RA, RB}},
 
-{ "fabs",    XRC(63,264,0), XRA_MASK,	COM,		{ FRT, FRB } },
-{ "fabs.",   XRC(63,264,1), XRA_MASK,	COM,		{ FRT, FRB } },
+{"divwo",	XO(31,491,1,0),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
+{"divwo.",	XO(31,491,1,1),	XO_MASK,     PPC,	0,		{RT, RA, RB}},
 
-{ "dctfixq", XRC(63,290,0), X_MASK,	POWER6,		{ FRT, FRB } },
-{ "dctfixq.",XRC(63,290,1), X_MASK,	POWER6,		{ FRT, FRB } },
+{"stxvb16x",	X(31,1004),	XX1_MASK,    PPCVSX3,	0,		{XS6, RA0, RB}},
 
-{ "ddedpdq", XRC(63,322,0), X_MASK,	POWER6,		{ SP, FRT, FRB } },
-{ "ddedpdq.",XRC(63,322,1), X_MASK,	POWER6,		{ SP, FRT, FRB } },
+{"trechkpt.",	XRC(31,1006,1),	XRTRARB_MASK,PPCHTM,	0,		{0}},
 
-{ "dxexq",   XRC(63,354,0), X_MASK,	POWER6,		{ FRT, FRB } },
-{ "dxexq.",  XRC(63,354,1), X_MASK,	POWER6,		{ FRT, FRB } },
+{"tlbli",	X(31,1010),	XRTRA_MASK,  PPC,	TITAN,		{RB}},
 
-{ "frin",    XRC(63,392,0), XRA_MASK,	POWER5,		{ FRT, FRB } },
-{ "frin.",   XRC(63,392,1), XRA_MASK,	POWER5,		{ FRT, FRB } },
-{ "friz",    XRC(63,424,0), XRA_MASK,	POWER5,		{ FRT, FRB } },
-{ "friz.",   XRC(63,424,1), XRA_MASK,	POWER5,		{ FRT, FRB } },
-{ "frip",    XRC(63,456,0), XRA_MASK,	POWER5,		{ FRT, FRB } },
-{ "frip.",   XRC(63,456,1), XRA_MASK,	POWER5,		{ FRT, FRB } },
-{ "frim",    XRC(63,488,0), XRA_MASK,	POWER5,		{ FRT, FRB } },
-{ "frim.",   XRC(63,488,1), XRA_MASK,	POWER5,		{ FRT, FRB } },
+{"stdcix",	X(31,1013),	X_MASK,	     POWER6,	0,		{RS, RA0, RB}},
 
-{ "dsubq",   XRC(63,514,0), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
-{ "dsubq.",  XRC(63,514,1), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
+{"dcbz",	X(31,1014),	XRT_MASK,    PPC,	0,		{RA0, RB}},
+{"dclz",	X(31,1014),	XRT_MASK,    PPC,	0,		{RA0, RB}},
 
-{ "ddivq",   XRC(63,546,0), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
-{ "ddivq.",  XRC(63,546,1), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
+{"dcbzep",	XRT(31,1023,0),	XRT_MASK,    E500MC|PPCA2, 0,		{RA0, RB}},
 
-{ "mffs",    XRC(63,583,0), XRARB_MASK,	COM,		{ FRT } },
-{ "mffs.",   XRC(63,583,1), XRARB_MASK,	COM,		{ FRT } },
+{"dcbzl",	XOPL(31,1014,1), XRT_MASK,   POWER4|E500MC, PPC476,	{RA0, RB}},
 
-{ "dcmpuq",  X(63,642),	    X_MASK,	POWER6,		{ BF,  FRA, FRB } },
+{"cctpl",	0x7c210b78,	0xffffffff,  CELL,	0,		{0}},
+{"cctpm",	0x7c421378,	0xffffffff,  CELL,	0,		{0}},
+{"cctph",	0x7c631b78,	0xffffffff,  CELL,	0,		{0}},
 
-{ "dtstsfq", X(63,674),	    X_MASK,	POWER6,		{ BF,  FRA, FRB } },
+{"dstt",	XDSS(31,342,1),	XDSS_MASK,   PPCVEC,	0,		{RA, RB, STRM}},
+{"dststt",	XDSS(31,374,1),	XDSS_MASK,   PPCVEC,	0,		{RA, RB, STRM}},
+{"dssall",	XDSS(31,822,1),	XDSS_MASK,   PPCVEC,	0,		{0}},
 
-{ "mtfsf",   XFL(63,711,0), XFL_MASK,	COM,		{ FLM, FRB, XFL_L, W } },
-{ "mtfsf.",  XFL(63,711,1), XFL_MASK,	COM,		{ FLM, FRB, XFL_L, W } },
+{"db8cyc",	0x7f9ce378,	0xffffffff,  CELL,	0,		{0}},
+{"db10cyc",	0x7fbdeb78,	0xffffffff,  CELL,	0,		{0}},
+{"db12cyc",	0x7fdef378,	0xffffffff,  CELL,	0,		{0}},
+{"db16cyc",	0x7ffffb78,	0xffffffff,  CELL,	0,		{0}},
 
-{ "drdpq",   XRC(63,770,0), X_MASK,	POWER6,		{ FRT, FRB } },
-{ "drdpq.",  XRC(63,770,1), X_MASK,	POWER6,		{ FRT, FRB } },
+{"lwz",		OP(32),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, D, RA0}},
+{"l",		OP(32),		OP_MASK,     PWRCOM,	PPCVLE,		{RT, D, RA0}},
 
-{ "dcffixq", XRC(63,802,0), X_MASK,	POWER6,		{ FRT, FRB } },
-{ "dcffixq.",XRC(63,802,1), X_MASK,	POWER6,		{ FRT, FRB } },
+{"lwzu",	OP(33),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, D, RAL}},
+{"lu",		OP(33),		OP_MASK,     PWRCOM,	PPCVLE,		{RT, D, RA0}},
 
-{ "fctid",   XRC(63,814,0), XRA_MASK,	PPC64,		{ FRT, FRB } },
-{ "fctid.",  XRC(63,814,1), XRA_MASK,	PPC64,		{ FRT, FRB } },
+{"lbz",		OP(34),		OP_MASK,     COM,	PPCVLE,		{RT, D, RA0}},
 
-{ "fctidz",  XRC(63,815,0), XRA_MASK,	PPC64,		{ FRT, FRB } },
-{ "fctidz.", XRC(63,815,1), XRA_MASK,	PPC64,		{ FRT, FRB } },
+{"lbzu",	OP(35),		OP_MASK,     COM,	PPCVLE,		{RT, D, RAL}},
 
-{ "denbcdq", XRC(63,834,0), X_MASK,	POWER6,		{ S, FRT, FRB } },
-{ "denbcdq.",XRC(63,834,1), X_MASK,	POWER6,		{ S, FRT, FRB } },
+{"stw",		OP(36),		OP_MASK,     PPCCOM,	PPCVLE,		{RS, D, RA0}},
+{"st",		OP(36),		OP_MASK,     PWRCOM,	PPCVLE,		{RS, D, RA0}},
 
-{ "fcfid",   XRC(63,846,0), XRA_MASK,	PPC64,		{ FRT, FRB } },
-{ "fcfid.",  XRC(63,846,1), XRA_MASK,	PPC64,		{ FRT, FRB } },
+{"stwu",	OP(37),		OP_MASK,     PPCCOM,	PPCVLE,		{RS, D, RAS}},
+{"stu",		OP(37),		OP_MASK,     PWRCOM,	PPCVLE,		{RS, D, RA0}},
 
-{ "diexq",   XRC(63,866,0), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
-{ "diexq.",  XRC(63,866,1), X_MASK,	POWER6,		{ FRT, FRA, FRB } },
+{"stb",		OP(38),		OP_MASK,     COM,	PPCVLE,		{RS, D, RA0}},
 
+{"stbu",	OP(39),		OP_MASK,     COM,	PPCVLE,		{RS, D, RAS}},
+
+{"lhz",		OP(40),		OP_MASK,     COM,	PPCVLE,		{RT, D, RA0}},
+
+{"lhzu",	OP(41),		OP_MASK,     COM,	PPCVLE,		{RT, D, RAL}},
+
+{"lha",		OP(42),		OP_MASK,     COM,	PPCVLE,		{RT, D, RA0}},
+
+{"lhau",	OP(43),		OP_MASK,     COM,	PPCVLE,		{RT, D, RAL}},
+
+{"sth",		OP(44),		OP_MASK,     COM,	PPCVLE,		{RS, D, RA0}},
+
+{"sthu",	OP(45),		OP_MASK,     COM,	PPCVLE,		{RS, D, RAS}},
+
+{"lmw",		OP(46),		OP_MASK,     PPCCOM,	PPCVLE,		{RT, D, RAM}},
+{"lm",		OP(46),		OP_MASK,     PWRCOM,	PPCVLE,		{RT, D, RA0}},
+
+{"stmw",	OP(47),		OP_MASK,     PPCCOM,	PPCVLE,		{RS, D, RA0}},
+{"stm",		OP(47),		OP_MASK,     PWRCOM,	PPCVLE,		{RS, D, RA0}},
+
+{"lfs",		OP(48),		OP_MASK,     COM,	PPCEFS|PPCVLE,	{FRT, D, RA0}},
+
+{"lfsu",	OP(49),		OP_MASK,     COM,	PPCEFS|PPCVLE,	{FRT, D, RAS}},
+
+{"lfd",		OP(50),		OP_MASK,     COM,	PPCEFS|PPCVLE,	{FRT, D, RA0}},
+
+{"lfdu",	OP(51),		OP_MASK,     COM,	PPCEFS|PPCVLE,	{FRT, D, RAS}},
+
+{"stfs",	OP(52),		OP_MASK,     COM,	PPCEFS|PPCVLE,	{FRS, D, RA0}},
+
+{"stfsu",	OP(53),		OP_MASK,     COM,	PPCEFS|PPCVLE,	{FRS, D, RAS}},
+
+{"stfd",	OP(54),		OP_MASK,     COM,	PPCEFS|PPCVLE,	{FRS, D, RA0}},
+
+{"stfdu",	OP(55),		OP_MASK,     COM,	PPCEFS|PPCVLE,	{FRS, D, RAS}},
+
+{"lq",		OP(56),		OP_MASK,     POWER4,	PPC476|PPCVLE,	{RTQ, DQ, RAQ}},
+{"psq_l",	OP(56),		OP_MASK,     PPCPS,	PPCVLE,		{FRT,PSD,RA,PSW,PSQ}},
+{"lfq",		OP(56),		OP_MASK,     POWER2,	PPCVLE,		{FRT, D, RA0}},
+
+{"lxsd",	DSO(57,2),	DS_MASK,     PPCVSX3,	PPCVLE,		{VD, DS, RA0}},
+{"lxssp",	DSO(57,3),	DS_MASK,     PPCVSX3,	PPCVLE,		{VD, DS, RA0}},
+{"lfdp",	OP(57),		OP_MASK,     POWER6,	POWER7|PPCVLE,	{FRTp, DS, RA0}},
+{"psq_lu",	OP(57),		OP_MASK,     PPCPS,	PPCVLE,		{FRT,PSD,RA,PSW,PSQ}},
+{"lfqu",	OP(57),		OP_MASK,     POWER2,	PPCVLE,		{FRT, D, RA0}},
+
+{"ld",		DSO(58,0),	DS_MASK,     PPC64,	PPCVLE,		{RT, DS, RA0}},
+{"ldu",		DSO(58,1),	DS_MASK,     PPC64,	PPCVLE,		{RT, DS, RAL}},
+{"lwa",		DSO(58,2),	DS_MASK,     PPC64,	PPCVLE,		{RT, DS, RA0}},
+
+{"dadd",	XRC(59,2,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, FRB}},
+{"dadd.",	XRC(59,2,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, FRB}},
+
+{"dqua",	ZRC(59,3,0),	Z2_MASK,     POWER6,	PPCVLE,		{FRT,FRA,FRB,RMC}},
+{"dqua.",	ZRC(59,3,1),	Z2_MASK,     POWER6,	PPCVLE,		{FRT,FRA,FRB,RMC}},
+
+{"fdivs",	A(59,18,0),	AFRC_MASK,   PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+{"fdivs.",	A(59,18,1),	AFRC_MASK,   PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+
+{"fsubs",	A(59,20,0),	AFRC_MASK,   PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+{"fsubs.",	A(59,20,1),	AFRC_MASK,   PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+
+{"fadds",	A(59,21,0),	AFRC_MASK,   PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+{"fadds.",	A(59,21,1),	AFRC_MASK,   PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+
+{"fsqrts",	A(59,22,0),    AFRAFRC_MASK, PPC,	TITAN|PPCVLE,	{FRT, FRB}},
+{"fsqrts.",	A(59,22,1),    AFRAFRC_MASK, PPC,	TITAN|PPCVLE,	{FRT, FRB}},
+
+{"fres",	A(59,24,0),   AFRAFRC_MASK,  POWER7,	PPCVLE,		{FRT, FRB}},
+{"fres",	A(59,24,0),   AFRALFRC_MASK, PPC,	POWER7|PPCVLE,	{FRT, FRB, A_L}},
+{"fres.",	A(59,24,1),   AFRAFRC_MASK,  POWER7,	PPCVLE,		{FRT, FRB}},
+{"fres.",	A(59,24,1),   AFRALFRC_MASK, PPC,	POWER7|PPCVLE,	{FRT, FRB, A_L}},
+
+{"fmuls",	A(59,25,0),	AFRB_MASK,   PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC}},
+{"fmuls.",	A(59,25,1),	AFRB_MASK,   PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC}},
+
+{"frsqrtes",	A(59,26,0),   AFRAFRC_MASK,  POWER7,	PPCVLE,		{FRT, FRB}},
+{"frsqrtes",	A(59,26,0),   AFRALFRC_MASK, POWER5,	POWER7|PPCVLE,	{FRT, FRB, A_L}},
+{"frsqrtes.",	A(59,26,1),   AFRAFRC_MASK,  POWER7,	PPCVLE,		{FRT, FRB}},
+{"frsqrtes.",	A(59,26,1),   AFRALFRC_MASK, POWER5,	POWER7|PPCVLE,	{FRT, FRB, A_L}},
+
+{"fmsubs",	A(59,28,0),	A_MASK,	     PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fmsubs.",	A(59,28,1),	A_MASK,	     PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+
+{"fmadds",	A(59,29,0),	A_MASK,	     PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fmadds.",	A(59,29,1),	A_MASK,	     PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+
+{"fnmsubs",	A(59,30,0),	A_MASK,	     PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fnmsubs.",	A(59,30,1),	A_MASK,	     PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+
+{"fnmadds",	A(59,31,0),	A_MASK,	     PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fnmadds.",	A(59,31,1),	A_MASK,	     PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+
+{"dmul",	XRC(59,34,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, FRB}},
+{"dmul.",	XRC(59,34,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, FRB}},
+
+{"drrnd",	ZRC(59,35,0),	Z2_MASK,     POWER6,	PPCVLE,		{FRT, FRA, FRB, RMC}},
+{"drrnd.",	ZRC(59,35,1),	Z2_MASK,     POWER6,	PPCVLE,		{FRT, FRA, FRB, RMC}},
+
+{"dscli",	ZRC(59,66,0),	Z_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, SH16}},
+{"dscli.",	ZRC(59,66,1),	Z_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, SH16}},
+
+{"dquai",	ZRC(59,67,0),	Z2_MASK,     POWER6,	PPCVLE,		{TE, FRT,FRB,RMC}},
+{"dquai.",	ZRC(59,67,1),	Z2_MASK,     POWER6,	PPCVLE,		{TE, FRT,FRB,RMC}},
+
+{"dscri",	ZRC(59,98,0),	Z_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, SH16}},
+{"dscri.",	ZRC(59,98,1),	Z_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, SH16}},
+
+{"drintx",	ZRC(59,99,0),	Z2_MASK,     POWER6,	PPCVLE,		{R, FRT, FRB, RMC}},
+{"drintx.",	ZRC(59,99,1),	Z2_MASK,     POWER6,	PPCVLE,		{R, FRT, FRB, RMC}},
+
+{"dcmpo",	X(59,130),	X_MASK,	     POWER6,	PPCVLE,		{BF,  FRA, FRB}},
+
+{"dtstex",	X(59,162),	X_MASK,	     POWER6,	PPCVLE,		{BF,  FRA, FRB}},
+{"dtstdc",	Z(59,194),	Z_MASK,	     POWER6,	PPCVLE,		{BF,  FRA, DCM}},
+{"dtstdg",	Z(59,226),	Z_MASK,	     POWER6,	PPCVLE,		{BF,  FRA, DGM}},
+
+{"drintn",	ZRC(59,227,0),	Z2_MASK,     POWER6,	PPCVLE,		{R, FRT, FRB, RMC}},
+{"drintn.",	ZRC(59,227,1),	Z2_MASK,     POWER6,	PPCVLE,		{R, FRT, FRB, RMC}},
+
+{"dctdp",	XRC(59,258,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRB}},
+{"dctdp.",	XRC(59,258,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRB}},
+
+{"dctfix",	XRC(59,290,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRB}},
+{"dctfix.",	XRC(59,290,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRB}},
+
+{"ddedpd",	XRC(59,322,0),	X_MASK,	     POWER6,	PPCVLE,		{SP, FRT, FRB}},
+{"ddedpd.",	XRC(59,322,1),	X_MASK,	     POWER6,	PPCVLE,		{SP, FRT, FRB}},
+
+{"dxex",	XRC(59,354,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRB}},
+{"dxex.",	XRC(59,354,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRB}},
+
+{"dsub",	XRC(59,514,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, FRB}},
+{"dsub.",	XRC(59,514,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, FRB}},
+
+{"ddiv",	XRC(59,546,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, FRB}},
+{"ddiv.",	XRC(59,546,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, FRB}},
+
+{"dcmpu",	X(59,642),	X_MASK,	     POWER6,	PPCVLE,		{BF,  FRA, FRB}},
+
+{"dtstsf",	X(59,674),	X_MASK,	     POWER6,	PPCVLE,		{BF,  FRA, FRB}},
+{"dtstsfi",	X(59,675),	X_MASK|1<<22,POWER9,	PPCVLE,		{BF, UIM6, FRB}},
+
+{"drsp",	XRC(59,770,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRB}},
+{"drsp.",	XRC(59,770,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRB}},
+
+{"dcffix",	XRC(59,802,0), X_MASK|FRA_MASK, POWER7,	PPCVLE,		{FRT, FRB}},
+{"dcffix.",	XRC(59,802,1), X_MASK|FRA_MASK, POWER7,	PPCVLE,		{FRT, FRB}},
+
+{"denbcd",	XRC(59,834,0),	X_MASK,	     POWER6,	PPCVLE,		{S, FRT, FRB}},
+{"denbcd.",	XRC(59,834,1),	X_MASK,	     POWER6,	PPCVLE,		{S, FRT, FRB}},
+
+{"fcfids",	XRC(59,846,0),	XRA_MASK, POWER7|PPCA2,	PPCVLE,		{FRT, FRB}},
+{"fcfids.",	XRC(59,846,1),	XRA_MASK, POWER7|PPCA2,	PPCVLE,		{FRT, FRB}},
+
+{"diex",	XRC(59,866,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, FRB}},
+{"diex.",	XRC(59,866,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRA, FRB}},
+
+{"fcfidus",	XRC(59,974,0),	XRA_MASK, POWER7|PPCA2,	PPCVLE,		{FRT, FRB}},
+{"fcfidus.",	XRC(59,974,1),	XRA_MASK, POWER7|PPCA2,	PPCVLE,		{FRT, FRB}},
+
+{"xsaddsp",	XX3(60,0),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsmaddasp",	XX3(60,1),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxsldwi",	XX3(60,2),	XX3SHW_MASK, PPCVSX,	PPCVLE,		{XT6, XA6, XB6, SHW}},
+{"xscmpeqdp",	XX3(60,3),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsrsqrtesp",	XX2(60,10),	XX2_MASK,    PPCVSX2,	PPCVLE,		{XT6, XB6}},
+{"xssqrtsp",	XX2(60,11),	XX2_MASK,    PPCVSX2,	PPCVLE,		{XT6, XB6}},
+{"xxsel",	XX4(60,3),	XX4_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6, XC6}},
+{"xssubsp",	XX3(60,8),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsmaddmsp",	XX3(60,9),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxspltd",	XX3(60,10),	XX3DM_MASK,  PPCVSX,	PPCVLE,		{XT6, XA6, XB6S, DMEX}},
+{"xxmrghd",	XX3(60,10),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxswapd",	XX3(60,10)|(2<<8), XX3_MASK, PPCVSX,	PPCVLE,		{XT6, XA6, XB6S}},
+{"xxmrgld",	XX3(60,10)|(3<<8), XX3_MASK, PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxpermdi",	XX3(60,10),	XX3DM_MASK,  PPCVSX,	PPCVLE,		{XT6, XA6, XB6, DM}},
+{"xscmpgtdp",	XX3(60,11),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsresp",	XX2(60,26),	XX2_MASK,    PPCVSX2,	PPCVLE,		{XT6, XB6}},
+{"xsmulsp",	XX3(60,16),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsmsubasp",	XX3(60,17),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxmrghw",	XX3(60,18),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscmpgedp",	XX3(60,19),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsdivsp",	XX3(60,24),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsmsubmsp",	XX3(60,25),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxperm",	XX3(60,26),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsadddp",	XX3(60,32),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsmaddadp",	XX3(60,33),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscmpudp",	XX3(60,35),	XX3BF_MASK,  PPCVSX,	PPCVLE,		{BF, XA6, XB6}},
+{"xscvdpuxws",	XX2(60,72),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsrdpi",	XX2(60,73),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsrsqrtedp",	XX2(60,74),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xssqrtdp",	XX2(60,75),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xssubdp",	XX3(60,40),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsmaddmdp",	XX3(60,41),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscmpodp",	XX3(60,43),	XX3BF_MASK,  PPCVSX,	PPCVLE,		{BF, XA6, XB6}},
+{"xscvdpsxws",	XX2(60,88),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsrdpiz",	XX2(60,89),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsredp",	XX2(60,90),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsmuldp",	XX3(60,48),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsmsubadp",	XX3(60,49),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxmrglw",	XX3(60,50),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsrdpip",	XX2(60,105),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xstsqrtdp",	XX2(60,106),	XX2BF_MASK,  PPCVSX,	PPCVLE,		{BF, XB6}},
+{"xsrdpic",	XX2(60,107),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsdivdp",	XX3(60,56),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsmsubmdp",	XX3(60,57),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxpermr",	XX3(60,58),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscmpexpdp",	XX3(60,59),	XX3BF_MASK,  PPCVSX3,	PPCVLE,		{BF, XA6, XB6}},
+{"xsrdpim",	XX2(60,121),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xstdivdp",	XX3(60,61),	XX3BF_MASK,  PPCVSX,	PPCVLE,		{BF, XA6, XB6}},
+{"xvaddsp",	XX3(60,64),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvmaddasp",	XX3(60,65),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpeqsp",	XX3RC(60,67,0),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpeqsp.",	XX3RC(60,67,1),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvspuxws",	XX2(60,136),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvrspi",	XX2(60,137),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvrsqrtesp",	XX2(60,138),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvsqrtsp",	XX2(60,139),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvsubsp",	XX3(60,72),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvmaddmsp",	XX3(60,73),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpgtsp",	XX3RC(60,75,0),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpgtsp.",	XX3RC(60,75,1),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvspsxws",	XX2(60,152),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvrspiz",	XX2(60,153),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvresp",	XX2(60,154),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvmulsp",	XX3(60,80),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvmsubasp",	XX3(60,81),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxspltw",	XX2(60,164),	XX2UIM_MASK, PPCVSX,	PPCVLE,		{XT6, XB6, UIM}},
+{"xxextractuw",	XX2(60,165),   XX2UIM4_MASK, PPCVSX3,	PPCVLE,		{XT6, XB6, UIMM4}},
+{"xvcmpgesp",	XX3RC(60,83,0),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpgesp.",	XX3RC(60,83,1),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvuxwsp",	XX2(60,168),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvrspip",	XX2(60,169),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvtsqrtsp",	XX2(60,170),	XX2BF_MASK,  PPCVSX,	PPCVLE,		{BF, XB6}},
+{"xvrspic",	XX2(60,171),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvdivsp",	XX3(60,88),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvmsubmsp",	XX3(60,89),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxspltib",	X(60,360),   XX1_MASK|3<<19, PPCVSX3,	PPCVLE,		{XT6, IMM8}},
+{"xxinsertw",	XX2(60,181),   XX2UIM4_MASK, PPCVSX3,	PPCVLE,		{XT6, XB6, UIMM4}},
+{"xvcvsxwsp",	XX2(60,184),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvrspim",	XX2(60,185),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvtdivsp",	XX3(60,93),	XX3BF_MASK,  PPCVSX,	PPCVLE,		{BF, XA6, XB6}},
+{"xvadddp",	XX3(60,96),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvmaddadp",	XX3(60,97),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpeqdp",	XX3RC(60,99,0),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpeqdp.",	XX3RC(60,99,1),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvdpuxws",	XX2(60,200),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvrdpi",	XX2(60,201),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvrsqrtedp",	XX2(60,202),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvsqrtdp",	XX2(60,203),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvsubdp",	XX3(60,104),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvmaddmdp",	XX3(60,105),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpgtdp",	XX3RC(60,107,0), XX3_MASK,   PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpgtdp.",	XX3RC(60,107,1), XX3_MASK,   PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvdpsxws",	XX2(60,216),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvrdpiz",	XX2(60,217),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvredp",	XX2(60,218),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvmuldp",	XX3(60,112),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvmsubadp",	XX3(60,113),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpgedp",	XX3RC(60,115,0), XX3_MASK,   PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcmpgedp.",	XX3RC(60,115,1), XX3_MASK,   PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvuxwdp",	XX2(60,232),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvrdpip",	XX2(60,233),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvtsqrtdp",	XX2(60,234),	XX2BF_MASK,  PPCVSX,	PPCVLE,		{BF, XB6}},
+{"xvrdpic",	XX2(60,235),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvdivdp",	XX3(60,120),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvmsubmdp",	XX3(60,121),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvsxwdp",	XX2(60,248),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvrdpim",	XX2(60,249),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvtdivdp",	XX3(60,125),	XX3BF_MASK,  PPCVSX,	PPCVLE,		{BF, XA6, XB6}},
+{"xsmaxcdp",	XX3(60,128),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsnmaddasp",	XX3(60,129),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxland",	XX3(60,130),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscvdpsp",	XX2(60,265),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xscvdpspn",	XX2(60,267),	XX2_MASK,    PPCVSX2,	PPCVLE,		{XT6, XB6}},
+{"xsmincdp",	XX3(60,136),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsnmaddmsp",	XX3(60,137),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxlandc",	XX3(60,138),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsrsp",	XX2(60,281),	XX2_MASK,    PPCVSX2,	PPCVLE,		{XT6, XB6}},
+{"xsmaxjdp",	XX3(60,144),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsnmsubasp",	XX3(60,145),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxlor",	XX3(60,146),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscvuxdsp",	XX2(60,296),	XX2_MASK,    PPCVSX2,	PPCVLE,		{XT6, XB6}},
+{"xststdcsp",	XX2(60,298),	XX2BFD_MASK, PPCVSX3,	PPCVLE,		{BF, XB6, DCMX}},
+{"xsminjdp",	XX3(60,152),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsnmsubmsp",	XX3(60,153),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxlxor",	XX3(60,154),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscvsxdsp",	XX2(60,312),	XX2_MASK,    PPCVSX2,	PPCVLE,		{XT6, XB6}},
+{"xsmaxdp",	XX3(60,160),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsnmaddadp",	XX3(60,161),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxlnor",	XX3(60,162),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscvdpuxds",	XX2(60,328),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xscvspdp",	XX2(60,329),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xscvspdpn",	XX2(60,331),	XX2_MASK,    PPCVSX2,	PPCVLE,		{XT6, XB6}},
+{"xsmindp",	XX3(60,168),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsnmaddmdp",	XX3(60,169),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxlorc",	XX3(60,170),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscvdpsxds",	XX2(60,344),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsabsdp",	XX2(60,345),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsxexpdp",	XX2VA(60,347,0),XX2_MASK|1,  PPCVSX3,	PPCVLE,		{RT, XB6}},
+{"xsxsigdp",	XX2VA(60,347,1),XX2_MASK|1,  PPCVSX3,	PPCVLE,		{RT, XB6}},
+{"xscvhpdp",	XX2VA(60,347,16),XX2_MASK,   PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xscvdphp",	XX2VA(60,347,17),XX2_MASK,   PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xscpsgndp",	XX3(60,176),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xsnmsubadp",	XX3(60,177),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxlnand",	XX3(60,178),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscvuxddp",	XX2(60,360),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsnabsdp",	XX2(60,361),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xststdcdp",	XX2(60,362),	XX2BFD_MASK, PPCVSX3,	PPCVLE,		{BF, XB6, DCMX}},
+{"xsnmsubmdp",	XX3(60,185),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xxleqv",	XX3(60,186),	XX3_MASK,    PPCVSX2,	PPCVLE,		{XT6, XA6, XB6}},
+{"xscvsxddp",	XX2(60,376),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsnegdp",	XX2(60,377),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvmaxsp",	XX3(60,192),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvnmaddasp",	XX3(60,193),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvspuxds",	XX2(60,392),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvcvdpsp",	XX2(60,393),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvminsp",	XX3(60,200),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvnmaddmsp",	XX3(60,201),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvspsxds",	XX2(60,408),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvabssp",	XX2(60,409),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvmovsp",	XX3(60,208),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6S}},
+{"xvcpsgnsp",	XX3(60,208),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvnmsubasp",	XX3(60,209),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvuxdsp",	XX2(60,424),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvnabssp",	XX2(60,425),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvtstdcsp",	XX2(60,426),  XX2DCMXS_MASK, PPCVSX3,	PPCVLE,		{XT6, XB6, DCMXS}},
+{"xviexpsp",	XX3(60,216),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvnmsubmsp",	XX3(60,217),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvsxdsp",	XX2(60,440),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvnegsp",	XX2(60,441),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvmaxdp",	XX3(60,224),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvnmaddadp",	XX3(60,225),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvdpuxds",	XX2(60,456),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvcvspdp",	XX2(60,457),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xsiexpdp",	X(60,918),	XX1_MASK,    PPCVSX3,	PPCVLE,		{XT6, RA, RB}},
+{"xvmindp",	XX3(60,232),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvnmaddmdp",	XX3(60,233),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvdpsxds",	XX2(60,472),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvabsdp",	XX2(60,473),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvxexpdp",	XX2VA(60,475,0),XX2_MASK,    PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xvxsigdp",	XX2VA(60,475,1),XX2_MASK,    PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xxbrh",	XX2VA(60,475,7),XX2_MASK,    PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xvxexpsp",	XX2VA(60,475,8),XX2_MASK,    PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xvxsigsp",	XX2VA(60,475,9),XX2_MASK,    PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xxbrw",	XX2VA(60,475,15),XX2_MASK,   PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xxbrd",	XX2VA(60,475,23),XX2_MASK,   PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xvcvhpsp",	XX2VA(60,475,24),XX2_MASK,   PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xvcvsphp",	XX2VA(60,475,25),XX2_MASK,   PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xxbrq",	XX2VA(60,475,31),XX2_MASK,   PPCVSX3,	PPCVLE,		{XT6, XB6}},
+{"xvmovdp",	XX3(60,240),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6S}},
+{"xvcpsgndp",	XX3(60,240),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvnmsubadp",	XX3(60,241),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvuxddp",	XX2(60,488),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvnabsdp",	XX2(60,489),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvtstdcdp",	XX2(60,490),  XX2DCMXS_MASK, PPCVSX3,	PPCVLE,		{XT6, XB6, DCMXS}},
+{"xviexpdp",	XX3(60,248),	XX3_MASK,    PPCVSX3,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvnmsubmdp",	XX3(60,249),	XX3_MASK,    PPCVSX,	PPCVLE,		{XT6, XA6, XB6}},
+{"xvcvsxddp",	XX2(60,504),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+{"xvnegdp",	XX2(60,505),	XX2_MASK,    PPCVSX,	PPCVLE,		{XT6, XB6}},
+
+{"psq_st",	OP(60),		OP_MASK,     PPCPS,	PPCVLE,		{FRS,PSD,RA,PSW,PSQ}},
+{"stfq",	OP(60),		OP_MASK,     POWER2,	PPCVLE,		{FRS, D, RA}},
+
+{"lxv",		DQX(61,1),	DQX_MASK,    PPCVSX3,	PPCVLE,		{XTQ6, DQ, RA0}},
+{"stxv",	DQX(61,5),	DQX_MASK,    PPCVSX3,	PPCVLE,		{XSQ6, DQ, RA0}},
+{"stxsd",	DSO(61,2),	DS_MASK,     PPCVSX3,	PPCVLE,		{VS, DS, RA0}},
+{"stxssp",	DSO(61,3),	DS_MASK,     PPCVSX3,	PPCVLE,		{VS, DS, RA0}},
+{"stfdp",	OP(61),		OP_MASK,     POWER6,	POWER7|PPCVLE,	{FRSp, DS, RA0}},
+{"psq_stu",	OP(61),		OP_MASK,     PPCPS,	PPCVLE,		{FRS,PSD,RA,PSW,PSQ}},
+{"stfqu",	OP(61),		OP_MASK,     POWER2,	PPCVLE,		{FRS, D, RA}},
+
+{"std",		DSO(62,0),	DS_MASK,     PPC64,	PPCVLE,		{RS, DS, RA0}},
+{"stdu",	DSO(62,1),	DS_MASK,     PPC64,	PPCVLE,		{RS, DS, RAS}},
+{"stq",		DSO(62,2),	DS_MASK,     POWER4,	PPC476|PPCVLE,	{RSQ, DS, RA0}},
+
+{"fcmpu",	X(63,0),	XBF_MASK,    COM,	PPCEFS|PPCVLE,	{BF, FRA, FRB}},
+
+{"daddq",	XRC(63,2,0),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, FRBp}},
+{"daddq.",	XRC(63,2,1),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, FRBp}},
+
+{"dquaq",	ZRC(63,3,0),	Z2_MASK,     POWER6,	PPCVLE,		{FRTp, FRAp, FRBp, RMC}},
+{"dquaq.",	ZRC(63,3,1),	Z2_MASK,     POWER6,	PPCVLE,		{FRTp, FRAp, FRBp, RMC}},
+
+{"xsaddqp",	XRC(63,4,0),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+{"xsaddqpo",	XRC(63,4,1),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+
+{"xsrqpi",	ZRC(63,5,0),	Z2_MASK,     PPCVSX3,	PPCVLE,		{R, VD, VB, RMC}},
+{"xsrqpix",	ZRC(63,5,1),	Z2_MASK,     PPCVSX3,	PPCVLE,		{R, VD, VB, RMC}},
+
+{"fcpsgn",	XRC(63,8,0),	X_MASK, POWER6|PPCA2|PPC476, PPCVLE,	{FRT, FRA, FRB}},
+{"fcpsgn.",	XRC(63,8,1),	X_MASK, POWER6|PPCA2|PPC476, PPCVLE,	{FRT, FRA, FRB}},
+
+{"frsp",	XRC(63,12,0),	XRA_MASK,    COM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+{"frsp.",	XRC(63,12,1),	XRA_MASK,    COM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+
+{"fctiw",	XRC(63,14,0),	XRA_MASK,    PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+{"fcir",	XRC(63,14,0),	XRA_MASK,    PWR2COM,	PPCVLE,		{FRT, FRB}},
+{"fctiw.",	XRC(63,14,1),	XRA_MASK,    PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+{"fcir.",	XRC(63,14,1),	XRA_MASK,    PWR2COM,	PPCVLE,		{FRT, FRB}},
+
+{"fctiwz",	XRC(63,15,0),	XRA_MASK,    PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+{"fcirz",	XRC(63,15,0),	XRA_MASK,    PWR2COM,	PPCVLE,		{FRT, FRB}},
+{"fctiwz.",	XRC(63,15,1),	XRA_MASK,    PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+{"fcirz.",	XRC(63,15,1),	XRA_MASK,    PWR2COM,	PPCVLE,		{FRT, FRB}},
+
+{"fdiv",	A(63,18,0),	AFRC_MASK,   PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+{"fd",		A(63,18,0),	AFRC_MASK,   PWRCOM,	PPCVLE,		{FRT, FRA, FRB}},
+{"fdiv.",	A(63,18,1),	AFRC_MASK,   PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+{"fd.",		A(63,18,1),	AFRC_MASK,   PWRCOM,	PPCVLE,		{FRT, FRA, FRB}},
+
+{"fsub",	A(63,20,0),	AFRC_MASK,   PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+{"fs",		A(63,20,0),	AFRC_MASK,   PWRCOM,	PPCVLE,		{FRT, FRA, FRB}},
+{"fsub.",	A(63,20,1),	AFRC_MASK,   PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+{"fs.",		A(63,20,1),	AFRC_MASK,   PWRCOM,	PPCVLE,		{FRT, FRA, FRB}},
+
+{"fadd",	A(63,21,0),	AFRC_MASK,   PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+{"fa",		A(63,21,0),	AFRC_MASK,   PWRCOM,	PPCVLE,		{FRT, FRA, FRB}},
+{"fadd.",	A(63,21,1),	AFRC_MASK,   PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRB}},
+{"fa.",		A(63,21,1),	AFRC_MASK,   PWRCOM,	PPCVLE,		{FRT, FRA, FRB}},
+
+{"fsqrt",	A(63,22,0),    AFRAFRC_MASK, PPCPWR2,	TITAN|PPCVLE,	{FRT, FRB}},
+{"fsqrt.",	A(63,22,1),    AFRAFRC_MASK, PPCPWR2,	TITAN|PPCVLE,	{FRT, FRB}},
+
+{"fsel",	A(63,23,0),	A_MASK,	     PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fsel.",	A(63,23,1),	A_MASK,	     PPC,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+
+{"fre",		A(63,24,0),   AFRAFRC_MASK,  POWER7,	PPCVLE,		{FRT, FRB}},
+{"fre",		A(63,24,0),   AFRALFRC_MASK, POWER5,	POWER7|PPCVLE,	{FRT, FRB, A_L}},
+{"fre.",	A(63,24,1),   AFRAFRC_MASK,  POWER7,	PPCVLE,		{FRT, FRB}},
+{"fre.",	A(63,24,1),   AFRALFRC_MASK, POWER5,	POWER7|PPCVLE,	{FRT, FRB, A_L}},
+
+{"fmul",	A(63,25,0),	AFRB_MASK,   PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRC}},
+{"fm",		A(63,25,0),	AFRB_MASK,   PWRCOM,	PPCVLE|PPCVLE,	{FRT, FRA, FRC}},
+{"fmul.",	A(63,25,1),	AFRB_MASK,   PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRC}},
+{"fm.",		A(63,25,1),	AFRB_MASK,   PWRCOM,	PPCVLE|PPCVLE,	{FRT, FRA, FRC}},
+
+{"frsqrte",	A(63,26,0),   AFRAFRC_MASK,  POWER7,	PPCVLE,		{FRT, FRB}},
+{"frsqrte",	A(63,26,0),   AFRALFRC_MASK, PPC,	POWER7|PPCVLE,	{FRT, FRB, A_L}},
+{"frsqrte.",	A(63,26,1),   AFRAFRC_MASK,  POWER7,	PPCVLE,		{FRT, FRB}},
+{"frsqrte.",	A(63,26,1),   AFRALFRC_MASK, PPC,	POWER7|PPCVLE,	{FRT, FRB, A_L}},
+
+{"fmsub",	A(63,28,0),	A_MASK,	     PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fms",		A(63,28,0),	A_MASK,	     PWRCOM,	PPCVLE,		{FRT, FRA, FRC, FRB}},
+{"fmsub.",	A(63,28,1),	A_MASK,	     PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fms.",	A(63,28,1),	A_MASK,	     PWRCOM,	PPCVLE,		{FRT, FRA, FRC, FRB}},
+
+{"fmadd",	A(63,29,0),	A_MASK,	     PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fma",		A(63,29,0),	A_MASK,	     PWRCOM,	PPCVLE,		{FRT, FRA, FRC, FRB}},
+{"fmadd.",	A(63,29,1),	A_MASK,	     PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fma.",	A(63,29,1),	A_MASK,	     PWRCOM,	PPCVLE,		{FRT, FRA, FRC, FRB}},
+
+{"fnmsub",	A(63,30,0),	A_MASK,	     PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fnms",	A(63,30,0),	A_MASK,	     PWRCOM,	PPCVLE,		{FRT, FRA, FRC, FRB}},
+{"fnmsub.",	A(63,30,1),	A_MASK,	     PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fnms.",	A(63,30,1),	A_MASK,	     PWRCOM,	PPCVLE,		{FRT, FRA, FRC, FRB}},
+
+{"fnmadd",	A(63,31,0),	A_MASK,	     PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fnma",	A(63,31,0),	A_MASK,	     PWRCOM,	PPCVLE,		{FRT, FRA, FRC, FRB}},
+{"fnmadd.",	A(63,31,1),	A_MASK,	     PPCCOM,	PPCEFS|PPCVLE,	{FRT, FRA, FRC, FRB}},
+{"fnma.",	A(63,31,1),	A_MASK,	     PWRCOM,	PPCVLE,		{FRT, FRA, FRC, FRB}},
+
+{"fcmpo",	X(63,32),	XBF_MASK,    COM,	PPCEFS|PPCVLE,	{BF, FRA, FRB}},
+
+{"dmulq",	XRC(63,34,0),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, FRBp}},
+{"dmulq.",	XRC(63,34,1),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, FRBp}},
+
+{"drrndq",	ZRC(63,35,0),	Z2_MASK,     POWER6,	PPCVLE,		{FRTp, FRA, FRBp, RMC}},
+{"drrndq.",	ZRC(63,35,1),	Z2_MASK,     POWER6,	PPCVLE,		{FRTp, FRA, FRBp, RMC}},
+
+{"xsmulqp",	XRC(63,36,0),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+{"xsmulqpo",	XRC(63,36,1),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+
+{"xsrqpxp",	Z(63,37),	Z2_MASK,     PPCVSX3,	PPCVLE,		{R, VD, VB, RMC}},
+
+{"mtfsb1",	XRC(63,38,0),	XRARB_MASK,  COM,	PPCVLE,		{BT}},
+{"mtfsb1.",	XRC(63,38,1),	XRARB_MASK,  COM,	PPCVLE,		{BT}},
+
+{"fneg",	XRC(63,40,0),	XRA_MASK,    COM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+{"fneg.",	XRC(63,40,1),	XRA_MASK,    COM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+
+{"mcrfs",      X(63,64), XRB_MASK|(3<<21)|(3<<16), COM,	PPCVLE,		{BF, BFA}},
+
+{"dscliq",	ZRC(63,66,0),	Z_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, SH16}},
+{"dscliq.",	ZRC(63,66,1),	Z_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, SH16}},
+
+{"dquaiq",	ZRC(63,67,0),	Z2_MASK,     POWER6,	PPCVLE,		{TE, FRTp, FRBp, RMC}},
+{"dquaiq.",	ZRC(63,67,1),	Z2_MASK,     POWER6,	PPCVLE,		{TE, FRTp, FRBp, RMC}},
+
+{"mtfsb0",	XRC(63,70,0),	XRARB_MASK,  COM,	PPCVLE,		{BT}},
+{"mtfsb0.",	XRC(63,70,1),	XRARB_MASK,  COM,	PPCVLE,		{BT}},
+
+{"fmr",		XRC(63,72,0),	XRA_MASK,    COM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+{"fmr.",	XRC(63,72,1),	XRA_MASK,    COM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+
+{"dscriq",	ZRC(63,98,0),	Z_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, SH16}},
+{"dscriq.",	ZRC(63,98,1),	Z_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, SH16}},
+
+{"drintxq",	ZRC(63,99,0),	Z2_MASK,     POWER6,	PPCVLE,		{R, FRTp, FRBp, RMC}},
+{"drintxq.",	ZRC(63,99,1),	Z2_MASK,     POWER6,	PPCVLE,		{R, FRTp, FRBp, RMC}},
+
+{"xscpsgnqp",	X(63,100),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+
+{"ftdiv",	X(63,128),	XBF_MASK,    POWER7,	PPCVLE,		{BF, FRA, FRB}},
+
+{"dcmpoq",	X(63,130),	X_MASK,	     POWER6,	PPCVLE,		{BF, FRAp, FRBp}},
+
+{"xscmpoqp",	X(63,132),	XBF_MASK,    PPCVSX3,	PPCVLE,		{BF, VA, VB}},
+
+{"mtfsfi",  XRC(63,134,0), XWRA_MASK|(3<<21)|(1<<11), POWER6|PPCA2|PPC476, PPCVLE, {BFF, U, W}},
+{"mtfsfi",  XRC(63,134,0), XRA_MASK|(3<<21)|(1<<11), COM, POWER6|PPCA2|PPC476|PPCVLE, {BFF, U}},
+{"mtfsfi.", XRC(63,134,1), XWRA_MASK|(3<<21)|(1<<11), POWER6|PPCA2|PPC476, PPCVLE, {BFF, U, W}},
+{"mtfsfi.", XRC(63,134,1), XRA_MASK|(3<<21)|(1<<11), COM, POWER6|PPCA2|PPC476|PPCVLE, {BFF, U}},
+
+{"fnabs",	XRC(63,136,0),	XRA_MASK,    COM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+{"fnabs.",	XRC(63,136,1),	XRA_MASK,    COM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+
+{"fctiwu",	XRC(63,142,0),	XRA_MASK,    POWER7,	PPCVLE,		{FRT, FRB}},
+{"fctiwu.",	XRC(63,142,1),	XRA_MASK,    POWER7,	PPCVLE,		{FRT, FRB}},
+{"fctiwuz",	XRC(63,143,0),	XRA_MASK,    POWER7,	PPCVLE,		{FRT, FRB}},
+{"fctiwuz.",	XRC(63,143,1),	XRA_MASK,    POWER7,	PPCVLE,		{FRT, FRB}},
+
+{"ftsqrt",	X(63,160),	XBF_MASK|FRA_MASK, POWER7, PPCVLE,	{BF, FRB}},
+
+{"dtstexq",	X(63,162),	X_MASK,	     POWER6,	PPCVLE,		{BF, FRAp, FRBp}},
+
+{"xscmpexpqp",	X(63,164),	XBF_MASK,    PPCVSX3,	PPCVLE,		{BF, VA, VB}},
+
+{"dtstdcq",	Z(63,194),	Z_MASK,	     POWER6,	PPCVLE,		{BF, FRAp, DCM}},
+{"dtstdgq",	Z(63,226),	Z_MASK,	     POWER6,	PPCVLE,		{BF, FRAp, DGM}},
+
+{"drintnq",	ZRC(63,227,0),	Z2_MASK,     POWER6,	PPCVLE,		{R, FRTp, FRBp, RMC}},
+{"drintnq.",	ZRC(63,227,1),	Z2_MASK,     POWER6,	PPCVLE,		{R, FRTp, FRBp, RMC}},
+
+{"dctqpq",	XRC(63,258,0),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRB}},
+{"dctqpq.",	XRC(63,258,1),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRB}},
+
+{"fabs",	XRC(63,264,0),	XRA_MASK,    COM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+{"fabs.",	XRC(63,264,1),	XRA_MASK,    COM,	PPCEFS|PPCVLE,	{FRT, FRB}},
+
+{"dctfixq",	XRC(63,290,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRBp}},
+{"dctfixq.",	XRC(63,290,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRBp}},
+
+{"ddedpdq",	XRC(63,322,0),	X_MASK,	     POWER6,	PPCVLE,		{SP, FRTp, FRBp}},
+{"ddedpdq.",	XRC(63,322,1),	X_MASK,	     POWER6,	PPCVLE,		{SP, FRTp, FRBp}},
+
+{"dxexq",	XRC(63,354,0),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRBp}},
+{"dxexq.",	XRC(63,354,1),	X_MASK,	     POWER6,	PPCVLE,		{FRT, FRBp}},
+
+{"xsmaddqp",	XRC(63,388,0),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+{"xsmaddqpo",	XRC(63,388,1),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+
+{"frin",	XRC(63,392,0),	XRA_MASK,    POWER5,	PPCVLE,		{FRT, FRB}},
+{"frin.",	XRC(63,392,1),	XRA_MASK,    POWER5,	PPCVLE,		{FRT, FRB}},
+
+{"xsmsubqp",	XRC(63,420,0),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+{"xsmsubqpo",	XRC(63,420,1),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+
+{"friz",	XRC(63,424,0),	XRA_MASK,    POWER5,	PPCVLE,		{FRT, FRB}},
+{"friz.",	XRC(63,424,1),	XRA_MASK,    POWER5,	PPCVLE,		{FRT, FRB}},
+
+{"xsnmaddqp",	XRC(63,452,0),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+{"xsnmaddqpo",	XRC(63,452,1),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+
+{"frip",	XRC(63,456,0),	XRA_MASK,    POWER5,	PPCVLE,		{FRT, FRB}},
+{"frip.",	XRC(63,456,1),	XRA_MASK,    POWER5,	PPCVLE,		{FRT, FRB}},
+
+{"xsnmsubqp",	XRC(63,484,0),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+{"xsnmsubqpo",	XRC(63,484,1),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+
+{"frim",	XRC(63,488,0),	XRA_MASK,    POWER5,	PPCVLE,		{FRT, FRB}},
+{"frim.",	XRC(63,488,1),	XRA_MASK,    POWER5,	PPCVLE,		{FRT, FRB}},
+
+{"dsubq",	XRC(63,514,0),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, FRBp}},
+{"dsubq.",	XRC(63,514,1),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, FRBp}},
+
+{"xssubqp",	XRC(63,516,0),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+{"xssubqpo",	XRC(63,516,1),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+
+{"ddivq",	XRC(63,546,0),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, FRBp}},
+{"ddivq.",	XRC(63,546,1),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRAp, FRBp}},
+
+{"xsdivqp",	XRC(63,548,0),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+{"xsdivqpo",	XRC(63,548,1),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+
+{"mffs",	XRC(63,583,0),	XRARB_MASK,  COM,	PPCEFS|PPCVLE,	{FRT}},
+{"mffs.",	XRC(63,583,1),	XRARB_MASK,  COM,	PPCEFS|PPCVLE,	{FRT}},
+
+{"mffsce",	XMMF(63,583,0,1), XMMF_MASK|RB_MASK, POWER9, PPCVLE,	{FRT}},
+{"mffscdrn",	XMMF(63,583,2,4), XMMF_MASK,         POWER9, PPCVLE,	{FRT, FRB}},
+{"mffscdrni",	XMMF(63,583,2,5), XMMF_MASK|(3<<14), POWER9, PPCVLE,	{FRT, DRM}},
+{"mffscrn",	XMMF(63,583,2,6), XMMF_MASK,         POWER9, PPCVLE,	{FRT, FRB}},
+{"mffscrni",	XMMF(63,583,2,7), XMMF_MASK|(7<<13), POWER9, PPCVLE,	{FRT, RM}},
+{"mffsl",	XMMF(63,583,3,0), XMMF_MASK|RB_MASK, POWER9, PPCVLE,	{FRT}},
+
+{"dcmpuq",	X(63,642),	X_MASK,	     POWER6,	PPCVLE,		{BF, FRAp, FRBp}},
+
+{"xscmpuqp",	X(63,644),	XBF_MASK,    PPCVSX3,	PPCVLE,		{BF, VA, VB}},
+
+{"dtstsfq",	X(63,674),	X_MASK,	     POWER6,	PPCVLE,		{BF, FRA, FRBp}},
+{"dtstsfiq",	X(63,675),	X_MASK|1<<22,POWER9,	PPCVLE,		{BF, UIM6, FRBp}},
+
+{"xststdcqp",	X(63,708),	X_MASK,	     PPCVSX3,	PPCVLE,		{BF, VB, DCMX}},
+
+{"mtfsf",	XFL(63,711,0),	XFL_MASK, POWER6|PPCA2|PPC476, PPCVLE,	{FLM, FRB, XFL_L, W}},
+{"mtfsf",	XFL(63,711,0),	XFL_MASK,    COM, POWER6|PPCA2|PPC476|PPCEFS|PPCVLE, {FLM, FRB}},
+{"mtfsf.",	XFL(63,711,1),	XFL_MASK, POWER6|PPCA2|PPC476, PPCVLE,	{FLM, FRB, XFL_L, W}},
+{"mtfsf.",	XFL(63,711,1),	XFL_MASK,    COM, POWER6|PPCA2|PPC476|PPCEFS|PPCVLE, {FLM, FRB}},
+
+{"drdpq",	XRC(63,770,0),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRBp}},
+{"drdpq.",	XRC(63,770,1),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRBp}},
+
+{"dcffixq",	XRC(63,802,0),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRB}},
+{"dcffixq.",	XRC(63,802,1),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRB}},
+
+{"xsabsqp",	XVA(63,804,0),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xsxexpqp",	XVA(63,804,2),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xsnabsqp",	XVA(63,804,8),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xsnegqp",	XVA(63,804,16),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xsxsigqp",	XVA(63,804,18),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xssqrtqp",	XVARC(63,804,27,0), XVA_MASK, PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xssqrtqpo",	XVARC(63,804,27,1), XVA_MASK, PPCVSX3,	PPCVLE,		{VD, VB}},
+
+{"fctid",	XRC(63,814,0),	XRA_MASK,    PPC64,	PPCVLE,		{FRT, FRB}},
+{"fctid",	XRC(63,814,0),	XRA_MASK,    PPC476,	PPCVLE,		{FRT, FRB}},
+{"fctid.",	XRC(63,814,1),	XRA_MASK,    PPC64,	PPCVLE,		{FRT, FRB}},
+{"fctid.",	XRC(63,814,1),	XRA_MASK,    PPC476,	PPCVLE,		{FRT, FRB}},
+
+{"fctidz",	XRC(63,815,0),	XRA_MASK,    PPC64,	PPCVLE,		{FRT, FRB}},
+{"fctidz",	XRC(63,815,0),	XRA_MASK,    PPC476,	PPCVLE,		{FRT, FRB}},
+{"fctidz.",	XRC(63,815,1),	XRA_MASK,    PPC64,	PPCVLE,		{FRT, FRB}},
+{"fctidz.",	XRC(63,815,1),	XRA_MASK,    PPC476,	PPCVLE,		{FRT, FRB}},
+
+{"denbcdq",	XRC(63,834,0),	X_MASK,	     POWER6,	PPCVLE,		{S, FRTp, FRBp}},
+{"denbcdq.",	XRC(63,834,1),	X_MASK,	     POWER6,	PPCVLE,		{S, FRTp, FRBp}},
+
+{"xscvqpuwz",	XVA(63,836,1),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xscvudqp",	XVA(63,836,2),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xscvqpswz",	XVA(63,836,9),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xscvsdqp",	XVA(63,836,10),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xscvqpudz",	XVA(63,836,17),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xscvqpdp",	XVARC(63,836,20,0), XVA_MASK, PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xscvqpdpo",	XVARC(63,836,20,1), XVA_MASK, PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xscvdpqp",	XVA(63,836,22),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+{"xscvqpsdz",	XVA(63,836,25),	XVA_MASK,    PPCVSX3,	PPCVLE,		{VD, VB}},
+
+{"fmrgow",	X(63,838),	X_MASK,	     PPCVSX2,	PPCVLE,		{FRT, FRA, FRB}},
+
+{"fcfid",	XRC(63,846,0),	XRA_MASK,    PPC64,	PPCVLE,		{FRT, FRB}},
+{"fcfid",	XRC(63,846,0),	XRA_MASK,    PPC476,	PPCVLE,		{FRT, FRB}},
+{"fcfid.",	XRC(63,846,1),	XRA_MASK,    PPC64,	PPCVLE,		{FRT, FRB}},
+{"fcfid.",	XRC(63,846,1),	XRA_MASK,    PPC476,	PPCVLE,		{FRT, FRB}},
+
+{"diexq",	XRC(63,866,0),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRA, FRBp}},
+{"diexq.",	XRC(63,866,1),	X_MASK,	     POWER6,	PPCVLE,		{FRTp, FRA, FRBp}},
+
+{"xsiexpqp",	X(63,868),	X_MASK,	     PPCVSX3,	PPCVLE,		{VD, VA, VB}},
+
+{"fctidu",	XRC(63,942,0),	XRA_MASK, POWER7|PPCA2,	PPCVLE,		{FRT, FRB}},
+{"fctidu.",	XRC(63,942,1),	XRA_MASK, POWER7|PPCA2,	PPCVLE,		{FRT, FRB}},
+
+{"fctiduz",	XRC(63,943,0),	XRA_MASK, POWER7|PPCA2,	PPCVLE,		{FRT, FRB}},
+{"fctiduz.",	XRC(63,943,1),	XRA_MASK, POWER7|PPCA2,	PPCVLE,		{FRT, FRB}},
+
+{"fmrgew",	X(63,966),	X_MASK,	     PPCVSX2,	PPCVLE,		{FRT, FRA, FRB}},
+
+{"fcfidu",	XRC(63,974,0),	XRA_MASK, POWER7|PPCA2,	PPCVLE,		{FRT, FRB}},
+{"fcfidu.",	XRC(63,974,1),	XRA_MASK, POWER7|PPCA2,	PPCVLE,		{FRT, FRB}},
 };
 
 const int powerpc_num_opcodes =
   sizeof (powerpc_opcodes) / sizeof (powerpc_opcodes[0]);
 
+/* The VLE opcode table.
+
+   The format of this opcode table is the same as the main opcode table.  */
+
+const struct powerpc_opcode vle_opcodes[] = {
+{"se_illegal",	C(0),		C_MASK,		PPCVLE,	0,		{}},
+{"se_isync",	C(1),		C_MASK,		PPCVLE,	0,		{}},
+{"se_sc",	C(2),		C_MASK,		PPCVLE,	0,		{}},
+{"se_blr",	C_LK(2,0),	C_LK_MASK,	PPCVLE,	0,		{}},
+{"se_blrl",	C_LK(2,1),	C_LK_MASK,	PPCVLE,	0,		{}},
+{"se_bctr",	C_LK(3,0),	C_LK_MASK,	PPCVLE,	0,		{}},
+{"se_bctrl",	C_LK(3,1),	C_LK_MASK,	PPCVLE,	0,		{}},
+{"se_rfi",	C(8),		C_MASK,		PPCVLE,	0,		{}},
+{"se_rfci",	C(9),		C_MASK,		PPCVLE,	0,		{}},
+{"se_rfdi",	C(10),		C_MASK,		PPCVLE,	0,		{}},
+{"se_rfmci",	C(11),		C_MASK, PPCRFMCI|PPCVLE, 0,		{}},
+{"se_not",	SE_R(0,2),	SE_R_MASK,	PPCVLE,	0,		{RX}},
+{"se_neg",	SE_R(0,3),	SE_R_MASK,	PPCVLE,	0,		{RX}},
+{"se_mflr",	SE_R(0,8),	SE_R_MASK,	PPCVLE,	0,		{RX}},
+{"se_mtlr",	SE_R(0,9),	SE_R_MASK,	PPCVLE,	0,		{RX}},
+{"se_mfctr",	SE_R(0,10),	SE_R_MASK,	PPCVLE,	0,		{RX}},
+{"se_mtctr",	SE_R(0,11),	SE_R_MASK,	PPCVLE,	0,		{RX}},
+{"se_extzb",	SE_R(0,12),	SE_R_MASK,	PPCVLE,	0,		{RX}},
+{"se_extsb",	SE_R(0,13),	SE_R_MASK,	PPCVLE,	0,		{RX}},
+{"se_extzh",	SE_R(0,14),	SE_R_MASK,	PPCVLE,	0,		{RX}},
+{"se_extsh",	SE_R(0,15),	SE_R_MASK,	PPCVLE,	0,		{RX}},
+{"se_mr",	SE_RR(0,1),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_mtar",	SE_RR(0,2),	SE_RR_MASK,	PPCVLE,	0,		{ARX, RY}},
+{"se_mfar",	SE_RR(0,3),	SE_RR_MASK,	PPCVLE,	0,		{RX, ARY}},
+{"se_add",	SE_RR(1,0),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_mullw",	SE_RR(1,1),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_sub",	SE_RR(1,2),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_subf",	SE_RR(1,3),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_cmp",	SE_RR(3,0),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_cmpl",	SE_RR(3,1),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_cmph",	SE_RR(3,2),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_cmphl",	SE_RR(3,3),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+
+{"e_cmpi",	SCI8BF(6,0,21),	SCI8BF_MASK,	PPCVLE,	0,		{CRD32, RA, SCLSCI8}},
+{"e_cmpwi",	SCI8BF(6,0,21),	SCI8BF_MASK,	PPCVLE,	0,		{CRD32, RA, SCLSCI8}},
+{"e_cmpli",	SCI8BF(6,1,21),	SCI8BF_MASK,	PPCVLE,	0,		{CRD32, RA, SCLSCI8}},
+{"e_cmplwi",	SCI8BF(6,1,21),	SCI8BF_MASK,	PPCVLE,	0,		{CRD32, RA, SCLSCI8}},
+{"e_addi",	SCI8(6,16),	SCI8_MASK,	PPCVLE,	0,		{RT, RA, SCLSCI8}},
+{"e_subi",	SCI8(6,16),	SCI8_MASK,	PPCVLE,	0,		{RT, RA, SCLSCI8N}},
+{"e_addi.",	SCI8(6,17),	SCI8_MASK,	PPCVLE,	0,		{RT, RA, SCLSCI8}},
+{"e_addic",	SCI8(6,18),	SCI8_MASK,	PPCVLE,	0,		{RT, RA, SCLSCI8}},
+{"e_subic",	SCI8(6,18),	SCI8_MASK,	PPCVLE,	0,		{RT, RA, SCLSCI8N}},
+{"e_addic.",	SCI8(6,19),	SCI8_MASK,	PPCVLE,	0,		{RT, RA, SCLSCI8}},
+{"e_subic.",	SCI8(6,19),	SCI8_MASK,	PPCVLE,	0,		{RT, RA, SCLSCI8N}},
+{"e_mulli",	SCI8(6,20),	SCI8_MASK,	PPCVLE,	0,		{RT, RA, SCLSCI8}},
+{"e_subfic",	SCI8(6,22),	SCI8_MASK,	PPCVLE,	0,		{RT, RA, SCLSCI8}},
+{"e_subfic.",	SCI8(6,23),	SCI8_MASK,	PPCVLE,	0,		{RT, RA, SCLSCI8}},
+{"e_andi",	SCI8(6,24),	SCI8_MASK,	PPCVLE,	0,		{RA, RS, SCLSCI8}},
+{"e_andi.",	SCI8(6,25),	SCI8_MASK,	PPCVLE,	0,		{RA, RS, SCLSCI8}},
+{"e_nop",	SCI8(6,26),	0xffffffff,	PPCVLE,	0,		{0}},
+{"e_ori",	SCI8(6,26),	SCI8_MASK,	PPCVLE,	0,		{RA, RS, SCLSCI8}},
+{"e_ori.",	SCI8(6,27),	SCI8_MASK,	PPCVLE,	0,		{RA, RS, SCLSCI8}},
+{"e_xori",	SCI8(6,28),	SCI8_MASK,	PPCVLE,	0,		{RA, RS, SCLSCI8}},
+{"e_xori.",	SCI8(6,29),	SCI8_MASK,	PPCVLE,	0,		{RA, RS, SCLSCI8}},
+{"e_lbzu",	OPVUP(6,0),	OPVUP_MASK,	PPCVLE,	0,		{RT, D8, RA0}},
+{"e_lhau",	OPVUP(6,3),	OPVUP_MASK,	PPCVLE,	0,		{RT, D8, RA0}},
+{"e_lhzu",	OPVUP(6,1),	OPVUP_MASK,	PPCVLE,	0,		{RT, D8, RA0}},
+{"e_lmw",	OPVUP(6,8),	OPVUP_MASK,	PPCVLE,	0,		{RT, D8, RA0}},
+{"e_lwzu",	OPVUP(6,2),	OPVUP_MASK,	PPCVLE,	0,		{RT, D8, RA0}},
+{"e_stbu",	OPVUP(6,4),	OPVUP_MASK,	PPCVLE,	0,		{RT, D8, RA0}},
+{"e_sthu",	OPVUP(6,5),	OPVUP_MASK,	PPCVLE,	0,		{RT, D8, RA0}},
+{"e_stwu",	OPVUP(6,6),	OPVUP_MASK,	PPCVLE,	0,		{RT, D8, RA0}},
+{"e_stmw",	OPVUP(6,9),	OPVUP_MASK,	PPCVLE,	0,		{RT, D8, RA0}},
+{"e_ldmvgprw",	OPVUPRT(6,16,0),OPVUPRT_MASK,	PPCVLE,	0,		{D8, RA0}},
+{"e_stmvgprw",	OPVUPRT(6,17,0),OPVUPRT_MASK,	PPCVLE,	0,		{D8, RA0}},
+{"e_ldmvsprw",	OPVUPRT(6,16,1),OPVUPRT_MASK,	PPCVLE,	0,		{D8, RA0}},
+{"e_stmvsprw",	OPVUPRT(6,17,1),OPVUPRT_MASK,	PPCVLE,	0,		{D8, RA0}},
+{"e_ldmvsrrw",	OPVUPRT(6,16,4),OPVUPRT_MASK,	PPCVLE,	0,		{D8, RA0}},
+{"e_stmvsrrw",	OPVUPRT(6,17,4),OPVUPRT_MASK,	PPCVLE,	0,		{D8, RA0}},
+{"e_ldmvcsrrw",	OPVUPRT(6,16,5),OPVUPRT_MASK,	PPCVLE,	0,		{D8, RA0}},
+{"e_stmvcsrrw",	OPVUPRT(6,17,5),OPVUPRT_MASK,	PPCVLE,	0,		{D8, RA0}},
+{"e_ldmvdsrrw",	OPVUPRT(6,16,6),OPVUPRT_MASK,	PPCVLE,	0,		{D8, RA0}},
+{"e_stmvdsrrw",	OPVUPRT(6,17,6),OPVUPRT_MASK,	PPCVLE,	0,		{D8, RA0}},
+{"e_add16i",	OP(7),		OP_MASK,	PPCVLE,	0,		{RT, RA, SI}},
+{"e_la",	OP(7),		OP_MASK,	PPCVLE,	0,		{RT, D, RA0}},
+{"e_sub16i",	OP(7),		OP_MASK,	PPCVLE,	0,		{RT, RA, NSI}},
+
+{"se_addi",	SE_IM5(8,0),	SE_IM5_MASK,	PPCVLE,	0,		{RX, OIMM5}},
+{"se_cmpli",	SE_IM5(8,1),	SE_IM5_MASK,	PPCVLE,	0,		{RX, OIMM5}},
+{"se_subi",	SE_IM5(9,0),	SE_IM5_MASK,	PPCVLE,	0,		{RX, OIMM5}},
+{"se_subi.",	SE_IM5(9,1),	SE_IM5_MASK,	PPCVLE,	0,		{RX, OIMM5}},
+{"se_cmpi",	SE_IM5(10,1),	SE_IM5_MASK,	PPCVLE,	0,		{RX, UI5}},
+{"se_bmaski",	SE_IM5(11,0),	SE_IM5_MASK,	PPCVLE,	0,		{RX, UI5}},
+{"se_andi",	SE_IM5(11,1),	SE_IM5_MASK,	PPCVLE,	0,		{RX, UI5}},
+
+{"e_lbz",	OP(12),		OP_MASK,	PPCVLE,	0,		{RT, D, RA0}},
+{"e_stb",	OP(13),		OP_MASK,	PPCVLE,	0,		{RT, D, RA0}},
+{"e_lha",	OP(14),		OP_MASK,	PPCVLE,	0,		{RT, D, RA0}},
+
+{"se_srw",	SE_RR(16,0),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_sraw",	SE_RR(16,1),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_slw",	SE_RR(16,2),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_nop",	SE_RR(17,0),	0xffff,		PPCVLE,	0,		{0}},
+{"se_or",	SE_RR(17,0),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_andc",	SE_RR(17,1),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_and",	SE_RR(17,2),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_and.",	SE_RR(17,3),	SE_RR_MASK,	PPCVLE,	0,		{RX, RY}},
+{"se_li",	IM7(9),		IM7_MASK,	PPCVLE,	0,		{RX, UI7}},
+
+{"e_lwz",	OP(20),		OP_MASK,	PPCVLE,	0,		{RT, D, RA0}},
+{"e_stw",	OP(21),		OP_MASK,	PPCVLE,	0,		{RT, D, RA0}},
+{"e_lhz",	OP(22),		OP_MASK,	PPCVLE,	0,		{RT, D, RA0}},
+{"e_sth",	OP(23),		OP_MASK,	PPCVLE,	0,		{RT, D, RA0}},
+
+{"se_bclri",	SE_IM5(24,0),	SE_IM5_MASK,	PPCVLE,	0,		{RX, UI5}},
+{"se_bgeni",	SE_IM5(24,1),	SE_IM5_MASK,	PPCVLE,	0,		{RX, UI5}},
+{"se_bseti",	SE_IM5(25,0),	SE_IM5_MASK,	PPCVLE,	0,		{RX, UI5}},
+{"se_btsti",	SE_IM5(25,1),	SE_IM5_MASK,	PPCVLE,	0,		{RX, UI5}},
+{"se_srwi",	SE_IM5(26,0),	SE_IM5_MASK,	PPCVLE,	0,		{RX, UI5}},
+{"se_srawi",	SE_IM5(26,1),	SE_IM5_MASK,	PPCVLE,	0,		{RX, UI5}},
+{"se_slwi",	SE_IM5(27,0),	SE_IM5_MASK,	PPCVLE,	0,		{RX, UI5}},
+
+{"e_lis",	I16L(28,28),	I16L_MASK,	PPCVLE,	0,		{RD, VLEUIMML}},
+{"e_and2is.",	I16L(28,29),	I16L_MASK,	PPCVLE,	0,		{RD, VLEUIMML}},
+{"e_or2is",	I16L(28,26),	I16L_MASK,	PPCVLE,	0,		{RD, VLEUIMML}},
+{"e_and2i.",	I16L(28,25),	I16L_MASK,	PPCVLE,	0,		{RD, VLEUIMML}},
+{"e_or2i",	I16L(28,24),	I16L_MASK,	PPCVLE,	0,		{RD, VLEUIMML}},
+{"e_cmphl16i",	IA16(28,23),	IA16_MASK,	PPCVLE,	0,		{RA, VLEUIMM}},
+{"e_cmph16i",	IA16(28,22),	IA16_MASK,	PPCVLE,	0,		{RA, VLESIMM}},
+{"e_cmpl16i",	I16A(28,21),	I16A_MASK,	PPCVLE,	0,		{RA, VLEUIMM}},
+{"e_mull2i",	I16A(28,20),	I16A_MASK,	PPCVLE,	0,		{RA, VLESIMM}},
+{"e_cmp16i",	IA16(28,19),	IA16_MASK,	PPCVLE,	0,		{RA, VLESIMM}},
+{"e_sub2is",	I16A(28,18),	I16A_MASK,	PPCVLE,	0,		{RA, VLENSIMM}},
+{"e_add2is",	I16A(28,18),	I16A_MASK,	PPCVLE,	0,		{RA, VLESIMM}},
+{"e_sub2i.",	I16A(28,17),	I16A_MASK,	PPCVLE,	0,		{RA, VLENSIMM}},
+{"e_add2i.",	I16A(28,17),	I16A_MASK,	PPCVLE,	0,		{RA, VLESIMM}},
+{"e_li",	LI20(28,0),	LI20_MASK,	PPCVLE,	0,		{RT, IMM20}},
+{"e_rlwimi",	M(29,0),	M_MASK,		PPCVLE,	0,		{RA, RS, SH, MB, ME}},
+{"e_rlwinm",	M(29,1),	M_MASK,		PPCVLE,	0,		{RA, RT, SH, MBE, ME}},
+{"e_b",		BD24(30,0,0),	BD24_MASK,	PPCVLE,	0,		{B24}},
+{"e_bl",	BD24(30,0,1),	BD24_MASK,	PPCVLE,	0,		{B24}},
+{"e_bdnz",	EBD15(30,8,BO32DNZ,0),	EBD15_MASK, PPCVLE, 0,		{B15}},
+{"e_bdnzl",	EBD15(30,8,BO32DNZ,1),	EBD15_MASK, PPCVLE, 0,		{B15}},
+{"e_bdz",	EBD15(30,8,BO32DZ,0),	EBD15_MASK, PPCVLE, 0,		{B15}},
+{"e_bdzl",	EBD15(30,8,BO32DZ,1),	EBD15_MASK, PPCVLE, 0,		{B15}},
+{"e_bge",	EBD15BI(30,8,BO32F,CBLT,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bgel",	EBD15BI(30,8,BO32F,CBLT,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bnl",	EBD15BI(30,8,BO32F,CBLT,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bnll",	EBD15BI(30,8,BO32F,CBLT,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_blt",	EBD15BI(30,8,BO32T,CBLT,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bltl",	EBD15BI(30,8,BO32T,CBLT,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bgt",	EBD15BI(30,8,BO32T,CBGT,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bgtl",	EBD15BI(30,8,BO32T,CBGT,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_ble",	EBD15BI(30,8,BO32F,CBGT,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_blel",	EBD15BI(30,8,BO32F,CBGT,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bng",	EBD15BI(30,8,BO32F,CBGT,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bngl",	EBD15BI(30,8,BO32F,CBGT,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bne",	EBD15BI(30,8,BO32F,CBEQ,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bnel",	EBD15BI(30,8,BO32F,CBEQ,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_beq",	EBD15BI(30,8,BO32T,CBEQ,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_beql",	EBD15BI(30,8,BO32T,CBEQ,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bso",	EBD15BI(30,8,BO32T,CBSO,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bsol",	EBD15BI(30,8,BO32T,CBSO,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bun",	EBD15BI(30,8,BO32T,CBSO,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bunl",	EBD15BI(30,8,BO32T,CBSO,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bns",	EBD15BI(30,8,BO32F,CBSO,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bnsl",	EBD15BI(30,8,BO32F,CBSO,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bnu",	EBD15BI(30,8,BO32F,CBSO,0), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bnul",	EBD15BI(30,8,BO32F,CBSO,1), EBD15BI_MASK, PPCVLE, 0,	{CRS,B15}},
+{"e_bc",	BD15(30,8,0),	BD15_MASK,	PPCVLE,	0,		{BO32, BI32, B15}},
+{"e_bcl",	BD15(30,8,1),	BD15_MASK,	PPCVLE,	0,		{BO32, BI32, B15}},
+
+{"e_bf",	EBD15(30,8,BO32F,0), EBD15_MASK, PPCVLE, 0,		{BI32,B15}},
+{"e_bfl",	EBD15(30,8,BO32F,1), EBD15_MASK, PPCVLE, 0,		{BI32,B15}},
+{"e_bt",	EBD15(30,8,BO32T,0), EBD15_MASK, PPCVLE, 0,		{BI32,B15}},
+{"e_btl",	EBD15(30,8,BO32T,1), EBD15_MASK, PPCVLE, 0,		{BI32,B15}},
+
+{"e_cmph",	X(31,14),	X_MASK,		PPCVLE,	0,		{CRD, RA, RB}},
+{"e_cmphl",	X(31,46),	X_MASK,		PPCVLE,	0,		{CRD, RA, RB}},
+{"e_crandc",	XL(31,129),	XL_MASK,	PPCVLE,	0,		{BT, BA, BB}},
+{"e_crnand",	XL(31,225),	XL_MASK,	PPCVLE,	0,		{BT, BA, BB}},
+{"e_crnot",	XL(31,33),	XL_MASK,	PPCVLE,	0,		{BT, BA, BBA}},
+{"e_crnor",	XL(31,33),	XL_MASK,	PPCVLE,	0,		{BT, BA, BB}},
+{"e_crclr",	XL(31,193),	XL_MASK,	PPCVLE,	0,		{BT, BAT, BBA}},
+{"e_crxor",	XL(31,193),	XL_MASK,	PPCVLE,	0,		{BT, BA, BB}},
+{"e_mcrf",	XL(31,16),	XL_MASK,	PPCVLE,	0,		{CRD, CR}},
+{"e_slwi",	EX(31,112),	EX_MASK,	PPCVLE,	0,		{RA, RS, SH}},
+{"e_slwi.",	EX(31,113),	EX_MASK,	PPCVLE,	0,		{RA, RS, SH}},
+
+{"e_crand",	XL(31,257),	XL_MASK,	PPCVLE,	0,		{BT, BA, BB}},
+
+{"e_rlw",	EX(31,560),	EX_MASK,	PPCVLE,	0,		{RA, RS, RB}},
+{"e_rlw.",	EX(31,561),	EX_MASK,	PPCVLE,	0,		{RA, RS, RB}},
+
+{"e_crset",	XL(31,289),	XL_MASK,	PPCVLE,	0,		{BT, BAT, BBA}},
+{"e_creqv",	XL(31,289),	XL_MASK,	PPCVLE,	0,		{BT, BA, BB}},
+
+{"e_rlwi",	EX(31,624),	EX_MASK,	PPCVLE,	0,		{RA, RS, SH}},
+{"e_rlwi.",	EX(31,625),	EX_MASK,	PPCVLE,	0,		{RA, RS, SH}},
+
+{"e_crorc",	XL(31,417),	XL_MASK,	PPCVLE,	0,		{BT, BA, BB}},
+
+{"e_crmove",	XL(31,449),	XL_MASK,	PPCVLE,	0,		{BT, BA, BBA}},
+{"e_cror",	XL(31,449),	XL_MASK,	PPCVLE,	0,		{BT, BA, BB}},
+
+{"mtmas1",	XSPR(31,467,625), XSPR_MASK,	PPCVLE,	0,		{RS}},
+
+{"e_srwi",	EX(31,1136),	EX_MASK,	PPCVLE,	0,		{RA, RS, SH}},
+{"e_srwi.",	EX(31,1137),	EX_MASK,	PPCVLE,	0,		{RA, RS, SH}},
+
+{"se_lbz",	SD4(8),		SD4_MASK,	PPCVLE,	0,		{RZ, SE_SD, RX}},
+
+{"se_stb",	SD4(9),		SD4_MASK,	PPCVLE,	0,		{RZ, SE_SD, RX}},
+
+{"se_lhz",	SD4(10),	SD4_MASK,	PPCVLE,	0,		{RZ, SE_SDH, RX}},
+
+{"se_sth",	SD4(11),	SD4_MASK,	PPCVLE,	0,		{RZ, SE_SDH, RX}},
+
+{"se_lwz",	SD4(12),	SD4_MASK,	PPCVLE,	0,		{RZ, SE_SDW, RX}},
+
+{"se_stw",	SD4(13),	SD4_MASK,	PPCVLE,	0,		{RZ, SE_SDW, RX}},
+
+{"se_bge",	EBD8IO(28,0,0),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_bnl",	EBD8IO(28,0,0),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_ble",	EBD8IO(28,0,1),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_bng",	EBD8IO(28,0,1),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_bne",	EBD8IO(28,0,2),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_bns",	EBD8IO(28,0,3),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_bnu",	EBD8IO(28,0,3),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_bf",	EBD8IO(28,0,0),	EBD8IO2_MASK,	PPCVLE,	0,		{BI16, B8}},
+{"se_blt",	EBD8IO(28,1,0),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_bgt",	EBD8IO(28,1,1),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_beq",	EBD8IO(28,1,2),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_bso",	EBD8IO(28,1,3),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_bun",	EBD8IO(28,1,3),	EBD8IO3_MASK,	PPCVLE,	0,		{B8}},
+{"se_bt",	EBD8IO(28,1,0),	EBD8IO2_MASK,	PPCVLE,	0,		{BI16, B8}},
+{"se_bc",	BD8IO(28),	BD8IO_MASK,	PPCVLE,	0,		{BO16, BI16, B8}},
+{"se_b",	BD8(58,0,0),	BD8_MASK,	PPCVLE,	0,		{B8}},
+{"se_bl",	BD8(58,0,1),	BD8_MASK,	PPCVLE,	0,		{B8}},
+};
+
+const int vle_num_opcodes =
+  sizeof (vle_opcodes) / sizeof (vle_opcodes[0]);
+
 /* The macro table.  This is only used by the assembler.  */
 
 /* The expressions of the form (-x ! 31) & (x | 31) have the value 0
@@ -4690,45 +7231,57 @@ const int powerpc_num_opcodes =
    support extracting the whole word (32 bits in this case).  */
 
 const struct powerpc_macro powerpc_macros[] = {
-{ "extldi",  4,   PPC64,	"rldicr %0,%1,%3,(%2)-1" },
-{ "extldi.", 4,   PPC64,	"rldicr. %0,%1,%3,(%2)-1" },
-{ "extrdi",  4,   PPC64,	"rldicl %0,%1,(%2)+(%3),64-(%2)" },
-{ "extrdi.", 4,   PPC64,	"rldicl. %0,%1,(%2)+(%3),64-(%2)" },
-{ "insrdi",  4,   PPC64,	"rldimi %0,%1,64-((%2)+(%3)),%3" },
-{ "insrdi.", 4,   PPC64,	"rldimi. %0,%1,64-((%2)+(%3)),%3" },
-{ "rotrdi",  3,   PPC64,	"rldicl %0,%1,(-(%2)!63)&((%2)|63),0" },
-{ "rotrdi.", 3,   PPC64,	"rldicl. %0,%1,(-(%2)!63)&((%2)|63),0" },
-{ "sldi",    3,   PPC64,	"rldicr %0,%1,%2,63-(%2)" },
-{ "sldi.",   3,   PPC64,	"rldicr. %0,%1,%2,63-(%2)" },
-{ "srdi",    3,   PPC64,	"rldicl %0,%1,(-(%2)!63)&((%2)|63),%2" },
-{ "srdi.",   3,   PPC64,	"rldicl. %0,%1,(-(%2)!63)&((%2)|63),%2" },
-{ "clrrdi",  3,   PPC64,	"rldicr %0,%1,0,63-(%2)" },
-{ "clrrdi.", 3,   PPC64,	"rldicr. %0,%1,0,63-(%2)" },
-{ "clrlsldi",4,   PPC64,	"rldic %0,%1,%3,(%2)-(%3)" },
-{ "clrlsldi.",4,  PPC64,	"rldic. %0,%1,%3,(%2)-(%3)" },
-
-{ "extlwi",  4,   PPCCOM,	"rlwinm %0,%1,%3,0,(%2)-1" },
-{ "extlwi.", 4,   PPCCOM,	"rlwinm. %0,%1,%3,0,(%2)-1" },
-{ "extrwi",  4,   PPCCOM,	"rlwinm %0,%1,((%2)+(%3))&((%2)+(%3)<>32),32-(%2),31" },
-{ "extrwi.", 4,   PPCCOM,	"rlwinm. %0,%1,((%2)+(%3))&((%2)+(%3)<>32),32-(%2),31" },
-{ "inslwi",  4,   PPCCOM,	"rlwimi %0,%1,(-(%3)!31)&((%3)|31),%3,(%2)+(%3)-1" },
-{ "inslwi.", 4,   PPCCOM,	"rlwimi. %0,%1,(-(%3)!31)&((%3)|31),%3,(%2)+(%3)-1"},
-{ "insrwi",  4,   PPCCOM,	"rlwimi %0,%1,32-((%2)+(%3)),%3,(%2)+(%3)-1" },
-{ "insrwi.", 4,   PPCCOM,	"rlwimi. %0,%1,32-((%2)+(%3)),%3,(%2)+(%3)-1"},
-{ "rotrwi",  3,   PPCCOM,	"rlwinm %0,%1,(-(%2)!31)&((%2)|31),0,31" },
-{ "rotrwi.", 3,   PPCCOM,	"rlwinm. %0,%1,(-(%2)!31)&((%2)|31),0,31" },
-{ "slwi",    3,   PPCCOM,	"rlwinm %0,%1,%2,0,31-(%2)" },
-{ "sli",     3,   PWRCOM,	"rlinm %0,%1,%2,0,31-(%2)" },
-{ "slwi.",   3,   PPCCOM,	"rlwinm. %0,%1,%2,0,31-(%2)" },
-{ "sli.",    3,   PWRCOM,	"rlinm. %0,%1,%2,0,31-(%2)" },
-{ "srwi",    3,   PPCCOM,	"rlwinm %0,%1,(-(%2)!31)&((%2)|31),%2,31" },
-{ "sri",     3,   PWRCOM,	"rlinm %0,%1,(-(%2)!31)&((%2)|31),%2,31" },
-{ "srwi.",   3,   PPCCOM,	"rlwinm. %0,%1,(-(%2)!31)&((%2)|31),%2,31" },
-{ "sri.",    3,   PWRCOM,	"rlinm. %0,%1,(-(%2)!31)&((%2)|31),%2,31" },
-{ "clrrwi",  3,   PPCCOM,	"rlwinm %0,%1,0,0,31-(%2)" },
-{ "clrrwi.", 3,   PPCCOM,	"rlwinm. %0,%1,0,0,31-(%2)" },
-{ "clrlslwi",4,   PPCCOM,	"rlwinm %0,%1,%3,(%2)-(%3),31-(%3)" },
-{ "clrlslwi.",4,  PPCCOM,	"rlwinm. %0,%1,%3,(%2)-(%3),31-(%3)" },
+{"extldi",   4,	PPC64,	"rldicr %0,%1,%3,(%2)-1"},
+{"extldi.",  4,	PPC64,	"rldicr. %0,%1,%3,(%2)-1"},
+{"extrdi",   4,	PPC64,	"rldicl %0,%1,((%2)+(%3))&((%2)+(%3)<>64),64-(%2)"},
+{"extrdi.",  4,	PPC64,	"rldicl. %0,%1,((%2)+(%3))&((%2)+(%3)<>64),64-(%2)"},
+{"insrdi",   4,	PPC64,	"rldimi %0,%1,64-((%2)+(%3)),%3"},
+{"insrdi.",  4,	PPC64,	"rldimi. %0,%1,64-((%2)+(%3)),%3"},
+{"rotrdi",   3,	PPC64,	"rldicl %0,%1,(-(%2)!63)&((%2)|63),0"},
+{"rotrdi.",  3,	PPC64,	"rldicl. %0,%1,(-(%2)!63)&((%2)|63),0"},
+{"sldi",     3,	PPC64,	"rldicr %0,%1,%2,63-(%2)"},
+{"sldi.",    3,	PPC64,	"rldicr. %0,%1,%2,63-(%2)"},
+{"srdi",     3,	PPC64,	"rldicl %0,%1,(-(%2)!63)&((%2)|63),%2"},
+{"srdi.",    3,	PPC64,	"rldicl. %0,%1,(-(%2)!63)&((%2)|63),%2"},
+{"clrrdi",   3,	PPC64,	"rldicr %0,%1,0,63-(%2)"},
+{"clrrdi.",  3,	PPC64,	"rldicr. %0,%1,0,63-(%2)"},
+{"clrlsldi", 4,	PPC64,	"rldic %0,%1,%3,(%2)-(%3)"},
+{"clrlsldi.",4,	PPC64,	"rldic. %0,%1,%3,(%2)-(%3)"},
+
+{"extlwi",   4,	PPCCOM,	"rlwinm %0,%1,%3,0,(%2)-1"},
+{"extlwi.",  4,	PPCCOM,	"rlwinm. %0,%1,%3,0,(%2)-1"},
+{"extrwi",   4,	PPCCOM,	"rlwinm %0,%1,((%2)+(%3))&((%2)+(%3)<>32),32-(%2),31"},
+{"extrwi.",  4,	PPCCOM,	"rlwinm. %0,%1,((%2)+(%3))&((%2)+(%3)<>32),32-(%2),31"},
+{"inslwi",   4,	PPCCOM,	"rlwimi %0,%1,(-(%3)!31)&((%3)|31),%3,(%2)+(%3)-1"},
+{"inslwi.",  4,	PPCCOM,	"rlwimi. %0,%1,(-(%3)!31)&((%3)|31),%3,(%2)+(%3)-1"},
+{"insrwi",   4,	PPCCOM,	"rlwimi %0,%1,32-((%2)+(%3)),%3,(%2)+(%3)-1"},
+{"insrwi.",  4,	PPCCOM,	"rlwimi. %0,%1,32-((%2)+(%3)),%3,(%2)+(%3)-1"},
+{"rotrwi",   3,	PPCCOM,	"rlwinm %0,%1,(-(%2)!31)&((%2)|31),0,31"},
+{"rotrwi.",  3,	PPCCOM,	"rlwinm. %0,%1,(-(%2)!31)&((%2)|31),0,31"},
+{"slwi",     3,	PPCCOM,	"rlwinm %0,%1,%2,0,31-(%2)"},
+{"sli",      3,	PWRCOM,	"rlinm %0,%1,%2,0,31-(%2)"},
+{"slwi.",    3,	PPCCOM,	"rlwinm. %0,%1,%2,0,31-(%2)"},
+{"sli.",     3,	PWRCOM,	"rlinm. %0,%1,%2,0,31-(%2)"},
+{"srwi",     3,	PPCCOM,	"rlwinm %0,%1,(-(%2)!31)&((%2)|31),%2,31"},
+{"sri",      3,	PWRCOM,	"rlinm %0,%1,(-(%2)!31)&((%2)|31),%2,31"},
+{"srwi.",    3,	PPCCOM,	"rlwinm. %0,%1,(-(%2)!31)&((%2)|31),%2,31"},
+{"sri.",     3,	PWRCOM,	"rlinm. %0,%1,(-(%2)!31)&((%2)|31),%2,31"},
+{"clrrwi",   3,	PPCCOM,	"rlwinm %0,%1,0,0,31-(%2)"},
+{"clrrwi.",  3,	PPCCOM,	"rlwinm. %0,%1,0,0,31-(%2)"},
+{"clrlslwi", 4,	PPCCOM,	"rlwinm %0,%1,%3,(%2)-(%3),31-(%3)"},
+{"clrlslwi.",4, PPCCOM,	"rlwinm. %0,%1,%3,(%2)-(%3),31-(%3)"},
+
+{"e_extlwi", 4,	PPCVLE, "e_rlwinm %0,%1,%3,0,(%2)-1"},
+{"e_extrwi", 4,	PPCVLE, "e_rlwinm %0,%1,((%2)+(%3))&((%2)+(%3)<>32),32-(%2),31"},
+{"e_inslwi", 4,	PPCVLE, "e_rlwimi %0,%1,(-(%3)!31)&((%3)|31),%3,(%2)+(%3)-1"},
+{"e_insrwi", 4,	PPCVLE, "e_rlwimi %0,%1,32-((%2)+(%3)),%3,(%2)+(%3)-1"},
+{"e_rotlwi", 3,	PPCVLE, "e_rlwinm %0,%1,%2,0,31"},
+{"e_rotrwi", 3,	PPCVLE, "e_rlwinm %0,%1,(-(%2)!31)&((%2)|31),0,31"},
+{"e_slwi",   3,	PPCVLE, "e_rlwinm %0,%1,%2,0,31-(%2)"},
+{"e_srwi",   3,	PPCVLE, "e_rlwinm %0,%1,(-(%2)!31)&((%2)|31),%2,31"},
+{"e_clrlwi", 3,	PPCVLE, "e_rlwinm %0,%1,0,%2,31"},
+{"e_clrrwi", 3,	PPCVLE, "e_rlwinm %0,%1,0,0,31-(%2)"},
+{"e_clrlslwi",4, PPCVLE, "e_rlwinm %0,%1,%3,(%2)-(%3),31-(%3)"},
 };
 
 const int powerpc_num_macros =
diff --git a/arch/powerpc/xmon/ppc.h b/arch/powerpc/xmon/ppc.h
index 6771856fd5f8..29c385f8e115 100644
--- a/arch/powerpc/xmon/ppc.h
+++ b/arch/powerpc/xmon/ppc.h
@@ -1,6 +1,5 @@
 /* ppc.h -- Header file for PowerPC opcode table
-   Copyright 1994, 1995, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
-   2007 Free Software Foundation, Inc.
+   Copyright (C) 1994-2016 Free Software Foundation, Inc.
    Written by Ian Lance Taylor, Cygnus Support
 
 This file is part of GDB, GAS, and the GNU binutils.
@@ -22,6 +21,14 @@ Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, US
 #ifndef PPC_H
 #define PPC_H
 
+#include "bfd_stdint.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef uint64_t ppc_cpu_t;
+
 /* The opcode table is an array of struct powerpc_opcode.  */
 
 struct powerpc_opcode
@@ -42,7 +49,12 @@ struct powerpc_opcode
   /* One bit flags for the opcode.  These are used to indicate which
      specific processors support the instructions.  The defined values
      are listed below.  */
-  unsigned long flags;
+  ppc_cpu_t flags;
+
+  /* One bit flags for the opcode.  These are used to indicate which
+     specific processors no longer support the instructions.  The defined
+     values are listed below.  */
+  ppc_cpu_t deprecated;
 
   /* An array of operand codes.  Each code is an index into the
      operand table.  They appear in the order which the operands must
@@ -55,6 +67,8 @@ struct powerpc_opcode
    instructions.  */
 extern const struct powerpc_opcode powerpc_opcodes[];
 extern const int powerpc_num_opcodes;
+extern const struct powerpc_opcode vle_opcodes[];
+extern const int vle_num_opcodes;
 
 /* Values defined for the flags field of a struct powerpc_opcode.  */
 
@@ -67,87 +81,152 @@ extern const int powerpc_num_opcodes;
 /* Opcode is defined for the POWER2 (Rios 2) architecture.  */
 #define PPC_OPCODE_POWER2		 4
 
-/* Opcode is only defined on 32 bit architectures.  */
-#define PPC_OPCODE_32			 8
-
-/* Opcode is only defined on 64 bit architectures.  */
-#define PPC_OPCODE_64		      0x10
-
 /* Opcode is supported by the Motorola PowerPC 601 processor.  The 601
    is assumed to support all PowerPC (PPC_OPCODE_PPC) instructions,
    but it also supports many additional POWER instructions.  */
-#define PPC_OPCODE_601		      0x20
+#define PPC_OPCODE_601			 8
 
 /* Opcode is supported in both the Power and PowerPC architectures
-   (ie, compiler's -mcpu=common or assembler's -mcom).  */
-#define PPC_OPCODE_COMMON	      0x40
+   (ie, compiler's -mcpu=common or assembler's -mcom).  More than just
+   the intersection of PPC_OPCODE_PPC with the union of PPC_OPCODE_POWER
+   and PPC_OPCODE_POWER2 because many instructions changed mnemonics
+   between POWER and POWERPC.  */
+#define PPC_OPCODE_COMMON	      0x10
 
 /* Opcode is supported for any Power or PowerPC platform (this is
    for the assembler's -many option, and it eliminates duplicates).  */
-#define PPC_OPCODE_ANY		      0x80
+#define PPC_OPCODE_ANY		      0x20
+
+/* Opcode is only defined on 64 bit architectures.  */
+#define PPC_OPCODE_64		      0x40
 
 /* Opcode is supported as part of the 64-bit bridge.  */
-#define PPC_OPCODE_64_BRIDGE	     0x100
+#define PPC_OPCODE_64_BRIDGE	      0x80
 
 /* Opcode is supported by Altivec Vector Unit */
-#define PPC_OPCODE_ALTIVEC	     0x200
+#define PPC_OPCODE_ALTIVEC	     0x100
 
 /* Opcode is supported by PowerPC 403 processor.  */
-#define PPC_OPCODE_403		     0x400
+#define PPC_OPCODE_403		     0x200
 
 /* Opcode is supported by PowerPC BookE processor.  */
-#define PPC_OPCODE_BOOKE	     0x800
-
-/* Opcode is only supported by 64-bit PowerPC BookE processor.  */
-#define PPC_OPCODE_BOOKE64	    0x1000
+#define PPC_OPCODE_BOOKE	     0x400
 
 /* Opcode is supported by PowerPC 440 processor.  */
-#define PPC_OPCODE_440		    0x2000
+#define PPC_OPCODE_440		     0x800
 
 /* Opcode is only supported by Power4 architecture.  */
-#define PPC_OPCODE_POWER4	    0x4000
+#define PPC_OPCODE_POWER4	    0x1000
 
-/* Opcode isn't supported by Power4 architecture.  */
-#define PPC_OPCODE_NOPOWER4	    0x8000
-
-/* Opcode is only supported by POWERPC Classic architecture.  */
-#define PPC_OPCODE_CLASSIC	   0x10000
+/* Opcode is only supported by Power7 architecture.  */
+#define PPC_OPCODE_POWER7	    0x2000
 
 /* Opcode is only supported by e500x2 Core.  */
-#define PPC_OPCODE_SPE		   0x20000
+#define PPC_OPCODE_SPE		    0x4000
 
 /* Opcode is supported by e500x2 Integer select APU.  */
-#define PPC_OPCODE_ISEL		   0x40000
+#define PPC_OPCODE_ISEL		    0x8000
 
 /* Opcode is an e500 SPE floating point instruction.  */
-#define PPC_OPCODE_EFS		   0x80000
+#define PPC_OPCODE_EFS		   0x10000
 
 /* Opcode is supported by branch locking APU.  */
-#define PPC_OPCODE_BRLOCK	  0x100000
+#define PPC_OPCODE_BRLOCK	   0x20000
 
 /* Opcode is supported by performance monitor APU.  */
-#define PPC_OPCODE_PMR		  0x200000
+#define PPC_OPCODE_PMR		   0x40000
 
 /* Opcode is supported by cache locking APU.  */
-#define PPC_OPCODE_CACHELCK	  0x400000
+#define PPC_OPCODE_CACHELCK	   0x80000
 
 /* Opcode is supported by machine check APU.  */
-#define PPC_OPCODE_RFMCI	  0x800000
+#define PPC_OPCODE_RFMCI	  0x100000
 
 /* Opcode is only supported by Power5 architecture.  */
-#define PPC_OPCODE_POWER5	 0x1000000
+#define PPC_OPCODE_POWER5	  0x200000
 
 /* Opcode is supported by PowerPC e300 family.  */
-#define PPC_OPCODE_E300          0x2000000
+#define PPC_OPCODE_E300           0x400000
 
 /* Opcode is only supported by Power6 architecture.  */
-#define PPC_OPCODE_POWER6	 0x4000000
+#define PPC_OPCODE_POWER6	  0x800000
 
 /* Opcode is only supported by PowerPC Cell family.  */
-#define PPC_OPCODE_CELL		 0x8000000
+#define PPC_OPCODE_CELL		 0x1000000
+
+/* Opcode is supported by CPUs with paired singles support.  */
+#define PPC_OPCODE_PPCPS	 0x2000000
+
+/* Opcode is supported by Power E500MC */
+#define PPC_OPCODE_E500MC        0x4000000
+
+/* Opcode is supported by PowerPC 405 processor.  */
+#define PPC_OPCODE_405		 0x8000000
+
+/* Opcode is supported by Vector-Scalar (VSX) Unit */
+#define PPC_OPCODE_VSX		0x10000000
+
+/* Opcode is supported by A2.  */
+#define PPC_OPCODE_A2	 	0x20000000
+
+/* Opcode is supported by PowerPC 476 processor.  */
+#define PPC_OPCODE_476		0x40000000
+
+/* Opcode is supported by AppliedMicro Titan core */
+#define PPC_OPCODE_TITAN        0x80000000
+
+/* Opcode which is supported by the e500 family */
+#define PPC_OPCODE_E500	       0x100000000ull
+
+/* Opcode is supported by Extended Altivec Vector Unit */
+#define PPC_OPCODE_ALTIVEC2    0x200000000ull
+
+/* Opcode is supported by Power E6500 */
+#define PPC_OPCODE_E6500       0x400000000ull
+
+/* Opcode is supported by Thread management APU */
+#define PPC_OPCODE_TMR         0x800000000ull
+
+/* Opcode which is supported by the VLE extension.  */
+#define PPC_OPCODE_VLE	      0x1000000000ull
+
+/* Opcode is only supported by Power8 architecture.  */
+#define PPC_OPCODE_POWER8     0x2000000000ull
+
+/* Opcode which is supported by the Hardware Transactional Memory extension.  */
+/* Currently, this is the same as the POWER8 mask.  If another cpu comes out
+   that isn't a superset of POWER8, we can define this to its own mask.  */
+#define PPC_OPCODE_HTM        PPC_OPCODE_POWER8
+
+/* Opcode is supported by ppc750cl.  */
+#define PPC_OPCODE_750	      0x4000000000ull
+
+/* Opcode is supported by ppc7450.  */
+#define PPC_OPCODE_7450	      0x8000000000ull
+
+/* Opcode is supported by ppc821/850/860.  */
+#define PPC_OPCODE_860	      0x10000000000ull
+
+/* Opcode is only supported by Power9 architecture.  */
+#define PPC_OPCODE_POWER9     0x20000000000ull
+
+/* Opcode is supported by Vector-Scalar (VSX) Unit from ISA 2.08.  */
+#define PPC_OPCODE_VSX3       0x40000000000ull
+
+  /* Opcode is supported by e200z4.  */
+#define PPC_OPCODE_E200Z4     0x80000000000ull
 
 /* A macro to extract the major opcode from an instruction.  */
 #define PPC_OP(i) (((i) >> 26) & 0x3f)
+
+/* A macro to determine if the instruction is a 2-byte VLE insn.  */
+#define PPC_OP_SE_VLE(m) ((m) <= 0xffff)
+
+/* A macro to extract the major opcode from a VLE instruction.  */
+#define VLE_OP(i,m) (((i) >> ((m) <= 0xffff ? 10 : 26)) & 0x3f)
+
+/* A macro to convert a VLE opcode to a VLE opcode segment.  */
+#define VLE_OP_TO_SEG(i) ((i) >> 1)
 
 /* The operands table is an array of struct powerpc_operand.  */
 
@@ -156,16 +235,22 @@ struct powerpc_operand
   /* A bitmask of bits in the operand.  */
   unsigned int bitm;
 
-  /* How far the operand is left shifted in the instruction.
-     -1 to indicate that BITM and SHIFT cannot be used to determine
-     where the operand goes in the insn.  */
+  /* The shift operation to be applied to the operand.  No shift
+     is made if this is zero.  For positive values, the operand
+     is shifted left by SHIFT.  For negative values, the operand
+     is shifted right by -SHIFT.  Use PPC_OPSHIFT_INV to indicate
+     that BITM and SHIFT cannot be used to determine where the
+     operand goes in the insn.  */
   int shift;
 
   /* Insertion function.  This is used by the assembler.  To insert an
      operand value into an instruction, check this field.
 
      If it is NULL, execute
-	 i |= (op & o->bitm) << o->shift;
+	 if (o->shift >= 0)
+	   i |= (op & o->bitm) << o->shift;
+	 else
+	   i |= (op & o->bitm) >> -o->shift;
      (i is the instruction which we are filling in, o is a pointer to
      this structure, and op is the operand value).
 
@@ -177,13 +262,16 @@ struct powerpc_operand
      operand value is legal, *ERRMSG will be unchanged (most operands
      can accept any value).  */
   unsigned long (*insert)
-    (unsigned long instruction, long op, int dialect, const char **errmsg);
+    (unsigned long instruction, long op, ppc_cpu_t dialect, const char **errmsg);
 
   /* Extraction function.  This is used by the disassembler.  To
      extract this operand type from an instruction, check this field.
 
      If it is NULL, compute
-	 op = (i >> o->shift) & o->bitm;
+	 if (o->shift >= 0)
+	   op = (i >> o->shift) & o->bitm;
+	 else
+	   op = (i << -o->shift) & o->bitm;
 	 if ((o->flags & PPC_OPERAND_SIGNED) != 0)
 	   sign_extend (op);
      (i is the instruction, o is a pointer to this structure, and op
@@ -195,7 +283,7 @@ struct powerpc_operand
      non-zero if this operand type can not actually be extracted from
      this operand (i.e., the instruction does not match).  If the
      operand is valid, *INVALID will not be changed.  */
-  long (*extract) (unsigned long instruction, int dialect, int *invalid);
+  long (*extract) (unsigned long instruction, ppc_cpu_t dialect, int *invalid);
 
   /* One bit syntax flags.  */
   unsigned long flags;
@@ -207,6 +295,11 @@ struct powerpc_operand
 extern const struct powerpc_operand powerpc_operands[];
 extern const unsigned int num_powerpc_operands;
 
+/* Use with the shift field of a struct powerpc_operand to indicate
+     that BITM and SHIFT cannot be used to determine where the operand
+     goes in the insn.  */
+#define PPC_OPSHIFT_INV (-1U << 31)
+
 /* Values defined for the flags field of a struct powerpc_operand.  */
 
 /* This operand takes signed values.  */
@@ -240,7 +333,7 @@ extern const unsigned int num_powerpc_operands;
        cr4 4	cr5 5	cr6 6	cr7 7
    These may be combined arithmetically, as in cr2*4+gt.  These are
    only supported on the PowerPC, not the POWER.  */
-#define PPC_OPERAND_CR (0x10)
+#define PPC_OPERAND_CR_BIT (0x10)
 
 /* This operand names a register.  The disassembler uses this to print
    register names with a leading 'r'.  */
@@ -296,6 +389,27 @@ extern const unsigned int num_powerpc_operands;
 
 /* Valid range of operand is 0..n rather than 0..n-1.  */
 #define PPC_OPERAND_PLUS1 (0x10000)
+
+/* Xilinx APU and FSL related operands */
+#define PPC_OPERAND_FSL (0x20000)
+#define PPC_OPERAND_FCR (0x40000)
+#define PPC_OPERAND_UDI (0x80000)
+
+/* This operand names a vector-scalar unit register.  The disassembler
+   prints these with a leading 'vs'.  */
+#define PPC_OPERAND_VSR (0x100000)
+
+/* This is a CR FIELD that does not use symbolic names.  */
+#define PPC_OPERAND_CR_REG (0x200000)
+
+/* This flag is only used with PPC_OPERAND_OPTIONAL.  If this operand
+   is omitted, then the value it should use for the operand is stored
+   in the SHIFT field of the immediatly following operand field.  */
+#define PPC_OPERAND_OPTIONAL_VALUE (0x400000)
+
+/* This flag is only used with PPC_OPERAND_OPTIONAL.  The operand is
+   only optional when generating 32-bit code.  */
+#define PPC_OPERAND_OPTIONAL32 (0x800000)
 
 /* The POWER and PowerPC assemblers use a few macros.  We keep them
    with the operands table for simplicity.  The macro table is an
@@ -312,7 +426,7 @@ struct powerpc_macro
   /* One bit flags for the opcode.  These are used to indicate which
      specific processors support the instructions.  The values are the
      same as those for the struct powerpc_opcode flags field.  */
-  unsigned long flags;
+  ppc_cpu_t flags;
 
   /* A format string to turn the macro into a normal instruction.
      Each %N in the string is replaced with operand number N (zero
@@ -323,4 +437,18 @@ struct powerpc_macro
 extern const struct powerpc_macro powerpc_macros[];
 extern const int powerpc_num_macros;
 
+extern ppc_cpu_t ppc_parse_cpu (ppc_cpu_t, ppc_cpu_t *, const char *);
+
+static inline long
+ppc_optional_operand_value (const struct powerpc_operand *operand)
+{
+  if ((operand->flags & PPC_OPERAND_OPTIONAL_VALUE) != 0)
+    return (operand+1)->shift;
+  return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* PPC_H */
-- 
GitLab


From 5b102782c7f460d88b42d504c78755eeb030894a Mon Sep 17 00:00:00 2001
From: Balbir Singh <bsingharora@gmail.com>
Date: Thu, 2 Feb 2017 10:33:44 +0530
Subject: [PATCH 0258/1084] powerpc/xmon: Enable disassembly files (compilation
 changes)

After updating ppc-dis.c, ppc-opc.c and ppc.h the following changes were
made to enable compilation and working of xmon:

  1.  Remove all disassembler_info
  2.  Use xmon's printf/print_address to output data and addresses
      respectively.
  3.  All bfd_* types and casts have been removed.
  4.  Optimizations related to opcd_indices have been removed.
  5.  The dialect is set based on cpu features.
  6.  PPC_OPCODE_CLASSIC is no longer supported in the new
      disassembler.
  7.  VLE opcode parsing and printing has been stripped.
  8.  Coding style conventions used for those routines has
      been retained and it does not match our CodingStyle.
  9.  The highest supported dialect is POWER9.
  10. Defined ATTRIBUTE_UNUSED in ppc-dis.c.
  11. Defined _(x) in ppc-dis.c.

Finally, we remove the dependency on BROKEN so that XMON_DISASSEMBLY can
be enabled again.

Signed-off-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Kconfig.debug  |   1 -
 arch/powerpc/xmon/ppc-dis.c | 592 ++++--------------------------------
 arch/powerpc/xmon/ppc-opc.c |  12 +-
 arch/powerpc/xmon/ppc.h     |   2 -
 4 files changed, 60 insertions(+), 547 deletions(-)

diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 73a3264cf052..c86df246339e 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -115,7 +115,6 @@ config XMON_DEFAULT
 config XMON_DISASSEMBLY
 	bool "Include disassembly support in xmon"
 	depends on XMON
-	depends on BROKEN
 	default y
 	help
 	  Include support for disassembling in xmon. You probably want
diff --git a/arch/powerpc/xmon/ppc-dis.c b/arch/powerpc/xmon/ppc-dis.c
index c382d13c88a3..31db8c072acd 100644
--- a/arch/powerpc/xmon/ppc-dis.c
+++ b/arch/powerpc/xmon/ppc-dis.c
@@ -18,418 +18,18 @@ You should have received a copy of the GNU General Public License
 along with this file; see the file COPYING.  If not, write to the Free
 Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.  */
 
-#include "sysdep.h"
-#include <stdio.h>
+#include <asm/cputable.h>
+#include <asm/cpu_has_feature.h>
+#include "nonstdio.h"
+#include "ansidecl.h"
+#include "ppc.h"
 #include "dis-asm.h"
-#include "elf-bfd.h"
-#include "elf/ppc.h"
-#include "opintl.h"
-#include "opcode/ppc.h"
 
 /* This file provides several disassembler functions, all of which use
    the disassembler interface defined in dis-asm.h.  Several functions
    are provided because this file handles disassembly for the PowerPC
    in both big and little endian mode and also for the POWER (RS/6000)
    chip.  */
-static int print_insn_powerpc (bfd_vma, struct disassemble_info *, int,
-			       ppc_cpu_t);
-
-struct dis_private
-{
-  /* Stash the result of parsing disassembler_options here.  */
-  ppc_cpu_t dialect;
-} private;
-
-#define POWERPC_DIALECT(INFO) \
-  (((struct dis_private *) ((INFO)->private_data))->dialect)
-
-struct ppc_mopt {
-  const char *opt;
-  ppc_cpu_t cpu;
-  ppc_cpu_t sticky;
-};
-
-struct ppc_mopt ppc_opts[] = {
-  { "403",     PPC_OPCODE_PPC | PPC_OPCODE_403,
-    0 },
-  { "405",     PPC_OPCODE_PPC | PPC_OPCODE_403 | PPC_OPCODE_405,
-    0 },
-  { "440",     (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE | PPC_OPCODE_440
-		| PPC_OPCODE_ISEL | PPC_OPCODE_RFMCI),
-    0 },
-  { "464",     (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE | PPC_OPCODE_440
-		| PPC_OPCODE_ISEL | PPC_OPCODE_RFMCI),
-    0 },
-  { "476",     (PPC_OPCODE_PPC | PPC_OPCODE_ISEL | PPC_OPCODE_440
-		| PPC_OPCODE_476 | PPC_OPCODE_POWER4 | PPC_OPCODE_POWER5),
-    0 },
-  { "601",     PPC_OPCODE_PPC | PPC_OPCODE_601,
-    0 },
-  { "603",     PPC_OPCODE_PPC,
-    0 },
-  { "604",     PPC_OPCODE_PPC,
-    0 },
-  { "620",     PPC_OPCODE_PPC | PPC_OPCODE_64,
-    0 },
-  { "7400",    PPC_OPCODE_PPC | PPC_OPCODE_ALTIVEC,
-    0 },
-  { "7410",    PPC_OPCODE_PPC | PPC_OPCODE_ALTIVEC,
-    0 },
-  { "7450",    PPC_OPCODE_PPC | PPC_OPCODE_7450 | PPC_OPCODE_ALTIVEC,
-    0 },
-  { "7455",    PPC_OPCODE_PPC | PPC_OPCODE_ALTIVEC,
-    0 },
-  { "750cl",   PPC_OPCODE_PPC | PPC_OPCODE_750 | PPC_OPCODE_PPCPS
-    , 0 },
-  { "821",     PPC_OPCODE_PPC | PPC_OPCODE_860,
-    0 },
-  { "850",     PPC_OPCODE_PPC | PPC_OPCODE_860,
-    0 },
-  { "860",     PPC_OPCODE_PPC | PPC_OPCODE_860,
-    0 },
-  { "a2",      (PPC_OPCODE_PPC | PPC_OPCODE_ISEL | PPC_OPCODE_POWER4
-		| PPC_OPCODE_POWER5 | PPC_OPCODE_CACHELCK | PPC_OPCODE_64
-		| PPC_OPCODE_A2),
-    0 },
-  { "altivec", PPC_OPCODE_PPC,
-    PPC_OPCODE_ALTIVEC | PPC_OPCODE_ALTIVEC2 },
-  { "any",     0,
-    PPC_OPCODE_ANY },
-  { "booke",   PPC_OPCODE_PPC | PPC_OPCODE_BOOKE,
-    0 },
-  { "booke32", PPC_OPCODE_PPC | PPC_OPCODE_BOOKE,
-    0 },
-  { "cell",    (PPC_OPCODE_PPC | PPC_OPCODE_64 | PPC_OPCODE_POWER4
-		| PPC_OPCODE_CELL | PPC_OPCODE_ALTIVEC),
-    0 },
-  { "com",     PPC_OPCODE_COMMON,
-    0 },
-  { "e200z4",  (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE| PPC_OPCODE_SPE
-		| PPC_OPCODE_ISEL | PPC_OPCODE_EFS | PPC_OPCODE_BRLOCK
-		| PPC_OPCODE_PMR | PPC_OPCODE_CACHELCK | PPC_OPCODE_RFMCI
-		| PPC_OPCODE_E500 | PPC_OPCODE_E200Z4),
-    PPC_OPCODE_VLE },
-  { "e300",    PPC_OPCODE_PPC | PPC_OPCODE_E300,
-    0 },
-  { "e500",    (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE | PPC_OPCODE_SPE
-		| PPC_OPCODE_ISEL | PPC_OPCODE_EFS | PPC_OPCODE_BRLOCK
-		| PPC_OPCODE_PMR | PPC_OPCODE_CACHELCK | PPC_OPCODE_RFMCI
-		| PPC_OPCODE_E500),
-    0 },
-  { "e500mc",  (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE | PPC_OPCODE_ISEL
-		| PPC_OPCODE_PMR | PPC_OPCODE_CACHELCK | PPC_OPCODE_RFMCI
-		| PPC_OPCODE_E500MC),
-    0 },
-  { "e500mc64",  (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE | PPC_OPCODE_ISEL
-		| PPC_OPCODE_PMR | PPC_OPCODE_CACHELCK | PPC_OPCODE_RFMCI
-		| PPC_OPCODE_E500MC | PPC_OPCODE_64 | PPC_OPCODE_POWER5
-		| PPC_OPCODE_POWER6 | PPC_OPCODE_POWER7),
-    0 },
-  { "e5500",    (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE | PPC_OPCODE_ISEL
-		| PPC_OPCODE_PMR | PPC_OPCODE_CACHELCK | PPC_OPCODE_RFMCI
-		| PPC_OPCODE_E500MC | PPC_OPCODE_64 | PPC_OPCODE_POWER4
-		| PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6
-		| PPC_OPCODE_POWER7),
-    0 },
-  { "e6500",   (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE | PPC_OPCODE_ISEL
-		| PPC_OPCODE_PMR | PPC_OPCODE_CACHELCK | PPC_OPCODE_RFMCI
-		| PPC_OPCODE_E500MC | PPC_OPCODE_64 | PPC_OPCODE_ALTIVEC
-		| PPC_OPCODE_ALTIVEC2 | PPC_OPCODE_E6500 | PPC_OPCODE_POWER4
-		| PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_POWER7),
-    0 },
-  { "e500x2",  (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE | PPC_OPCODE_SPE
-		| PPC_OPCODE_ISEL | PPC_OPCODE_EFS | PPC_OPCODE_BRLOCK
-		| PPC_OPCODE_PMR | PPC_OPCODE_CACHELCK | PPC_OPCODE_RFMCI
-		| PPC_OPCODE_E500),
-    0 },
-  { "efs",     PPC_OPCODE_PPC | PPC_OPCODE_EFS,
-    0 },
-  { "power4",  PPC_OPCODE_PPC | PPC_OPCODE_64 | PPC_OPCODE_POWER4,
-    0 },
-  { "power5",  (PPC_OPCODE_PPC | PPC_OPCODE_64 | PPC_OPCODE_POWER4
-		| PPC_OPCODE_POWER5),
-    0 },
-  { "power6",  (PPC_OPCODE_PPC | PPC_OPCODE_64 | PPC_OPCODE_POWER4
-		| PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_ALTIVEC),
-    0 },
-  { "power7",  (PPC_OPCODE_PPC | PPC_OPCODE_ISEL | PPC_OPCODE_64
-		| PPC_OPCODE_POWER4 | PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6
-		| PPC_OPCODE_POWER7 | PPC_OPCODE_ALTIVEC | PPC_OPCODE_VSX),
-    0 },
-  { "power8",  (PPC_OPCODE_PPC | PPC_OPCODE_ISEL | PPC_OPCODE_64
-		| PPC_OPCODE_POWER4 | PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6
-		| PPC_OPCODE_POWER7 | PPC_OPCODE_POWER8 | PPC_OPCODE_HTM
-		| PPC_OPCODE_ALTIVEC | PPC_OPCODE_ALTIVEC2 | PPC_OPCODE_VSX),
-    0 },
-  { "power9",  (PPC_OPCODE_PPC | PPC_OPCODE_ISEL | PPC_OPCODE_64
-		| PPC_OPCODE_POWER4 | PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6
-		| PPC_OPCODE_POWER7 | PPC_OPCODE_POWER8 | PPC_OPCODE_POWER9
-		| PPC_OPCODE_HTM | PPC_OPCODE_ALTIVEC | PPC_OPCODE_ALTIVEC2
-		| PPC_OPCODE_VSX | PPC_OPCODE_VSX3 ),
-    0 },
-  { "ppc",     PPC_OPCODE_PPC,
-    0 },
-  { "ppc32",   PPC_OPCODE_PPC,
-    0 },
-  { "ppc64",   PPC_OPCODE_PPC | PPC_OPCODE_64,
-    0 },
-  { "ppc64bridge", PPC_OPCODE_PPC | PPC_OPCODE_64_BRIDGE,
-    0 },
-  { "ppcps",   PPC_OPCODE_PPC | PPC_OPCODE_PPCPS,
-    0 },
-  { "pwr",     PPC_OPCODE_POWER,
-    0 },
-  { "pwr2",    PPC_OPCODE_POWER | PPC_OPCODE_POWER2,
-    0 },
-  { "pwr4",    PPC_OPCODE_PPC | PPC_OPCODE_64 | PPC_OPCODE_POWER4,
-    0 },
-  { "pwr5",    (PPC_OPCODE_PPC | PPC_OPCODE_64 | PPC_OPCODE_POWER4
-		| PPC_OPCODE_POWER5),
-    0 },
-  { "pwr5x",   (PPC_OPCODE_PPC | PPC_OPCODE_64 | PPC_OPCODE_POWER4
-		| PPC_OPCODE_POWER5),
-    0 },
-  { "pwr6",    (PPC_OPCODE_PPC | PPC_OPCODE_64 | PPC_OPCODE_POWER4
-		| PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_ALTIVEC),
-    0 },
-  { "pwr7",    (PPC_OPCODE_PPC | PPC_OPCODE_ISEL | PPC_OPCODE_64
-		| PPC_OPCODE_POWER4 | PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6
-		| PPC_OPCODE_POWER7 | PPC_OPCODE_ALTIVEC | PPC_OPCODE_VSX),
-    0 },
-  { "pwr8",    (PPC_OPCODE_PPC | PPC_OPCODE_ISEL | PPC_OPCODE_64
-		| PPC_OPCODE_POWER4 | PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6
-		| PPC_OPCODE_POWER7 | PPC_OPCODE_POWER8 | PPC_OPCODE_HTM
-		| PPC_OPCODE_ALTIVEC | PPC_OPCODE_ALTIVEC2 | PPC_OPCODE_VSX),
-    0 },
-  { "pwr9",    (PPC_OPCODE_PPC | PPC_OPCODE_ISEL | PPC_OPCODE_64
-		| PPC_OPCODE_POWER4 | PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6
-		| PPC_OPCODE_POWER7 | PPC_OPCODE_POWER8 | PPC_OPCODE_POWER9
-		| PPC_OPCODE_HTM | PPC_OPCODE_ALTIVEC | PPC_OPCODE_ALTIVEC2
-		| PPC_OPCODE_VSX | PPC_OPCODE_VSX3 ),
-    0 },
-  { "pwrx",    PPC_OPCODE_POWER | PPC_OPCODE_POWER2,
-    0 },
-  { "spe",     PPC_OPCODE_PPC | PPC_OPCODE_EFS,
-    PPC_OPCODE_SPE },
-  { "titan",   (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE | PPC_OPCODE_PMR
-		| PPC_OPCODE_RFMCI | PPC_OPCODE_TITAN),
-    0 },
-  { "vle",     (PPC_OPCODE_PPC | PPC_OPCODE_BOOKE| PPC_OPCODE_SPE
-		| PPC_OPCODE_ISEL | PPC_OPCODE_EFS | PPC_OPCODE_BRLOCK
-		| PPC_OPCODE_PMR | PPC_OPCODE_CACHELCK | PPC_OPCODE_RFMCI
-		| PPC_OPCODE_E500),
-    PPC_OPCODE_VLE },
-  { "vsx",     PPC_OPCODE_PPC,
-    PPC_OPCODE_VSX | PPC_OPCODE_VSX3 },
-  { "htm",     PPC_OPCODE_PPC,
-    PPC_OPCODE_HTM },
-};
-
-/* Switch between Booke and VLE dialects for interlinked dumps.  */
-static ppc_cpu_t
-get_powerpc_dialect (struct disassemble_info *info)
-{
-  ppc_cpu_t dialect = 0;
-
-  dialect = POWERPC_DIALECT (info);
-
-  /* Disassemble according to the section headers flags for VLE-mode.  */
-  if (dialect & PPC_OPCODE_VLE
-      && info->section->owner != NULL
-      && bfd_get_flavour (info->section->owner) == bfd_target_elf_flavour
-      && elf_object_id (info->section->owner) == PPC32_ELF_DATA
-      && (elf_section_flags (info->section) & SHF_PPC_VLE) != 0)
-    return dialect;
-  else
-    return dialect & ~ PPC_OPCODE_VLE;
-}
-
-/* Handle -m and -M options that set cpu type, and .machine arg.  */
-
-ppc_cpu_t
-ppc_parse_cpu (ppc_cpu_t ppc_cpu, ppc_cpu_t *sticky, const char *arg)
-{
-  unsigned int i;
-
-  for (i = 0; i < sizeof (ppc_opts) / sizeof (ppc_opts[0]); i++)
-    if (strcmp (ppc_opts[i].opt, arg) == 0)
-      {
-	if (ppc_opts[i].sticky)
-	  {
-	    *sticky |= ppc_opts[i].sticky;
-	    if ((ppc_cpu & ~*sticky) != 0)
-	      break;
-	  }
-	ppc_cpu = ppc_opts[i].cpu;
-	break;
-      }
-  if (i >= sizeof (ppc_opts) / sizeof (ppc_opts[0]))
-    return 0;
-
-  ppc_cpu |= *sticky;
-  return ppc_cpu;
-}
-
-/* Determine which set of machines to disassemble for.  */
-
-static void
-powerpc_init_dialect (struct disassemble_info *info)
-{
-  ppc_cpu_t dialect = 0;
-  ppc_cpu_t sticky = 0;
-  char *arg;
-  struct dis_private *priv = calloc (sizeof (*priv), 1);
-
-  if (priv == NULL)
-    priv = &private;
-
-  switch (info->mach)
-    {
-    case bfd_mach_ppc_403:
-    case bfd_mach_ppc_403gc:
-      dialect = ppc_parse_cpu (dialect, &sticky, "403");
-      break;
-    case bfd_mach_ppc_405:
-      dialect = ppc_parse_cpu (dialect, &sticky, "405");
-      break;
-    case bfd_mach_ppc_601:
-      dialect = ppc_parse_cpu (dialect, &sticky, "601");
-      break;
-    case bfd_mach_ppc_a35:
-    case bfd_mach_ppc_rs64ii:
-    case bfd_mach_ppc_rs64iii:
-      dialect = ppc_parse_cpu (dialect, &sticky, "pwr2") | PPC_OPCODE_64;
-      break;
-    case bfd_mach_ppc_e500:
-      dialect = ppc_parse_cpu (dialect, &sticky, "e500");
-      break;
-    case bfd_mach_ppc_e500mc:
-      dialect = ppc_parse_cpu (dialect, &sticky, "e500mc");
-      break;
-    case bfd_mach_ppc_e500mc64:
-      dialect = ppc_parse_cpu (dialect, &sticky, "e500mc64");
-      break;
-    case bfd_mach_ppc_e5500:
-      dialect = ppc_parse_cpu (dialect, &sticky, "e5500");
-      break;
-    case bfd_mach_ppc_e6500:
-      dialect = ppc_parse_cpu (dialect, &sticky, "e6500");
-      break;
-    case bfd_mach_ppc_titan:
-      dialect = ppc_parse_cpu (dialect, &sticky, "titan");
-      break;
-    case bfd_mach_ppc_vle:
-      dialect = ppc_parse_cpu (dialect, &sticky, "vle");
-      break;
-    default:
-      dialect = ppc_parse_cpu (dialect, &sticky, "power9") | PPC_OPCODE_ANY;
-    }
-
-  arg = info->disassembler_options;
-  while (arg != NULL)
-    {
-      ppc_cpu_t new_cpu = 0;
-      char *end = strchr (arg, ',');
-
-      if (end != NULL)
-	*end = 0;
-
-      if ((new_cpu = ppc_parse_cpu (dialect, &sticky, arg)) != 0)
-	dialect = new_cpu;
-      else if (strcmp (arg, "32") == 0)
-	dialect &= ~(ppc_cpu_t) PPC_OPCODE_64;
-      else if (strcmp (arg, "64") == 0)
-	dialect |= PPC_OPCODE_64;
-      else
-	fprintf (stderr, _("warning: ignoring unknown -M%s option\n"), arg);
-
-      if (end != NULL)
-	*end++ = ',';
-      arg = end;
-    }
-
-  info->private_data = priv;
-  POWERPC_DIALECT(info) = dialect;
-}
-
-#define PPC_OPCD_SEGS 64
-static unsigned short powerpc_opcd_indices[PPC_OPCD_SEGS+1];
-#define VLE_OPCD_SEGS 32
-static unsigned short vle_opcd_indices[VLE_OPCD_SEGS+1];
-
-/* Calculate opcode table indices to speed up disassembly,
-   and init dialect.  */
-
-void
-disassemble_init_powerpc (struct disassemble_info *info)
-{
-  int i;
-  unsigned short last;
-
-  if (powerpc_opcd_indices[PPC_OPCD_SEGS] == 0)
-    {
-
-      i = powerpc_num_opcodes;
-      while (--i >= 0)
-        {
-          unsigned op = PPC_OP (powerpc_opcodes[i].opcode);
-
-          powerpc_opcd_indices[op] = i;
-        }
-
-      last = powerpc_num_opcodes;
-      for (i = PPC_OPCD_SEGS; i > 0; --i)
-        {
-          if (powerpc_opcd_indices[i] == 0)
-	    powerpc_opcd_indices[i] = last;
-          last = powerpc_opcd_indices[i];
-        }
-
-      i = vle_num_opcodes;
-      while (--i >= 0)
-        {
-          unsigned op = VLE_OP (vle_opcodes[i].opcode, vle_opcodes[i].mask);
-          unsigned seg = VLE_OP_TO_SEG (op);
-
-          vle_opcd_indices[seg] = i;
-        }
-
-      last = vle_num_opcodes;
-      for (i = VLE_OPCD_SEGS; i > 0; --i)
-        {
-          if (vle_opcd_indices[i] == 0)
-	    vle_opcd_indices[i] = last;
-          last = vle_opcd_indices[i];
-        }
-    }
-
-  if (info->arch == bfd_arch_powerpc)
-    powerpc_init_dialect (info);
-}
-
-/* Print a big endian PowerPC instruction.  */
-
-int
-print_insn_big_powerpc (bfd_vma memaddr, struct disassemble_info *info)
-{
-  return print_insn_powerpc (memaddr, info, 1, get_powerpc_dialect (info));
-}
-
-/* Print a little endian PowerPC instruction.  */
-
-int
-print_insn_little_powerpc (bfd_vma memaddr, struct disassemble_info *info)
-{
-  return print_insn_powerpc (memaddr, info, 0, get_powerpc_dialect (info));
-}
-
-/* Print a POWER (RS/6000) instruction.  */
-
-int
-print_insn_rs6000 (bfd_vma memaddr, struct disassemble_info *info)
-{
-  return print_insn_powerpc (memaddr, info, 1, PPC_OPCODE_POWER);
-}
 
 /* Extract the operand value from the PowerPC or POWER instruction.  */
 
@@ -498,11 +98,9 @@ lookup_powerpc (unsigned long insn, ppc_cpu_t dialect)
   /* Get the major opcode of the instruction.  */
   op = PPC_OP (insn);
 
+  opcode_end = powerpc_opcodes + powerpc_num_opcodes;
   /* Find the first match in the opcode table for this major opcode.  */
-  opcode_end = powerpc_opcodes + powerpc_opcd_indices[op + 1];
-  for (opcode = powerpc_opcodes + powerpc_opcd_indices[op];
-       opcode < opcode_end;
-       ++opcode)
+  for (opcode = powerpc_opcodes; opcode < opcode_end; ++opcode)
     {
       const unsigned char *opindex;
       const struct powerpc_operand *operand;
@@ -531,110 +129,45 @@ lookup_powerpc (unsigned long insn, ppc_cpu_t dialect)
   return NULL;
 }
 
-/* Find a match for INSN in the VLE opcode table.  */
+/* Print a PowerPC or POWER instruction.  */
 
-static const struct powerpc_opcode *
-lookup_vle (unsigned long insn)
+int print_insn_powerpc (unsigned long insn, unsigned long memaddr)
 {
   const struct powerpc_opcode *opcode;
-  const struct powerpc_opcode *opcode_end;
-  unsigned op, seg;
-
-  op = PPC_OP (insn);
-  if (op >= 0x20 && op <= 0x37)
-    {
-      /* This insn has a 4-bit opcode.  */
-      op &= 0x3c;
-    }
-  seg = VLE_OP_TO_SEG (op);
-
-  /* Find the first match in the opcode table for this major opcode.  */
-  opcode_end = vle_opcodes + vle_opcd_indices[seg + 1];
-  for (opcode = vle_opcodes + vle_opcd_indices[seg];
-       opcode < opcode_end;
-       ++opcode)
-    {
-      unsigned long table_opcd = opcode->opcode;
-      unsigned long table_mask = opcode->mask;
-      bfd_boolean table_op_is_short = PPC_OP_SE_VLE(table_mask);
-      unsigned long insn2;
-      const unsigned char *opindex;
-      const struct powerpc_operand *operand;
-      int invalid;
-
-      insn2 = insn;
-      if (table_op_is_short)
-	insn2 >>= 16;
-      if ((insn2 & table_mask) != table_opcd)
-	continue;
+  bool insn_is_short;
+  ppc_cpu_t dialect;
 
-      /* Check validity of operands.  */
-      invalid = 0;
-      for (opindex = opcode->operands; *opindex != 0; ++opindex)
-	{
-	  operand = powerpc_operands + *opindex;
-	  if (operand->extract)
-	    (*operand->extract) (insn, (ppc_cpu_t)0, &invalid);
-	}
-      if (invalid)
-	continue;
+  dialect = PPC_OPCODE_PPC | PPC_OPCODE_COMMON
+            | PPC_OPCODE_64 | PPC_OPCODE_POWER4 | PPC_OPCODE_ALTIVEC;
 
-      return opcode;
-    }
+  if (cpu_has_feature(CPU_FTRS_POWER5))
+    dialect |= PPC_OPCODE_POWER5;
 
-  return NULL;
-}
+  if (cpu_has_feature(CPU_FTRS_CELL))
+    dialect |= (PPC_OPCODE_CELL | PPC_OPCODE_ALTIVEC);
 
-/* Print a PowerPC or POWER instruction.  */
+  if (cpu_has_feature(CPU_FTRS_POWER6))
+    dialect |= (PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_ALTIVEC);
 
-static int
-print_insn_powerpc (bfd_vma memaddr,
-		    struct disassemble_info *info,
-		    int bigendian,
-		    ppc_cpu_t dialect)
-{
-  bfd_byte buffer[4];
-  int status;
-  unsigned long insn;
-  const struct powerpc_opcode *opcode;
-  bfd_boolean insn_is_short;
+  if (cpu_has_feature(CPU_FTRS_POWER7))
+    dialect |= (PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_POWER7
+                | PPC_OPCODE_ALTIVEC | PPC_OPCODE_VSX);
 
-  status = (*info->read_memory_func) (memaddr, buffer, 4, info);
-  if (status != 0)
-    {
-      /* The final instruction may be a 2-byte VLE insn.  */
-      if ((dialect & PPC_OPCODE_VLE) != 0)
-        {
-          /* Clear buffer so unused bytes will not have garbage in them.  */
-          buffer[0] = buffer[1] = buffer[2] = buffer[3] = 0;
-          status = (*info->read_memory_func) (memaddr, buffer, 2, info);
-          if (status != 0)
-            {
-              (*info->memory_error_func) (status, memaddr, info);
-              return -1;
-            }
-        }
-      else
-        {
-          (*info->memory_error_func) (status, memaddr, info);
-          return -1;
-        }
-    }
+  if (cpu_has_feature(CPU_FTRS_POWER8))
+    dialect |= (PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_POWER7
+		| PPC_OPCODE_POWER8 | PPC_OPCODE_HTM
+		| PPC_OPCODE_ALTIVEC | PPC_OPCODE_ALTIVEC2 | PPC_OPCODE_VSX);
 
-  if (bigendian)
-    insn = bfd_getb32 (buffer);
-  else
-    insn = bfd_getl32 (buffer);
+  if (cpu_has_feature(CPU_FTRS_POWER9))
+    dialect |= (PPC_OPCODE_POWER5 | PPC_OPCODE_POWER6 | PPC_OPCODE_POWER7
+		| PPC_OPCODE_POWER8 | PPC_OPCODE_POWER9 | PPC_OPCODE_HTM
+		| PPC_OPCODE_ALTIVEC | PPC_OPCODE_ALTIVEC2
+		| PPC_OPCODE_VSX | PPC_OPCODE_VSX3),
 
   /* Get the major opcode of the insn.  */
   opcode = NULL;
-  insn_is_short = FALSE;
-  if ((dialect & PPC_OPCODE_VLE) != 0)
-    {
-      opcode = lookup_vle (insn);
-      if (opcode != NULL)
-	insn_is_short = PPC_OP_SE_VLE(opcode->mask);
-    }
+  insn_is_short = false;
+
   if (opcode == NULL)
     opcode = lookup_powerpc (insn, dialect);
   if (opcode == NULL && (dialect & PPC_OPCODE_ANY) != 0)
@@ -649,9 +182,9 @@ print_insn_powerpc (bfd_vma memaddr,
       int skip_optional;
 
       if (opcode->operands[0] != 0)
-	(*info->fprintf_func) (info->stream, "%-7s ", opcode->name);
+	printf("%-7s ", opcode->name);
       else
-	(*info->fprintf_func) (info->stream, "%s", opcode->name);
+	printf("%s", opcode->name);
 
       if (insn_is_short)
         /* The operands will be fetched out of the 16-bit instruction.  */
@@ -688,34 +221,34 @@ print_insn_powerpc (bfd_vma memaddr,
 
 	  if (need_comma)
 	    {
-	      (*info->fprintf_func) (info->stream, ",");
+	      printf(",");
 	      need_comma = 0;
 	    }
 
 	  /* Print the operand as directed by the flags.  */
 	  if ((operand->flags & PPC_OPERAND_GPR) != 0
 	      || ((operand->flags & PPC_OPERAND_GPR_0) != 0 && value != 0))
-	    (*info->fprintf_func) (info->stream, "r%ld", value);
+	    printf("r%ld", value);
 	  else if ((operand->flags & PPC_OPERAND_FPR) != 0)
-	    (*info->fprintf_func) (info->stream, "f%ld", value);
+	    printf("f%ld", value);
 	  else if ((operand->flags & PPC_OPERAND_VR) != 0)
-	    (*info->fprintf_func) (info->stream, "v%ld", value);
+	    printf("v%ld", value);
 	  else if ((operand->flags & PPC_OPERAND_VSR) != 0)
-	    (*info->fprintf_func) (info->stream, "vs%ld", value);
+	    printf("vs%ld", value);
 	  else if ((operand->flags & PPC_OPERAND_RELATIVE) != 0)
-	    (*info->print_address_func) (memaddr + value, info);
+	    print_address(memaddr + value);
 	  else if ((operand->flags & PPC_OPERAND_ABSOLUTE) != 0)
-	    (*info->print_address_func) ((bfd_vma) value & 0xffffffff, info);
+	    print_address(value & 0xffffffff);
 	  else if ((operand->flags & PPC_OPERAND_FSL) != 0)
-	    (*info->fprintf_func) (info->stream, "fsl%ld", value);
+	    printf("fsl%ld", value);
 	  else if ((operand->flags & PPC_OPERAND_FCR) != 0)
-	    (*info->fprintf_func) (info->stream, "fcr%ld", value);
+	    printf("fcr%ld", value);
 	  else if ((operand->flags & PPC_OPERAND_UDI) != 0)
-	    (*info->fprintf_func) (info->stream, "%ld", value);
+	    printf("%ld", value);
 	  else if ((operand->flags & PPC_OPERAND_CR_REG) != 0
 		   && (((dialect & PPC_OPCODE_PPC) != 0)
 		       || ((dialect & PPC_OPCODE_VLE) != 0)))
-	    (*info->fprintf_func) (info->stream, "cr%ld", value);
+	    printf("cr%ld", value);
 	  else if (((operand->flags & PPC_OPERAND_CR_BIT) != 0)
 		   && (((dialect & PPC_OPCODE_PPC) != 0)
 		       || ((dialect & PPC_OPCODE_VLE) != 0)))
@@ -726,16 +259,16 @@ print_insn_powerpc (bfd_vma memaddr,
 
 	      cr = value >> 2;
 	      if (cr != 0)
-		(*info->fprintf_func) (info->stream, "4*cr%d+", cr);
+		printf("4*cr%d+", cr);
 	      cc = value & 3;
-	      (*info->fprintf_func) (info->stream, "%s", cbnames[cc]);
+	      printf("%s", cbnames[cc]);
 	    }
 	  else
-	    (*info->fprintf_func) (info->stream, "%d", (int) value);
+	    printf("%d", (int) value);
 
 	  if (need_paren)
 	    {
-	      (*info->fprintf_func) (info->stream, ")");
+	      printf(")");
 	      need_paren = 0;
 	    }
 
@@ -743,7 +276,7 @@ print_insn_powerpc (bfd_vma memaddr,
 	    need_comma = 1;
 	  else
 	    {
-	      (*info->fprintf_func) (info->stream, "(");
+	      printf("(");
 	      need_paren = 1;
 	    }
 	}
@@ -761,28 +294,7 @@ print_insn_powerpc (bfd_vma memaddr,
     }
 
   /* We could not find a match.  */
-  (*info->fprintf_func) (info->stream, ".long 0x%lx", insn);
+  printf(".long 0x%lx", insn);
 
   return 4;
 }
-
-void
-print_ppc_disassembler_options (FILE *stream)
-{
-  unsigned int i, col;
-
-  fprintf (stream, _("\n\
-The following PPC specific disassembler options are supported for use with\n\
-the -M switch:\n"));
-
-  for (col = 0, i = 0; i < sizeof (ppc_opts) / sizeof (ppc_opts[0]); i++)
-    {
-      col += fprintf (stream, " %s,", ppc_opts[i].opt);
-      if (col > 66)
-	{
-	  fprintf (stream, "\n");
-	  col = 0;
-	}
-    }
-  fprintf (stream, " 32, 64\n");
-}
diff --git a/arch/powerpc/xmon/ppc-opc.c b/arch/powerpc/xmon/ppc-opc.c
index e3ad69c3be07..f2a49d237b59 100644
--- a/arch/powerpc/xmon/ppc-opc.c
+++ b/arch/powerpc/xmon/ppc-opc.c
@@ -19,10 +19,14 @@
    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
    02110-1301, USA.  */
 
-#include "sysdep.h"
-#include <stdio.h>
-#include "opcode/ppc.h"
-#include "opintl.h"
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include "nonstdio.h"
+#include "ppc.h"
+
+#define ATTRIBUTE_UNUSED
+#define _(x)	x
 
 /* This file holds the PowerPC opcode table.  The opcode table
    includes almost all of the extended instruction mnemonics.  This
diff --git a/arch/powerpc/xmon/ppc.h b/arch/powerpc/xmon/ppc.h
index 29c385f8e115..d00f33dcf192 100644
--- a/arch/powerpc/xmon/ppc.h
+++ b/arch/powerpc/xmon/ppc.h
@@ -21,8 +21,6 @@ Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, US
 #ifndef PPC_H
 #define PPC_H
 
-#include "bfd_stdint.h"
-
 #ifdef __cplusplus
 extern "C" {
 #endif
-- 
GitLab


From d7d55536c6cd1f80295b6d7483ad0587b148bde4 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Wed, 11 Jan 2017 11:50:06 +1100
Subject: [PATCH 0259/1084] drivers/pci/hotplug: Handle presence detection
 change properly

The surprise hotplug is driven by interrupt in PowerNV PCI hotplug
driver. In the interrupt handler, pnv_php_interrupt(), we bail when
pnv_pci_get_presence_state() returns zero wrongly. It causes the
presence change event is always ignored incorrectly.

This fixes the issue by bailing on error (non-zero value) returned
from pnv_pci_get_presence_state().

Fixes: 360aebd85a4 ("drivers/pci/hotplug: Support surprise hotplug in powernv driver")
Cc: stable@vger.kernel.org #v4.9+
Reported-by: Hank Chang <hankmax0000@gmail.com>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Tested-by: Willie Liauw <williel@supermicro.com.tw>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/pci/hotplug/pnv_php.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index 56efaf72d08e..306728cb5785 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -707,8 +707,12 @@ static irqreturn_t pnv_php_interrupt(int irq, void *data)
 		added = !!(lsts & PCI_EXP_LNKSTA_DLLLA);
 	} else if (sts & PCI_EXP_SLTSTA_PDC) {
 		ret = pnv_pci_get_presence_state(php_slot->id, &presence);
-		if (!ret)
+		if (ret) {
+			dev_warn(&pdev->dev, "PCI slot [%s] error %d getting presence (0x%04x), to retry the operation.\n",
+				 php_slot->name, ret, sts);
 			return IRQ_HANDLED;
+		}
+
 		added = !!(presence == OPAL_PCI_SLOT_PRESENT);
 	} else {
 		return IRQ_NONE;
-- 
GitLab


From d0c424971f70501ec0a0364117b9934db039c9cc Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Wed, 11 Jan 2017 11:50:07 +1100
Subject: [PATCH 0260/1084] drivers/pci/hotplug: Fix initial state for empty
 slot

In PowerNV PCI hotplug driver, the initial PCI slot's state is set
to PNV_PHP_STATE_POPULATED if no PCI devices are connected to the
slot. The PCI devices that are hot added to the slot won't be probed
and populated because of the check in pnv_php_enable():

        /* Check if the slot has been configured */
        if (php_slot->state != PNV_PHP_STATE_REGISTERED)
                return 0;

This fixes the issue by leaving the slot in PNV_PHP_STATE_REGISTERED
state initially if nothing is connected to the slot.

Fixes: 360aebd85a4 ("drivers/pci/hotplug: Support surprise hotplug in powernv driver")
Cc: stable@vger.kernel.org #v4.9+
Reported-by: Hank Chang <hankmax0000@gmail.com>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Tested-by: Willie Liauw <williel@supermicro.com.tw>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/pci/hotplug/pnv_php.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index 306728cb5785..41304b313512 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -430,9 +430,21 @@ static int pnv_php_enable(struct pnv_php_slot *php_slot, bool rescan)
 	if (ret)
 		return ret;
 
-	/* Proceed if there have nothing behind the slot */
-	if (presence == OPAL_PCI_SLOT_EMPTY)
+	/*
+	 * Proceed if there have nothing behind the slot. However,
+	 * we should leave the slot in registered state at the
+	 * beginning. Otherwise, the PCI devices inserted afterwards
+	 * won't be probed and populated.
+	 */
+	if (presence == OPAL_PCI_SLOT_EMPTY) {
+		if (!php_slot->power_state_check) {
+			php_slot->power_state_check = true;
+
+			return 0;
+		}
+
 		goto scan;
+	}
 
 	/*
 	 * If the power supply to the slot is off, we can't detect
-- 
GitLab


From 454593e54cac126a0f49d4b65f90a5e518c51404 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Wed, 11 Jan 2017 11:50:08 +1100
Subject: [PATCH 0261/1084] drivers/pci/hotplug: Mask PDC interrupt if required

We're supporting surprise hotplug on PCI slots behind root port
or PCIe switch downstream ports, which don't claim the capability
in hardware register (offset: PCIe cap + PCI_EXP_SLTCAP). PEX8718
is one of the examples. For those PCI slots, the PDC (Presence
Detection Change) event isn't reliable and the underly (skiboot)
firmware has best judgement.

This masks the PDC event when skiboot requests by "ibm,slot-broken-pdc"
property in PCI slot's device-tree node.

Reported-by: Hank Chang <hankmax0000@gmail.com>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Tested-by: Willie Liauw <williel@supermicro.com.tw>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/pnv-pci.h |  2 ++
 drivers/pci/hotplug/pnv_php.c      | 27 ++++++++++++++++++++++-----
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h
index 696438f09aea..de9681034353 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -57,6 +57,8 @@ struct pnv_php_slot {
 	uint64_t			id;
 	char				*name;
 	int				slot_no;
+	unsigned int			flags;
+#define PNV_PHP_FLAG_BROKEN_PDC		0x1
 	struct kref			kref;
 #define PNV_PHP_STATE_INITIALIZED	0
 #define PNV_PHP_STATE_REGISTERED	1
diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index 41304b313512..63cd9f354b79 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -717,7 +717,8 @@ static irqreturn_t pnv_php_interrupt(int irq, void *data)
 	if (sts & PCI_EXP_SLTSTA_DLLSC) {
 		pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lsts);
 		added = !!(lsts & PCI_EXP_LNKSTA_DLLLA);
-	} else if (sts & PCI_EXP_SLTSTA_PDC) {
+	} else if (!(php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC) &&
+		   (sts & PCI_EXP_SLTSTA_PDC)) {
 		ret = pnv_pci_get_presence_state(php_slot->id, &presence);
 		if (ret) {
 			dev_warn(&pdev->dev, "PCI slot [%s] error %d getting presence (0x%04x), to retry the operation.\n",
@@ -768,6 +769,7 @@ static irqreturn_t pnv_php_interrupt(int irq, void *data)
 static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq)
 {
 	struct pci_dev *pdev = php_slot->pdev;
+	u32 broken_pdc = 0;
 	u16 sts, ctrl;
 	int ret;
 
@@ -779,9 +781,18 @@ static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq)
 		return;
 	}
 
+	/* Check PDC (Presence Detection Change) is broken or not */
+	ret = of_property_read_u32(php_slot->dn, "ibm,slot-broken-pdc",
+				   &broken_pdc);
+	if (!ret && broken_pdc)
+		php_slot->flags |= PNV_PHP_FLAG_BROKEN_PDC;
+
 	/* Clear pending interrupts */
 	pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &sts);
-	sts |= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC);
+	if (php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC)
+		sts |= PCI_EXP_SLTSTA_DLLSC;
+	else
+		sts |= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC);
 	pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, sts);
 
 	/* Request the interrupt */
@@ -795,9 +806,15 @@ static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq)
 
 	/* Enable the interrupts */
 	pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &ctrl);
-	ctrl |= (PCI_EXP_SLTCTL_HPIE |
-		 PCI_EXP_SLTCTL_PDCE |
-		 PCI_EXP_SLTCTL_DLLSCE);
+	if (php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC) {
+		ctrl &= ~PCI_EXP_SLTCTL_PDCE;
+		ctrl |= (PCI_EXP_SLTCTL_HPIE |
+			 PCI_EXP_SLTCTL_DLLSCE);
+	} else {
+		ctrl |= (PCI_EXP_SLTCTL_HPIE |
+			 PCI_EXP_SLTCTL_PDCE |
+			 PCI_EXP_SLTCTL_DLLSCE);
+	}
 	pcie_capability_write_word(pdev, PCI_EXP_SLTCTL, ctrl);
 
 	/* The interrupt is initialized successfully when @irq is valid */
-- 
GitLab


From 0d2b5cdc76ff7eef9d727c2e802213ec15d5e94f Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 15 Feb 2017 20:24:25 +1100
Subject: [PATCH 0262/1084] powerpc/64e: Fix bogus usage of WARN_ONCE()

WARN_ONCE() takes a condition and a format string. We were passing a
constant string as the condition, and the function name as the format
string. It would work, but the message would be just the function name.

Fix it by just using WARN_ONCE() directly instead of if (x) WARN_ONCE().

Noticed-by: Geliang Tang <geliangtang@163.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/setup_64.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index b9855f1b290a..adf2084f214b 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -113,14 +113,12 @@ void __init setup_tlb_core_data(void)
 		 * If we have threads, we need either tlbsrx.
 		 * or e6500 tablewalk mode, or else TLB handlers
 		 * will be racy and could produce duplicate entries.
+		 * Should we panic instead?
 		 */
-		if (smt_enabled_at_boot >= 2 &&
-		    !mmu_has_feature(MMU_FTR_USE_TLBRSRV) &&
-		    book3e_htw_mode != PPC_HTW_E6500) {
-			/* Should we panic instead? */
-			WARN_ONCE("%s: unsupported MMU configuration -- expect problems\n",
-				  __func__);
-		}
+		WARN_ONCE(smt_enabled_at_boot >= 2 &&
+			  !mmu_has_feature(MMU_FTR_USE_TLBRSRV) &&
+			  book3e_htw_mode != PPC_HTW_E6500,
+			  "%s: unsupported MMU configuration\n", __func__);
 	}
 }
 #endif
-- 
GitLab


From 7d7be3aa08fa338e84eb235805ee18f2fab85a46 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Fri, 4 Dec 2015 17:30:31 +0000
Subject: [PATCH 0263/1084] powerpc/spufs: Get rid of broken fasync stuff

In all the years it's been in the tree it had never been used by
anything - it would instantly trigger BUG_ON() in fs/fcntl.c due to
bogus band argument (ie. POLLIN not POLL_IN) passed to kill_fasync().
Since nobody had ever used it in ten years, let's just rip it out and be
done with that.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/cell/spufs/file.c  | 65 +++--------------------
 arch/powerpc/platforms/cell/spufs/spufs.h |  3 --
 2 files changed, 6 insertions(+), 62 deletions(-)

diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index a35e2c29d7ee..03f2cdfabf23 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -684,23 +684,13 @@ size_t spu_ibox_read(struct spu_context *ctx, u32 *data)
 	return ctx->ops->ibox_read(ctx, data);
 }
 
-static int spufs_ibox_fasync(int fd, struct file *file, int on)
-{
-	struct spu_context *ctx = file->private_data;
-
-	return fasync_helper(fd, file, on, &ctx->ibox_fasync);
-}
-
 /* interrupt-level ibox callback function. */
 void spufs_ibox_callback(struct spu *spu)
 {
 	struct spu_context *ctx = spu->ctx;
 
-	if (!ctx)
-		return;
-
-	wake_up_all(&ctx->ibox_wq);
-	kill_fasync(&ctx->ibox_fasync, SIGIO, POLLIN);
+	if (ctx)
+		wake_up_all(&ctx->ibox_wq);
 }
 
 /*
@@ -795,7 +785,6 @@ static const struct file_operations spufs_ibox_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_ibox_read,
 	.poll	= spufs_ibox_poll,
-	.fasync	= spufs_ibox_fasync,
 	.llseek = no_llseek,
 };
 
@@ -833,26 +822,13 @@ size_t spu_wbox_write(struct spu_context *ctx, u32 data)
 	return ctx->ops->wbox_write(ctx, data);
 }
 
-static int spufs_wbox_fasync(int fd, struct file *file, int on)
-{
-	struct spu_context *ctx = file->private_data;
-	int ret;
-
-	ret = fasync_helper(fd, file, on, &ctx->wbox_fasync);
-
-	return ret;
-}
-
 /* interrupt-level wbox callback function. */
 void spufs_wbox_callback(struct spu *spu)
 {
 	struct spu_context *ctx = spu->ctx;
 
-	if (!ctx)
-		return;
-
-	wake_up_all(&ctx->wbox_wq);
-	kill_fasync(&ctx->wbox_fasync, SIGIO, POLLOUT);
+	if (ctx)
+		wake_up_all(&ctx->wbox_wq);
 }
 
 /*
@@ -945,7 +921,6 @@ static const struct file_operations spufs_wbox_fops = {
 	.open	= spufs_pipe_open,
 	.write	= spufs_wbox_write,
 	.poll	= spufs_wbox_poll,
-	.fasync	= spufs_wbox_fasync,
 	.llseek = no_llseek,
 };
 
@@ -1521,28 +1496,8 @@ void spufs_mfc_callback(struct spu *spu)
 {
 	struct spu_context *ctx = spu->ctx;
 
-	if (!ctx)
-		return;
-
-	wake_up_all(&ctx->mfc_wq);
-
-	pr_debug("%s %s\n", __func__, spu->name);
-	if (ctx->mfc_fasync) {
-		u32 free_elements, tagstatus;
-		unsigned int mask;
-
-		/* no need for spu_acquire in interrupt context */
-		free_elements = ctx->ops->get_mfc_free_elements(ctx);
-		tagstatus = ctx->ops->read_mfc_tagstatus(ctx);
-
-		mask = 0;
-		if (free_elements & 0xffff)
-			mask |= POLLOUT;
-		if (tagstatus & ctx->tagwait)
-			mask |= POLLIN;
-
-		kill_fasync(&ctx->mfc_fasync, SIGIO, mask);
-	}
+	if (ctx)
+		wake_up_all(&ctx->mfc_wq);
 }
 
 static int spufs_read_mfc_tagstatus(struct spu_context *ctx, u32 *status)
@@ -1804,13 +1759,6 @@ static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int data
 	return err;
 }
 
-static int spufs_mfc_fasync(int fd, struct file *file, int on)
-{
-	struct spu_context *ctx = file->private_data;
-
-	return fasync_helper(fd, file, on, &ctx->mfc_fasync);
-}
-
 static const struct file_operations spufs_mfc_fops = {
 	.open	 = spufs_mfc_open,
 	.release = spufs_mfc_release,
@@ -1819,7 +1767,6 @@ static const struct file_operations spufs_mfc_fops = {
 	.poll	 = spufs_mfc_poll,
 	.flush	 = spufs_mfc_flush,
 	.fsync	 = spufs_mfc_fsync,
-	.fasync	 = spufs_mfc_fasync,
 	.mmap	 = spufs_mfc_mmap,
 	.llseek  = no_llseek,
 };
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index bcfd6f063efa..aac733966092 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -102,9 +102,6 @@ struct spu_context {
 	wait_queue_head_t stop_wq;
 	wait_queue_head_t mfc_wq;
 	wait_queue_head_t run_wq;
-	struct fasync_struct *ibox_fasync;
-	struct fasync_struct *wbox_fasync;
-	struct fasync_struct *mfc_fasync;
 	u32 tagwait;
 	struct spu_context_ops *ops;
 	struct work_struct reap_work;
-- 
GitLab


From 454656155110603e88b83eec847d5293a30bb96e Mon Sep 17 00:00:00 2001
From: Rashmica Gupta <rashmicy@gmail.com>
Date: Wed, 15 Feb 2017 21:41:20 +1100
Subject: [PATCH 0264/1084] powerpc/asm: Use OFFSET macro in asm-offsets.c

A lot of entries in asm-offests.c look like this:

  DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));

But there is a common macro, OFFSET, which makes this cleaner:

  OFFSET(TI_flags, thread_info, flags)

So use it.

Signed-off-by: Rashmica Gupta <rashmicy@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/asm-offsets.c | 711 ++++++++++++++----------------
 1 file changed, 340 insertions(+), 371 deletions(-)

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 68657a767f80..8b7b2fd7e374 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -74,206 +74,188 @@
 
 int main(void)
 {
-	DEFINE(THREAD, offsetof(struct task_struct, thread));
-	DEFINE(MM, offsetof(struct task_struct, mm));
-	DEFINE(MMCONTEXTID, offsetof(struct mm_struct, context.id));
+	OFFSET(THREAD, task_struct, thread);
+	OFFSET(MM, task_struct, mm);
+	OFFSET(MMCONTEXTID, mm_struct, context.id);
 #ifdef CONFIG_PPC64
 	DEFINE(SIGSEGV, SIGSEGV);
 	DEFINE(NMI_MASK, NMI_MASK);
-	DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr));
+	OFFSET(TASKTHREADPPR, task_struct, thread.ppr);
 #else
-	DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
+	OFFSET(THREAD_INFO, task_struct, stack);
 	DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16));
-	DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit));
+	OFFSET(KSP_LIMIT, thread_struct, ksp_limit);
 #endif /* CONFIG_PPC64 */
 
 #ifdef CONFIG_LIVEPATCH
-	DEFINE(TI_livepatch_sp, offsetof(struct thread_info, livepatch_sp));
+	OFFSET(TI_livepatch_sp, thread_info, livepatch_sp);
 #endif
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 	DEFINE(TSK_STACK_CANARY, offsetof(struct task_struct, stack_canary));
 #endif
-	DEFINE(KSP, offsetof(struct thread_struct, ksp));
-	DEFINE(PT_REGS, offsetof(struct thread_struct, regs));
+	OFFSET(KSP, thread_struct, ksp);
+	OFFSET(PT_REGS, thread_struct, regs);
 #ifdef CONFIG_BOOKE
-	DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0]));
+	OFFSET(THREAD_NORMSAVES, thread_struct, normsave[0]);
 #endif
-	DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode));
-	DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fp_state));
-	DEFINE(THREAD_FPSAVEAREA, offsetof(struct thread_struct, fp_save_area));
-	DEFINE(FPSTATE_FPSCR, offsetof(struct thread_fp_state, fpscr));
-	DEFINE(THREAD_LOAD_FP, offsetof(struct thread_struct, load_fp));
+	OFFSET(THREAD_FPEXC_MODE, thread_struct, fpexc_mode);
+	OFFSET(THREAD_FPSTATE, thread_struct, fp_state);
+	OFFSET(THREAD_FPSAVEAREA, thread_struct, fp_save_area);
+	OFFSET(FPSTATE_FPSCR, thread_fp_state, fpscr);
+	OFFSET(THREAD_LOAD_FP, thread_struct, load_fp);
 #ifdef CONFIG_ALTIVEC
-	DEFINE(THREAD_VRSTATE, offsetof(struct thread_struct, vr_state));
-	DEFINE(THREAD_VRSAVEAREA, offsetof(struct thread_struct, vr_save_area));
-	DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave));
-	DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr));
-	DEFINE(VRSTATE_VSCR, offsetof(struct thread_vr_state, vscr));
-	DEFINE(THREAD_LOAD_VEC, offsetof(struct thread_struct, load_vec));
+	OFFSET(THREAD_VRSTATE, thread_struct, vr_state);
+	OFFSET(THREAD_VRSAVEAREA, thread_struct, vr_save_area);
+	OFFSET(THREAD_VRSAVE, thread_struct, vrsave);
+	OFFSET(THREAD_USED_VR, thread_struct, used_vr);
+	OFFSET(VRSTATE_VSCR, thread_vr_state, vscr);
+	OFFSET(THREAD_LOAD_VEC, thread_struct, load_vec);
 #endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_VSX
-	DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr));
+	OFFSET(THREAD_USED_VSR, thread_struct, used_vsr);
 #endif /* CONFIG_VSX */
 #ifdef CONFIG_PPC64
-	DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid));
+	OFFSET(KSP_VSID, thread_struct, ksp_vsid);
 #else /* CONFIG_PPC64 */
-	DEFINE(PGDIR, offsetof(struct thread_struct, pgdir));
+	OFFSET(PGDIR, thread_struct, pgdir);
 #ifdef CONFIG_SPE
-	DEFINE(THREAD_EVR0, offsetof(struct thread_struct, evr[0]));
-	DEFINE(THREAD_ACC, offsetof(struct thread_struct, acc));
-	DEFINE(THREAD_SPEFSCR, offsetof(struct thread_struct, spefscr));
-	DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe));
+	OFFSET(THREAD_EVR0, thread_struct, evr[0]);
+	OFFSET(THREAD_ACC, thread_struct, acc);
+	OFFSET(THREAD_SPEFSCR, thread_struct, spefscr);
+	OFFSET(THREAD_USED_SPE, thread_struct, used_spe);
 #endif /* CONFIG_SPE */
 #endif /* CONFIG_PPC64 */
 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-	DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, debug.dbcr0));
+	OFFSET(THREAD_DBCR0, thread_struct, debug.dbcr0);
 #endif
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
-	DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
+	OFFSET(THREAD_KVM_SVCPU, thread_struct, kvm_shadow_vcpu);
 #endif
 #if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
-	DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu));
+	OFFSET(THREAD_KVM_VCPU, thread_struct, kvm_vcpu);
 #endif
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	DEFINE(PACATMSCRATCH, offsetof(struct paca_struct, tm_scratch));
-	DEFINE(THREAD_TM_TFHAR, offsetof(struct thread_struct, tm_tfhar));
-	DEFINE(THREAD_TM_TEXASR, offsetof(struct thread_struct, tm_texasr));
-	DEFINE(THREAD_TM_TFIAR, offsetof(struct thread_struct, tm_tfiar));
-	DEFINE(THREAD_TM_TAR, offsetof(struct thread_struct, tm_tar));
-	DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr));
-	DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr));
-	DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs));
-	DEFINE(THREAD_CKVRSTATE, offsetof(struct thread_struct,
-						 ckvr_state));
-	DEFINE(THREAD_CKVRSAVE, offsetof(struct thread_struct,
-					    ckvrsave));
-	DEFINE(THREAD_CKFPSTATE, offsetof(struct thread_struct,
-						 ckfp_state));
+	OFFSET(PACATMSCRATCH, paca_struct, tm_scratch);
+	OFFSET(THREAD_TM_TFHAR, thread_struct, tm_tfhar);
+	OFFSET(THREAD_TM_TEXASR, thread_struct, tm_texasr);
+	OFFSET(THREAD_TM_TFIAR, thread_struct, tm_tfiar);
+	OFFSET(THREAD_TM_TAR, thread_struct, tm_tar);
+	OFFSET(THREAD_TM_PPR, thread_struct, tm_ppr);
+	OFFSET(THREAD_TM_DSCR, thread_struct, tm_dscr);
+	OFFSET(PT_CKPT_REGS, thread_struct, ckpt_regs);
+	OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state);
+	OFFSET(THREAD_CKVRSAVE, thread_struct, ckvrsave);
+	OFFSET(THREAD_CKFPSTATE, thread_struct, ckfp_state);
 	/* Local pt_regs on stack for Transactional Memory funcs. */
 	DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD +
 	       sizeof(struct pt_regs) + 16);
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
-	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
-	DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
-	DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
-	DEFINE(TI_TASK, offsetof(struct thread_info, task));
-	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
+	OFFSET(TI_FLAGS, thread_info, flags);
+	OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags);
+	OFFSET(TI_PREEMPT, thread_info, preempt_count);
+	OFFSET(TI_TASK, thread_info, task);
+	OFFSET(TI_CPU, thread_info, cpu);
 
 #ifdef CONFIG_PPC64
-	DEFINE(DCACHEL1BLOCKSIZE, offsetof(struct ppc64_caches, l1d.block_size));
-	DEFINE(DCACHEL1LOGBLOCKSIZE, offsetof(struct ppc64_caches, l1d.log_block_size));
-	DEFINE(DCACHEL1BLOCKSPERPAGE, offsetof(struct ppc64_caches, l1d.blocks_per_page));
-	DEFINE(ICACHEL1BLOCKSIZE, offsetof(struct ppc64_caches, l1i.block_size));
-	DEFINE(ICACHEL1LOGBLOCKSIZE, offsetof(struct ppc64_caches, l1i.log_block_size));
-	DEFINE(ICACHEL1BLOCKSPERPAGE, offsetof(struct ppc64_caches, l1i.blocks_per_page));
+	OFFSET(DCACHEL1BLOCKSIZE, ppc64_caches, l1d.block_size);
+	OFFSET(DCACHEL1LOGBLOCKSIZE, ppc64_caches, l1d.log_block_size);
+	OFFSET(DCACHEL1BLOCKSPERPAGE, ppc64_caches, l1d.blocks_per_page);
+	OFFSET(ICACHEL1BLOCKSIZE, ppc64_caches, l1i.block_size);
+	OFFSET(ICACHEL1LOGBLOCKSIZE, ppc64_caches, l1i.log_block_size);
+	OFFSET(ICACHEL1BLOCKSPERPAGE, ppc64_caches, l1i.blocks_per_page);
 	/* paca */
 	DEFINE(PACA_SIZE, sizeof(struct paca_struct));
-	DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index));
-	DEFINE(PACAPROCSTART, offsetof(struct paca_struct, cpu_start));
-	DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack));
-	DEFINE(PACACURRENT, offsetof(struct paca_struct, __current));
-	DEFINE(PACASAVEDMSR, offsetof(struct paca_struct, saved_msr));
-	DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr));
-	DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1));
-	DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc));
-	DEFINE(PACAKBASE, offsetof(struct paca_struct, kernelbase));
-	DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
-	DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
-	DEFINE(PACAIRQHAPPENED, offsetof(struct paca_struct, irq_happened));
+	OFFSET(PACAPACAINDEX, paca_struct, paca_index);
+	OFFSET(PACAPROCSTART, paca_struct, cpu_start);
+	OFFSET(PACAKSAVE, paca_struct, kstack);
+	OFFSET(PACACURRENT, paca_struct, __current);
+	OFFSET(PACASAVEDMSR, paca_struct, saved_msr);
+	OFFSET(PACASTABRR, paca_struct, stab_rr);
+	OFFSET(PACAR1, paca_struct, saved_r1);
+	OFFSET(PACATOC, paca_struct, kernel_toc);
+	OFFSET(PACAKBASE, paca_struct, kernelbase);
+	OFFSET(PACAKMSR, paca_struct, kernel_msr);
+	OFFSET(PACASOFTIRQEN, paca_struct, soft_enabled);
+	OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened);
 #ifdef CONFIG_PPC_BOOK3S
-	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, mm_ctx_id));
+	OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id);
 #ifdef CONFIG_PPC_MM_SLICES
-	DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
-					    mm_ctx_low_slices_psize));
-	DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct,
-					    mm_ctx_high_slices_psize));
+	OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize);
+	OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize);
 	DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
 #endif /* CONFIG_PPC_MM_SLICES */
 #endif
 
 #ifdef CONFIG_PPC_BOOK3E
-	DEFINE(PACAPGD, offsetof(struct paca_struct, pgd));
-	DEFINE(PACA_KERNELPGD, offsetof(struct paca_struct, kernel_pgd));
-	DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
-	DEFINE(PACA_EXTLB, offsetof(struct paca_struct, extlb));
-	DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
-	DEFINE(PACA_EXCRIT, offsetof(struct paca_struct, excrit));
-	DEFINE(PACA_EXDBG, offsetof(struct paca_struct, exdbg));
-	DEFINE(PACA_MC_STACK, offsetof(struct paca_struct, mc_kstack));
-	DEFINE(PACA_CRIT_STACK, offsetof(struct paca_struct, crit_kstack));
-	DEFINE(PACA_DBG_STACK, offsetof(struct paca_struct, dbg_kstack));
-	DEFINE(PACA_TCD_PTR, offsetof(struct paca_struct, tcd_ptr));
-
-	DEFINE(TCD_ESEL_NEXT,
-		offsetof(struct tlb_core_data, esel_next));
-	DEFINE(TCD_ESEL_MAX,
-		offsetof(struct tlb_core_data, esel_max));
-	DEFINE(TCD_ESEL_FIRST,
-		offsetof(struct tlb_core_data, esel_first));
+	OFFSET(PACAPGD, paca_struct, pgd);
+	OFFSET(PACA_KERNELPGD, paca_struct, kernel_pgd);
+	OFFSET(PACA_EXGEN, paca_struct, exgen);
+	OFFSET(PACA_EXTLB, paca_struct, extlb);
+	OFFSET(PACA_EXMC, paca_struct, exmc);
+	OFFSET(PACA_EXCRIT, paca_struct, excrit);
+	OFFSET(PACA_EXDBG, paca_struct, exdbg);
+	OFFSET(PACA_MC_STACK, paca_struct, mc_kstack);
+	OFFSET(PACA_CRIT_STACK, paca_struct, crit_kstack);
+	OFFSET(PACA_DBG_STACK, paca_struct, dbg_kstack);
+	OFFSET(PACA_TCD_PTR, paca_struct, tcd_ptr);
+
+	OFFSET(TCD_ESEL_NEXT, tlb_core_data, esel_next);
+	OFFSET(TCD_ESEL_MAX, tlb_core_data, esel_max);
+	OFFSET(TCD_ESEL_FIRST, tlb_core_data, esel_first);
 #endif /* CONFIG_PPC_BOOK3E */
 
 #ifdef CONFIG_PPC_STD_MMU_64
-	DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
-	DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
-	DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp));
+	OFFSET(PACASLBCACHE, paca_struct, slb_cache);
+	OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr);
+	OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp);
 #ifdef CONFIG_PPC_MM_SLICES
-	DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp));
+	OFFSET(MMUPSIZESLLP, mmu_psize_def, sllp);
 #else
-	DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, mm_ctx_sllp));
+	OFFSET(PACACONTEXTSLLP, paca_struct, mm_ctx_sllp);
 #endif /* CONFIG_PPC_MM_SLICES */
-	DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
-	DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
-	DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb));
-	DEFINE(PACALPPACAPTR, offsetof(struct paca_struct, lppaca_ptr));
-	DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr));
-	DEFINE(SLBSHADOW_STACKVSID,
-	       offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid));
-	DEFINE(SLBSHADOW_STACKESID,
-	       offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid));
-	DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area));
-	DEFINE(LPPACA_PMCINUSE, offsetof(struct lppaca, pmcregs_in_use));
-	DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx));
-	DEFINE(LPPACA_YIELDCOUNT, offsetof(struct lppaca, yield_count));
-	DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx));
+	OFFSET(PACA_EXGEN, paca_struct, exgen);
+	OFFSET(PACA_EXMC, paca_struct, exmc);
+	OFFSET(PACA_EXSLB, paca_struct, exslb);
+	OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr);
+	OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr);
+	OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid);
+	OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid);
+	OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area);
+	OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use);
+	OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx);
+	OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count);
+	OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx);
 #endif /* CONFIG_PPC_STD_MMU_64 */
-	DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
+	OFFSET(PACAEMERGSP, paca_struct, emergency_sp);
 #ifdef CONFIG_PPC_BOOK3S_64
-	DEFINE(PACAMCEMERGSP, offsetof(struct paca_struct, mc_emergency_sp));
-	DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce));
-#endif
-	DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
-	DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
-	DEFINE(PACA_DSCR_DEFAULT, offsetof(struct paca_struct, dscr_default));
-	DEFINE(ACCOUNT_STARTTIME,
-	       offsetof(struct paca_struct, accounting.starttime));
-	DEFINE(ACCOUNT_STARTTIME_USER,
-	       offsetof(struct paca_struct, accounting.starttime_user));
-	DEFINE(ACCOUNT_USER_TIME,
-	       offsetof(struct paca_struct, accounting.user_time));
-	DEFINE(ACCOUNT_SYSTEM_TIME,
-	       offsetof(struct paca_struct, accounting.system_time));
-	DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));
-	DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost));
-	DEFINE(PACA_SPRG_VDSO, offsetof(struct paca_struct, sprg_vdso));
+	OFFSET(PACAMCEMERGSP, paca_struct, mc_emergency_sp);
+	OFFSET(PACA_IN_MCE, paca_struct, in_mce);
+#endif
+	OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
+	OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
+	OFFSET(PACA_DSCR_DEFAULT, paca_struct, dscr_default);
+	OFFSET(ACCOUNT_STARTTIME, paca_struct, accounting.starttime);
+	OFFSET(ACCOUNT_STARTTIME_USER, paca_struct, accounting.starttime_user);
+	OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.user_time);
+	OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.system_time);
+	OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save);
+	OFFSET(PACA_NAPSTATELOST, paca_struct, nap_state_lost);
+	OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso);
 #else /* CONFIG_PPC64 */
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-	DEFINE(ACCOUNT_STARTTIME,
-	       offsetof(struct thread_info, accounting.starttime));
-	DEFINE(ACCOUNT_STARTTIME_USER,
-	       offsetof(struct thread_info, accounting.starttime_user));
-	DEFINE(ACCOUNT_USER_TIME,
-	       offsetof(struct thread_info, accounting.user_time));
-	DEFINE(ACCOUNT_SYSTEM_TIME,
-	       offsetof(struct thread_info, accounting.system_time));
+	OFFSET(ACCOUNT_STARTTIME, thread_info, accounting.starttime);
+	OFFSET(ACCOUNT_STARTTIME_USER, thread_info, accounting.starttime_user);
+	OFFSET(ACCOUNT_USER_TIME, thread_info, accounting.user_time);
+	OFFSET(ACCOUNT_SYSTEM_TIME, thread_info, accounting.system_time);
 #endif
 #endif /* CONFIG_PPC64 */
 
 	/* RTAS */
-	DEFINE(RTASBASE, offsetof(struct rtas_t, base));
-	DEFINE(RTASENTRY, offsetof(struct rtas_t, entry));
+	OFFSET(RTASBASE, rtas_t, base);
+	OFFSET(RTASENTRY, rtas_t, entry);
 
 	/* Interrupt register frame */
 	DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
@@ -354,17 +336,17 @@ int main(void)
 #endif
 
 #ifndef CONFIG_PPC64
-	DEFINE(MM_PGD, offsetof(struct mm_struct, pgd));
+	OFFSET(MM_PGD, mm_struct, pgd);
 #endif /* ! CONFIG_PPC64 */
 
 	/* About the CPU features table */
-	DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features));
-	DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup));
-	DEFINE(CPU_SPEC_RESTORE, offsetof(struct cpu_spec, cpu_restore));
+	OFFSET(CPU_SPEC_FEATURES, cpu_spec, cpu_features);
+	OFFSET(CPU_SPEC_SETUP, cpu_spec, cpu_setup);
+	OFFSET(CPU_SPEC_RESTORE, cpu_spec, cpu_restore);
 
-	DEFINE(pbe_address, offsetof(struct pbe, address));
-	DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
-	DEFINE(pbe_next, offsetof(struct pbe, next));
+	OFFSET(pbe_address, pbe, address);
+	OFFSET(pbe_orig_address, pbe, orig_address);
+	OFFSET(pbe_next, pbe, next);
 
 #ifndef CONFIG_PPC64
 	DEFINE(TASK_SIZE, TASK_SIZE);
@@ -372,40 +354,40 @@ int main(void)
 #endif /* ! CONFIG_PPC64 */
 
 	/* datapage offsets for use by vdso */
-	DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct vdso_data, tb_orig_stamp));
-	DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct vdso_data, tb_ticks_per_sec));
-	DEFINE(CFG_TB_TO_XS, offsetof(struct vdso_data, tb_to_xs));
-	DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct vdso_data, tb_update_count));
-	DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct vdso_data, tz_minuteswest));
-	DEFINE(CFG_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
-	DEFINE(CFG_SYSCALL_MAP32, offsetof(struct vdso_data, syscall_map_32));
-	DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec));
-	DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
-	DEFINE(STAMP_XTIME, offsetof(struct vdso_data, stamp_xtime));
-	DEFINE(STAMP_SEC_FRAC, offsetof(struct vdso_data, stamp_sec_fraction));
-	DEFINE(CFG_ICACHE_BLOCKSZ, offsetof(struct vdso_data, icache_block_size));
-	DEFINE(CFG_DCACHE_BLOCKSZ, offsetof(struct vdso_data, dcache_block_size));
-	DEFINE(CFG_ICACHE_LOGBLOCKSZ, offsetof(struct vdso_data, icache_log_block_size));
-	DEFINE(CFG_DCACHE_LOGBLOCKSZ, offsetof(struct vdso_data, dcache_log_block_size));
+	OFFSET(CFG_TB_ORIG_STAMP, vdso_data, tb_orig_stamp);
+	OFFSET(CFG_TB_TICKS_PER_SEC, vdso_data, tb_ticks_per_sec);
+	OFFSET(CFG_TB_TO_XS, vdso_data, tb_to_xs);
+	OFFSET(CFG_TB_UPDATE_COUNT, vdso_data, tb_update_count);
+	OFFSET(CFG_TZ_MINUTEWEST, vdso_data, tz_minuteswest);
+	OFFSET(CFG_TZ_DSTTIME, vdso_data, tz_dsttime);
+	OFFSET(CFG_SYSCALL_MAP32, vdso_data, syscall_map_32);
+	OFFSET(WTOM_CLOCK_SEC, vdso_data, wtom_clock_sec);
+	OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec);
+	OFFSET(STAMP_XTIME, vdso_data, stamp_xtime);
+	OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction);
+	OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size);
+	OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size);
+	OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size);
+	OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_data, dcache_log_block_size);
 #ifdef CONFIG_PPC64
-	DEFINE(CFG_SYSCALL_MAP64, offsetof(struct vdso_data, syscall_map_64));
-	DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec));
-	DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec));
-	DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec));
-	DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec));
-	DEFINE(TSPC64_TV_SEC, offsetof(struct timespec, tv_sec));
-	DEFINE(TSPC64_TV_NSEC, offsetof(struct timespec, tv_nsec));
-	DEFINE(TSPC32_TV_SEC, offsetof(struct compat_timespec, tv_sec));
-	DEFINE(TSPC32_TV_NSEC, offsetof(struct compat_timespec, tv_nsec));
+	OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64);
+	OFFSET(TVAL64_TV_SEC, timeval, tv_sec);
+	OFFSET(TVAL64_TV_USEC, timeval, tv_usec);
+	OFFSET(TVAL32_TV_SEC, compat_timeval, tv_sec);
+	OFFSET(TVAL32_TV_USEC, compat_timeval, tv_usec);
+	OFFSET(TSPC64_TV_SEC, timespec, tv_sec);
+	OFFSET(TSPC64_TV_NSEC, timespec, tv_nsec);
+	OFFSET(TSPC32_TV_SEC, compat_timespec, tv_sec);
+	OFFSET(TSPC32_TV_NSEC, compat_timespec, tv_nsec);
 #else
-	DEFINE(TVAL32_TV_SEC, offsetof(struct timeval, tv_sec));
-	DEFINE(TVAL32_TV_USEC, offsetof(struct timeval, tv_usec));
-	DEFINE(TSPC32_TV_SEC, offsetof(struct timespec, tv_sec));
-	DEFINE(TSPC32_TV_NSEC, offsetof(struct timespec, tv_nsec));
+	OFFSET(TVAL32_TV_SEC, timeval, tv_sec);
+	OFFSET(TVAL32_TV_USEC, timeval, tv_usec);
+	OFFSET(TSPC32_TV_SEC, timespec, tv_sec);
+	OFFSET(TSPC32_TV_NSEC, timespec, tv_nsec);
 #endif
 	/* timeval/timezone offsets for use by vdso */
-	DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
-	DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
+	OFFSET(TZONE_TZ_MINWEST, timezone, tz_minuteswest);
+	OFFSET(TZONE_TZ_DSTTIME, timezone, tz_dsttime);
 
 	/* Other bits used by the vdso */
 	DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
@@ -425,170 +407,170 @@ int main(void)
 	DEFINE(PTE_SIZE, sizeof(pte_t));
 
 #ifdef CONFIG_KVM
-	DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
-	DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
-	DEFINE(VCPU_GUEST_PID, offsetof(struct kvm_vcpu, arch.pid));
-	DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
-	DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave));
-	DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fp.fpr));
+	OFFSET(VCPU_HOST_STACK, kvm_vcpu, arch.host_stack);
+	OFFSET(VCPU_HOST_PID, kvm_vcpu, arch.host_pid);
+	OFFSET(VCPU_GUEST_PID, kvm_vcpu, arch.pid);
+	OFFSET(VCPU_GPRS, kvm_vcpu, arch.gpr);
+	OFFSET(VCPU_VRSAVE, kvm_vcpu, arch.vrsave);
+	OFFSET(VCPU_FPRS, kvm_vcpu, arch.fp.fpr);
 #ifdef CONFIG_ALTIVEC
-	DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr.vr));
+	OFFSET(VCPU_VRS, kvm_vcpu, arch.vr.vr);
 #endif
-	DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
-	DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
-	DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
+	OFFSET(VCPU_XER, kvm_vcpu, arch.xer);
+	OFFSET(VCPU_CTR, kvm_vcpu, arch.ctr);
+	OFFSET(VCPU_LR, kvm_vcpu, arch.lr);
 #ifdef CONFIG_PPC_BOOK3S
-	DEFINE(VCPU_TAR, offsetof(struct kvm_vcpu, arch.tar));
+	OFFSET(VCPU_TAR, kvm_vcpu, arch.tar);
 #endif
-	DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
-	DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
+	OFFSET(VCPU_CR, kvm_vcpu, arch.cr);
+	OFFSET(VCPU_PC, kvm_vcpu, arch.pc);
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.shregs.msr));
-	DEFINE(VCPU_SRR0, offsetof(struct kvm_vcpu, arch.shregs.srr0));
-	DEFINE(VCPU_SRR1, offsetof(struct kvm_vcpu, arch.shregs.srr1));
-	DEFINE(VCPU_SPRG0, offsetof(struct kvm_vcpu, arch.shregs.sprg0));
-	DEFINE(VCPU_SPRG1, offsetof(struct kvm_vcpu, arch.shregs.sprg1));
-	DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2));
-	DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3));
+	OFFSET(VCPU_MSR, kvm_vcpu, arch.shregs.msr);
+	OFFSET(VCPU_SRR0, kvm_vcpu, arch.shregs.srr0);
+	OFFSET(VCPU_SRR1, kvm_vcpu, arch.shregs.srr1);
+	OFFSET(VCPU_SPRG0, kvm_vcpu, arch.shregs.sprg0);
+	OFFSET(VCPU_SPRG1, kvm_vcpu, arch.shregs.sprg1);
+	OFFSET(VCPU_SPRG2, kvm_vcpu, arch.shregs.sprg2);
+	OFFSET(VCPU_SPRG3, kvm_vcpu, arch.shregs.sprg3);
 #endif
 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
-	DEFINE(VCPU_TB_RMENTRY, offsetof(struct kvm_vcpu, arch.rm_entry));
-	DEFINE(VCPU_TB_RMINTR, offsetof(struct kvm_vcpu, arch.rm_intr));
-	DEFINE(VCPU_TB_RMEXIT, offsetof(struct kvm_vcpu, arch.rm_exit));
-	DEFINE(VCPU_TB_GUEST, offsetof(struct kvm_vcpu, arch.guest_time));
-	DEFINE(VCPU_TB_CEDE, offsetof(struct kvm_vcpu, arch.cede_time));
-	DEFINE(VCPU_CUR_ACTIVITY, offsetof(struct kvm_vcpu, arch.cur_activity));
-	DEFINE(VCPU_ACTIVITY_START, offsetof(struct kvm_vcpu, arch.cur_tb_start));
-	DEFINE(TAS_SEQCOUNT, offsetof(struct kvmhv_tb_accumulator, seqcount));
-	DEFINE(TAS_TOTAL, offsetof(struct kvmhv_tb_accumulator, tb_total));
-	DEFINE(TAS_MIN, offsetof(struct kvmhv_tb_accumulator, tb_min));
-	DEFINE(TAS_MAX, offsetof(struct kvmhv_tb_accumulator, tb_max));
-#endif
-	DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3));
-	DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4));
-	DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5));
-	DEFINE(VCPU_SHARED_SPRG6, offsetof(struct kvm_vcpu_arch_shared, sprg6));
-	DEFINE(VCPU_SHARED_SPRG7, offsetof(struct kvm_vcpu_arch_shared, sprg7));
-	DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid));
-	DEFINE(VCPU_SHADOW_PID1, offsetof(struct kvm_vcpu, arch.shadow_pid1));
-	DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
-	DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
-	DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
+	OFFSET(VCPU_TB_RMENTRY, kvm_vcpu, arch.rm_entry);
+	OFFSET(VCPU_TB_RMINTR, kvm_vcpu, arch.rm_intr);
+	OFFSET(VCPU_TB_RMEXIT, kvm_vcpu, arch.rm_exit);
+	OFFSET(VCPU_TB_GUEST, kvm_vcpu, arch.guest_time);
+	OFFSET(VCPU_TB_CEDE, kvm_vcpu, arch.cede_time);
+	OFFSET(VCPU_CUR_ACTIVITY, kvm_vcpu, arch.cur_activity);
+	OFFSET(VCPU_ACTIVITY_START, kvm_vcpu, arch.cur_tb_start);
+	OFFSET(TAS_SEQCOUNT, kvmhv_tb_accumulator, seqcount);
+	OFFSET(TAS_TOTAL, kvmhv_tb_accumulator, tb_total);
+	OFFSET(TAS_MIN, kvmhv_tb_accumulator, tb_min);
+	OFFSET(TAS_MAX, kvmhv_tb_accumulator, tb_max);
+#endif
+	OFFSET(VCPU_SHARED_SPRG3, kvm_vcpu_arch_shared, sprg3);
+	OFFSET(VCPU_SHARED_SPRG4, kvm_vcpu_arch_shared, sprg4);
+	OFFSET(VCPU_SHARED_SPRG5, kvm_vcpu_arch_shared, sprg5);
+	OFFSET(VCPU_SHARED_SPRG6, kvm_vcpu_arch_shared, sprg6);
+	OFFSET(VCPU_SHARED_SPRG7, kvm_vcpu_arch_shared, sprg7);
+	OFFSET(VCPU_SHADOW_PID, kvm_vcpu, arch.shadow_pid);
+	OFFSET(VCPU_SHADOW_PID1, kvm_vcpu, arch.shadow_pid1);
+	OFFSET(VCPU_SHARED, kvm_vcpu, arch.shared);
+	OFFSET(VCPU_SHARED_MSR, kvm_vcpu_arch_shared, msr);
+	OFFSET(VCPU_SHADOW_MSR, kvm_vcpu, arch.shadow_msr);
 #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
-	DEFINE(VCPU_SHAREDBE, offsetof(struct kvm_vcpu, arch.shared_big_endian));
+	OFFSET(VCPU_SHAREDBE, kvm_vcpu, arch.shared_big_endian);
 #endif
 
-	DEFINE(VCPU_SHARED_MAS0, offsetof(struct kvm_vcpu_arch_shared, mas0));
-	DEFINE(VCPU_SHARED_MAS1, offsetof(struct kvm_vcpu_arch_shared, mas1));
-	DEFINE(VCPU_SHARED_MAS2, offsetof(struct kvm_vcpu_arch_shared, mas2));
-	DEFINE(VCPU_SHARED_MAS7_3, offsetof(struct kvm_vcpu_arch_shared, mas7_3));
-	DEFINE(VCPU_SHARED_MAS4, offsetof(struct kvm_vcpu_arch_shared, mas4));
-	DEFINE(VCPU_SHARED_MAS6, offsetof(struct kvm_vcpu_arch_shared, mas6));
+	OFFSET(VCPU_SHARED_MAS0, kvm_vcpu_arch_shared, mas0);
+	OFFSET(VCPU_SHARED_MAS1, kvm_vcpu_arch_shared, mas1);
+	OFFSET(VCPU_SHARED_MAS2, kvm_vcpu_arch_shared, mas2);
+	OFFSET(VCPU_SHARED_MAS7_3, kvm_vcpu_arch_shared, mas7_3);
+	OFFSET(VCPU_SHARED_MAS4, kvm_vcpu_arch_shared, mas4);
+	OFFSET(VCPU_SHARED_MAS6, kvm_vcpu_arch_shared, mas6);
 
-	DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
-	DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
+	OFFSET(VCPU_KVM, kvm_vcpu, kvm);
+	OFFSET(KVM_LPID, kvm, arch.lpid);
 
 	/* book3s */
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	DEFINE(KVM_TLB_SETS, offsetof(struct kvm, arch.tlb_sets));
-	DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));
-	DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
-	DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
-	DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
-	DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
-	DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
-	DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
-	DEFINE(KVM_RADIX, offsetof(struct kvm, arch.radix));
-	DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
-	DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
-	DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
-	DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
-	DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst));
-	DEFINE(VCPU_CPU, offsetof(struct kvm_vcpu, cpu));
-	DEFINE(VCPU_THREAD_CPU, offsetof(struct kvm_vcpu, arch.thread_cpu));
+	OFFSET(KVM_TLB_SETS, kvm, arch.tlb_sets);
+	OFFSET(KVM_SDR1, kvm, arch.sdr1);
+	OFFSET(KVM_HOST_LPID, kvm, arch.host_lpid);
+	OFFSET(KVM_HOST_LPCR, kvm, arch.host_lpcr);
+	OFFSET(KVM_HOST_SDR1, kvm, arch.host_sdr1);
+	OFFSET(KVM_NEED_FLUSH, kvm, arch.need_tlb_flush.bits);
+	OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls);
+	OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
+	OFFSET(KVM_RADIX, kvm, arch.radix);
+	OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
+	OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
+	OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
+	OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty);
+	OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst);
+	OFFSET(VCPU_CPU, kvm_vcpu, cpu);
+	OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu);
 #endif
 #ifdef CONFIG_PPC_BOOK3S
-	DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr));
-	DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr));
-	DEFINE(VCPU_IC, offsetof(struct kvm_vcpu, arch.ic));
-	DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr));
-	DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr));
-	DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor));
-	DEFINE(VCPU_IAMR, offsetof(struct kvm_vcpu, arch.iamr));
-	DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl));
-	DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr));
-	DEFINE(VCPU_DABRX, offsetof(struct kvm_vcpu, arch.dabrx));
-	DEFINE(VCPU_DAWR, offsetof(struct kvm_vcpu, arch.dawr));
-	DEFINE(VCPU_DAWRX, offsetof(struct kvm_vcpu, arch.dawrx));
-	DEFINE(VCPU_CIABR, offsetof(struct kvm_vcpu, arch.ciabr));
-	DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
-	DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec));
-	DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires));
-	DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions));
-	DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded));
-	DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
-	DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
-	DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
-	DEFINE(VCPU_SPMC, offsetof(struct kvm_vcpu, arch.spmc));
-	DEFINE(VCPU_SIAR, offsetof(struct kvm_vcpu, arch.siar));
-	DEFINE(VCPU_SDAR, offsetof(struct kvm_vcpu, arch.sdar));
-	DEFINE(VCPU_SIER, offsetof(struct kvm_vcpu, arch.sier));
-	DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
-	DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max));
-	DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
-	DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
-	DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
-	DEFINE(VCPU_FAULT_GPA, offsetof(struct kvm_vcpu, arch.fault_gpa));
-	DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr));
-	DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
-	DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
-	DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar));
-	DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr));
-	DEFINE(VCPU_FSCR, offsetof(struct kvm_vcpu, arch.fscr));
-	DEFINE(VCPU_PSPB, offsetof(struct kvm_vcpu, arch.pspb));
-	DEFINE(VCPU_EBBHR, offsetof(struct kvm_vcpu, arch.ebbhr));
-	DEFINE(VCPU_EBBRR, offsetof(struct kvm_vcpu, arch.ebbrr));
-	DEFINE(VCPU_BESCR, offsetof(struct kvm_vcpu, arch.bescr));
-	DEFINE(VCPU_CSIGR, offsetof(struct kvm_vcpu, arch.csigr));
-	DEFINE(VCPU_TACR, offsetof(struct kvm_vcpu, arch.tacr));
-	DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr));
-	DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop));
-	DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort));
-	DEFINE(VCPU_TID, offsetof(struct kvm_vcpu, arch.tid));
-	DEFINE(VCPU_PSSCR, offsetof(struct kvm_vcpu, arch.psscr));
-	DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_map));
-	DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
-	DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
-	DEFINE(VCORE_KVM, offsetof(struct kvmppc_vcore, kvm));
-	DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset));
-	DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr));
-	DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr));
-	DEFINE(VCORE_DPDES, offsetof(struct kvmppc_vcore, dpdes));
-	DEFINE(VCORE_VTB, offsetof(struct kvmppc_vcore, vtb));
-	DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
-	DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv));
+	OFFSET(VCPU_PURR, kvm_vcpu, arch.purr);
+	OFFSET(VCPU_SPURR, kvm_vcpu, arch.spurr);
+	OFFSET(VCPU_IC, kvm_vcpu, arch.ic);
+	OFFSET(VCPU_DSCR, kvm_vcpu, arch.dscr);
+	OFFSET(VCPU_AMR, kvm_vcpu, arch.amr);
+	OFFSET(VCPU_UAMOR, kvm_vcpu, arch.uamor);
+	OFFSET(VCPU_IAMR, kvm_vcpu, arch.iamr);
+	OFFSET(VCPU_CTRL, kvm_vcpu, arch.ctrl);
+	OFFSET(VCPU_DABR, kvm_vcpu, arch.dabr);
+	OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx);
+	OFFSET(VCPU_DAWR, kvm_vcpu, arch.dawr);
+	OFFSET(VCPU_DAWRX, kvm_vcpu, arch.dawrx);
+	OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr);
+	OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags);
+	OFFSET(VCPU_DEC, kvm_vcpu, arch.dec);
+	OFFSET(VCPU_DEC_EXPIRES, kvm_vcpu, arch.dec_expires);
+	OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
+	OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
+	OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
+	OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
+	OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
+	OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc);
+	OFFSET(VCPU_SIAR, kvm_vcpu, arch.siar);
+	OFFSET(VCPU_SDAR, kvm_vcpu, arch.sdar);
+	OFFSET(VCPU_SIER, kvm_vcpu, arch.sier);
+	OFFSET(VCPU_SLB, kvm_vcpu, arch.slb);
+	OFFSET(VCPU_SLB_MAX, kvm_vcpu, arch.slb_max);
+	OFFSET(VCPU_SLB_NR, kvm_vcpu, arch.slb_nr);
+	OFFSET(VCPU_FAULT_DSISR, kvm_vcpu, arch.fault_dsisr);
+	OFFSET(VCPU_FAULT_DAR, kvm_vcpu, arch.fault_dar);
+	OFFSET(VCPU_FAULT_GPA, kvm_vcpu, arch.fault_gpa);
+	OFFSET(VCPU_INTR_MSR, kvm_vcpu, arch.intr_msr);
+	OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst);
+	OFFSET(VCPU_TRAP, kvm_vcpu, arch.trap);
+	OFFSET(VCPU_CFAR, kvm_vcpu, arch.cfar);
+	OFFSET(VCPU_PPR, kvm_vcpu, arch.ppr);
+	OFFSET(VCPU_FSCR, kvm_vcpu, arch.fscr);
+	OFFSET(VCPU_PSPB, kvm_vcpu, arch.pspb);
+	OFFSET(VCPU_EBBHR, kvm_vcpu, arch.ebbhr);
+	OFFSET(VCPU_EBBRR, kvm_vcpu, arch.ebbrr);
+	OFFSET(VCPU_BESCR, kvm_vcpu, arch.bescr);
+	OFFSET(VCPU_CSIGR, kvm_vcpu, arch.csigr);
+	OFFSET(VCPU_TACR, kvm_vcpu, arch.tacr);
+	OFFSET(VCPU_TCSCR, kvm_vcpu, arch.tcscr);
+	OFFSET(VCPU_ACOP, kvm_vcpu, arch.acop);
+	OFFSET(VCPU_WORT, kvm_vcpu, arch.wort);
+	OFFSET(VCPU_TID, kvm_vcpu, arch.tid);
+	OFFSET(VCPU_PSSCR, kvm_vcpu, arch.psscr);
+	OFFSET(VCORE_ENTRY_EXIT, kvmppc_vcore, entry_exit_map);
+	OFFSET(VCORE_IN_GUEST, kvmppc_vcore, in_guest);
+	OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads);
+	OFFSET(VCORE_KVM, kvmppc_vcore, kvm);
+	OFFSET(VCORE_TB_OFFSET, kvmppc_vcore, tb_offset);
+	OFFSET(VCORE_LPCR, kvmppc_vcore, lpcr);
+	OFFSET(VCORE_PCR, kvmppc_vcore, pcr);
+	OFFSET(VCORE_DPDES, kvmppc_vcore, dpdes);
+	OFFSET(VCORE_VTB, kvmppc_vcore, vtb);
+	OFFSET(VCPU_SLB_E, kvmppc_slb, orige);
+	OFFSET(VCPU_SLB_V, kvmppc_slb, origv);
 	DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb));
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	DEFINE(VCPU_TFHAR, offsetof(struct kvm_vcpu, arch.tfhar));
-	DEFINE(VCPU_TFIAR, offsetof(struct kvm_vcpu, arch.tfiar));
-	DEFINE(VCPU_TEXASR, offsetof(struct kvm_vcpu, arch.texasr));
-	DEFINE(VCPU_GPR_TM, offsetof(struct kvm_vcpu, arch.gpr_tm));
-	DEFINE(VCPU_FPRS_TM, offsetof(struct kvm_vcpu, arch.fp_tm.fpr));
-	DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr));
-	DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm));
-	DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm));
-	DEFINE(VCPU_XER_TM, offsetof(struct kvm_vcpu, arch.xer_tm));
-	DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm));
-	DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm));
-	DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm));
-	DEFINE(VCPU_PPR_TM, offsetof(struct kvm_vcpu, arch.ppr_tm));
-	DEFINE(VCPU_DSCR_TM, offsetof(struct kvm_vcpu, arch.dscr_tm));
-	DEFINE(VCPU_TAR_TM, offsetof(struct kvm_vcpu, arch.tar_tm));
+	OFFSET(VCPU_TFHAR, kvm_vcpu, arch.tfhar);
+	OFFSET(VCPU_TFIAR, kvm_vcpu, arch.tfiar);
+	OFFSET(VCPU_TEXASR, kvm_vcpu, arch.texasr);
+	OFFSET(VCPU_GPR_TM, kvm_vcpu, arch.gpr_tm);
+	OFFSET(VCPU_FPRS_TM, kvm_vcpu, arch.fp_tm.fpr);
+	OFFSET(VCPU_VRS_TM, kvm_vcpu, arch.vr_tm.vr);
+	OFFSET(VCPU_VRSAVE_TM, kvm_vcpu, arch.vrsave_tm);
+	OFFSET(VCPU_CR_TM, kvm_vcpu, arch.cr_tm);
+	OFFSET(VCPU_XER_TM, kvm_vcpu, arch.xer_tm);
+	OFFSET(VCPU_LR_TM, kvm_vcpu, arch.lr_tm);
+	OFFSET(VCPU_CTR_TM, kvm_vcpu, arch.ctr_tm);
+	OFFSET(VCPU_AMR_TM, kvm_vcpu, arch.amr_tm);
+	OFFSET(VCPU_PPR_TM, kvm_vcpu, arch.ppr_tm);
+	OFFSET(VCPU_DSCR_TM, kvm_vcpu, arch.dscr_tm);
+	OFFSET(VCPU_TAR_TM, kvm_vcpu, arch.tar_tm);
 #endif
 
 #ifdef CONFIG_PPC_BOOK3S_64
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
-	DEFINE(PACA_SVCPU, offsetof(struct paca_struct, shadow_vcpu));
+	OFFSET(PACA_SVCPU, paca_struct, shadow_vcpu);
 # define SVCPU_FIELD(x, f)	DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f))
 #else
 # define SVCPU_FIELD(x, f)
@@ -671,11 +653,11 @@ int main(void)
 	HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
 	HSTATE_FIELD(HSTATE_SPLIT_MODE, kvm_split_mode);
 	DEFINE(IPI_PRIORITY, IPI_PRIORITY);
-	DEFINE(KVM_SPLIT_RPR, offsetof(struct kvm_split_mode, rpr));
-	DEFINE(KVM_SPLIT_PMMAR, offsetof(struct kvm_split_mode, pmmar));
-	DEFINE(KVM_SPLIT_LDBAR, offsetof(struct kvm_split_mode, ldbar));
-	DEFINE(KVM_SPLIT_DO_NAP, offsetof(struct kvm_split_mode, do_nap));
-	DEFINE(KVM_SPLIT_NAPPED, offsetof(struct kvm_split_mode, napped));
+	OFFSET(KVM_SPLIT_RPR, kvm_split_mode, rpr);
+	OFFSET(KVM_SPLIT_PMMAR, kvm_split_mode, pmmar);
+	OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar);
+	OFFSET(KVM_SPLIT_DO_NAP, kvm_split_mode, do_nap);
+	OFFSET(KVM_SPLIT_NAPPED, kvm_split_mode, napped);
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 #ifdef CONFIG_PPC_BOOK3S_64
@@ -685,32 +667,27 @@ int main(void)
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
 #else /* CONFIG_PPC_BOOK3S */
-	DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
-	DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
-	DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
-	DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
-	DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
-	DEFINE(VCPU_SPRG9, offsetof(struct kvm_vcpu, arch.sprg9));
-	DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
-	DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
-	DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
-	DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save));
+	OFFSET(VCPU_CR, kvm_vcpu, arch.cr);
+	OFFSET(VCPU_XER, kvm_vcpu, arch.xer);
+	OFFSET(VCPU_LR, kvm_vcpu, arch.lr);
+	OFFSET(VCPU_CTR, kvm_vcpu, arch.ctr);
+	OFFSET(VCPU_PC, kvm_vcpu, arch.pc);
+	OFFSET(VCPU_SPRG9, kvm_vcpu, arch.sprg9);
+	OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst);
+	OFFSET(VCPU_FAULT_DEAR, kvm_vcpu, arch.fault_dear);
+	OFFSET(VCPU_FAULT_ESR, kvm_vcpu, arch.fault_esr);
+	OFFSET(VCPU_CRIT_SAVE, kvm_vcpu, arch.crit_save);
 #endif /* CONFIG_PPC_BOOK3S */
 #endif /* CONFIG_KVM */
 
 #ifdef CONFIG_KVM_GUEST
-	DEFINE(KVM_MAGIC_SCRATCH1, offsetof(struct kvm_vcpu_arch_shared,
-					    scratch1));
-	DEFINE(KVM_MAGIC_SCRATCH2, offsetof(struct kvm_vcpu_arch_shared,
-					    scratch2));
-	DEFINE(KVM_MAGIC_SCRATCH3, offsetof(struct kvm_vcpu_arch_shared,
-					    scratch3));
-	DEFINE(KVM_MAGIC_INT, offsetof(struct kvm_vcpu_arch_shared,
-				       int_pending));
-	DEFINE(KVM_MAGIC_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
-	DEFINE(KVM_MAGIC_CRITICAL, offsetof(struct kvm_vcpu_arch_shared,
-					    critical));
-	DEFINE(KVM_MAGIC_SR, offsetof(struct kvm_vcpu_arch_shared, sr));
+	OFFSET(KVM_MAGIC_SCRATCH1, kvm_vcpu_arch_shared, scratch1);
+	OFFSET(KVM_MAGIC_SCRATCH2, kvm_vcpu_arch_shared, scratch2);
+	OFFSET(KVM_MAGIC_SCRATCH3, kvm_vcpu_arch_shared, scratch3);
+	OFFSET(KVM_MAGIC_INT, kvm_vcpu_arch_shared, int_pending);
+	OFFSET(KVM_MAGIC_MSR, kvm_vcpu_arch_shared, msr);
+	OFFSET(KVM_MAGIC_CRITICAL, kvm_vcpu_arch_shared, critical);
+	OFFSET(KVM_MAGIC_SR, kvm_vcpu_arch_shared, sr);
 #endif
 
 #ifdef CONFIG_44x
@@ -719,45 +696,37 @@ int main(void)
 #endif
 #ifdef CONFIG_PPC_FSL_BOOK3E
 	DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam));
-	DEFINE(TLBCAM_MAS0, offsetof(struct tlbcam, MAS0));
-	DEFINE(TLBCAM_MAS1, offsetof(struct tlbcam, MAS1));
-	DEFINE(TLBCAM_MAS2, offsetof(struct tlbcam, MAS2));
-	DEFINE(TLBCAM_MAS3, offsetof(struct tlbcam, MAS3));
-	DEFINE(TLBCAM_MAS7, offsetof(struct tlbcam, MAS7));
+	OFFSET(TLBCAM_MAS0, tlbcam, MAS0);
+	OFFSET(TLBCAM_MAS1, tlbcam, MAS1);
+	OFFSET(TLBCAM_MAS2, tlbcam, MAS2);
+	OFFSET(TLBCAM_MAS3, tlbcam, MAS3);
+	OFFSET(TLBCAM_MAS7, tlbcam, MAS7);
 #endif
 
 #if defined(CONFIG_KVM) && defined(CONFIG_SPE)
-	DEFINE(VCPU_EVR, offsetof(struct kvm_vcpu, arch.evr[0]));
-	DEFINE(VCPU_ACC, offsetof(struct kvm_vcpu, arch.acc));
-	DEFINE(VCPU_SPEFSCR, offsetof(struct kvm_vcpu, arch.spefscr));
-	DEFINE(VCPU_HOST_SPEFSCR, offsetof(struct kvm_vcpu, arch.host_spefscr));
+	OFFSET(VCPU_EVR, kvm_vcpu, arch.evr[0]);
+	OFFSET(VCPU_ACC, kvm_vcpu, arch.acc);
+	OFFSET(VCPU_SPEFSCR, kvm_vcpu, arch.spefscr);
+	OFFSET(VCPU_HOST_SPEFSCR, kvm_vcpu, arch.host_spefscr);
 #endif
 
 #ifdef CONFIG_KVM_BOOKE_HV
-	DEFINE(VCPU_HOST_MAS4, offsetof(struct kvm_vcpu, arch.host_mas4));
-	DEFINE(VCPU_HOST_MAS6, offsetof(struct kvm_vcpu, arch.host_mas6));
+	OFFSET(VCPU_HOST_MAS4, kvm_vcpu, arch.host_mas4);
+	OFFSET(VCPU_HOST_MAS6, kvm_vcpu, arch.host_mas6);
 #endif
 
 #ifdef CONFIG_KVM_EXIT_TIMING
-	DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
-						arch.timing_exit.tv32.tbu));
-	DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu,
-						arch.timing_exit.tv32.tbl));
-	DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu,
-					arch.timing_last_enter.tv32.tbu));
-	DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu,
-					arch.timing_last_enter.tv32.tbl));
+	OFFSET(VCPU_TIMING_EXIT_TBU, kvm_vcpu, arch.timing_exit.tv32.tbu);
+	OFFSET(VCPU_TIMING_EXIT_TBL, kvm_vcpu, arch.timing_exit.tv32.tbl);
+	OFFSET(VCPU_TIMING_LAST_ENTER_TBU, kvm_vcpu, arch.timing_last_enter.tv32.tbu);
+	OFFSET(VCPU_TIMING_LAST_ENTER_TBL, kvm_vcpu, arch.timing_last_enter.tv32.tbl);
 #endif
 
 #ifdef CONFIG_PPC_POWERNV
-	DEFINE(PACA_CORE_IDLE_STATE_PTR,
-			offsetof(struct paca_struct, core_idle_state_ptr));
-	DEFINE(PACA_THREAD_IDLE_STATE,
-			offsetof(struct paca_struct, thread_idle_state));
-	DEFINE(PACA_THREAD_MASK,
-			offsetof(struct paca_struct, thread_mask));
-	DEFINE(PACA_SUBCORE_SIBLING_MASK,
-			offsetof(struct paca_struct, subcore_sibling_mask));
+	OFFSET(PACA_CORE_IDLE_STATE_PTR, paca_struct, core_idle_state_ptr);
+	OFFSET(PACA_THREAD_IDLE_STATE, paca_struct, thread_idle_state);
+	OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask);
+	OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
 #endif
 
 	DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
-- 
GitLab


From 10d4cf188ab26a4ebeee4c914e82acd5edd9f0ff Mon Sep 17 00:00:00 2001
From: Rashmica Gupta <rashmicy@gmail.com>
Date: Thu, 2 Jun 2016 14:29:47 +1000
Subject: [PATCH 0265/1084] powerpc/asm: Define STACK_PT_REGS_OFFSET macro in
 asm-offsets.c

There are quite a few entries in asm-offests.c which look like:

  DEFINE(REG, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, reg));

So define a macro to do it once.

Signed-off-by: Rashmica Gupta <rashmicy@gmail.com>
[mpe: Rename to STACK_PT_REGS_OFFSET for excruciating explicitness]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/asm-offsets.c | 61 ++++++++++++++++---------------
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8b7b2fd7e374..b6709021fee5 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -72,6 +72,9 @@
 #include <asm/fixmap.h>
 #endif
 
+#define STACK_PT_REGS_OFFSET(sym, val)	\
+	DEFINE(sym, STACK_FRAME_OVERHEAD + offsetof(struct pt_regs, val))
+
 int main(void)
 {
 	OFFSET(THREAD, task_struct, thread);
@@ -265,38 +268,38 @@ int main(void)
 	DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
 	DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
 #endif /* CONFIG_PPC64 */
-	DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0]));
-	DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1]));
-	DEFINE(GPR2, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[2]));
-	DEFINE(GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[3]));
-	DEFINE(GPR4, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[4]));
-	DEFINE(GPR5, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[5]));
-	DEFINE(GPR6, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[6]));
-	DEFINE(GPR7, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[7]));
-	DEFINE(GPR8, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[8]));
-	DEFINE(GPR9, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[9]));
-	DEFINE(GPR10, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[10]));
-	DEFINE(GPR11, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[11]));
-	DEFINE(GPR12, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[12]));
-	DEFINE(GPR13, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[13]));
+	STACK_PT_REGS_OFFSET(GPR0, gpr[0]);
+	STACK_PT_REGS_OFFSET(GPR1, gpr[1]);
+	STACK_PT_REGS_OFFSET(GPR2, gpr[2]);
+	STACK_PT_REGS_OFFSET(GPR3, gpr[3]);
+	STACK_PT_REGS_OFFSET(GPR4, gpr[4]);
+	STACK_PT_REGS_OFFSET(GPR5, gpr[5]);
+	STACK_PT_REGS_OFFSET(GPR6, gpr[6]);
+	STACK_PT_REGS_OFFSET(GPR7, gpr[7]);
+	STACK_PT_REGS_OFFSET(GPR8, gpr[8]);
+	STACK_PT_REGS_OFFSET(GPR9, gpr[9]);
+	STACK_PT_REGS_OFFSET(GPR10, gpr[10]);
+	STACK_PT_REGS_OFFSET(GPR11, gpr[11]);
+	STACK_PT_REGS_OFFSET(GPR12, gpr[12]);
+	STACK_PT_REGS_OFFSET(GPR13, gpr[13]);
 #ifndef CONFIG_PPC64
-	DEFINE(GPR14, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[14]));
+	STACK_PT_REGS_OFFSET(GPR14, gpr[14]);
 #endif /* CONFIG_PPC64 */
 	/*
 	 * Note: these symbols include _ because they overlap with special
 	 * register names
 	 */
-	DEFINE(_NIP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, nip));
-	DEFINE(_MSR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, msr));
-	DEFINE(_CTR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ctr));
-	DEFINE(_LINK, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, link));
-	DEFINE(_CCR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ccr));
-	DEFINE(_XER, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, xer));
-	DEFINE(_DAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar));
-	DEFINE(_DSISR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr));
-	DEFINE(ORIG_GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, orig_gpr3));
-	DEFINE(RESULT, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, result));
-	DEFINE(_TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap));
+	STACK_PT_REGS_OFFSET(_NIP, nip);
+	STACK_PT_REGS_OFFSET(_MSR, msr);
+	STACK_PT_REGS_OFFSET(_CTR, ctr);
+	STACK_PT_REGS_OFFSET(_LINK, link);
+	STACK_PT_REGS_OFFSET(_CCR, ccr);
+	STACK_PT_REGS_OFFSET(_XER, xer);
+	STACK_PT_REGS_OFFSET(_DAR, dar);
+	STACK_PT_REGS_OFFSET(_DSISR, dsisr);
+	STACK_PT_REGS_OFFSET(ORIG_GPR3, orig_gpr3);
+	STACK_PT_REGS_OFFSET(RESULT, result);
+	STACK_PT_REGS_OFFSET(_TRAP, trap);
 #ifndef CONFIG_PPC64
 	/*
 	 * The PowerPC 400-class & Book-E processors have neither the DAR
@@ -304,10 +307,10 @@ int main(void)
 	 * DEAR and ESR SPRs for such processors.  For critical interrupts
 	 * we use them to hold SRR0 and SRR1.
 	 */
-	DEFINE(_DEAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar));
-	DEFINE(_ESR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr));
+	STACK_PT_REGS_OFFSET(_DEAR, dar);
+	STACK_PT_REGS_OFFSET(_ESR, dsisr);
 #else /* CONFIG_PPC64 */
-	DEFINE(SOFTE, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, softe));
+	STACK_PT_REGS_OFFSET(SOFTE, softe);
 
 	/* These _only_ to be used with {PROM,RTAS}_FRAME_SIZE!!! */
 	DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs));
-- 
GitLab


From f84775c2d5d92581f2df60d53b574b0405c85be3 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 16 Feb 2017 11:27:00 +1100
Subject: [PATCH 0266/1084] powerpc/pseries: Fix build break when
 MEMORY_HOTREMOVE=n

We broke the build when CONFIG_MEMORY_HOTREMOVE=n:

  arch/powerpc/platforms/pseries/hotplug-memory.c:821:8: error: implicit
  declaration of function 'dlpar_memory_readd_by_index'

Add a dummy to fix it.

Fixes: e70d59700fc3 ("powerpc/pseries: Introduce memory hotplug READD operation")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/hotplug-memory.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 3381c20edbc0..b3b92814ce87 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -628,7 +628,10 @@ static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop)
 {
 	return -EOPNOTSUPP;
 }
-
+static int dlpar_memory_readd_by_index(u32 drc_index, struct property *prop)
+{
+	return -EOPNOTSUPP;
+}
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
 static int dlpar_add_lmb(struct of_drconf_cell *lmb)
-- 
GitLab


From a42715830d552d7c0e3be709383ece4832453275 Mon Sep 17 00:00:00 2001
From: Stewart Smith <stewart@linux.vnet.ibm.com>
Date: Thu, 16 Feb 2017 11:37:15 +1100
Subject: [PATCH 0267/1084] MAINTAINERS: Remove powerpc's "opal" pattern match

The block guys have merged a new driver called "sed-opal", which is
getting caught by our pattern match on "opal".

So drop our "opal" pattern and replace it with matches against the
specific paths for our drivers. We should try and remember to add new
drivers as they're added in future.

Suggested-by: Jon Derrick <jonathan.derrick@intel.com>
Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
[mpe: Fix ic2/i2c typo, write change log]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 MAINTAINERS | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 26edd832c64e..bd294c22640a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7384,18 +7384,24 @@ L:	linuxppc-dev@lists.ozlabs.org
 Q:	http://patchwork.ozlabs.org/project/linuxppc-dev/list/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git
 S:	Supported
+F:	Documentation/ABI/stable/sysfs-firmware-opal-*
+F:	Documentation/devicetree/bindings/powerpc/opal/
+F:	Documentation/devicetree/bindings/rtc/rtc-opal.txt
+F:	Documentation/devicetree/bindings/i2c/i2c-opal.txt
 F:	Documentation/powerpc/
 F:	arch/powerpc/
 F:	drivers/char/tpm/tpm_ibmvtpm*
 F:	drivers/crypto/nx/
 F:	drivers/crypto/vmx/
+F:	drivers/i2c/busses/i2c-opal.c
 F:	drivers/net/ethernet/ibm/ibmveth.*
 F:	drivers/net/ethernet/ibm/ibmvnic.*
 F:	drivers/pci/hotplug/pnv_php.c
 F:	drivers/pci/hotplug/rpa*
+F:	drivers/rtc/rtc-opal.c
 F:	drivers/scsi/ibmvscsi/
+F:	drivers/tty/hvc/hvc_opal.c
 F:	tools/testing/selftests/powerpc
-N:	opal
 N:	/pmac
 N:	powermac
 N:	powernv
-- 
GitLab


From e471c393dfafff54c65979cbda7d5a0eb38590f4 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 13 Feb 2017 15:26:40 +1100
Subject: [PATCH 0268/1084] powerpc/mm: Convert slb_finish_load[_1T] to local
 symbols

slb_finish_load and slb_finish_load_1T are both only used within
slb_low.S, so make them local symbols.

This makes the code a little clearer, as it's more obvious neither is
intended to be an entry point from arbitrary other code, only the uses
in this file.

It also prevents them being used with kprobes and other tracing tools,
which is good because we're not able to safely take traps at these
locations, so making them local symbols avoids us needing to blacklist
them.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/slb_low.S | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index e2974fcd20f1..9beed92c1900 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -71,9 +71,9 @@ slb_miss_kernel_load_linear:
 
 
 BEGIN_FTR_SECTION
-	b	slb_finish_load
+	b	.Lslb_finish_load
 END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
-	b	slb_finish_load_1T
+	b	.Lslb_finish_load_1T
 
 1:
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
@@ -109,9 +109,9 @@ slb_miss_kernel_load_io:
 	addi	r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l
 
 BEGIN_FTR_SECTION
-	b	slb_finish_load
+	b	.Lslb_finish_load
 END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
-	b	slb_finish_load_1T
+	b	.Lslb_finish_load_1T
 
 0:	/*
 	 * For userspace addresses, make sure this is region 0.
@@ -174,9 +174,9 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
 	ld	r9,PACACONTEXTID(r13)
 BEGIN_FTR_SECTION
 	cmpldi	r10,0x1000
-	bge	slb_finish_load_1T
+	bge	.Lslb_finish_load_1T
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
-	b	slb_finish_load
+	b	.Lslb_finish_load
 
 8:	/* invalid EA - return an error indication */
 	crset	4*cr0+eq		/* indicate failure */
@@ -187,7 +187,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
  *
  * r3 = EA, r9 = context, r10 = ESID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET
  */
-slb_finish_load:
+.Lslb_finish_load:
 	rldimi  r10,r9,ESID_BITS,0
 	ASM_VSID_SCRAMBLE(r10,r9,256M)
 	/*
@@ -256,7 +256,7 @@ slb_compare_rr_to_size:
  *
  * r3 = EA, r9 = context, r10 = ESID(256MB), r11 = flags, clobbers r9
  */
-slb_finish_load_1T:
+.Lslb_finish_load_1T:
 	srdi	r10,r10,(SID_SHIFT_1T - SID_SHIFT)	/* get 1T ESID */
 	rldimi  r10,r9,ESID_BITS_1T,0
 	ASM_VSID_SCRAMBLE(r10,r9,1T)
-- 
GitLab


From a90e883d8b8fb07ddea4d9618ce58bf72bce7f00 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 13 Feb 2017 15:30:19 +1100
Subject: [PATCH 0269/1084] powerpc/mm: Blacklist SLB symbols from kprobe

We can't sensibly take a trap at this point. So, blacklist these
symbols.

Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/slb_low.S | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 9beed92c1900..a85e06ea6c20 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -272,3 +272,11 @@ slb_compare_rr_to_size:
 	clrrdi	r3,r3,SID_SHIFT_1T	/* clear out non-ESID bits */
 	b	7b
 
+
+_ASM_NOKPROBE_SYMBOL(slb_allocate_realmode)
+_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_linear)
+_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_io)
+_ASM_NOKPROBE_SYMBOL(slb_compare_rr_to_size)
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_vmemmap)
+#endif
-- 
GitLab


From 333f7b76865bec24c66710cf352f892d69e3ba0a Mon Sep 17 00:00:00 2001
From: Sahil Mehta <sahilmehta17@gmail.com>
Date: Wed, 15 Feb 2017 13:45:56 -0500
Subject: [PATCH 0270/1084] powerpc/pseries: Implement indexed-count hotplug
 memory add

Indexed-count add for memory hotplug guarantees that a contiguous block
of <count> lmbs beginning at a specified <drc index> will be assigned,
any LMBs in this range that are not already assigned will be DLPAR added.
Because of Qemu's per-DIMM memory management, the addition of a contiguous
block of memory currently requires a series of individual calls to add
each LMB in the block. Indexed-count add reduces this series of calls to
a single call for the entire block.

Signed-off-by: Sahil Mehta <sahilmehta17@gmail.com>
Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/rtas.h               |   2 +
 arch/powerpc/platforms/pseries/dlpar.c        |  38 +++++-
 .../platforms/pseries/hotplug-memory.c        | 119 ++++++++++++++++--
 3 files changed, 147 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 076b89247ab5..ec9dd79398ee 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -307,6 +307,7 @@ struct pseries_hp_errorlog {
 	union {
 		__be32	drc_index;
 		__be32	drc_count;
+		struct { __be32 count, index; } ic;
 		char	drc_name[1];
 	} _drc_u;
 };
@@ -323,6 +324,7 @@ struct pseries_hp_errorlog {
 #define PSERIES_HP_ELOG_ID_DRC_NAME	1
 #define PSERIES_HP_ELOG_ID_DRC_INDEX	2
 #define PSERIES_HP_ELOG_ID_DRC_COUNT	3
+#define PSERIES_HP_ELOG_ID_DRC_IC	4
 
 struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
 					      uint16_t section_id);
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index d3a81e746fc4..193e052fa0dd 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -354,11 +354,17 @@ static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
 	switch (hp_elog->id_type) {
 	case PSERIES_HP_ELOG_ID_DRC_COUNT:
 		hp_elog->_drc_u.drc_count =
-					be32_to_cpu(hp_elog->_drc_u.drc_count);
+				be32_to_cpu(hp_elog->_drc_u.drc_count);
 		break;
 	case PSERIES_HP_ELOG_ID_DRC_INDEX:
 		hp_elog->_drc_u.drc_index =
-					be32_to_cpu(hp_elog->_drc_u.drc_index);
+				be32_to_cpu(hp_elog->_drc_u.drc_index);
+		break;
+	case PSERIES_HP_ELOG_ID_DRC_IC:
+		hp_elog->_drc_u.ic.count =
+				be32_to_cpu(hp_elog->_drc_u.ic.count);
+		hp_elog->_drc_u.ic.index =
+				be32_to_cpu(hp_elog->_drc_u.ic.index);
 	}
 
 	switch (hp_elog->resource) {
@@ -467,7 +473,33 @@ static int dlpar_parse_id_type(char **cmd, struct pseries_hp_errorlog *hp_elog)
 	if (!arg)
 		return -EINVAL;
 
-	if (sysfs_streq(arg, "index")) {
+	if (sysfs_streq(arg, "indexed-count")) {
+		hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_IC;
+		arg = strsep(cmd, " ");
+		if (!arg) {
+			pr_err("No DRC count specified.\n");
+			return -EINVAL;
+		}
+
+		if (kstrtou32(arg, 0, &count)) {
+			pr_err("Invalid DRC count specified.\n");
+			return -EINVAL;
+		}
+
+		arg = strsep(cmd, " ");
+		if (!arg) {
+			pr_err("No DRC Index specified.\n");
+			return -EINVAL;
+		}
+
+		if (kstrtou32(arg, 0, &index)) {
+			pr_err("Invalid DRC Index specified.\n");
+			return -EINVAL;
+		}
+
+		hp_elog->_drc_u.ic.count = cpu_to_be32(count);
+		hp_elog->_drc_u.ic.index = cpu_to_be32(index);
+	} else if (sysfs_streq(arg, "index")) {
 		hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
 		arg = strsep(cmd, " ");
 		if (!arg) {
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index b3b92814ce87..ad8b3a606a8e 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -779,6 +779,97 @@ static int dlpar_memory_add_by_index(u32 drc_index, struct property *prop)
 	return rc;
 }
 
+static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index,
+				  struct property *prop)
+{
+	struct of_drconf_cell *lmbs;
+	u32 num_lmbs, *p;
+	int i, rc, start_lmb_found;
+	int lmbs_available = 0, start_index = 0, end_index;
+
+	pr_info("Attempting to hot-add %u LMB(s) at index %x\n",
+		lmbs_to_add, drc_index);
+
+	if (lmbs_to_add == 0)
+		return -EINVAL;
+
+	p = prop->value;
+	num_lmbs = *p++;
+	lmbs = (struct of_drconf_cell *)p;
+	start_lmb_found = 0;
+
+	/* Navigate to drc_index */
+	while (start_index < num_lmbs) {
+		if (lmbs[start_index].drc_index == drc_index) {
+			start_lmb_found = 1;
+			break;
+		}
+
+		start_index++;
+	}
+
+	if (!start_lmb_found)
+		return -EINVAL;
+
+	end_index = start_index + lmbs_to_add;
+
+	/* Validate that the LMBs in this range are not reserved */
+	for (i = start_index; i < end_index; i++) {
+		if (lmbs[i].flags & DRCONF_MEM_RESERVED)
+			break;
+
+		lmbs_available++;
+	}
+
+	if (lmbs_available < lmbs_to_add)
+		return -EINVAL;
+
+	for (i = start_index; i < end_index; i++) {
+		if (lmbs[i].flags & DRCONF_MEM_ASSIGNED)
+			continue;
+
+		rc = dlpar_acquire_drc(lmbs[i].drc_index);
+		if (rc)
+			break;
+
+		rc = dlpar_add_lmb(&lmbs[i]);
+		if (rc) {
+			dlpar_release_drc(lmbs[i].drc_index);
+			break;
+		}
+
+		lmbs[i].reserved = 1;
+	}
+
+	if (rc) {
+		pr_err("Memory indexed-count-add failed, removing any added LMBs\n");
+
+		for (i = start_index; i < end_index; i++) {
+			if (!lmbs[i].reserved)
+				continue;
+
+			rc = dlpar_remove_lmb(&lmbs[i]);
+			if (rc)
+				pr_err("Failed to remove LMB, drc index %x\n",
+				       be32_to_cpu(lmbs[i].drc_index));
+			else
+				dlpar_release_drc(lmbs[i].drc_index);
+		}
+		rc = -EINVAL;
+	} else {
+		for (i = start_index; i < end_index; i++) {
+			if (!lmbs[i].reserved)
+				continue;
+
+			pr_info("Memory at %llx (drc index %x) was hot-added\n",
+				lmbs[i].base_addr, lmbs[i].drc_index);
+			lmbs[i].reserved = 0;
+		}
+	}
+
+	return rc;
+}
+
 int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
 {
 	struct device_node *dn;
@@ -786,9 +877,6 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
 	u32 count, drc_index;
 	int rc;
 
-	count = hp_elog->_drc_u.drc_count;
-	drc_index = hp_elog->_drc_u.drc_index;
-
 	lock_device_hotplug();
 
 	dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
@@ -805,22 +893,35 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
 
 	switch (hp_elog->action) {
 	case PSERIES_HP_ELOG_ACTION_ADD:
-		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
+		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) {
+			count = hp_elog->_drc_u.drc_count;
 			rc = dlpar_memory_add_by_count(count, prop);
-		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
+		} else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
+			drc_index = hp_elog->_drc_u.drc_index;
 			rc = dlpar_memory_add_by_index(drc_index, prop);
-		else
+		} else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_IC) {
+			count = hp_elog->_drc_u.ic.count;
+			drc_index = hp_elog->_drc_u.ic.index;
+			rc = dlpar_memory_add_by_ic(count, drc_index, prop);
+		} else {
 			rc = -EINVAL;
+		}
+
 		break;
 	case PSERIES_HP_ELOG_ACTION_REMOVE:
-		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
+		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) {
+			count = hp_elog->_drc_u.drc_count;
 			rc = dlpar_memory_remove_by_count(count, prop);
-		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
+		} else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
+			drc_index = hp_elog->_drc_u.drc_index;
 			rc = dlpar_memory_remove_by_index(drc_index, prop);
-		else
+		} else {
 			rc = -EINVAL;
+		}
+
 		break;
 	case PSERIES_HP_ELOG_ACTION_READD:
+		drc_index = hp_elog->_drc_u.drc_index;
 		rc = dlpar_memory_readd_by_index(drc_index, prop);
 		break;
 	default:
-- 
GitLab


From 753843471cbbaeca25a5cab51981ee721ad272f7 Mon Sep 17 00:00:00 2001
From: Sahil Mehta <sahilmehta17@gmail.com>
Date: Wed, 15 Feb 2017 13:46:18 -0500
Subject: [PATCH 0271/1084] powerpc/pseries: Implement indexed-count hotplug
 memory remove

Indexed-count remove for memory hotplug guarantees that a contiguous block
of <count> lmbs beginning at a specified <index> will be unassigned (NOT
that <count> lmbs will be removed). Because of Qemu's per-DIMM memory
management, the removal of a contiguous block of memory currently
requires a series of individual calls. Indexed-count remove reduces
this series into a single call.

Signed-off-by: Sahil Mehta <sahilmehta17@gmail.com>
Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 .../platforms/pseries/hotplug-memory.c        | 98 +++++++++++++++++++
 1 file changed, 98 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index ad8b3a606a8e..e28abfa013e5 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -601,6 +601,94 @@ static int dlpar_memory_readd_by_index(u32 drc_index, struct property *prop)
 
 	return rc;
 }
+
+static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index,
+				     struct property *prop)
+{
+	struct of_drconf_cell *lmbs;
+	u32 num_lmbs, *p;
+	int i, rc, start_lmb_found;
+	int lmbs_available = 0, start_index = 0, end_index;
+
+	pr_info("Attempting to hot-remove %u LMB(s) at %x\n",
+		lmbs_to_remove, drc_index);
+
+	if (lmbs_to_remove == 0)
+		return -EINVAL;
+
+	p = prop->value;
+	num_lmbs = *p++;
+	lmbs = (struct of_drconf_cell *)p;
+	start_lmb_found = 0;
+
+	/* Navigate to drc_index */
+	while (start_index < num_lmbs) {
+		if (lmbs[start_index].drc_index == drc_index) {
+			start_lmb_found = 1;
+			break;
+		}
+
+		start_index++;
+	}
+
+	if (!start_lmb_found)
+		return -EINVAL;
+
+	end_index = start_index + lmbs_to_remove;
+
+	/* Validate that there are enough LMBs to satisfy the request */
+	for (i = start_index; i < end_index; i++) {
+		if (lmbs[i].flags & DRCONF_MEM_RESERVED)
+			break;
+
+		lmbs_available++;
+	}
+
+	if (lmbs_available < lmbs_to_remove)
+		return -EINVAL;
+
+	for (i = start_index; i < end_index; i++) {
+		if (!(lmbs[i].flags & DRCONF_MEM_ASSIGNED))
+			continue;
+
+		rc = dlpar_remove_lmb(&lmbs[i]);
+		if (rc)
+			break;
+
+		lmbs[i].reserved = 1;
+	}
+
+	if (rc) {
+		pr_err("Memory indexed-count-remove failed, adding any removed LMBs\n");
+
+		for (i = start_index; i < end_index; i++) {
+			if (!lmbs[i].reserved)
+				continue;
+
+			rc = dlpar_add_lmb(&lmbs[i]);
+			if (rc)
+				pr_err("Failed to add LMB, drc index %x\n",
+				       be32_to_cpu(lmbs[i].drc_index));
+
+			lmbs[i].reserved = 0;
+		}
+		rc = -EINVAL;
+	} else {
+		for (i = start_index; i < end_index; i++) {
+			if (!lmbs[i].reserved)
+				continue;
+
+			dlpar_release_drc(lmbs[i].drc_index);
+			pr_info("Memory at %llx (drc index %x) was hot-removed\n",
+				lmbs[i].base_addr, lmbs[i].drc_index);
+
+			lmbs[i].reserved = 0;
+		}
+	}
+
+	return rc;
+}
+
 #else
 static inline int pseries_remove_memblock(unsigned long base,
 					  unsigned int memblock_size)
@@ -632,6 +720,12 @@ static int dlpar_memory_readd_by_index(u32 drc_index, struct property *prop)
 {
 	return -EOPNOTSUPP;
 }
+
+static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index,
+				     struct property *prop)
+{
+	return -EOPNOTSUPP;
+}
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
 static int dlpar_add_lmb(struct of_drconf_cell *lmb)
@@ -915,6 +1009,10 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
 		} else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
 			drc_index = hp_elog->_drc_u.drc_index;
 			rc = dlpar_memory_remove_by_index(drc_index, prop);
+		} else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_IC) {
+			count = hp_elog->_drc_u.ic.count;
+			drc_index = hp_elog->_drc_u.ic.index;
+			rc = dlpar_memory_remove_by_ic(count, drc_index, prop);
 		} else {
 			rc = -EINVAL;
 		}
-- 
GitLab


From efe881afdd9996ccbcd2a09c93b724f4ffc25991 Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Sun, 12 Feb 2017 22:33:10 +0530
Subject: [PATCH 0272/1084] powerpc/perf: Factor out event_alternative function

Factor out the power8 event_alternative function to share
the code with power9.

Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/isa207-common.c | 36 +++++++++++++++++++++++++++++++
 arch/powerpc/perf/isa207-common.h |  3 +++
 arch/powerpc/perf/power8-pmu.c    | 35 ++----------------------------
 3 files changed, 41 insertions(+), 33 deletions(-)

diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 50e598cf644b..a86fadee352b 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -338,3 +338,39 @@ void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[])
 	if (pmc <= 3)
 		mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1));
 }
+
+static int find_alternative(u64 event, const unsigned int ev_alt[][MAX_ALT], int size)
+{
+	int i, j;
+
+	for (i = 0; i < size; ++i) {
+		if (event < ev_alt[i][0])
+			break;
+
+		for (j = 0; j < MAX_ALT && ev_alt[i][j]; ++j)
+			if (event == ev_alt[i][j])
+				return i;
+	}
+
+	return -1;
+}
+
+int isa207_get_alternatives(u64 event, u64 alt[],
+				const unsigned int ev_alt[][MAX_ALT], int size)
+{
+	int i, j, num_alt = 0;
+	u64 alt_event;
+
+	alt[num_alt++] = event;
+	i = find_alternative(event, ev_alt, size);
+	if (i >= 0) {
+		/* Filter out the original event, it's already in alt[0] */
+		for (j = 0; j < MAX_ALT; ++j) {
+			alt_event = ev_alt[i][j];
+			if (alt_event && alt_event != event)
+				alt[num_alt++] = alt_event;
+		}
+	}
+
+	return num_alt;
+}
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index 90495f1580c7..3e9150f6690a 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -260,5 +260,8 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
 				unsigned int hwc[], unsigned long mmcr[],
 				struct perf_event *pevents[]);
 void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[]);
+int isa207_get_alternatives(u64 event, u64 alt[],
+				const unsigned int ev_alt[][MAX_ALT], int size);
+
 
 #endif
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index d07186382f3a..ce15b19a7962 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -48,43 +48,12 @@ static const unsigned int event_alternatives[][MAX_ALT] = {
 	{ PM_RUN_INST_CMPL_ALT,		PM_RUN_INST_CMPL },
 };
 
-/*
- * Scan the alternatives table for a match and return the
- * index into the alternatives table if found, else -1.
- */
-static int find_alternative(u64 event)
-{
-	int i, j;
-
-	for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
-		if (event < event_alternatives[i][0])
-			break;
-
-		for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
-			if (event == event_alternatives[i][j])
-				return i;
-	}
-
-	return -1;
-}
-
 static int power8_get_alternatives(u64 event, unsigned int flags, u64 alt[])
 {
 	int i, j, num_alt = 0;
-	u64 alt_event;
-
-	alt[num_alt++] = event;
-
-	i = find_alternative(event);
-	if (i >= 0) {
-		/* Filter out the original event, it's already in alt[0] */
-		for (j = 0; j < MAX_ALT; ++j) {
-			alt_event = event_alternatives[i][j];
-			if (alt_event && alt_event != event)
-				alt[num_alt++] = alt_event;
-		}
-	}
 
+	num_alt = isa207_get_alternatives(event, alt, event_alternatives,
+					(int)ARRAY_SIZE(event_alternatives));
 	if (flags & PPMU_ONLY_COUNT_RUN) {
 		/*
 		 * We're only counting in RUN state, so PM_CYC is equivalent to
-- 
GitLab


From ac19670eb1798749121647bd9f0b403f52e31e6b Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Sun, 12 Feb 2017 22:33:11 +0530
Subject: [PATCH 0273/1084] powerpc/perf: Add PM_INST_DISP event to Power9
 event list

Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/power9-events-list.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h
index 929b56d47ad9..71a6bfee5c02 100644
--- a/arch/powerpc/perf/power9-events-list.h
+++ b/arch/powerpc/perf/power9-events-list.h
@@ -53,3 +53,6 @@ EVENT(PM_ITLB_MISS,				0x400fc)
 EVENT(PM_RUN_INST_CMPL,				0x500fa)
 /* Run_cycles */
 EVENT(PM_RUN_CYC,				0x600f4)
+/* Instruction Dispatched */
+EVENT(PM_INST_DISP,				0x200f2)
+EVENT(PM_INST_DISP_ALT,				0x300f2)
-- 
GitLab


From a114aca57a59cd92189bf3795ac082a5e8fac0f1 Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Sun, 12 Feb 2017 22:33:12 +0530
Subject: [PATCH 0274/1084] powerpc/perf: Add alternative event table and
 function for power9

Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/power9-pmu.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 7332634e18c9..b38acff8a791 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -106,6 +106,21 @@ enum {
 /* PowerISA v2.07 format attribute structure*/
 extern struct attribute_group isa207_pmu_format_group;
 
+/* Table of alternatives, sorted by column 0 */
+static const unsigned int power9_event_alternatives[][MAX_ALT] = {
+	{ PM_INST_DISP,			PM_INST_DISP_ALT },
+};
+
+static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+	int num_alt = 0;
+
+	num_alt = isa207_get_alternatives(event, alt, power9_event_alternatives,
+				(int)ARRAY_SIZE(power9_event_alternatives));
+
+	return num_alt;
+}
+
 GENERIC_EVENT_ATTR(cpu-cycles,			PM_CYC);
 GENERIC_EVENT_ATTR(stalled-cycles-frontend,	PM_ICT_NOSLOT_CYC);
 GENERIC_EVENT_ATTR(stalled-cycles-backend,	PM_CMPLU_STALL);
@@ -383,6 +398,7 @@ static struct power_pmu power9_isa207_pmu = {
 	.config_bhrb		= power9_config_bhrb,
 	.bhrb_filter_map	= power9_bhrb_filter_map,
 	.get_constraint		= isa207_get_constraint,
+	.get_alternatives	= power9_get_alternatives,
 	.disable_pmc		= isa207_disable_pmc,
 	.flags			= PPMU_NO_SIAR | PPMU_ARCH_207S,
 	.n_generic		= ARRAY_SIZE(power9_generic_events),
@@ -401,6 +417,7 @@ static struct power_pmu power9_pmu = {
 	.config_bhrb		= power9_config_bhrb,
 	.bhrb_filter_map	= power9_bhrb_filter_map,
 	.get_constraint		= isa207_get_constraint,
+	.get_alternatives	= power9_get_alternatives,
 	.disable_pmc		= isa207_disable_pmc,
 	.flags			= PPMU_HAS_SIER | PPMU_ARCH_207S,
 	.n_generic		= ARRAY_SIZE(power9_generic_events),
-- 
GitLab


From 8a088542c8f8d0bb458e4db2c01a2534f1adaf47 Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Sun, 12 Feb 2017 22:33:13 +0530
Subject: [PATCH 0275/1084] powerpc/perf: Use PM_INST_DISP for generic
 instructions sample

Since PM_INST_CMPL may not provide right counts in all
sampling scenarios in power9 DD1, instead use PM_INST_DISP.
Patch also update generic instruction sampling with the same.

Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/power9-pmu.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index b38acff8a791..454e9f70894f 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -228,6 +228,17 @@ static const struct attribute_group *power9_pmu_attr_groups[] = {
 	NULL,
 };
 
+static int power9_generic_events_dd1[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] =			PM_CYC,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =	PM_ICT_NOSLOT_CYC,
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =	PM_CMPLU_STALL,
+	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_DISP,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BRU_CMPL,
+	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_BR_MPRED_CMPL,
+	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
+	[PERF_COUNT_HW_CACHE_MISSES] =			PM_LD_MISS_L1_FIN,
+};
+
 static int power9_generic_events[] = {
 	[PERF_COUNT_HW_CPU_CYCLES] =			PM_CYC,
 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =	PM_ICT_NOSLOT_CYC,
@@ -401,8 +412,8 @@ static struct power_pmu power9_isa207_pmu = {
 	.get_alternatives	= power9_get_alternatives,
 	.disable_pmc		= isa207_disable_pmc,
 	.flags			= PPMU_NO_SIAR | PPMU_ARCH_207S,
-	.n_generic		= ARRAY_SIZE(power9_generic_events),
-	.generic_events		= power9_generic_events,
+	.n_generic		= ARRAY_SIZE(power9_generic_events_dd1),
+	.generic_events		= power9_generic_events_dd1,
 	.cache_events		= &power9_cache_events,
 	.attr_groups		= power9_isa207_pmu_attr_groups,
 	.bhrb_nr		= 32,
@@ -437,6 +448,11 @@ static int __init init_power9_pmu(void)
 		return -ENODEV;
 
 	if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+		/*
+		 * Since PM_INST_CMPL may not provide right counts in all
+		 * sampling scenarios in power9 DD1, instead use PM_INST_DISP.
+		 */
+		EVENT_VAR(PM_INST_CMPL, _g).id = PM_INST_DISP;
 		rc = register_power_pmu(&power9_isa207_pmu);
 	} else {
 		rc = register_power_pmu(&power9_pmu);
-- 
GitLab


From 356d8ce3d0a4a1d7c8448c4d234121736ad3d471 Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Sun, 12 Feb 2017 22:33:14 +0530
Subject: [PATCH 0276/1084] powerpc/perf: Use Instruction Counter value

Since PM_INST_DISP include speculative instruction,
based on the workload the dispatch count could vary
considerably. Hence as an alternative, for completed
instruction counting, program the PM_INST_DISP event
to the MMCR* but use Instruction Counter register value.

Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/core-book3s.c | 36 +++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 270eb9b74e2e..87d17a1f7168 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -57,6 +57,7 @@ struct cpu_hw_events {
 	void				*bhrb_context;
 	struct	perf_branch_stack	bhrb_stack;
 	struct	perf_branch_entry	bhrb_entries[BHRB_MAX_ENTRIES];
+	u64				ic_init;
 };
 
 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
@@ -127,6 +128,10 @@ static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
 static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
 static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
 static void pmao_restore_workaround(bool ebb) { }
+static bool use_ic(u64 event)
+{
+	return false;
+}
 #endif /* CONFIG_PPC32 */
 
 static bool regs_use_siar(struct pt_regs *regs)
@@ -688,6 +693,15 @@ static void pmao_restore_workaround(bool ebb)
 	mtspr(SPRN_PMC5, pmcs[4]);
 	mtspr(SPRN_PMC6, pmcs[5]);
 }
+
+static bool use_ic(u64 event)
+{
+	if (cpu_has_feature(CPU_FTR_POWER9_DD1) &&
+			(event == 0x200f2 || event == 0x300f2))
+		return true;
+
+	return false;
+}
 #endif /* CONFIG_PPC64 */
 
 static void perf_event_interrupt(struct pt_regs *regs);
@@ -1007,6 +1021,7 @@ static u64 check_and_compute_delta(u64 prev, u64 val)
 static void power_pmu_read(struct perf_event *event)
 {
 	s64 val, delta, prev;
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
 
 	if (event->hw.state & PERF_HES_STOPPED)
 		return;
@@ -1016,6 +1031,13 @@ static void power_pmu_read(struct perf_event *event)
 
 	if (is_ebb_event(event)) {
 		val = read_pmc(event->hw.idx);
+		if (use_ic(event->attr.config)) {
+			val = mfspr(SPRN_IC);
+			if (val > cpuhw->ic_init)
+				val = val - cpuhw->ic_init;
+			else
+				val = val + (0 - cpuhw->ic_init);
+		}
 		local64_set(&event->hw.prev_count, val);
 		return;
 	}
@@ -1029,6 +1051,13 @@ static void power_pmu_read(struct perf_event *event)
 		prev = local64_read(&event->hw.prev_count);
 		barrier();
 		val = read_pmc(event->hw.idx);
+		if (use_ic(event->attr.config)) {
+			val = mfspr(SPRN_IC);
+			if (val > cpuhw->ic_init)
+				val = val - cpuhw->ic_init;
+			else
+				val = val + (0 - cpuhw->ic_init);
+		}
 		delta = check_and_compute_delta(prev, val);
 		if (!delta)
 			return;
@@ -1466,6 +1495,13 @@ static int power_pmu_add(struct perf_event *event, int ef_flags)
 					event->attr.branch_sample_type);
 	}
 
+	/*
+	 * Workaround for POWER9 DD1 to use the Instruction Counter
+	 * register value for instruction counting
+	 */
+	if (use_ic(event->attr.config))
+		cpuhw->ic_init = mfspr(SPRN_IC);
+
 	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
 	return ret;
-- 
GitLab


From 8d911904f3ce412b20874a9c95f82009dcbb007c Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Sun, 12 Feb 2017 22:33:15 +0530
Subject: [PATCH 0277/1084] powerpc/perf: Add restrictions to PMC5 in power9
 DD1

PMC5 on POWER9 DD1 may not provide right counts in all
sampling scenarios, hence use PM_INST_DISP event instead
in PMC2 or PMC3 in preference.

Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/isa207-common.h | 4 ++++
 arch/powerpc/perf/power9-pmu.c    | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index 3e9150f6690a..cf9bd8990159 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -222,6 +222,10 @@
 	CNST_PMC_VAL(1) | CNST_PMC_VAL(2) | CNST_PMC_VAL(3) | \
 	CNST_PMC_VAL(4) | CNST_PMC_VAL(5) | CNST_PMC_VAL(6) | CNST_NC_VAL
 
+/*
+ * Lets restrict use of PMC5 for instruction counting.
+ */
+#define P9_DD1_TEST_ADDER	(ISA207_TEST_ADDER | CNST_PMC_VAL(5))
 
 /* Bits in MMCR1 for PowerISA v2.07 */
 #define MMCR1_UNIT_SHIFT(pmc)		(60 - (4 * ((pmc) - 1)))
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 454e9f70894f..5fe9cb1dc3b6 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -423,7 +423,7 @@ static struct power_pmu power9_pmu = {
 	.name			= "POWER9",
 	.n_counter		= MAX_PMU_COUNTERS,
 	.add_fields		= ISA207_ADD_FIELDS,
-	.test_adder		= ISA207_TEST_ADDER,
+	.test_adder		= P9_DD1_TEST_ADDER,
 	.compute_mmcr		= isa207_compute_mmcr,
 	.config_bhrb		= power9_config_bhrb,
 	.bhrb_filter_map	= power9_bhrb_filter_map,
-- 
GitLab


From 78a16d9fc1206e1a484b6ac96348756f3846bfea Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Mon, 13 Feb 2017 17:02:54 +0530
Subject: [PATCH 0278/1084] powerpc/perf: Avoid FAB_*_MATCH checks for power9

Since power9 does not support FAB_*_MATCH bits in MMCR1,
avoid these checks for power9. For this, patch factor out
code in isa207_get_constraint() to retain these checks
only for power8.

Patch also updates the comment in power9-pmu raw event
encode layout to remove FAB_*_MATCH.

Finally for power9, patch adds additional check for
threshold events when adding the thresh mask and value in
isa207_get_constraint().

fixes: 7ffd948fae4c ('powerpc/perf: factor out power8 pmu functions')
fixes: 18201b204286 ('powerpc/perf: power9 raw event format encoding')
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/isa207-common.c | 58 +++++++++++++++++++++----------
 arch/powerpc/perf/power9-pmu.c    |  8 ++---
 2 files changed, 42 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index a86fadee352b..e79fb5fb817d 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -97,6 +97,28 @@ static unsigned long combine_shift(unsigned long pmc)
 	return MMCR1_COMBINE_SHIFT(pmc);
 }
 
+static inline bool event_is_threshold(u64 event)
+{
+	return (event >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK;
+}
+
+static bool is_thresh_cmp_valid(u64 event)
+{
+	unsigned int cmp, exp;
+
+	/*
+	 * Check the mantissa upper two bits are not zero, unless the
+	 * exponent is also zero. See the THRESH_CMP_MANTISSA doc.
+	 */
+	cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
+	exp = cmp >> 7;
+
+	if (exp && (cmp & 0x60) == 0)
+		return false;
+
+	return true;
+}
+
 int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
 {
 	unsigned int unit, pmc, cache, ebb;
@@ -163,28 +185,26 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
 		value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT);
 	}
 
-	/*
-	 * Special case for PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC,
-	 * the threshold control bits are used for the match value.
-	 */
-	if (event_is_fab_match(event)) {
-		mask  |= CNST_FAB_MATCH_MASK;
-		value |= CNST_FAB_MATCH_VAL(event >> EVENT_THR_CTL_SHIFT);
+	if (cpu_has_feature(CPU_FTR_ARCH_300))  {
+		if (event_is_threshold(event) && is_thresh_cmp_valid(event)) {
+			mask  |= CNST_THRESH_MASK;
+			value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT);
+		}
 	} else {
 		/*
-		 * Check the mantissa upper two bits are not zero, unless the
-		 * exponent is also zero. See the THRESH_CMP_MANTISSA doc.
+		 * Special case for PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC,
+		 * the threshold control bits are used for the match value.
 		 */
-		unsigned int cmp, exp;
-
-		cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
-		exp = cmp >> 7;
-
-		if (exp && (cmp & 0x60) == 0)
-			return -1;
+		if (event_is_fab_match(event)) {
+			mask  |= CNST_FAB_MATCH_MASK;
+			value |= CNST_FAB_MATCH_VAL(event >> EVENT_THR_CTL_SHIFT);
+		} else {
+			if (!is_thresh_cmp_valid(event))
+				return -1;
 
-		mask  |= CNST_THRESH_MASK;
-		value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT);
+			mask  |= CNST_THRESH_MASK;
+			value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT);
+		}
 	}
 
 	if (!pmc && ebb)
@@ -279,7 +299,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
 		 * PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC,
 		 * the threshold bits are used for the match value.
 		 */
-		if (event_is_fab_match(event[i])) {
+		if (!cpu_has_feature(CPU_FTR_ARCH_300) && event_is_fab_match(event[i])) {
 			mmcr1 |= ((event[i] >> EVENT_THR_CTL_SHIFT) &
 				  EVENT_THR_CTL_MASK) << MMCR1_FAB_SHIFT;
 		} else {
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 5fe9cb1dc3b6..7f6582708e06 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -22,7 +22,7 @@
  * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
  *   | | [ ]                       [ ] [      thresh_cmp     ]   [  thresh_ctl   ]
  *   | |  |                         |                                     |
- *   | |  *- IFM (Linux)            |    thresh start/stop OR FAB match -*
+ *   | |  *- IFM (Linux)            |	               thresh start/stop -*
  *   | *- BHRB (Linux)              *sm
  *   *- EBB (Linux)
  *
@@ -50,11 +50,9 @@
  * MMCR1[31]   = pmc4combine[1]
  *
  * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011
- *	# PM_MRK_FAB_RSP_MATCH
- *	MMCR1[20:27] = thresh_ctl   (FAB_CRESP_MATCH / FAB_TYPE_MATCH)
+ *	MMCR1[20:27] = thresh_ctl
  * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001
- *	# PM_MRK_FAB_RSP_MATCH_CYC
- *	MMCR1[20:27] = thresh_ctl   (FAB_CRESP_MATCH / FAB_TYPE_MATCH)
+ *	MMCR1[20:27] = thresh_ctl
  * else
  *	MMCRA[48:55] = thresh_ctl   (THRESH START/END)
  *
-- 
GitLab


From a2391b35f1d9d5b51d43a9150c7239253565d5a6 Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Sat, 24 Dec 2016 11:35:49 +0530
Subject: [PATCH 0279/1084] powerpc/perf: use is_kernel_addr macro in
 perf_get_misc_flags()

Cleanup to use is_kernel_addr macro.

Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/perf/core-book3s.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 87d17a1f7168..595dd718ea87 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -248,7 +248,7 @@ static inline u32 perf_get_misc_flags(struct pt_regs *regs)
 	 */
 	if (ppmu->flags & PPMU_NO_SIPR) {
 		unsigned long siar = mfspr(SPRN_SIAR);
-		if (siar >= PAGE_OFFSET)
+		if (is_kernel_addr(siar))
 			return PERF_RECORD_MISC_KERNEL;
 		return PERF_RECORD_MISC_USER;
 	}
-- 
GitLab


From 6ae3f8ad2017079292cb49c8959b527bcbcbefed Mon Sep 17 00:00:00 2001
From: Russell Currey <ruscur@russell.cc>
Date: Fri, 17 Feb 2017 13:01:35 +1100
Subject: [PATCH 0280/1084] powerpc: Add POWER9 architected mode to cputable

PVR value of 0x0F000005 means we are arch v3.00 compliant (i.e. POWER9).

Acked-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Russell Currey <ruscur@russell.cc>
[mpe: Don't set num_pmcs, so we keep the PMU fields from the raw entry]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/cputable.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 6a82ef039c50..bb7a1890aeb7 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -386,6 +386,23 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.machine_check_early	= __machine_check_early_realmode_p8,
 		.platform		= "power8",
 	},
+	{	/* 3.00-compliant processor, i.e. Power9 "architected" mode */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x0f000005,
+		.cpu_name		= "POWER9 (architected)",
+		.cpu_features		= CPU_FTRS_POWER9,
+		.cpu_user_features	= COMMON_USER_POWER9,
+		.cpu_user_features2	= COMMON_USER2_POWER9,
+		.mmu_features		= MMU_FTRS_POWER9,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.oprofile_type		= PPC_OPROFILE_INVALID,
+		.oprofile_cpu_type	= "ppc64/ibm-compat-v1",
+		.cpu_setup		= __setup_cpu_power9,
+		.cpu_restore		= __restore_cpu_power9,
+		.flush_tlb		= __flush_tlb_power9,
+		.platform		= "power9",
+	},
 	{	/* Power7 */
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x003f0000,
-- 
GitLab


From 36c7c9da40c408a71e5e6bfe12e57dcf549a296d Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Thu, 16 Feb 2017 10:22:32 +1100
Subject: [PATCH 0281/1084] pci/hotplug/pnv-php: Remove WARN_ON() in
 pnv_php_put_slot()

The WARN_ON() causes unnecessary backtrace when putting the parent
slot, which is likely to be NULL.

 WARNING: CPU: 2 PID: 1071 at drivers/pci/hotplug/pnv_php.c:85 \
                              pnv_php_release+0xcc/0x150 [pnv_php]
    :
 Call Trace:
 [c0000003bc007c10] [d00000000ad613c4] pnv_php_release+0x144/0x150 [pnv_php]
 [c0000003bc007c40] [c0000000006641d8] pci_hp_deregister+0x238/0x330
 [c0000003bc007cd0] [d00000000ad61440] pnv_php_unregister_one+0x70/0xa0 [pnv_php]
 [c0000003bc007d10] [d00000000ad614c0] pnv_php_unregister+0x50/0x80 [pnv_php]
 [c0000003bc007d40] [d00000000ad61e84] pnv_php_exit+0x50/0xcb4 [pnv_php]
 [c0000003bc007d70] [c00000000019499c] SyS_delete_module+0x1fc/0x2a0
 [c0000003bc007e30] [c00000000000b184] system_call+0x38/0xe0

Cc: <stable@vger.kernel.org> # v4.8+
Fixes: 66725152fb9f ("PCI/hotplug: PowerPC PowerNV PCI hotplug driver")
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Tested-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/pci/hotplug/pnv_php.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index 63cd9f354b79..da1bbf3eda55 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -76,7 +76,7 @@ static void pnv_php_free_slot(struct kref *kref)
 static inline void pnv_php_put_slot(struct pnv_php_slot *php_slot)
 {
 
-	if (WARN_ON(!php_slot))
+	if (!php_slot)
 		return;
 
 	kref_put(&php_slot->kref, pnv_php_free_slot);
-- 
GitLab


From 303529d6ef1293513c2c73c9ab86489eebb37d08 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Thu, 16 Feb 2017 10:22:33 +1100
Subject: [PATCH 0282/1084] pci/hotplug/pnv-php: Disable surprise hotplug
 capability on conflicts

The root port or PCIe switch downstream port might have been associated
with driver other than pnv-php. The MSI or MSIx might also have been
enabled by that driver (e.g. pcieport_drv). Attempt to enable MSI incurs
below backtrace:

 PowerPC PowerNV PCI Hotplug Driver version: 0.1
 ------------[ cut here ]------------
 WARNING: CPU: 19 PID: 1004 at drivers/pci/msi.c:1071 \
                              __pci_enable_msi_range+0x84/0x4e0
 NIP [c000000000665c34] __pci_enable_msi_range+0x84/0x4e0
 LR [c000000000665c24] __pci_enable_msi_range+0x74/0x4e0
 Call Trace:
 [c000000384d67600] [c000000000665c24] __pci_enable_msi_range+0x74/0x4e0
 [c000000384d676e0] [d00000000aa31b04] pnv_php_register+0x564/0x5a0 [pnv_php]
 [c000000384d677c0] [d00000000aa31658] pnv_php_register+0xb8/0x5a0 [pnv_php]
 [c000000384d678a0] [d00000000aa31658] pnv_php_register+0xb8/0x5a0 [pnv_php]
 [c000000384d67980] [d00000000aa31dfc] pnv_php_init+0x60/0x98 [pnv_php]
 [c000000384d679f0] [c00000000000cfdc] do_one_initcall+0x6c/0x1d0
 [c000000384d67ab0] [c000000000b92354] do_init_module+0x94/0x254
 [c000000384d67b40] [c00000000019719c] load_module+0x258c/0x2c60
 [c000000384d67d30] [c000000000197bb0] SyS_finit_module+0xf0/0x170
 [c000000384d67e30] [c00000000000b184] system_call+0x38/0xe0

This fixes the issue by skipping enabling the surprise hotplug
capability if the MSI or MSIx on the PCI slot's upstream port has
been enabled by other driver.

Cc: <stable@vger.kernel.org> # v4.9+
Fixes: 360aebd85a4c ("drivers/pci/hotplug: Support surprise hotplug in powernv driver")
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Tested-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/pci/hotplug/pnv_php.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index da1bbf3eda55..6d36b7630491 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -826,6 +826,14 @@ static void pnv_php_enable_irq(struct pnv_php_slot *php_slot)
 	struct pci_dev *pdev = php_slot->pdev;
 	int irq, ret;
 
+	/*
+	 * The MSI/MSIx interrupt might have been occupied by other
+	 * drivers. Don't populate the surprise hotplug capability
+	 * in that case.
+	 */
+	if (pci_dev_msi_enabled(pdev))
+		return;
+
 	ret = pci_enable_device(pdev);
 	if (ret) {
 		dev_warn(&pdev->dev, "Error %d enabling device\n", ret);
-- 
GitLab


From 49f4b08e61547a5ccd2db551d994c4503efe5666 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Thu, 16 Feb 2017 10:22:34 +1100
Subject: [PATCH 0283/1084] pci/hotplug/pnv-php: Disable MSI and PCI device
 properly

pnv_php_disable_irq() can be called in two paths: Bailing path in
pnv_php_enable_irq() or releasing slot. The MSI (or MSIx) interrupts
is disabled unconditionally in pnv_php_disable_irq(). It's wrong
because that might be enabled by drivers other than pnv-php.

This disables MSI (or MSIx) interrupts and the PCI device only if
it was enabled by pnv-php. In the error path of pnv_php_enable_irq(),
we rely on the newly added parameter @disable_device. In the path
of releasing slot, @pnv_php->irq is checked.

Cc: <stable@vger.kernel.org> # v4.9+
Fixes: 360aebd85a4c ("drivers/pci/hotplug: Support surprise hotplug in powernv driver")
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/pci/hotplug/pnv_php.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index 6d36b7630491..335a55684cd8 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -35,9 +35,11 @@ static void pnv_php_register(struct device_node *dn);
 static void pnv_php_unregister_one(struct device_node *dn);
 static void pnv_php_unregister(struct device_node *dn);
 
-static void pnv_php_disable_irq(struct pnv_php_slot *php_slot)
+static void pnv_php_disable_irq(struct pnv_php_slot *php_slot,
+				bool disable_device)
 {
 	struct pci_dev *pdev = php_slot->pdev;
+	int irq = php_slot->irq;
 	u16 ctrl;
 
 	if (php_slot->irq > 0) {
@@ -56,10 +58,14 @@ static void pnv_php_disable_irq(struct pnv_php_slot *php_slot)
 		php_slot->wq = NULL;
 	}
 
-	if (pdev->msix_enabled)
-		pci_disable_msix(pdev);
-	else if (pdev->msi_enabled)
-		pci_disable_msi(pdev);
+	if (disable_device || irq > 0) {
+		if (pdev->msix_enabled)
+			pci_disable_msix(pdev);
+		else if (pdev->msi_enabled)
+			pci_disable_msi(pdev);
+
+		pci_disable_device(pdev);
+	}
 }
 
 static void pnv_php_free_slot(struct kref *kref)
@@ -68,7 +74,7 @@ static void pnv_php_free_slot(struct kref *kref)
 					struct pnv_php_slot, kref);
 
 	WARN_ON(!list_empty(&php_slot->children));
-	pnv_php_disable_irq(php_slot);
+	pnv_php_disable_irq(php_slot, false);
 	kfree(php_slot->name);
 	kfree(php_slot);
 }
@@ -777,7 +783,7 @@ static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq)
 	php_slot->wq = alloc_workqueue("pciehp-%s", 0, 0, php_slot->name);
 	if (!php_slot->wq) {
 		dev_warn(&pdev->dev, "Cannot alloc workqueue\n");
-		pnv_php_disable_irq(php_slot);
+		pnv_php_disable_irq(php_slot, true);
 		return;
 	}
 
@@ -799,7 +805,7 @@ static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq)
 	ret = request_irq(irq, pnv_php_interrupt, IRQF_SHARED,
 			  php_slot->name, php_slot);
 	if (ret) {
-		pnv_php_disable_irq(php_slot);
+		pnv_php_disable_irq(php_slot, true);
 		dev_warn(&pdev->dev, "Error %d enabling IRQ %d\n", ret, irq);
 		return;
 	}
-- 
GitLab


From c618f6b188a9170f67e4abd478d250cc72aed1e1 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Wed, 8 Feb 2017 14:16:50 +1100
Subject: [PATCH 0284/1084] powerpc/mm: Fix typo in set_pte_at()

This fixes the typo about the _PAGE_PTE in set_pte_at() by changing
"tryint" to "trying to".

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/pgtable.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index cb39c8bd2436..a03ff3d99e0c 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -193,9 +193,7 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
 	 */
 	VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep));
 
-	/*
-	 * Add the pte bit when tryint set a pte
-	 */
+	/* Add the pte bit when trying to set a pte */
 	pte = __pte(pte_val(pte) | _PAGE_PTE);
 
 	/* Note: mm->context.id might not yet have been assigned as
-- 
GitLab


From 727597d12140b342a3deef10348b5edeb076275d Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Wed, 8 Feb 2017 14:11:03 +1100
Subject: [PATCH 0285/1084] powerpc/kernel: Remove error message in
 pcibios_setup_phb_resources()

The CAPI driver creates virtual PHB (vPHB) from the CAPI adapter.
The vPHB's IO and memory windows aren't built from device-tree node
as we do for normal PHBs. A error message is thrown in below path
when trying to probe AFUs contained in the adapter. The error message
is confusing and unnecessary.

    cxl_probe()
    pci_init_afu()
    cxl_pci_vphb_add()
    pcibios_scan_phb()
    pcibios_setup_phb_resources()

This removes the error message. We might have the case where the
first memory window on real PHB isn't populated properly because
of error in "ranges" property in the device-tree node. We can check
the device-tree instead for that. This also removes one unnecessary
blank line in the function.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/pci-common.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 74bec5498972..05bfdaeaa2f9 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1559,16 +1559,10 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose,
 	/* Hookup PHB Memory resources */
 	for (i = 0; i < 3; ++i) {
 		res = &hose->mem_resources[i];
-		if (!res->flags) {
-			if (i == 0)
-				printk(KERN_ERR "PCI: Memory resource 0 not set for "
-				       "host bridge %s (domain %d)\n",
-				       hose->dn->full_name, hose->global_number);
+		if (!res->flags)
 			continue;
-		}
-		offset = hose->mem_offset[i];
-
 
+		offset = hose->mem_offset[i];
 		pr_debug("PCI: PHB MEM resource %d = %pR off 0x%08llx\n", i,
 			 res, (unsigned long long)offset);
 
-- 
GitLab


From 02983449c87b1dfd9b75af4c8a2a8057f9664c08 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Wed, 11 Jan 2017 12:09:05 +1100
Subject: [PATCH 0286/1084] powerpc/powernv: Remove unused variable in
 pnv_pci_sriov_disable()

The local variable @iov isn't used, to remove it.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 8278f43ad4b8..06c91705562e 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1468,14 +1468,12 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
 	struct pnv_phb        *phb;
 	struct pnv_ioda_pe    *pe;
 	struct pci_dn         *pdn;
-	struct pci_sriov      *iov;
 	u16                    num_vfs, i;
 
 	bus = pdev->bus;
 	hose = pci_bus_to_host(bus);
 	phb = hose->private_data;
 	pdn = pci_get_pdn(pdev);
-	iov = pdev->sriov;
 	num_vfs = pdn->num_vfs;
 
 	/* Release VF PEs */
-- 
GitLab


From 8bd8c6533369a013297e0eec7898b37290852a73 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 15 Feb 2017 21:31:40 -0300
Subject: [PATCH 0287/1084] tools perf scripting python: clang doesn't have
 -spec, remove it

Gcc has a -spec option to override what options to pass to cc, etc, and
in some distros this is used, like in fedora, where we end up getting
this passed to gcc that makes clang, that doesn't have this option to
stop the build:

  CC       /tmp/build/perf/util/scripting-engines/trace-event-python.o
clang-4.0: error: argument unused during compilation: '-specs=/usr/lib/rpm/redhat/redhat-hardened-cc1' [-Werror,-Wunused-command-line-argument]

So filter this out when the compiler used is clang, this way we
can build the python scripting support in tools/perf/.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-2gosxoiouf24pnlknp7w7q4z@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.config                             | 7 +++++++
 tools/perf/util/scripting-engines/trace-event-python.c | 4 ++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 2b941efadb04..27c9fbca7bd9 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -175,6 +175,10 @@ PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
 PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
 PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
 
+ifeq ($(CC), clang)
+  PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
+endif
+
 FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
 FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
 FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS)
@@ -601,6 +605,9 @@ else
       PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
       PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
       PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
+      ifeq ($(CC), clang)
+        PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
+      endif
       FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
 
       ifneq ($(feature-libpython), 1)
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 089438da1f7f..89b532c47cc4 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -368,10 +368,10 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
 		if (node->map) {
 			struct map *map = node->map;
 			const char *dsoname = "[unknown]";
-			if (map && map->dso && (map->dso->name || map->dso->long_name)) {
+			if (map && map->dso) {
 				if (symbol_conf.show_kernel_path && map->dso->long_name)
 					dsoname = map->dso->long_name;
-				else if (map->dso->name)
+				else
 					dsoname = map->dso->name;
 			}
 			pydict_set_item_string_decref(pyelem, "dso",
-- 
GitLab


From 4be92cf018c391c4ecc5fd091fae8c152de3692f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 15 Feb 2017 21:36:48 -0300
Subject: [PATCH 0288/1084] perf python: Filter out -specs=/a/b/c from the
 python binding cc options

The -spec=/path/to/file can be used to change what gcc puts in the cc,
ld, etc command lines, but this is not present in clang, filter it out
at the setup.py file by changing python2's internal variable where it
keeps its initial CFLAGS value.

With this all of perf can be built in at least Fedora 25, fixing this
problem:

    GEN      /tmp/build/perf/python/perf.so
    CC       /tmp/build/perf/builtin-buildid-list.o
  clang-4.0: error: argument unused during compilation: '-specs=/usr/lib/rpm/redhat/redhat-hardened-cc1' [-Werror,-Wunused-command-line-argument]
  clang-4.0: error: argument unused during compilation: '-specs=/usr/lib/rpm/redhat/redhat-hardened-cc1' [-Werror,-Wunused-command-line-argument]
  error: command 'clang' failed with exit status 1

Now I need to change all the containers where I have clang to build
perf with it, so that we can check that in other distros (opensuse, debian,
ubuntu, etc) this also works.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-g9lhgr162ao8ao29vvf0hgm1@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/setup.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index c8680984d2d6..af415febbc46 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -1,8 +1,15 @@
 #!/usr/bin/python2
 
-from distutils.core import setup, Extension
 from os import getenv
 
+cc = getenv("CC")
+if cc == "clang":
+    from _sysconfigdata import build_time_vars
+    from re import sub
+    build_time_vars["CFLAGS"] = sub("-specs=[^ ]+", "", build_time_vars["CFLAGS"])
+
+from distutils.core import setup, Extension
+
 from distutils.command.build_ext   import build_ext   as _build_ext
 from distutils.command.install_lib import install_lib as _install_lib
 
-- 
GitLab


From 9c72fd0ff092526ea25c5a59d4a6ed94aeb4a66f Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 16 Feb 2017 20:13:52 -0500
Subject: [PATCH 0289/1084] tools lib traceevent: It's preempt not prempt

Fix the typo of the function name pevent_data_prempt_count()

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Fixes: c52d9e4e677b ("tools lib traceevent: Add retrieval of preempt count and latency flags")
Link: http://lkml.kernel.org/r/20170216201352.469c99de@grimm.local.home
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse.c | 4 ++--
 tools/lib/traceevent/event-parse.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 14a4f623c1a5..32171310bf82 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -5204,13 +5204,13 @@ int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec)
 }
 
 /**
- * pevent_data_prempt_count - parse the preempt count from the record
+ * pevent_data_preempt_count - parse the preempt count from the record
  * @pevent: a handle to the pevent
  * @rec: the record to parse
  *
  * This returns the preempt count from a record.
  */
-int pevent_data_prempt_count(struct pevent *pevent, struct pevent_record *rec)
+int pevent_data_preempt_count(struct pevent *pevent, struct pevent_record *rec)
 {
 	return parse_common_pc(pevent, rec->data);
 }
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 7aae746ec2fe..67daa01260a9 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -709,7 +709,7 @@ void pevent_data_lat_fmt(struct pevent *pevent,
 int pevent_data_type(struct pevent *pevent, struct pevent_record *rec);
 struct event_format *pevent_data_event_from_type(struct pevent *pevent, int type);
 int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec);
-int pevent_data_prempt_count(struct pevent *pevent, struct pevent_record *rec);
+int pevent_data_preempt_count(struct pevent *pevent, struct pevent_record *rec);
 int pevent_data_flags(struct pevent *pevent, struct pevent_record *rec);
 const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid);
 struct cmdline;
-- 
GitLab


From 8074bf51fe5af68dcbc12bd7fa72dc4be8f41dde Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 17 Feb 2017 12:27:26 -0300
Subject: [PATCH 0290/1084] perf session: Fix DEBUG=1 build with clang

The struct branch_stack->branch_stack.cycles field is a u64 :16
bitfield, and this somehow confuses clang 4.0 when checking the
arguments of a printf format, so cast the :16 to unsigned short to help
it.

Silences this:

  util/session.c:935:4: error: format specifies type 'unsigned short' but the argument has type 'u64' (aka 'unsigned long') [-Werror,-Wformat]
                          e->flags.cycles,
                          ^~~~~~~~~~~~~~~
  1 error generated.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-eo2t4uhlbne105z72tvyzkp1@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/session.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 4cdbc8f5f14d..1dd617d116b5 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -932,7 +932,7 @@ static void branch_stack__printf(struct perf_sample *sample)
 
 		printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
 			i, e->from, e->to,
-			e->flags.cycles,
+			(unsigned short)e->flags.cycles,
 			e->flags.mispred ? "M" : " ",
 			e->flags.predicted ? "P" : " ",
 			e->flags.abort ? "A" : " ",
-- 
GitLab


From 92a7e1278005b6bb3459affc50b2b6e2464e7e7c Mon Sep 17 00:00:00 2001
From: Jan Stancek <jstancek@redhat.com>
Date: Fri, 17 Feb 2017 12:10:24 +0100
Subject: [PATCH 0291/1084] perf cpumap: Add cpu__max_present_cpu()

Similar to cpu__max_cpu() (which returns the max possible CPU), returns
the max present CPU.

Signed-off-by: Jan Stancek <jstancek@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/8ea4601b5cacc49927235b4ebac424bd6eeccb06.1487146877.git.jstancek@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cpumap.c | 22 ++++++++++++++++++++++
 tools/perf/util/cpumap.h |  1 +
 2 files changed, 23 insertions(+)

diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 2c0b52264a46..8c7504939113 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -9,6 +9,7 @@
 #include "asm/bug.h"
 
 static int max_cpu_num;
+static int max_present_cpu_num;
 static int max_node_num;
 static int *cpunode_map;
 
@@ -442,6 +443,7 @@ static void set_max_cpu_num(void)
 
 	/* set up default */
 	max_cpu_num = 4096;
+	max_present_cpu_num = 4096;
 
 	mnt = sysfs__mountpoint();
 	if (!mnt)
@@ -455,6 +457,17 @@ static void set_max_cpu_num(void)
 	}
 
 	ret = get_max_num(path, &max_cpu_num);
+	if (ret)
+		goto out;
+
+	/* get the highest present cpu number for a sparse allocation */
+	ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
+	if (ret == PATH_MAX) {
+		pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+		goto out;
+	}
+
+	ret = get_max_num(path, &max_present_cpu_num);
 
 out:
 	if (ret)
@@ -505,6 +518,15 @@ int cpu__max_cpu(void)
 	return max_cpu_num;
 }
 
+int cpu__max_present_cpu(void)
+{
+	if (unlikely(!max_present_cpu_num))
+		set_max_cpu_num();
+
+	return max_present_cpu_num;
+}
+
+
 int cpu__get_node(int cpu)
 {
 	if (unlikely(cpunode_map == NULL)) {
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 06bd689f5989..1a0549af8f5c 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -62,6 +62,7 @@ int cpu__setup_cpunode_map(void);
 
 int cpu__max_node(void);
 int cpu__max_cpu(void);
+int cpu__max_present_cpu(void);
 int cpu__get_node(int cpu);
 
 int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
-- 
GitLab


From 43db2843a4a41cc8cdb6ab696639aeee1f4d5062 Mon Sep 17 00:00:00 2001
From: Jan Stancek <jstancek@redhat.com>
Date: Fri, 17 Feb 2017 12:10:25 +0100
Subject: [PATCH 0292/1084] perf header: Make build_cpu_topology skip
 offline/absent CPUs

When build_cpu_topo() encounters offline/absent CPUs, it fails to find any
sysfs entries and returns failure.

This leads to build_cpu_topology() and write_cpu_topology() failing as
well.

Because HEADER_CPU_TOPOLOGY has not been written, read leaves cpu_topology_map
NULL and we get NULL ptr deref at:

  ...
   cmd_test
    __cmd_test
     test_and_print
      run_test
       test_session_topology
        check_cpu_topology

  36: Session topology                           :
  --- start ---
  test child forked, pid 14902
  templ file: /tmp/perf-test-4CKocW
  failed to write feature HEADER_CPU_TOPOLOGY
  perf: Segmentation fault
  Obtained 9 stack frames.
  ./perf(sighandler_dump_stack+0x41) [0x5095f1]
  /lib64/libc.so.6(+0x35250) [0x7f4b7c3c9250]
  ./perf(test_session_topology+0x1db) [0x490ceb]
  ./perf() [0x475b68]
  ./perf(cmd_test+0x5b9) [0x4763c9]
  ./perf() [0x4945a3]
  ./perf(main+0x69f) [0x427e8f]
  /lib64/libc.so.6(__libc_start_main+0xf5) [0x7f4b7c3b5b35]
  ./perf() [0x427fb9]
  test child interrupted
  ---- end ----
  Session topology: FAILED!

This patch makes build_cpu_topology() skip offline/absent CPUs, by checking
their presence against cpu_map built from online CPUs.

Signed-off-by: Jan Stancek <jstancek@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/a271b770175524f4961d4903af33798358a4a518.1487146877.git.jstancek@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/header.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 3d12c16e5103..1222f6c5e7b3 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -505,24 +505,31 @@ static void free_cpu_topo(struct cpu_topo *tp)
 
 static struct cpu_topo *build_cpu_topology(void)
 {
-	struct cpu_topo *tp;
+	struct cpu_topo *tp = NULL;
 	void *addr;
 	u32 nr, i;
 	size_t sz;
 	long ncpus;
 	int ret = -1;
+	struct cpu_map *map;
 
 	ncpus = sysconf(_SC_NPROCESSORS_CONF);
 	if (ncpus < 0)
 		return NULL;
 
+	/* build online CPU map */
+	map = cpu_map__new(NULL);
+	if (map == NULL) {
+		pr_debug("failed to get system cpumap\n");
+		return NULL;
+	}
+
 	nr = (u32)(ncpus & UINT_MAX);
 
 	sz = nr * sizeof(char *);
-
 	addr = calloc(1, sizeof(*tp) + 2 * sz);
 	if (!addr)
-		return NULL;
+		goto out_free;
 
 	tp = addr;
 	tp->cpu_nr = nr;
@@ -532,10 +539,16 @@ static struct cpu_topo *build_cpu_topology(void)
 	tp->thread_siblings = addr;
 
 	for (i = 0; i < nr; i++) {
+		if (!cpu_map__has(map, i))
+			continue;
+
 		ret = build_cpu_topo(tp, i);
 		if (ret < 0)
 			break;
 	}
+
+out_free:
+	cpu_map__put(map);
 	if (ret) {
 		free_cpu_topo(tp);
 		tp = NULL;
-- 
GitLab


From da8a58b56c661681f9b2fd2fa59c6da3a5bac8d1 Mon Sep 17 00:00:00 2001
From: Jan Stancek <jstancek@redhat.com>
Date: Fri, 17 Feb 2017 12:10:26 +0100
Subject: [PATCH 0293/1084] perf tools: Replace _SC_NPROCESSORS_CONF with
 max_present_cpu in cpu_topology_map

There are 2 problems wrt. cpu_topology_map on systems with sparse CPUs:

1. offline/absent CPUs will have their socket_id and core_id set to -1
   which triggers:
   "socket_id number is too big.You may need to upgrade the perf tool."

2. size of cpu_topology_map (perf_env.cpu[]) is allocated based on
   _SC_NPROCESSORS_CONF, but can be indexed with CPU ids going above.
   Users of perf_env.cpu[] are using CPU id as index. This can lead
   to read beyond what was allocated:
   ==19991== Invalid read of size 4
   ==19991==    at 0x490CEB: check_cpu_topology (topology.c:69)
   ==19991==    by 0x490CEB: test_session_topology (topology.c:106)
   ...

For example:
  _SC_NPROCESSORS_CONF == 16
  available: 2 nodes (0-1)
  node 0 cpus: 0 6 8 10 16 22 24 26
  node 0 size: 12004 MB
  node 0 free: 9470 MB
  node 1 cpus: 1 7 9 11 23 25 27
  node 1 size: 12093 MB
  node 1 free: 9406 MB
  node distances:
  node   0   1
    0:  10  20
    1:  20  10

This patch changes HEADER_NRCPUS.nr_cpus_available from _SC_NPROCESSORS_CONF
to max_present_cpu and updates any user of cpu_topology_map to iterate
with nr_cpus_avail.

As a consequence HEADER_CPU_TOPOLOGY core_id and socket_id lists get longer,
but maintain compatibility with pre-patch state - index to cpu_topology_map is
CPU id.

  perf test 36 -v
  36: Session topology                           :
  --- start ---
  test child forked, pid 22211
  templ file: /tmp/perf-test-gmdX5i
  CPU 0, core 0, socket 0
  CPU 1, core 0, socket 1
  CPU 6, core 10, socket 0
  CPU 7, core 10, socket 1
  CPU 8, core 1, socket 0
  CPU 9, core 1, socket 1
  CPU 10, core 9, socket 0
  CPU 11, core 9, socket 1
  CPU 16, core 0, socket 0
  CPU 22, core 10, socket 0
  CPU 23, core 10, socket 1
  CPU 24, core 1, socket 0
  CPU 25, core 1, socket 1
  CPU 26, core 9, socket 0
  CPU 27, core 9, socket 1
  test child finished with 0
  ---- end ----
  Session topology: Ok

Signed-off-by: Jan Stancek <jstancek@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/d7c05c6445fca74a8442c2c73cfffd349c52c44f.1487146877.git.jstancek@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c   |  2 +-
 tools/perf/tests/topology.c |  4 +++-
 tools/perf/util/env.c       |  2 +-
 tools/perf/util/header.c    | 16 +++++-----------
 4 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f28719178b51..ca27a8a705ac 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1765,7 +1765,7 @@ static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, i
 
 	cpu = map->map[idx];
 
-	if (cpu >= env->nr_cpus_online)
+	if (cpu >= env->nr_cpus_avail)
 		return -1;
 
 	return cpu;
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 98fe69ac553c..803f893550d6 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -65,7 +65,9 @@ static int check_cpu_topology(char *path, struct cpu_map *map)
 	session = perf_session__new(&file, false, NULL);
 	TEST_ASSERT_VAL("can't get session", session);
 
-	for (i = 0; i < session->header.env.nr_cpus_online; i++) {
+	for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
+		if (!cpu_map__has(map, i))
+			continue;
 		pr_debug("CPU %d, core %d, socket %d\n", i,
 			 session->header.env.cpu[i].core_id,
 			 session->header.env.cpu[i].socket_id);
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index bb964e86b09d..075fc77286bf 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -66,7 +66,7 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
 		return 0;
 
 	if (env->nr_cpus_avail == 0)
-		env->nr_cpus_avail = sysconf(_SC_NPROCESSORS_CONF);
+		env->nr_cpus_avail = cpu__max_present_cpu();
 
 	nr_cpus = env->nr_cpus_avail;
 	if (nr_cpus == -1)
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 1222f6c5e7b3..05714d548584 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -295,11 +295,7 @@ static int write_nrcpus(int fd, struct perf_header *h __maybe_unused,
 	u32 nrc, nra;
 	int ret;
 
-	nr = sysconf(_SC_NPROCESSORS_CONF);
-	if (nr < 0)
-		return -1;
-
-	nrc = (u32)(nr & UINT_MAX);
+	nrc = cpu__max_present_cpu();
 
 	nr = sysconf(_SC_NPROCESSORS_ONLN);
 	if (nr < 0)
@@ -513,9 +509,7 @@ static struct cpu_topo *build_cpu_topology(void)
 	int ret = -1;
 	struct cpu_map *map;
 
-	ncpus = sysconf(_SC_NPROCESSORS_CONF);
-	if (ncpus < 0)
-		return NULL;
+	ncpus = cpu__max_present_cpu();
 
 	/* build online CPU map */
 	map = cpu_map__new(NULL);
@@ -1139,7 +1133,7 @@ static void print_cpu_topology(struct perf_header *ph, int fd __maybe_unused,
 {
 	int nr, i;
 	char *str;
-	int cpu_nr = ph->env.nr_cpus_online;
+	int cpu_nr = ph->env.nr_cpus_avail;
 
 	nr = ph->env.nr_sibling_cores;
 	str = ph->env.sibling_cores;
@@ -1794,7 +1788,7 @@ static int process_cpu_topology(struct perf_file_section *section,
 	u32 nr, i;
 	char *str;
 	struct strbuf sb;
-	int cpu_nr = ph->env.nr_cpus_online;
+	int cpu_nr = ph->env.nr_cpus_avail;
 	u64 size = 0;
 
 	ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu));
@@ -1875,7 +1869,7 @@ static int process_cpu_topology(struct perf_file_section *section,
 		if (ph->needs_swap)
 			nr = bswap_32(nr);
 
-		if (nr > (u32)cpu_nr) {
+		if (nr != (u32)-1 && nr > (u32)cpu_nr) {
 			pr_debug("socket_id number is too big."
 				 "You may need to upgrade the perf tool.\n");
 			goto free_cpu;
-- 
GitLab


From 85e0d509654c2e2e58b29e50a883acd4c4e8807d Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 17 Feb 2017 15:00:54 +0100
Subject: [PATCH 0294/1084] perf build: Add special fixdep cleaning rule

Ingo reported following build failure:

On Sat, Feb 11, 2017 at 12:12:34PM +0100, Ingo Molnar wrote:
>
> So I had this oldish 32-bit 15.10 Ubuntu installation around (fully updated), and
> trying to build perf gave me:
>
> deimos:~/tip/tools/perf> make
>   BUILD:   Doing 'make -j4' parallel build
> make[3]: *** No rule to make target '/usr/include/x86_64-linux-gnu/sys/types.h', needed by 'fixdep.o'.  Stop.
> Makefile:42: recipe for target 'fixdep-in.o' failed
> make[2]: *** [fixdep-in.o] Error 2
> /home/mingo/tip/tools/build/Makefile.include:4: recipe for target 'fixdep' failed
> make[1]: *** [fixdep] Error 2
> Makefile:68: recipe for target 'all' failed
> make: *** [all] Error 2
>
> Now this got a bit better after I did a 'make mrproper' in the kernel tree:
>
> deimos:~/tip/tools/perf> make
>   BUILD:   Doing 'make -j4' parallel build
>   HOSTCC   fixdep.o
> /home/mingo/tip/tools/build/fixdep: 1: /home/mingo/tip/tools/build/fixdep: Syntax error: "(" unexpected
> /home/mingo/tip/tools/build/Makefile.build:101: recipe for target 'fixdep.o' failed
> make[3]: *** [fixdep.o] Error 2
> Makefile:42: recipe for target 'fixdep-in.o' failed
> make[2]: *** [fixdep-in.o] Error 2
> /home/mingo/tip/tools/build/Makefile.include:4: recipe for target 'fixdep' failed
> make[1]: *** [fixdep] Error 2
> Makefile:68: recipe for target 'all' failed
> make: *** [all] Error 2
>
> After some digging it turns out that my 'fixdep' binary was 64-bit:
>
> deimos:~/tip/tools/perf> file /home/mingo/tip/tools/build/fixdep
> /home/mingo/tip/tools/build/fixdep: ELF 64-bit LSB executable, x86-64, version 1
> (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, for GNU/Linux
> 2.6.32, BuildID[sha1]=d527f736b57b5ba47210fbcb562a3b52867d21c1, not stripped
>
> But it did not get cleaned out by 'make clean'.
>
> Only after I did a 'make clean' in tools/ itself, did it get built properly.

It shows we don't clean up properly the fixdep objects, so adding
special rule for that.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Reported-by: Ingo Molnar <mingo@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1487340058-10496-2-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/build/Makefile         | 4 ++--
 tools/build/Makefile.include | 3 +++
 tools/perf/Makefile.perf     | 4 ++--
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/tools/build/Makefile b/tools/build/Makefile
index aaf7ed329a45..477f00eda591 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -35,8 +35,8 @@ all: $(OUTPUT)fixdep
 
 clean:
 	$(call QUIET_CLEAN, fixdep)
-	$(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
-	$(Q)rm -f fixdep
+	$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
+	$(Q)rm -f $(OUTPUT)fixdep
 
 $(OUTPUT)fixdep-in.o: FORCE
 	$(Q)$(MAKE) $(build)=fixdep
diff --git a/tools/build/Makefile.include b/tools/build/Makefile.include
index ad22e4e7bc59..d360f39a445b 100644
--- a/tools/build/Makefile.include
+++ b/tools/build/Makefile.include
@@ -3,4 +3,7 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj
 fixdep:
 	$(Q)$(MAKE) -C $(srctree)/tools/build CFLAGS= LDFLAGS= $(OUTPUT)fixdep
 
+fixdep-clean:
+	$(Q)$(MAKE) -C $(srctree)/tools/build clean
+
 .PHONY: fixdep
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 4da19b6ba94a..79fe31f20a17 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -726,13 +726,13 @@ config-clean:
 	$(call QUIET_CLEAN, config)
 	$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
 
-clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean
+clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean
 	$(call QUIET_CLEAN, core-objs)  $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
 	$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
 	$(Q)$(RM) $(OUTPUT).config-detected
 	$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so
 	$(call QUIET_CLEAN, core-gen)   $(RM)  *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
-		$(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \
+		$(OUTPUT)util/intel-pt-decoder/inat-tables.c \
 		$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
 		$(OUTPUT)pmu-events/pmu-events.c
 	$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
-- 
GitLab


From 67b49b38f7bd6f34319b540ce824d5697241b3a8 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 17 Feb 2017 15:00:55 +0100
Subject: [PATCH 0295/1084] perf tools: Move new_term arguments into struct
 parse_events_term template

We need to add yet another parameter to new_term function in following
patch, so it's better to move first all the current params into template
struct parse_events_term and use it as a single argument.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1487340058-10496-3-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.c | 69 +++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 30 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 281e44af31e2..984d99a8fdc5 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -2318,24 +2318,20 @@ int parse_events__is_hardcoded_term(struct parse_events_term *term)
 	return term->type_term != PARSE_EVENTS__TERM_TYPE_USER;
 }
 
-static int new_term(struct parse_events_term **_term, int type_val,
-		    int type_term, char *config,
-		    char *str, u64 num, int err_term, int err_val)
+static int new_term(struct parse_events_term **_term,
+		    struct parse_events_term *temp,
+		    char *str, u64 num)
 {
 	struct parse_events_term *term;
 
-	term = zalloc(sizeof(*term));
+	term = malloc(sizeof(*term));
 	if (!term)
 		return -ENOMEM;
 
+	*term = *temp;
 	INIT_LIST_HEAD(&term->list);
-	term->type_val  = type_val;
-	term->type_term = type_term;
-	term->config = config;
-	term->err_term = err_term;
-	term->err_val  = err_val;
 
-	switch (type_val) {
+	switch (term->type_val) {
 	case PARSE_EVENTS__TERM_TYPE_NUM:
 		term->val.num = num;
 		break;
@@ -2358,10 +2354,15 @@ int parse_events_term__num(struct parse_events_term **term,
 	YYLTYPE *loc_term = loc_term_;
 	YYLTYPE *loc_val = loc_val_;
 
-	return new_term(term, PARSE_EVENTS__TERM_TYPE_NUM, type_term,
-			config, NULL, num,
-			loc_term ? loc_term->first_column : 0,
-			loc_val ? loc_val->first_column : 0);
+	struct parse_events_term temp = {
+		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
+		.type_term = type_term,
+		.config    = config,
+		.err_term  = loc_term ? loc_term->first_column : 0,
+		.err_val   = loc_val  ? loc_val->first_column  : 0,
+	};
+
+	return new_term(term, &temp, NULL, num);
 }
 
 int parse_events_term__str(struct parse_events_term **term,
@@ -2371,37 +2372,45 @@ int parse_events_term__str(struct parse_events_term **term,
 	YYLTYPE *loc_term = loc_term_;
 	YYLTYPE *loc_val = loc_val_;
 
-	return new_term(term, PARSE_EVENTS__TERM_TYPE_STR, type_term,
-			config, str, 0,
-			loc_term ? loc_term->first_column : 0,
-			loc_val ? loc_val->first_column : 0);
+	struct parse_events_term temp = {
+		.type_val  = PARSE_EVENTS__TERM_TYPE_STR,
+		.type_term = type_term,
+		.config    = config,
+		.err_term  = loc_term ? loc_term->first_column : 0,
+		.err_val   = loc_val  ? loc_val->first_column  : 0,
+	};
+
+	return new_term(term, &temp, str, 0);
 }
 
 int parse_events_term__sym_hw(struct parse_events_term **term,
 			      char *config, unsigned idx)
 {
 	struct event_symbol *sym;
+	struct parse_events_term temp = {
+		.type_val  = PARSE_EVENTS__TERM_TYPE_STR,
+		.type_term = PARSE_EVENTS__TERM_TYPE_USER,
+		.config    = config ?: (char *) "event",
+	};
 
 	BUG_ON(idx >= PERF_COUNT_HW_MAX);
 	sym = &event_symbols_hw[idx];
 
-	if (config)
-		return new_term(term, PARSE_EVENTS__TERM_TYPE_STR,
-				PARSE_EVENTS__TERM_TYPE_USER, config,
-				(char *) sym->symbol, 0, 0, 0);
-	else
-		return new_term(term, PARSE_EVENTS__TERM_TYPE_STR,
-				PARSE_EVENTS__TERM_TYPE_USER,
-				(char *) "event", (char *) sym->symbol,
-				0, 0, 0);
+	return new_term(term, &temp, (char *) sym->symbol, 0);
 }
 
 int parse_events_term__clone(struct parse_events_term **new,
 			     struct parse_events_term *term)
 {
-	return new_term(new, term->type_val, term->type_term, term->config,
-			term->val.str, term->val.num,
-			term->err_term, term->err_val);
+	struct parse_events_term temp = {
+		.type_val  = term->type_val,
+		.type_term = term->type_term,
+		.config    = term->config,
+		.err_term  = term->err_term,
+		.err_val   = term->err_val,
+	};
+
+	return new_term(new, &temp, term->val.str, term->val.num);
 }
 
 void parse_events_terms__purge(struct list_head *terms)
-- 
GitLab


From 99e7138eb7897aa0ccc6661173ae2d7e79721e05 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 17 Feb 2017 15:00:56 +0100
Subject: [PATCH 0296/1084] perf tools: Fail on using multiple bits long terms
 without value

Currently we allow not to specify value for numeric terms and we set
them to value 1. This was originaly meant just for single bit terms to
allow user to type:

  $ perf record -e 'cpu/cpu-cycles,any'

instead of:

  $ perf record -e 'cpu/cpu-cycles,any=1'

However it works also for multi bits terms like:

  $ perf record -e 'cpu/event/' ls
  ...
  $ perf evlist -v
  ..., config: 0x1, ...

After discussion with Peter we decided making such term usage to fail,
like:

  $ perf record -e 'cpu/event/' ls
  event syntax error: 'cpu/event/'
                       \___ no value assigned for term
  ...

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1487340058-10496-4-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.c |  2 ++
 tools/perf/util/parse-events.h |  2 ++
 tools/perf/util/parse-events.y | 14 +++++++-------
 tools/perf/util/pmu.c          | 13 +++++++++++--
 4 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 984d99a8fdc5..67a8aebc67ab 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -2349,6 +2349,7 @@ static int new_term(struct parse_events_term **_term,
 
 int parse_events_term__num(struct parse_events_term **term,
 			   int type_term, char *config, u64 num,
+			   bool no_value,
 			   void *loc_term_, void *loc_val_)
 {
 	YYLTYPE *loc_term = loc_term_;
@@ -2358,6 +2359,7 @@ int parse_events_term__num(struct parse_events_term **term,
 		.type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
 		.type_term = type_term,
 		.config    = config,
+		.no_value  = no_value,
 		.err_term  = loc_term ? loc_term->first_column : 0,
 		.err_val   = loc_val  ? loc_val->first_column  : 0,
 	};
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index da246a3ddb69..1af6a267c21b 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -94,6 +94,7 @@ struct parse_events_term {
 	int type_term;
 	struct list_head list;
 	bool used;
+	bool no_value;
 
 	/* error string indexes for within parsed string */
 	int err_term;
@@ -122,6 +123,7 @@ void parse_events__shrink_config_terms(void);
 int parse_events__is_hardcoded_term(struct parse_events_term *term);
 int parse_events_term__num(struct parse_events_term **term,
 			   int type_term, char *config, u64 num,
+			   bool novalue,
 			   void *loc_term, void *loc_val);
 int parse_events_term__str(struct parse_events_term **term,
 			   int type_term, char *config, char *str,
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index a14b47ab3879..30f018ea1370 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -252,7 +252,7 @@ PE_KERNEL_PMU_EVENT sep_dc
 			if (!strcasecmp(alias->name, $1)) {
 				ALLOC_LIST(head);
 				ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-					$1, 1, &@1, NULL));
+					$1, 1, false, &@1, NULL));
 				list_add_tail(&term->list, head);
 
 				if (!parse_events_add_pmu(data, list,
@@ -282,7 +282,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc
 
 	ALLOC_LIST(head);
 	ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-					&pmu_name, 1, &@1, NULL));
+					&pmu_name, 1, false, &@1, NULL));
 	list_add_tail(&term->list, head);
 
 	ALLOC_LIST(list);
@@ -548,7 +548,7 @@ PE_NAME '=' PE_VALUE
 	struct parse_events_term *term;
 
 	ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-					$1, $3, &@1, &@3));
+					$1, $3, false, &@1, &@3));
 	$$ = term;
 }
 |
@@ -566,7 +566,7 @@ PE_NAME
 	struct parse_events_term *term;
 
 	ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-					$1, 1, &@1, NULL));
+					$1, 1, true, &@1, NULL));
 	$$ = term;
 }
 |
@@ -591,7 +591,7 @@ PE_TERM '=' PE_VALUE
 {
 	struct parse_events_term *term;
 
-	ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3, &@1, &@3));
+	ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3, false, &@1, &@3));
 	$$ = term;
 }
 |
@@ -599,7 +599,7 @@ PE_TERM
 {
 	struct parse_events_term *term;
 
-	ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, &@1, NULL));
+	ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, true, &@1, NULL));
 	$$ = term;
 }
 |
@@ -620,7 +620,7 @@ PE_NAME array '=' PE_VALUE
 	struct parse_events_term *term;
 
 	ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-					$1, $4, &@1, &@4));
+					$1, $4, false, &@1, &@4));
 	term->array = $2;
 	$$ = term;
 }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 49bfee0e3d9e..63cb46cb9b0f 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -834,9 +834,18 @@ static int pmu_config_term(struct list_head *formats,
 	 * Either directly use a numeric term, or try to translate string terms
 	 * using event parameters.
 	 */
-	if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM)
+	if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+		if (term->no_value &&
+		    bitmap_weight(format->bits, PERF_PMU_FORMAT_BITS) > 1) {
+			if (err) {
+				err->idx = term->err_val;
+				err->str = strdup("no value assigned for term");
+			}
+			return -EINVAL;
+		}
+
 		val = term->val.num;
-	else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
+	} else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
 		if (strcmp(term->val.str, "?")) {
 			if (verbose) {
 				pr_info("Invalid sysfs entry %s=%s\n",
-- 
GitLab


From 0d79f8b93187c771b6971acfaba67f4e2f1e0710 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 17 Feb 2017 18:00:34 +0100
Subject: [PATCH 0297/1084] perf stat: Add -a as default target

Boris asked for default -a option in case we monitor only uncore events.

While implementing that I thought it might be actually useful to make it
overall default.

Running 'perf stat' will now collect system wide data.

Committer note:

Testing it:

  # perf stat
  ^C
   Performance counter stats for 'system wide':

         3571.559178      cpu-clock (msec)          #    4.000 CPUs utilized
               3,346      context-switches          #    0.937 K/sec
                 277      cpu-migrations            #    0.078 K/sec
              57,271      page-faults               #    0.016 M/sec
       4,535,633,835      cycles                    #    1.270 GHz
       6,389,736,516      instructions              #    1.41  insn per cycle
       1,541,293,875      branches                  #  431.547 M/sec
          14,526,396      branch-misses             #    0.94% of all branches

         0.892950118 seconds time elapsed

  #

Requested-and-Acked-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20170217170034.GB15389@krava
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-stat.txt | 2 +-
 tools/perf/builtin-stat.c              | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index d96ccd4844df..aecf2a87e7d6 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -63,7 +63,7 @@ report::
 
 -a::
 --all-cpus::
-        system-wide collection from all CPUs
+        system-wide collection from all CPUs (default if no target is specified)
 
 -c::
 --scale::
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index ca27a8a705ac..9989b03c21f2 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -2445,8 +2445,9 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 	} else if (big_num_opt == 0) /* User passed --no-big-num */
 		big_num = false;
 
+	/* Make system wide (-a) the default target. */
 	if (!argc && target__none(&target))
-		usage_with_options(stat_usage, stat_options);
+		target.system_wide = true;
 
 	if (run_count < 0) {
 		pr_err("Run count must be a positive number\n");
-- 
GitLab


From 483635a9d0802d5ffbe402ceac5b93ddb2acb138 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 17 Feb 2017 18:00:18 +0100
Subject: [PATCH 0298/1084] perf record: Add -a as default target

Running 'perf record' with no target (-a, -p, -t, etc) will now collect
system wide data.

Commiter notes:

Testing it:

  [root@jouet ~]# perf record
  ^C[ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 1.351 MB perf.data (366 samples) ]
  #

is equivalent to:

  # perf record -a
  ^C[ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 1.411 MB perf.data (978 samples) ]
  #

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20170217170018.GA15389@krava
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-record.txt | 2 +-
 tools/perf/builtin-record.c              | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 27256bc68eda..b16003ec14a7 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -157,7 +157,7 @@ OPTIONS
 
 -a::
 --all-cpus::
-        System-wide collection from all CPUs.
+        System-wide collection from all CPUs (default if no target is specified).
 
 -p::
 --pid=::
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 6cd6776052e7..b87bbef73394 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1677,8 +1677,10 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	argc = parse_options(argc, argv, record_options, record_usage,
 			    PARSE_OPT_STOP_AT_NON_OPTION);
+
+	/* Make system wide (-a) the default target. */
 	if (!argc && target__none(&rec->opts.target))
-		usage_with_options(record_usage, record_options);
+		rec->opts.target.system_wide = true;
 
 	if (nr_cgroups && !rec->opts.target.system_wide) {
 		usage_with_options_msg(record_usage, record_options,
-- 
GitLab


From c3821b3497aae1752cb2be72c32f650ef24c8820 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Sun, 5 Feb 2017 22:02:01 +0800
Subject: [PATCH 0299/1084] nfsd/idmap: return nfserr_inval for 0-length names

Tigran Mkrtchyan's new pynfs testcases for zero length principals fail:

SATT16   st_setattr.testEmptyPrincipal                            : FAILURE
           Setting empty owner should return NFS4ERR_INVAL,
           instead got NFS4ERR_BADOWNER
SATT17   st_setattr.testEmptyGroupPrincipal                       : FAILURE
           Setting empty owner_group should return NFS4ERR_INVAL,
           instead got NFS4ERR_BADOWNER

This patch checks the principal and returns nfserr_inval directly.  It
could check after decoding in nfs4xdr.c, but it's simpler to do it in
nfsd_map_xxxx.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4idmap.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 5b20577dcdd2..6b9b6cca469f 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -628,6 +628,10 @@ nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
 {
 	__be32 status;
 	u32 id = -1;
+
+	if (name == NULL || namelen == 0)
+		return nfserr_inval;
+
 	status = do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, &id);
 	*uid = make_kuid(&init_user_ns, id);
 	if (!uid_valid(*uid))
@@ -641,6 +645,10 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
 {
 	__be32 status;
 	u32 id = -1;
+
+	if (name == NULL || namelen == 0)
+		return nfserr_inval;
+
 	status = do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, &id);
 	*gid = make_kgid(&init_user_ns, id);
 	if (!gid_valid(*gid))
-- 
GitLab


From f7d1ddbe7648af7460d23688c8c131342eb43b3a Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Sun, 5 Feb 2017 09:57:07 +0800
Subject: [PATCH 0300/1084] nfsd/callback: Cleanup callback cred on shutdown

The rpccred gotten from rpc_lookup_machine_cred() should be put when
state is shutdown.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4callback.c |  8 ++++++++
 fs/nfsd/nfs4state.c    | 10 ++++++----
 fs/nfsd/state.h        |  1 +
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index eb78109d666c..fb6ca0ace9b1 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -753,6 +753,14 @@ int set_callback_cred(void)
 	return 0;
 }
 
+void cleanup_callback_cred(void)
+{
+	if (callback_cred) {
+		put_rpccred(callback_cred);
+		callback_cred = NULL;
+	}
+}
+
 static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
 {
 	if (clp->cl_minorversion == 0) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a0dee8ae9f97..d35eb077330f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -7012,23 +7012,24 @@ nfs4_state_start(void)
 
 	ret = set_callback_cred();
 	if (ret)
-		return -ENOMEM;
+		return ret;
+
 	laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4");
 	if (laundry_wq == NULL) {
 		ret = -ENOMEM;
-		goto out_recovery;
+		goto out_cleanup_cred;
 	}
 	ret = nfsd4_create_callback_queue();
 	if (ret)
 		goto out_free_laundry;
 
 	set_max_delegations();
-
 	return 0;
 
 out_free_laundry:
 	destroy_workqueue(laundry_wq);
-out_recovery:
+out_cleanup_cred:
+	cleanup_callback_cred();
 	return ret;
 }
 
@@ -7086,6 +7087,7 @@ nfs4_state_shutdown(void)
 {
 	destroy_workqueue(laundry_wq);
 	nfsd4_destroy_callback_queue();
+	cleanup_callback_cred();
 }
 
 static void
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 4516e8b7d776..005c911b34ac 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -615,6 +615,7 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
 extern __be32 nfs4_check_open_reclaim(clientid_t *clid,
 		struct nfsd4_compound_state *cstate, struct nfsd_net *nn);
 extern int set_callback_cred(void);
+extern void cleanup_callback_cred(void);
 extern void nfsd4_probe_callback(struct nfs4_client *clp);
 extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
 extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
-- 
GitLab


From e86a40bc7331b7b93a88edd74e6b2f738107a6f9 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Sun, 5 Feb 2017 09:57:37 +0800
Subject: [PATCH 0301/1084] nfsd/callback: skip the callback tag

The callback tag is NULL, and hdr->nops is unused too right now, but.
But if we were to ever test with a nonzero callback tag, nops will get a
bad value.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4callback.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index fb6ca0ace9b1..ea74149841e7 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -303,6 +303,7 @@ static int decode_cb_compound4res(struct xdr_stream *xdr,
 	p = xdr_inline_decode(xdr, length + 4);
 	if (unlikely(p == NULL))
 		goto out_overflow;
+	p += XDR_QUADLEN(length);
 	hdr->nops = be32_to_cpup(p);
 	return 0;
 out_overflow:
-- 
GitLab


From 827433801c3bcf21296915c42c1314fbb944b197 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Sun, 5 Feb 2017 09:57:48 +0800
Subject: [PATCH 0302/1084] nfsd/callback: Drop a useless data copy when
 comparing sessionid

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4callback.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index ea74149841e7..0274db6e65d0 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -397,13 +397,10 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
 				    struct nfsd4_callback *cb)
 {
 	struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
-	struct nfs4_sessionid id;
-	int status;
+	int status = -ESERVERFAULT;
 	__be32 *p;
 	u32 dummy;
 
-	status = -ESERVERFAULT;
-
 	/*
 	 * If the server returns different values for sessionID, slotID or
 	 * sequence number, the server is looney tunes.
@@ -411,9 +408,8 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
 	p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4);
 	if (unlikely(p == NULL))
 		goto out_overflow;
-	memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
-	if (memcmp(id.data, session->se_sessionid.data,
-					NFS4_MAX_SESSIONID_LEN) != 0) {
+
+	if (memcmp(p, session->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) {
 		dprintk("NFS: %s Invalid session id\n", __func__);
 		goto out;
 	}
-- 
GitLab


From 2282cd2c05e281120dedc665a8a6a24053c44662 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Fri, 3 Feb 2017 22:36:00 +0800
Subject: [PATCH 0303/1084] NFSD: Get response size before operation for all
 RPCs

NFSD usess PAGE_SIZE as the reply size estimate for RPCs which don't
support op_rsize_bop(), A PAGE_SIZE (4096) is larger than many real
response sizes, eg, access (op_encode_hdr_size + 2), seek
(op_encode_hdr_size + 3).

This patch just adds op_rsize_bop() for all RPCs getting response size.

An overestimate is generally safe but the tighter estimates are probably
better.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c | 69 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 63 insertions(+), 6 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 171f2d7ecfdd..02750a4c8770 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1842,6 +1842,12 @@ static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd
 	return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32);
 }
 
+static inline u32 nfsd4_access_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+	/* ac_supported, ac_resp_access */
+	return (op_encode_hdr_size + 2)* sizeof(__be32);
+}
+
 static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
@@ -1896,6 +1902,11 @@ static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp,
 	return ret;
 }
 
+static inline u32 nfsd4_getfh_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+	return (op_encode_hdr_size + 1) * sizeof(__be32) + NFS4_FHSIZE;
+}
+
 static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + op_encode_change_info_maxsz)
@@ -1937,6 +1948,11 @@ static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o
 		XDR_QUADLEN(rlen)) * sizeof(__be32);
 }
 
+static inline u32 nfsd4_readlink_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+	return (op_encode_hdr_size + 1) * sizeof(__be32) + PAGE_SIZE;
+}
+
 static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + op_encode_change_info_maxsz)
@@ -1956,11 +1972,23 @@ static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp,
 		+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32);
 }
 
+static inline u32 nfsd4_test_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+	return (op_encode_hdr_size + 1 + op->u.test_stateid.ts_num_ids)
+		* sizeof(__be32);
+}
+
 static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
 }
 
+static inline u32 nfsd4_secinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+	return (op_encode_hdr_size + RPC_AUTH_MAXFLAVOR *
+		(4 + XDR_QUADLEN(GSS_OID_MAX_LEN))) * sizeof(__be32);
+}
+
 static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) *
@@ -2015,6 +2043,19 @@ static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 }
 
 #ifdef CONFIG_NFSD_PNFS
+static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+	u32 maxcount = 0, rlen = 0;
+
+	maxcount = svc_max_payload(rqstp);
+	rlen = min(op->u.getdeviceinfo.gd_maxcount, maxcount);
+
+	return (op_encode_hdr_size +
+		1 /* gd_layout_type*/ +
+		XDR_QUADLEN(rlen) +
+		2 /* gd_notify_types */) * sizeof(__be32);
+}
+
 /*
  * At this stage we don't really know what layout driver will handle the request,
  * so we need to define an arbitrary upper bound here.
@@ -2044,10 +2085,17 @@ static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_
 }
 #endif /* CONFIG_NFSD_PNFS */
 
+
+static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+	return (op_encode_hdr_size + 3) * sizeof(__be32);
+}
+
 static struct nfsd4_operation nfsd4_ops[] = {
 	[OP_ACCESS] = {
 		.op_func = (nfsd4op_func)nfsd4_access,
 		.op_name = "OP_ACCESS",
+		.op_rsize_bop = (nfsd4op_rsize)nfsd4_access_rsize,
 	},
 	[OP_CLOSE] = {
 		.op_func = (nfsd4op_func)nfsd4_close,
@@ -2085,6 +2133,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 	[OP_GETFH] = {
 		.op_func = (nfsd4op_func)nfsd4_getfh,
 		.op_name = "OP_GETFH",
+		.op_rsize_bop = (nfsd4op_rsize)nfsd4_getfh_rsize,
 	},
 	[OP_LINK] = {
 		.op_func = (nfsd4op_func)nfsd4_link,
@@ -2103,6 +2152,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 	[OP_LOCKT] = {
 		.op_func = (nfsd4op_func)nfsd4_lockt,
 		.op_name = "OP_LOCKT",
+		.op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
 	},
 	[OP_LOCKU] = {
 		.op_func = (nfsd4op_func)nfsd4_locku,
@@ -2115,15 +2165,18 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_func = (nfsd4op_func)nfsd4_lookup,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
 		.op_name = "OP_LOOKUP",
+		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
 	},
 	[OP_LOOKUPP] = {
 		.op_func = (nfsd4op_func)nfsd4_lookupp,
 		.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
 		.op_name = "OP_LOOKUPP",
+		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
 	},
 	[OP_NVERIFY] = {
 		.op_func = (nfsd4op_func)nfsd4_nverify,
 		.op_name = "OP_NVERIFY",
+		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
 	},
 	[OP_OPEN] = {
 		.op_func = (nfsd4op_func)nfsd4_open,
@@ -2181,6 +2234,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 	[OP_READLINK] = {
 		.op_func = (nfsd4op_func)nfsd4_readlink,
 		.op_name = "OP_READLINK",
+		.op_rsize_bop = (nfsd4op_rsize)nfsd4_readlink_rsize,
 	},
 	[OP_REMOVE] = {
 		.op_func = (nfsd4op_func)nfsd4_remove,
@@ -2219,6 +2273,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_func = (nfsd4op_func)nfsd4_secinfo,
 		.op_flags = OP_HANDLES_WRONGSEC,
 		.op_name = "OP_SECINFO",
+		.op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
 	},
 	[OP_SETATTR] = {
 		.op_func = (nfsd4op_func)nfsd4_setattr,
@@ -2244,6 +2299,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 	[OP_VERIFY] = {
 		.op_func = (nfsd4op_func)nfsd4_verify,
 		.op_name = "OP_VERIFY",
+		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
 	},
 	[OP_WRITE] = {
 		.op_func = (nfsd4op_func)nfsd4_write,
@@ -2318,11 +2374,13 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
 		.op_flags = OP_HANDLES_WRONGSEC,
 		.op_name = "OP_SECINFO_NO_NAME",
+		.op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
 	},
 	[OP_TEST_STATEID] = {
 		.op_func = (nfsd4op_func)nfsd4_test_stateid,
 		.op_flags = ALLOWED_WITHOUT_FH,
 		.op_name = "OP_TEST_STATEID",
+		.op_rsize_bop = (nfsd4op_rsize)nfsd4_test_stateid_rsize,
 	},
 	[OP_FREE_STATEID] = {
 		.op_func = (nfsd4op_func)nfsd4_free_stateid,
@@ -2336,6 +2394,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_func = (nfsd4op_func)nfsd4_getdeviceinfo,
 		.op_flags = ALLOWED_WITHOUT_FH,
 		.op_name = "OP_GETDEVICEINFO",
+		.op_rsize_bop = (nfsd4op_rsize)nfsd4_getdeviceinfo_rsize,
 	},
 	[OP_LAYOUTGET] = {
 		.op_func = (nfsd4op_func)nfsd4_layoutget,
@@ -2385,6 +2444,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 	[OP_SEEK] = {
 		.op_func = (nfsd4op_func)nfsd4_seek,
 		.op_name = "OP_SEEK",
+		.op_rsize_bop = (nfsd4op_rsize)nfsd4_seek_rsize,
 	},
 };
 
@@ -2429,14 +2489,11 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
 
 int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
-	struct nfsd4_operation *opdesc;
-	nfsd4op_rsize estimator;
-
 	if (op->opnum == OP_ILLEGAL)
 		return op_encode_hdr_size * sizeof(__be32);
-	opdesc = OPDESC(op);
-	estimator = opdesc->op_rsize_bop;
-	return estimator ? estimator(rqstp, op) : PAGE_SIZE;
+
+	BUG_ON(OPDESC(op)->op_rsize_bop == NULL);
+	return OPDESC(op)->op_rsize_bop(rqstp, op);
 }
 
 void warn_on_nonidempotent_op(struct nfsd4_op *op)
-- 
GitLab


From 7323f0d2881bbd426ce6bc0a956ef9e739ffe767 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Fri, 3 Feb 2017 22:51:46 +0800
Subject: [PATCH 0304/1084] NFSD: Reserve adequate space for LOCKT operation

After tightening the OP_LOCKT reply size estimate, we can get warnings
like:

[11512.783519] RPC request reserved 124 but used 152
[11512.813624] RPC request reserved 108 but used 136

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 7e4df80456ff..382c1fd05b4c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1941,12 +1941,12 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 		} else
 			max_reply += nfsd4_max_reply(argp->rqstp, op);
 		/*
-		 * OP_LOCK may return a conflicting lock.  (Special case
-		 * because it will just skip encoding this if it runs
-		 * out of xdr buffer space, and it is the only operation
-		 * that behaves this way.)
+		 * OP_LOCK and OP_LOCKT may return a conflicting lock.
+		 * (Special case because it will just skip encoding this
+		 * if it runs out of xdr buffer space, and it is the only
+		 * operation that behaves this way.)
 		 */
-		if (op->opnum == OP_LOCK)
+		if (op->opnum == OP_LOCK || op->opnum == OP_LOCKT)
 			max_reply += NFS4_OPAQUE_LIMIT;
 
 		if (op->status) {
-- 
GitLab


From d11914b21c4c21a294fe8937d66c1a192caa3cad Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 4 Jan 2017 04:58:28 +1000
Subject: [PATCH 0305/1084] powerpc/64: Implement
 clear_bit_unlock_is_negative_byte()

Commit b91e1302ad9b8 ("mm: optimize PageWaiters bit use for
unlock_page()") added a special bitop function to speed up
unlock_page(). Implement this for 64-bit powerpc.

This improves the unlock_page() core code from this:

	li	9,1
	lwsync
1:	ldarx	10,0,3,0
	andc	10,10,9
	stdcx.	10,0,3
	bne-	1b
	ori	2,2,0
	ld	9,0(3)
	andi.	10,9,0x80
	beqlr
	li	4,0
	b	wake_up_page_bit

To this:

	li	10,1
	lwsync
1:	ldarx	9,0,3,0
	andc	9,9,10
	stdcx.	9,0,3
	bne-	1b
	andi.	10,9,0x80
	beqlr
	li	4,0
	b	wake_up_page_bit

In a test of elapsed time for dd writing into 16GB of already-dirty
pagecache on a POWER8 with 4K pages, which has one unlock_page per 4kB
this patch reduced overhead by 1.1%:

    N           Min           Max        Median           Avg        Stddev
x  19         2.578         2.619         2.594         2.595         0.011
+  19         2.552         2.592         2.564         2.565         0.008
Difference at 95.0% confidence
	-0.030  +/- 0.006
	-1.142% +/- 0.243%

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Made 64-bit only until I can test it properly on 32-bit]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/bitops.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index 59abc620f8e8..73eb794d6163 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -154,6 +154,34 @@ static __inline__ int test_and_change_bit(unsigned long nr,
 	return test_and_change_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0;
 }
 
+#ifdef CONFIG_PPC64
+static __inline__ unsigned long clear_bit_unlock_return_word(int nr,
+						volatile unsigned long *addr)
+{
+	unsigned long old, t;
+	unsigned long *p = (unsigned long *)addr + BIT_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+
+	__asm__ __volatile__ (
+	PPC_RELEASE_BARRIER
+"1:"	PPC_LLARX(%0,0,%3,0) "\n"
+	"andc %1,%0,%2\n"
+	PPC405_ERR77(0,%3)
+	PPC_STLCX "%1,0,%3\n"
+	"bne- 1b\n"
+	: "=&r" (old), "=&r" (t)
+	: "r" (mask), "r" (p)
+	: "cc", "memory");
+
+	return old;
+}
+
+/* This is a special function for mm/filemap.c */
+#define clear_bit_unlock_is_negative_byte(nr, addr)			\
+	(clear_bit_unlock_return_word(nr, addr) & BIT_MASK(PG_waiters))
+
+#endif /* CONFIG_PPC64 */
+
 #include <asm-generic/bitops/non-atomic.h>
 
 static __inline__ void __clear_bit_unlock(int nr, volatile unsigned long *addr)
-- 
GitLab


From 50fdd36f336a03b9486f1cd4b0d85fcb361baf60 Mon Sep 17 00:00:00 2001
From: Baoyou Xie <baoyou.xie@linaro.org>
Date: Tue, 7 Feb 2017 08:56:41 +0800
Subject: [PATCH 0306/1084] thermal: zx2967: add thermal driver for ZTE's
 zx2967 family

This patch adds thermal driver for ZTE's zx2967 family.

Signed-off-by: Baoyou Xie <baoyou.xie@linaro.org>
Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/Kconfig          |   8 +
 drivers/thermal/Makefile         |   1 +
 drivers/thermal/zx2967_thermal.c | 258 +++++++++++++++++++++++++++++++
 3 files changed, 267 insertions(+)
 create mode 100644 drivers/thermal/zx2967_thermal.c

diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 3912d24a07b1..776b34396144 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -445,4 +445,12 @@ depends on (ARCH_QCOM && OF) || COMPILE_TEST
 source "drivers/thermal/qcom/Kconfig"
 endmenu
 
+config ZX2967_THERMAL
+	tristate "Thermal sensors on zx2967 SoC"
+	depends on ARCH_ZX || COMPILE_TEST
+	help
+	  Enable the zx2967 thermal sensors driver, which supports
+	  the primitive temperature sensor embedded in zx2967 SoCs.
+	  This sensor generates the real time die temperature.
+
 endif
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index adcf3cc90859..7adae2029355 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -57,3 +57,4 @@ obj-$(CONFIG_TEGRA_SOCTHERM)	+= tegra/
 obj-$(CONFIG_HISI_THERMAL)     += hisi_thermal.o
 obj-$(CONFIG_MTK_THERMAL)	+= mtk_thermal.o
 obj-$(CONFIG_GENERIC_ADC_THERMAL)	+= thermal-generic-adc.o
+obj-$(CONFIG_ZX2967_THERMAL)	+= zx2967_thermal.o
diff --git a/drivers/thermal/zx2967_thermal.c b/drivers/thermal/zx2967_thermal.c
new file mode 100644
index 000000000000..a5670ad2cfc8
--- /dev/null
+++ b/drivers/thermal/zx2967_thermal.c
@@ -0,0 +1,258 @@
+/*
+ * ZTE's zx2967 family thermal sensor driver
+ *
+ * Copyright (C) 2017 ZTE Ltd.
+ *
+ * Author: Baoyou Xie <baoyou.xie@linaro.org>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/thermal.h>
+
+/* Power Mode: 0->low 1->high */
+#define ZX2967_THERMAL_POWER_MODE	0
+#define ZX2967_POWER_MODE_LOW		0
+#define ZX2967_POWER_MODE_HIGH		1
+
+/* DCF Control Register */
+#define ZX2967_THERMAL_DCF		0x4
+#define ZX2967_DCF_EN			BIT(1)
+#define ZX2967_DCF_FREEZE		BIT(0)
+
+/* Selection Register */
+#define ZX2967_THERMAL_SEL		0x8
+
+/* Control Register */
+#define ZX2967_THERMAL_CTRL		0x10
+
+#define ZX2967_THERMAL_READY		BIT(12)
+#define ZX2967_THERMAL_TEMP_MASK	GENMASK(11, 0)
+#define ZX2967_THERMAL_ID_MASK		0x18
+#define ZX2967_THERMAL_ID		0x10
+
+#define ZX2967_GET_TEMP_TIMEOUT_US	(100 * 1024)
+
+/**
+ * struct zx2967_thermal_priv - zx2967 thermal sensor private structure
+ * @tzd: struct thermal_zone_device where the sensor is registered
+ * @lock: prevents read sensor in parallel
+ * @clk_topcrm: topcrm clk structure
+ * @clk_apb: apb clk structure
+ * @regs: pointer to base address of the thermal sensor
+ */
+
+struct zx2967_thermal_priv {
+	struct thermal_zone_device	*tzd;
+	struct mutex			lock;
+	struct clk			*clk_topcrm;
+	struct clk			*clk_apb;
+	void __iomem			*regs;
+	struct device			*dev;
+};
+
+static int zx2967_thermal_get_temp(void *data, int *temp)
+{
+	void __iomem *regs;
+	struct zx2967_thermal_priv *priv = data;
+	u32 val;
+	int ret;
+
+	if (!priv->tzd)
+		return -EAGAIN;
+
+	regs = priv->regs;
+	mutex_lock(&priv->lock);
+	writel_relaxed(ZX2967_POWER_MODE_LOW,
+		       regs + ZX2967_THERMAL_POWER_MODE);
+	writel_relaxed(ZX2967_DCF_EN, regs + ZX2967_THERMAL_DCF);
+
+	val = readl_relaxed(regs + ZX2967_THERMAL_SEL);
+	val &= ~ZX2967_THERMAL_ID_MASK;
+	val |= ZX2967_THERMAL_ID;
+	writel_relaxed(val, regs + ZX2967_THERMAL_SEL);
+
+	/*
+	 * Must wait for a while, surely it's a bit odd.
+	 * otherwise temperature value we got has a few deviation, even if
+	 * the THERMAL_READY bit is set.
+	 */
+	usleep_range(100, 300);
+	ret = readx_poll_timeout(readl, regs + ZX2967_THERMAL_CTRL,
+				 val, val & ZX2967_THERMAL_READY, 300,
+				 ZX2967_GET_TEMP_TIMEOUT_US);
+	if (ret) {
+		dev_err(priv->dev, "Thermal sensor data timeout\n");
+		goto unlock;
+	}
+
+	writel_relaxed(ZX2967_DCF_FREEZE | ZX2967_DCF_EN,
+		       regs + ZX2967_THERMAL_DCF);
+	val = readl_relaxed(regs + ZX2967_THERMAL_CTRL)
+			 & ZX2967_THERMAL_TEMP_MASK;
+	writel_relaxed(ZX2967_POWER_MODE_HIGH,
+		       regs + ZX2967_THERMAL_POWER_MODE);
+
+	/*
+	 * Calculate temperature
+	 * In dts, slope is multiplied by 1000.
+	 */
+	*temp = DIV_ROUND_CLOSEST(((s32)val + priv->tzd->tzp->offset) * 1000,
+				  priv->tzd->tzp->slope);
+
+unlock:
+	mutex_unlock(&priv->lock);
+	return ret;
+}
+
+static struct thermal_zone_of_device_ops zx2967_of_thermal_ops = {
+	.get_temp = zx2967_thermal_get_temp,
+};
+
+static int zx2967_thermal_probe(struct platform_device *pdev)
+{
+	struct zx2967_thermal_priv *priv;
+	struct resource *res;
+	int ret;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(priv->regs))
+		return PTR_ERR(priv->regs);
+
+	priv->clk_topcrm = devm_clk_get(&pdev->dev, "topcrm");
+	if (IS_ERR(priv->clk_topcrm)) {
+		ret = PTR_ERR(priv->clk_topcrm);
+		dev_err(&pdev->dev, "failed to get topcrm clock: %d\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(priv->clk_topcrm);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to enable topcrm clock: %d\n",
+			ret);
+		return ret;
+	}
+
+	priv->clk_apb = devm_clk_get(&pdev->dev, "apb");
+	if (IS_ERR(priv->clk_apb)) {
+		ret = PTR_ERR(priv->clk_apb);
+		dev_err(&pdev->dev, "failed to get apb clock: %d\n", ret);
+		goto disable_clk_topcrm;
+	}
+
+	ret = clk_prepare_enable(priv->clk_apb);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to enable apb clock: %d\n",
+			ret);
+		goto disable_clk_topcrm;
+	}
+
+	mutex_init(&priv->lock);
+	priv->tzd = thermal_zone_of_sensor_register(&pdev->dev,
+					0, priv, &zx2967_of_thermal_ops);
+
+	if (IS_ERR(priv->tzd)) {
+		ret = PTR_ERR(priv->tzd);
+		dev_err(&pdev->dev, "failed to register sensor: %d\n", ret);
+		goto disable_clk_all;
+	}
+
+	if (priv->tzd->tzp->slope == 0) {
+		thermal_zone_of_sensor_unregister(&pdev->dev, priv->tzd);
+		dev_err(&pdev->dev, "coefficients of sensor is invalid\n");
+		ret = -EINVAL;
+		goto disable_clk_all;
+	}
+
+	priv->dev = &pdev->dev;
+	platform_set_drvdata(pdev, priv);
+
+	return 0;
+
+disable_clk_all:
+	clk_disable_unprepare(priv->clk_apb);
+disable_clk_topcrm:
+	clk_disable_unprepare(priv->clk_topcrm);
+	return ret;
+}
+
+static int zx2967_thermal_exit(struct platform_device *pdev)
+{
+	struct zx2967_thermal_priv *priv = platform_get_drvdata(pdev);
+
+	thermal_zone_of_sensor_unregister(&pdev->dev, priv->tzd);
+	clk_disable_unprepare(priv->clk_topcrm);
+	clk_disable_unprepare(priv->clk_apb);
+
+	return 0;
+}
+
+static const struct of_device_id zx2967_thermal_id_table[] = {
+	{ .compatible = "zte,zx296718-thermal" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, zx2967_thermal_id_table);
+
+#ifdef CONFIG_PM_SLEEP
+static int zx2967_thermal_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct zx2967_thermal_priv *priv = platform_get_drvdata(pdev);
+
+	if (priv && priv->clk_topcrm)
+		clk_disable_unprepare(priv->clk_topcrm);
+
+	if (priv && priv->clk_apb)
+		clk_disable_unprepare(priv->clk_apb);
+
+	return 0;
+}
+
+static int zx2967_thermal_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct zx2967_thermal_priv *priv = platform_get_drvdata(pdev);
+	int error;
+
+	error = clk_prepare_enable(priv->clk_topcrm);
+	if (error)
+		return error;
+
+	error = clk_prepare_enable(priv->clk_apb);
+	if (error) {
+		clk_disable_unprepare(priv->clk_topcrm);
+		return error;
+	}
+
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(zx2967_thermal_pm_ops,
+			 zx2967_thermal_suspend, zx2967_thermal_resume);
+
+static struct platform_driver zx2967_thermal_driver = {
+	.probe = zx2967_thermal_probe,
+	.remove = zx2967_thermal_exit,
+	.driver = {
+		.name = "zx2967_thermal",
+		.of_match_table = zx2967_thermal_id_table,
+		.pm = &zx2967_thermal_pm_ops,
+	},
+};
+module_platform_driver(zx2967_thermal_driver);
+
+MODULE_AUTHOR("Baoyou Xie <baoyou.xie@linaro.org>");
+MODULE_DESCRIPTION("ZTE zx2967 thermal driver");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From ef9b36d9101eb2d5827699a4e969c719fb8812c0 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Sat, 11 Feb 2017 22:12:00 +0200
Subject: [PATCH 0307/1084] thermal: exynos: Remove parsing unused
 samsung,tmu_cal_mode property

The property samsung,tmu_cal_mode is not used and not used.  We can
safely remove it.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/samsung/exynos_tmu.c | 1 -
 drivers/thermal/samsung/exynos_tmu.h | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c
index ad1186dd6132..7b8ef09d2b3c 100644
--- a/drivers/thermal/samsung/exynos_tmu.c
+++ b/drivers/thermal/samsung/exynos_tmu.c
@@ -1168,7 +1168,6 @@ static int exynos_of_sensor_conf(struct device_node *np,
 	pdata->default_temp_offset = (u8)value;
 
 	of_property_read_u32(np, "samsung,tmu_cal_type", &pdata->cal_type);
-	of_property_read_u32(np, "samsung,tmu_cal_mode", &pdata->cal_mode);
 
 	of_node_put(np);
 	return 0;
diff --git a/drivers/thermal/samsung/exynos_tmu.h b/drivers/thermal/samsung/exynos_tmu.h
index 440c7140b660..5149c2a3030c 100644
--- a/drivers/thermal/samsung/exynos_tmu.h
+++ b/drivers/thermal/samsung/exynos_tmu.h
@@ -70,7 +70,6 @@ struct exynos_tmu_platform_data {
 
 	enum soc_type type;
 	u32 cal_type;
-	u32 cal_mode;
 };
 
 #endif /* _EXYNOS_TMU_H */
-- 
GitLab


From 8b051ec37c93b28e1f1cac4c9677ff88e56930ff Mon Sep 17 00:00:00 2001
From: Shailendra Verma <shailendra.v@samsung.com>
Date: Mon, 30 Jan 2017 10:34:58 +0530
Subject: [PATCH 0308/1084] thermal: imx: Fix possible NULL dereference.

of_device_get_match_data could return NULL, and so can cause
a NULL pointer dereference later.

Signed-off-by: Shailendra Verma <shailendra.v@samsung.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/imx_thermal.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index 06912f0602b7..fb648a45754e 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -489,6 +489,10 @@ static int imx_thermal_probe(struct platform_device *pdev)
 	data->tempmon = map;
 
 	data->socdata = of_device_get_match_data(&pdev->dev);
+	if (!data->socdata) {
+		dev_err(&pdev->dev, "no device match found\n");
+		return -ENODEV;
+	}
 
 	/* make sure the IRQ flag is clear before enabling irq on i.MX6SX */
 	if (data->socdata->version == TEMPMON_IMX6SX) {
-- 
GitLab


From 173a31271253d6ecb5358b72fa26a47e78db3e14 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Mon, 16 Jan 2017 15:43:02 +0530
Subject: [PATCH 0309/1084] thermal: ti-soc-thermal: Remove CPU_THERMAL
 Dependency from TI_THERMAL

Currently when CPU_THERMAL is not defined the thermal sensors
are not even exposed consequently no cooling is possible. CPU_THERMAL
eventually depends on CPUFREQ. CPPUFREQ is not the only cooling
for CPU.

The thermal shutdown for critical temperatures is another
cooling solution which will currently not get enabled if CPU_THERMAL
is not defined. Remove this dependency so as to have the last level
of thermal protection working even without CPUFREQ defined.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/ti-soc-thermal/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/thermal/ti-soc-thermal/Kconfig b/drivers/thermal/ti-soc-thermal/Kconfig
index ea8283f08aa6..fe0e877f84d0 100644
--- a/drivers/thermal/ti-soc-thermal/Kconfig
+++ b/drivers/thermal/ti-soc-thermal/Kconfig
@@ -11,7 +11,6 @@ config TI_SOC_THERMAL
 config TI_THERMAL
 	bool "Texas Instruments SoCs thermal framework support"
 	depends on TI_SOC_THERMAL
-	depends on CPU_THERMAL
 	help
 	  If you say yes here you want to get support for generic thermal
 	  framework for the Texas Instruments on die bandgap temperature sensor.
-- 
GitLab


From 96234d44ce3dc598de06b9d7431db9d186dd0d11 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Fri, 6 Jan 2017 11:31:42 +0530
Subject: [PATCH 0310/1084] thermal: arm: dra752: Remove TSHUT configuration

Technical Reference Manual [1] mandates that software should
not be configuring the thermal shutdown thresholds. Hence
removing TSHUT_CONFIG.

[1] http://www.ti.com/lit/ug/sprui30b/sprui30b.pdf

Signed-off-by: Keerthy <j-keerthy@ti.com>
Reported-by: Ravikumar Kattekola <rk@ti.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/ti-soc-thermal/dra752-thermal-data.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c b/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c
index 58b5c6694cd4..0a3f88dd883e 100644
--- a/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c
+++ b/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c
@@ -416,8 +416,7 @@ int dra752_adc_to_temp[DRA752_ADC_END_VALUE - DRA752_ADC_START_VALUE + 1] = {
 
 /* DRA752 data */
 const struct ti_bandgap_data dra752_data = {
-	.features = TI_BANDGAP_FEATURE_TSHUT_CONFIG |
-			TI_BANDGAP_FEATURE_FREEZE_BIT |
+	.features = TI_BANDGAP_FEATURE_FREEZE_BIT |
 			TI_BANDGAP_FEATURE_TALERT |
 			TI_BANDGAP_FEATURE_COUNTER_DELAY |
 			TI_BANDGAP_FEATURE_HISTORY_BUFFER |
-- 
GitLab


From 13d00b6439f1cf570ef962cf141bb3e329997265 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Fri, 6 Jan 2017 11:31:43 +0530
Subject: [PATCH 0311/1084] thermal: arm: dra752: Remove all TSHUT related
 definitions

No configuration needs to be done for TSHUT from software.
Hence remove all the unnecessary definitions.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 .../thermal/ti-soc-thermal/dra752-bandgap.h   | 19 --------------
 .../ti-soc-thermal/dra752-thermal-data.c      | 25 -------------------
 2 files changed, 44 deletions(-)

diff --git a/drivers/thermal/ti-soc-thermal/dra752-bandgap.h b/drivers/thermal/ti-soc-thermal/dra752-bandgap.h
index 6b0f2b1160f7..a31e4b5e82cd 100644
--- a/drivers/thermal/ti-soc-thermal/dra752-bandgap.h
+++ b/drivers/thermal/ti-soc-thermal/dra752-bandgap.h
@@ -54,7 +54,6 @@
 #define DRA752_STD_FUSE_OPP_BGAP_CORE_OFFSET		0x8
 #define DRA752_TEMP_SENSOR_CORE_OFFSET			0x154
 #define DRA752_BANDGAP_THRESHOLD_CORE_OFFSET		0x1ac
-#define DRA752_BANDGAP_TSHUT_CORE_OFFSET		0x1b8
 #define DRA752_BANDGAP_CUMUL_DTEMP_CORE_OFFSET		0x1c4
 #define DRA752_DTEMP_CORE_0_OFFSET			0x208
 #define DRA752_DTEMP_CORE_1_OFFSET			0x20c
@@ -66,7 +65,6 @@
 #define DRA752_STD_FUSE_OPP_BGAP_IVA_OFFSET		0x388
 #define DRA752_TEMP_SENSOR_IVA_OFFSET			0x398
 #define DRA752_BANDGAP_THRESHOLD_IVA_OFFSET		0x3a4
-#define DRA752_BANDGAP_TSHUT_IVA_OFFSET			0x3ac
 #define DRA752_BANDGAP_CUMUL_DTEMP_IVA_OFFSET		0x3b4
 #define DRA752_DTEMP_IVA_0_OFFSET			0x3d0
 #define DRA752_DTEMP_IVA_1_OFFSET			0x3d4
@@ -78,7 +76,6 @@
 #define DRA752_STD_FUSE_OPP_BGAP_MPU_OFFSET		0x4
 #define DRA752_TEMP_SENSOR_MPU_OFFSET			0x14c
 #define DRA752_BANDGAP_THRESHOLD_MPU_OFFSET		0x1a4
-#define DRA752_BANDGAP_TSHUT_MPU_OFFSET			0x1b0
 #define DRA752_BANDGAP_CUMUL_DTEMP_MPU_OFFSET		0x1bc
 #define DRA752_DTEMP_MPU_0_OFFSET			0x1e0
 #define DRA752_DTEMP_MPU_1_OFFSET			0x1e4
@@ -90,7 +87,6 @@
 #define DRA752_STD_FUSE_OPP_BGAP_DSPEVE_OFFSET			0x384
 #define DRA752_TEMP_SENSOR_DSPEVE_OFFSET			0x394
 #define DRA752_BANDGAP_THRESHOLD_DSPEVE_OFFSET			0x3a0
-#define DRA752_BANDGAP_TSHUT_DSPEVE_OFFSET			0x3a8
 #define DRA752_BANDGAP_CUMUL_DTEMP_DSPEVE_OFFSET		0x3b0
 #define DRA752_DTEMP_DSPEVE_0_OFFSET				0x3bc
 #define DRA752_DTEMP_DSPEVE_1_OFFSET				0x3c0
@@ -102,7 +98,6 @@
 #define DRA752_STD_FUSE_OPP_BGAP_GPU_OFFSET		0x0
 #define DRA752_TEMP_SENSOR_GPU_OFFSET			0x150
 #define DRA752_BANDGAP_THRESHOLD_GPU_OFFSET		0x1a8
-#define DRA752_BANDGAP_TSHUT_GPU_OFFSET			0x1b4
 #define DRA752_BANDGAP_CUMUL_DTEMP_GPU_OFFSET		0x1c0
 #define DRA752_DTEMP_GPU_0_OFFSET			0x1f4
 #define DRA752_DTEMP_GPU_1_OFFSET			0x1f8
@@ -173,10 +168,6 @@
 #define DRA752_BANDGAP_THRESHOLD_HOT_MASK		(0x3ff << 16)
 #define DRA752_BANDGAP_THRESHOLD_COLD_MASK		(0x3ff << 0)
 
-/* DRA752.TSHUT_THRESHOLD */
-#define DRA752_TSHUT_THRESHOLD_MUXCTRL_MASK		BIT(31)
-#define DRA752_TSHUT_THRESHOLD_HOT_MASK			(0x3ff << 16)
-#define DRA752_TSHUT_THRESHOLD_COLD_MASK		(0x3ff << 0)
 
 /* DRA752.BANDGAP_CUMUL_DTEMP_CORE */
 #define DRA752_BANDGAP_CUMUL_DTEMP_CORE_MASK		(0xffffffff << 0)
@@ -216,8 +207,6 @@
 #define DRA752_GPU_MAX_TEMP				125000
 #define DRA752_GPU_HYST_VAL				5000
 /* interrupts thresholds */
-#define DRA752_GPU_TSHUT_HOT				915
-#define DRA752_GPU_TSHUT_COLD				900
 #define DRA752_GPU_T_HOT				800
 #define DRA752_GPU_T_COLD				795
 
@@ -230,8 +219,6 @@
 #define DRA752_MPU_MAX_TEMP				125000
 #define DRA752_MPU_HYST_VAL				5000
 /* interrupts thresholds */
-#define DRA752_MPU_TSHUT_HOT				915
-#define DRA752_MPU_TSHUT_COLD				900
 #define DRA752_MPU_T_HOT				800
 #define DRA752_MPU_T_COLD				795
 
@@ -244,8 +231,6 @@
 #define DRA752_CORE_MAX_TEMP				125000
 #define DRA752_CORE_HYST_VAL				5000
 /* interrupts thresholds */
-#define DRA752_CORE_TSHUT_HOT				915
-#define DRA752_CORE_TSHUT_COLD				900
 #define DRA752_CORE_T_HOT				800
 #define DRA752_CORE_T_COLD				795
 
@@ -258,8 +243,6 @@
 #define DRA752_DSPEVE_MAX_TEMP				125000
 #define DRA752_DSPEVE_HYST_VAL				5000
 /* interrupts thresholds */
-#define DRA752_DSPEVE_TSHUT_HOT				915
-#define DRA752_DSPEVE_TSHUT_COLD			900
 #define DRA752_DSPEVE_T_HOT				800
 #define DRA752_DSPEVE_T_COLD				795
 
@@ -272,8 +255,6 @@
 #define DRA752_IVA_MAX_TEMP				125000
 #define DRA752_IVA_HYST_VAL				5000
 /* interrupts thresholds */
-#define DRA752_IVA_TSHUT_HOT				915
-#define DRA752_IVA_TSHUT_COLD				900
 #define DRA752_IVA_T_HOT				800
 #define DRA752_IVA_T_COLD				795
 
diff --git a/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c b/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c
index 0a3f88dd883e..118d7d847715 100644
--- a/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c
+++ b/drivers/thermal/ti-soc-thermal/dra752-thermal-data.c
@@ -49,9 +49,6 @@ dra752_core_temp_sensor_registers = {
 	.bgap_threshold = DRA752_BANDGAP_THRESHOLD_CORE_OFFSET,
 	.threshold_thot_mask = DRA752_BANDGAP_THRESHOLD_HOT_MASK,
 	.threshold_tcold_mask = DRA752_BANDGAP_THRESHOLD_COLD_MASK,
-	.tshut_threshold = DRA752_BANDGAP_TSHUT_CORE_OFFSET,
-	.tshut_hot_mask = DRA752_TSHUT_THRESHOLD_HOT_MASK,
-	.tshut_cold_mask = DRA752_TSHUT_THRESHOLD_COLD_MASK,
 	.bgap_status = DRA752_BANDGAP_STATUS_1_OFFSET,
 	.status_bgap_alert_mask = DRA752_BANDGAP_STATUS_1_ALERT_MASK,
 	.status_hot_mask = DRA752_BANDGAP_STATUS_1_HOT_CORE_MASK,
@@ -85,9 +82,6 @@ dra752_iva_temp_sensor_registers = {
 	.bgap_threshold = DRA752_BANDGAP_THRESHOLD_IVA_OFFSET,
 	.threshold_thot_mask = DRA752_BANDGAP_THRESHOLD_HOT_MASK,
 	.threshold_tcold_mask = DRA752_BANDGAP_THRESHOLD_COLD_MASK,
-	.tshut_threshold = DRA752_BANDGAP_TSHUT_IVA_OFFSET,
-	.tshut_hot_mask = DRA752_TSHUT_THRESHOLD_HOT_MASK,
-	.tshut_cold_mask = DRA752_TSHUT_THRESHOLD_COLD_MASK,
 	.bgap_status = DRA752_BANDGAP_STATUS_2_OFFSET,
 	.status_bgap_alert_mask = DRA752_BANDGAP_STATUS_1_ALERT_MASK,
 	.status_hot_mask = DRA752_BANDGAP_STATUS_2_HOT_IVA_MASK,
@@ -121,9 +115,6 @@ dra752_mpu_temp_sensor_registers = {
 	.bgap_threshold = DRA752_BANDGAP_THRESHOLD_MPU_OFFSET,
 	.threshold_thot_mask = DRA752_BANDGAP_THRESHOLD_HOT_MASK,
 	.threshold_tcold_mask = DRA752_BANDGAP_THRESHOLD_COLD_MASK,
-	.tshut_threshold = DRA752_BANDGAP_TSHUT_MPU_OFFSET,
-	.tshut_hot_mask = DRA752_TSHUT_THRESHOLD_HOT_MASK,
-	.tshut_cold_mask = DRA752_TSHUT_THRESHOLD_COLD_MASK,
 	.bgap_status = DRA752_BANDGAP_STATUS_1_OFFSET,
 	.status_bgap_alert_mask = DRA752_BANDGAP_STATUS_1_ALERT_MASK,
 	.status_hot_mask = DRA752_BANDGAP_STATUS_1_HOT_MPU_MASK,
@@ -157,9 +148,6 @@ dra752_dspeve_temp_sensor_registers = {
 	.bgap_threshold = DRA752_BANDGAP_THRESHOLD_DSPEVE_OFFSET,
 	.threshold_thot_mask = DRA752_BANDGAP_THRESHOLD_HOT_MASK,
 	.threshold_tcold_mask = DRA752_BANDGAP_THRESHOLD_COLD_MASK,
-	.tshut_threshold = DRA752_BANDGAP_TSHUT_DSPEVE_OFFSET,
-	.tshut_hot_mask = DRA752_TSHUT_THRESHOLD_HOT_MASK,
-	.tshut_cold_mask = DRA752_TSHUT_THRESHOLD_COLD_MASK,
 	.bgap_status = DRA752_BANDGAP_STATUS_2_OFFSET,
 	.status_bgap_alert_mask = DRA752_BANDGAP_STATUS_1_ALERT_MASK,
 	.status_hot_mask = DRA752_BANDGAP_STATUS_2_HOT_DSPEVE_MASK,
@@ -193,9 +181,6 @@ dra752_gpu_temp_sensor_registers = {
 	.bgap_threshold = DRA752_BANDGAP_THRESHOLD_GPU_OFFSET,
 	.threshold_thot_mask = DRA752_BANDGAP_THRESHOLD_HOT_MASK,
 	.threshold_tcold_mask = DRA752_BANDGAP_THRESHOLD_COLD_MASK,
-	.tshut_threshold = DRA752_BANDGAP_TSHUT_GPU_OFFSET,
-	.tshut_hot_mask = DRA752_TSHUT_THRESHOLD_HOT_MASK,
-	.tshut_cold_mask = DRA752_TSHUT_THRESHOLD_COLD_MASK,
 	.bgap_status = DRA752_BANDGAP_STATUS_1_OFFSET,
 	.status_bgap_alert_mask = DRA752_BANDGAP_STATUS_1_ALERT_MASK,
 	.status_hot_mask = DRA752_BANDGAP_STATUS_1_HOT_GPU_MASK,
@@ -211,8 +196,6 @@ dra752_gpu_temp_sensor_registers = {
 
 /* Thresholds and limits for DRA752 MPU temperature sensor */
 static struct temp_sensor_data dra752_mpu_temp_sensor_data = {
-	.tshut_hot = DRA752_MPU_TSHUT_HOT,
-	.tshut_cold = DRA752_MPU_TSHUT_COLD,
 	.t_hot = DRA752_MPU_T_HOT,
 	.t_cold = DRA752_MPU_T_COLD,
 	.min_freq = DRA752_MPU_MIN_FREQ,
@@ -226,8 +209,6 @@ static struct temp_sensor_data dra752_mpu_temp_sensor_data = {
 
 /* Thresholds and limits for DRA752 GPU temperature sensor */
 static struct temp_sensor_data dra752_gpu_temp_sensor_data = {
-	.tshut_hot = DRA752_GPU_TSHUT_HOT,
-	.tshut_cold = DRA752_GPU_TSHUT_COLD,
 	.t_hot = DRA752_GPU_T_HOT,
 	.t_cold = DRA752_GPU_T_COLD,
 	.min_freq = DRA752_GPU_MIN_FREQ,
@@ -241,8 +222,6 @@ static struct temp_sensor_data dra752_gpu_temp_sensor_data = {
 
 /* Thresholds and limits for DRA752 CORE temperature sensor */
 static struct temp_sensor_data dra752_core_temp_sensor_data = {
-	.tshut_hot = DRA752_CORE_TSHUT_HOT,
-	.tshut_cold = DRA752_CORE_TSHUT_COLD,
 	.t_hot = DRA752_CORE_T_HOT,
 	.t_cold = DRA752_CORE_T_COLD,
 	.min_freq = DRA752_CORE_MIN_FREQ,
@@ -256,8 +235,6 @@ static struct temp_sensor_data dra752_core_temp_sensor_data = {
 
 /* Thresholds and limits for DRA752 DSPEVE temperature sensor */
 static struct temp_sensor_data dra752_dspeve_temp_sensor_data = {
-	.tshut_hot = DRA752_DSPEVE_TSHUT_HOT,
-	.tshut_cold = DRA752_DSPEVE_TSHUT_COLD,
 	.t_hot = DRA752_DSPEVE_T_HOT,
 	.t_cold = DRA752_DSPEVE_T_COLD,
 	.min_freq = DRA752_DSPEVE_MIN_FREQ,
@@ -271,8 +248,6 @@ static struct temp_sensor_data dra752_dspeve_temp_sensor_data = {
 
 /* Thresholds and limits for DRA752 IVA temperature sensor */
 static struct temp_sensor_data dra752_iva_temp_sensor_data = {
-	.tshut_hot = DRA752_IVA_TSHUT_HOT,
-	.tshut_cold = DRA752_IVA_TSHUT_COLD,
 	.t_hot = DRA752_IVA_T_HOT,
 	.t_cold = DRA752_IVA_T_COLD,
 	.min_freq = DRA752_IVA_MIN_FREQ,
-- 
GitLab


From 992edf395b8a8411c506f4345bc04451eba95976 Mon Sep 17 00:00:00 2001
From: Vivek Gautam <vivek.gautam@codeaurora.org>
Date: Wed, 28 Dec 2016 14:16:45 +0530
Subject: [PATCH 0312/1084] thermal: mtk_thermal: Staticise a number of data
 variables

Sparse throws following warnings:
drivers/thermal/mtk_thermal.c:186:11: warning: symbol 'mt8173_bank_data' was not declared. Should it be static?
drivers/thermal/mtk_thermal.c:193:11: warning: symbol 'mt8173_msr' was not declared. Should it be static?
drivers/thermal/mtk_thermal.c:197:11: warning: symbol 'mt8173_adcpnp' was not declared. Should it be static?
drivers/thermal/mtk_thermal.c:201:11: warning: symbol 'mt8173_mux_values' was not declared. Should it be static?
drivers/thermal/mtk_thermal.c:204:11: warning: symbol 'mt2701_bank_data' was not declared. Should it be static?
drivers/thermal/mtk_thermal.c:208:11: warning: symbol 'mt2701_msr' was not declared. Should it be static?
drivers/thermal/mtk_thermal.c:212:11: warning: symbol 'mt2701_adcpnp' was not declared. Should it be static?
drivers/thermal/mtk_thermal.c:216:11: warning: symbol 'mt2701_mux_values' was not declared. Should it be static?

Make these variables as static to fix these warnings.

Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/mtk_thermal.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/thermal/mtk_thermal.c b/drivers/thermal/mtk_thermal.c
index 34169c32d495..1aff7fde54b1 100644
--- a/drivers/thermal/mtk_thermal.c
+++ b/drivers/thermal/mtk_thermal.c
@@ -183,37 +183,37 @@ struct mtk_thermal {
 };
 
 /* MT8173 thermal sensor data */
-const int mt8173_bank_data[MT8173_NUM_ZONES][3] = {
+static const int mt8173_bank_data[MT8173_NUM_ZONES][3] = {
 	{ MT8173_TS2, MT8173_TS3 },
 	{ MT8173_TS2, MT8173_TS4 },
 	{ MT8173_TS1, MT8173_TS2, MT8173_TSABB },
 	{ MT8173_TS2 },
 };
 
-const int mt8173_msr[MT8173_NUM_SENSORS_PER_ZONE] = {
+static const int mt8173_msr[MT8173_NUM_SENSORS_PER_ZONE] = {
 	TEMP_MSR0, TEMP_MSR1, TEMP_MSR2, TEMP_MSR2
 };
 
-const int mt8173_adcpnp[MT8173_NUM_SENSORS_PER_ZONE] = {
+static const int mt8173_adcpnp[MT8173_NUM_SENSORS_PER_ZONE] = {
 	TEMP_ADCPNP0, TEMP_ADCPNP1, TEMP_ADCPNP2, TEMP_ADCPNP3
 };
 
-const int mt8173_mux_values[MT8173_NUM_SENSORS] = { 0, 1, 2, 3, 16 };
+static const int mt8173_mux_values[MT8173_NUM_SENSORS] = { 0, 1, 2, 3, 16 };
 
 /* MT2701 thermal sensor data */
-const int mt2701_bank_data[MT2701_NUM_SENSORS] = {
+static const int mt2701_bank_data[MT2701_NUM_SENSORS] = {
 	MT2701_TS1, MT2701_TS2, MT2701_TSABB
 };
 
-const int mt2701_msr[MT2701_NUM_SENSORS_PER_ZONE] = {
+static const int mt2701_msr[MT2701_NUM_SENSORS_PER_ZONE] = {
 	TEMP_MSR0, TEMP_MSR1, TEMP_MSR2
 };
 
-const int mt2701_adcpnp[MT2701_NUM_SENSORS_PER_ZONE] = {
+static const int mt2701_adcpnp[MT2701_NUM_SENSORS_PER_ZONE] = {
 	TEMP_ADCPNP0, TEMP_ADCPNP1, TEMP_ADCPNP2
 };
 
-const int mt2701_mux_values[MT2701_NUM_SENSORS] = { 0, 1, 16 };
+static const int mt2701_mux_values[MT2701_NUM_SENSORS] = { 0, 1, 16 };
 
 /**
  * The MT8173 thermal controller has four banks. Each bank can read up to
-- 
GitLab


From b8826e506ee58873725ec3a25a2a27fefd762574 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 18 Feb 2017 22:07:24 -0500
Subject: [PATCH 0313/1084] selftest for default_file_splice_read() infoleak

bug fixed in commit b9dc6f65bc5e ("fix a fencepost error in pipe_advance()")

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 tools/testing/selftests/Makefile                          | 1 +
 tools/testing/selftests/splice/Makefile                   | 8 ++++++++
 tools/testing/selftests/splice/default_file_splice_read.c | 8 ++++++++
 .../testing/selftests/splice/default_file_splice_read.sh  | 7 +++++++
 4 files changed, 24 insertions(+)
 create mode 100644 tools/testing/selftests/splice/Makefile
 create mode 100644 tools/testing/selftests/splice/default_file_splice_read.c
 create mode 100755 tools/testing/selftests/splice/default_file_splice_read.sh

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 71b05891a6a1..0e72f1d03c9e 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -24,6 +24,7 @@ TARGETS += ptrace
 TARGETS += seccomp
 TARGETS += sigaltstack
 TARGETS += size
+TARGETS += splice
 TARGETS += static_keys
 TARGETS += sync
 TARGETS += sysctl
diff --git a/tools/testing/selftests/splice/Makefile b/tools/testing/selftests/splice/Makefile
new file mode 100644
index 000000000000..de51f439d4a6
--- /dev/null
+++ b/tools/testing/selftests/splice/Makefile
@@ -0,0 +1,8 @@
+TEST_PROGS := default_file_splice_read.sh
+EXTRA := default_file_splice_read
+all: $(TEST_PROGS) $(EXTRA)
+
+include ../lib.mk
+
+clean:
+	rm -fr $(TEST_PROGS) $(EXTRA)
diff --git a/tools/testing/selftests/splice/default_file_splice_read.c b/tools/testing/selftests/splice/default_file_splice_read.c
new file mode 100644
index 000000000000..01dd6091554c
--- /dev/null
+++ b/tools/testing/selftests/splice/default_file_splice_read.c
@@ -0,0 +1,8 @@
+#define _GNU_SOURCE
+#include <fcntl.h>
+
+int main(int argc, char **argv)
+{
+        splice(0, 0, 1, 0, 1<<30, 0);
+	return 0;
+}
diff --git a/tools/testing/selftests/splice/default_file_splice_read.sh b/tools/testing/selftests/splice/default_file_splice_read.sh
new file mode 100755
index 000000000000..1ea2adeabc94
--- /dev/null
+++ b/tools/testing/selftests/splice/default_file_splice_read.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+n=`./default_file_splice_read </dev/null | wc -c`
+
+test "$n" = 0 && exit 0
+
+echo "default_file_splice_read broken: leaked $n"
+exit 1
-- 
GitLab


From eec11535ca3d3e2daa2c8f59fa8ce1963db98abd Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 18 Jan 2017 14:13:20 +0300
Subject: [PATCH 0314/1084] hfs: fix hfs_readdir()

I was looking through static analysis warnings and there is a bug here
that goes all the way back to the start of git.  Basically we're copying
the pointer and nearby garbage instead of the data the fd.key pointer is
pointing to.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Vyacheslav Dubeyko <slava@dubeyko.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hfs/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 5de5c48b418d..75b254280ff6 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -169,7 +169,7 @@ static int hfs_readdir(struct file *file, struct dir_context *ctx)
 	 * Can be done after the list insertion; exclusion with
 	 * hfs_delete_cat() is provided by directory lock.
 	 */
-	memcpy(&rd->key, &fd.key, sizeof(struct hfs_cat_key));
+	memcpy(&rd->key, &fd.key->cat, sizeof(struct hfs_cat_key));
 out:
 	hfs_find_exit(&fd);
 	return err;
-- 
GitLab


From 9a584bf9bf0a1c608f5ed5f5e63b074bbc81a322 Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Fri, 13 Jan 2017 20:53:21 +0530
Subject: [PATCH 0315/1084] target/iscsi: split iscsit_check_dataout_hdr()

Split iscsit_check_dataout_hdr() into two functions
1. __iscsit_check_dataout_hdr() - This function
   validates data out hdr.
2. iscsit_check_dataout_hdr() - This function finds
   iSCSI cmd using iscsit_find_cmd_from_itt_or_dump(),
   then it calls __iscsit_check_dataout_hdr() to
   validate iSCSI hdr.

This split is required to support Chelsio T6 iSCSI
DDP completion feature. T6 adapters reduce number of
completions to host by generating single completion
for all directly placed(DDP) iSCSI pdus in a sequence,
DDP completion contains iSCSI hdr of the last pdu in a
sequence.

On receiving DDP completion cxgbit driver will first
find iSCSI cmd using iscsit_find_cmd_from_itt_or_dump()
then updates cmd->write_data_done, cmd->next_burst_len,
cmd->data_sn and calls  __iscsit_check_dataout_hdr()
to validate iSCSI hdr.

(Move XRDSL check ahead of itt lookup / dump - nab)

Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c      | 65 +++++++++++++++---------
 drivers/target/iscsi/iscsi_target_util.c |  1 +
 include/target/iscsi/iscsi_transport.h   | 11 +++-
 3 files changed, 50 insertions(+), 27 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index b4f1d1cbe521..2285988c209b 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -1431,36 +1431,17 @@ static void iscsit_do_crypto_hash_buf(
 }
 
 int
-iscsit_check_dataout_hdr(struct iscsi_conn *conn, unsigned char *buf,
-			  struct iscsi_cmd **out_cmd)
+__iscsit_check_dataout_hdr(struct iscsi_conn *conn, void *buf,
+			   struct iscsi_cmd *cmd, u32 payload_length,
+			   bool *success)
 {
-	struct iscsi_data *hdr = (struct iscsi_data *)buf;
-	struct iscsi_cmd *cmd = NULL;
+	struct iscsi_data *hdr = buf;
 	struct se_cmd *se_cmd;
-	u32 payload_length = ntoh24(hdr->dlength);
 	int rc;
 
-	if (!payload_length) {
-		pr_warn("DataOUT payload is ZERO, ignoring.\n");
-		return 0;
-	}
-
 	/* iSCSI write */
 	atomic_long_add(payload_length, &conn->sess->rx_data_octets);
 
-	if (payload_length > conn->conn_ops->MaxXmitDataSegmentLength) {
-		pr_err("DataSegmentLength: %u is greater than"
-			" MaxXmitDataSegmentLength: %u\n", payload_length,
-			conn->conn_ops->MaxXmitDataSegmentLength);
-		return iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR,
-					 buf);
-	}
-
-	cmd = iscsit_find_cmd_from_itt_or_dump(conn, hdr->itt,
-			payload_length);
-	if (!cmd)
-		return 0;
-
 	pr_debug("Got DataOut ITT: 0x%08x, TTT: 0x%08x,"
 		" DataSN: 0x%08x, Offset: %u, Length: %u, CID: %hu\n",
 		hdr->itt, hdr->ttt, hdr->datasn, ntohl(hdr->offset),
@@ -1553,10 +1534,44 @@ iscsit_check_dataout_hdr(struct iscsi_conn *conn, unsigned char *buf,
 		return 0;
 	else if (rc == DATAOUT_CANNOT_RECOVER)
 		return -1;
-
-	*out_cmd = cmd;
+	*success = true;
 	return 0;
 }
+EXPORT_SYMBOL(__iscsit_check_dataout_hdr);
+
+int
+iscsit_check_dataout_hdr(struct iscsi_conn *conn, void *buf,
+			 struct iscsi_cmd **out_cmd)
+{
+	struct iscsi_data *hdr = buf;
+	struct iscsi_cmd *cmd;
+	u32 payload_length = ntoh24(hdr->dlength);
+	int rc;
+	bool success = false;
+
+	if (!payload_length) {
+		pr_warn_ratelimited("DataOUT payload is ZERO, ignoring.\n");
+		return 0;
+	}
+
+	if (payload_length > conn->conn_ops->MaxXmitDataSegmentLength) {
+		pr_err_ratelimited("DataSegmentLength: %u is greater than"
+			" MaxXmitDataSegmentLength: %u\n", payload_length,
+			conn->conn_ops->MaxXmitDataSegmentLength);
+		return iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, buf);
+	}
+
+	cmd = iscsit_find_cmd_from_itt_or_dump(conn, hdr->itt, payload_length);
+	if (!cmd)
+		return 0;
+
+	rc = __iscsit_check_dataout_hdr(conn, buf, cmd, payload_length, &success);
+
+	if (success)
+		*out_cmd = cmd;
+
+	return rc;
+}
 EXPORT_SYMBOL(iscsit_check_dataout_hdr);
 
 static int
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index b5a1b4ccba12..f46eadffec07 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -417,6 +417,7 @@ struct iscsi_cmd *iscsit_find_cmd_from_itt_or_dump(
 
 	return NULL;
 }
+EXPORT_SYMBOL(iscsit_find_cmd_from_itt_or_dump);
 
 struct iscsi_cmd *iscsit_find_cmd_from_ttt(
 	struct iscsi_conn *conn,
diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h
index 1277e9ba0318..ff1a4f4cd66d 100644
--- a/include/target/iscsi/iscsi_transport.h
+++ b/include/target/iscsi/iscsi_transport.h
@@ -55,8 +55,12 @@ extern int iscsit_setup_scsi_cmd(struct iscsi_conn *, struct iscsi_cmd *,
 extern void iscsit_set_unsoliticed_dataout(struct iscsi_cmd *);
 extern int iscsit_process_scsi_cmd(struct iscsi_conn *, struct iscsi_cmd *,
 				struct iscsi_scsi_req *);
-extern int iscsit_check_dataout_hdr(struct iscsi_conn *, unsigned char *,
-				struct iscsi_cmd **);
+extern int
+__iscsit_check_dataout_hdr(struct iscsi_conn *, void *,
+			   struct iscsi_cmd *, u32, bool *);
+extern int
+iscsit_check_dataout_hdr(struct iscsi_conn *conn, void *buf,
+			 struct iscsi_cmd **out_cmd);
 extern int iscsit_check_dataout_payload(struct iscsi_cmd *, struct iscsi_data *,
 				bool);
 extern int iscsit_setup_nop_out(struct iscsi_conn *, struct iscsi_cmd *,
@@ -125,6 +129,9 @@ extern void iscsit_release_cmd(struct iscsi_cmd *);
 extern void iscsit_free_cmd(struct iscsi_cmd *, bool);
 extern void iscsit_add_cmd_to_immediate_queue(struct iscsi_cmd *,
 					      struct iscsi_conn *, u8);
+extern struct iscsi_cmd *
+iscsit_find_cmd_from_itt_or_dump(struct iscsi_conn *conn,
+				 itt_t init_task_tag, u32 length);
 
 /*
  * From iscsi_target_nego.c
-- 
GitLab


From 3da1110fd1ac3e11208f12c966c6de7a2c306b9d Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Fri, 13 Jan 2017 20:53:22 +0530
Subject: [PATCH 0316/1084] target/cxgbit: use cxgb4_tp_smt_idx() to get smt
 idx

cxgb4_tp_smt_idx() returns smt idx for T4,T5,T6 adapters.

Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/cxgbit/cxgbit_cm.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
index 35aaa36e9af5..845a7de39b9c 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
@@ -872,7 +872,8 @@ cxgbit_offload_init(struct cxgbit_sock *csk, int iptype, __u8 *peer_ip,
 			goto out;
 		csk->mtu = ndev->mtu;
 		csk->tx_chan = cxgb4_port_chan(ndev);
-		csk->smac_idx = (cxgb4_port_viid(ndev) & 0x7F) << 1;
+		csk->smac_idx = cxgb4_tp_smt_idx(cdev->lldi.adapter_type,
+						 cxgb4_port_viid(ndev));
 		step = cdev->lldi.ntxq /
 			cdev->lldi.nchan;
 		csk->txq_idx = cxgb4_port_idx(ndev) * step;
@@ -907,7 +908,8 @@ cxgbit_offload_init(struct cxgbit_sock *csk, int iptype, __u8 *peer_ip,
 		port_id = cxgb4_port_idx(ndev);
 		csk->mtu = dst_mtu(dst);
 		csk->tx_chan = cxgb4_port_chan(ndev);
-		csk->smac_idx = (cxgb4_port_viid(ndev) & 0x7F) << 1;
+		csk->smac_idx = cxgb4_tp_smt_idx(cdev->lldi.adapter_type,
+						 cxgb4_port_viid(ndev));
 		step = cdev->lldi.ntxq /
 			cdev->lldi.nports;
 		csk->txq_idx = (port_id * step) +
-- 
GitLab


From d88b504e413e7d1471562ff9c3311dbf983e0291 Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Fri, 13 Jan 2017 20:53:23 +0530
Subject: [PATCH 0317/1084] target/cxgbit: Use T6 specific macros to get ETH/IP
 hdr len

Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/cxgbit/cxgbit_cm.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
index 845a7de39b9c..b26c85a65b80 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
@@ -1068,6 +1068,7 @@ cxgbit_pass_accept_rpl(struct cxgbit_sock *csk, struct cpl_pass_accept_req *req)
 	struct sk_buff *skb;
 	const struct tcphdr *tcph;
 	struct cpl_t5_pass_accept_rpl *rpl5;
+	struct cxgb4_lld_info *lldi = &csk->com.cdev->lldi;
 	unsigned int len = roundup(sizeof(*rpl5), 16);
 	unsigned int mtu_idx;
 	u64 opt0;
@@ -1121,8 +1122,13 @@ cxgbit_pass_accept_rpl(struct cxgbit_sock *csk, struct cpl_pass_accept_req *req)
 		opt2 |= WND_SCALE_EN_F;
 
 	hlen = ntohl(req->hdr_len);
-	tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) +
-		IP_HDR_LEN_G(hlen);
+
+	if (is_t5(lldi->adapter_type))
+		tcph = (struct tcphdr *)((u8 *)(req + 1) +
+		       ETH_HDR_LEN_G(hlen) + IP_HDR_LEN_G(hlen));
+	else
+		tcph = (struct tcphdr *)((u8 *)(req + 1) +
+		       T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen));
 
 	if (tcph->ece && tcph->cwr)
 		opt2 |= CCTRL_ECN_V(1);
-- 
GitLab


From 5248788ec300f7ee738502bfc466dbb9b625247e Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Fri, 13 Jan 2017 20:53:24 +0530
Subject: [PATCH 0318/1084] target/cxgbit: Enable DDP for T6 only if data
 sequence and pdu are in order

Enable DDP for T6 only if DataSequenceInOrder=YES and
DataPDUInOrder=YES to ensure inorder delivery of iSCSI pdus.

Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/cxgbit/cxgbit_target.c | 86 +++++++++++++++------
 1 file changed, 61 insertions(+), 25 deletions(-)

diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c
index 8ae399dfe84f..4780fae2a1d1 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_target.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c
@@ -653,26 +653,6 @@ static int cxgbit_set_iso_npdu(struct cxgbit_sock *csk)
 	u32 max_npdu, max_iso_npdu;
 
 	if (conn->login->leading_connection) {
-		param = iscsi_find_param_from_key(DATASEQUENCEINORDER,
-						  conn->param_list);
-		if (!param) {
-			pr_err("param not found key %s\n", DATASEQUENCEINORDER);
-			return -1;
-		}
-
-		if (strcmp(param->value, YES))
-			return 0;
-
-		param = iscsi_find_param_from_key(DATAPDUINORDER,
-						  conn->param_list);
-		if (!param) {
-			pr_err("param not found key %s\n", DATAPDUINORDER);
-			return -1;
-		}
-
-		if (strcmp(param->value, YES))
-			return 0;
-
 		param = iscsi_find_param_from_key(MAXBURSTLENGTH,
 						  conn->param_list);
 		if (!param) {
@@ -683,11 +663,6 @@ static int cxgbit_set_iso_npdu(struct cxgbit_sock *csk)
 		if (kstrtou32(param->value, 0, &mbl) < 0)
 			return -1;
 	} else {
-		if (!conn->sess->sess_ops->DataSequenceInOrder)
-			return 0;
-		if (!conn->sess->sess_ops->DataPDUInOrder)
-			return 0;
-
 		mbl = conn->sess->sess_ops->MaxBurstLength;
 	}
 
@@ -706,6 +681,53 @@ static int cxgbit_set_iso_npdu(struct cxgbit_sock *csk)
 	return 0;
 }
 
+/*
+ * cxgbit_seq_pdu_inorder()
+ * @csk: pointer to cxgbit socket structure
+ *
+ * This function checks whether data sequence and data
+ * pdu are in order.
+ *
+ * Return: returns -1 on error, 0 if data sequence and
+ * data pdu are in order, 1 if data sequence or data pdu
+ * is not in order.
+ */
+static int cxgbit_seq_pdu_inorder(struct cxgbit_sock *csk)
+{
+	struct iscsi_conn *conn = csk->conn;
+	struct iscsi_param *param;
+
+	if (conn->login->leading_connection) {
+		param = iscsi_find_param_from_key(DATASEQUENCEINORDER,
+						  conn->param_list);
+		if (!param) {
+			pr_err("param not found key %s\n", DATASEQUENCEINORDER);
+			return -1;
+		}
+
+		if (strcmp(param->value, YES))
+			return 1;
+
+		param = iscsi_find_param_from_key(DATAPDUINORDER,
+						  conn->param_list);
+		if (!param) {
+			pr_err("param not found key %s\n", DATAPDUINORDER);
+			return -1;
+		}
+
+		if (strcmp(param->value, YES))
+			return 1;
+
+	} else {
+		if (!conn->sess->sess_ops->DataSequenceInOrder)
+			return 1;
+		if (!conn->sess->sess_ops->DataPDUInOrder)
+			return 1;
+	}
+
+	return 0;
+}
+
 static int cxgbit_set_params(struct iscsi_conn *conn)
 {
 	struct cxgbit_sock *csk = conn->context;
@@ -732,11 +754,24 @@ static int cxgbit_set_params(struct iscsi_conn *conn)
 	}
 
 	if (!erl) {
+		int ret;
+
+		ret = cxgbit_seq_pdu_inorder(csk);
+		if (ret < 0) {
+			return -1;
+		} else if (ret > 0) {
+			if (is_t5(cdev->lldi.adapter_type))
+				goto enable_ddp;
+			else
+				goto enable_digest;
+		}
+
 		if (test_bit(CDEV_ISO_ENABLE, &cdev->flags)) {
 			if (cxgbit_set_iso_npdu(csk))
 				return -1;
 		}
 
+enable_ddp:
 		if (test_bit(CDEV_DDP_ENABLE, &cdev->flags)) {
 			if (cxgbit_setup_conn_pgidx(csk,
 						    ppm->tformat.pgsz_idx_dflt))
@@ -745,6 +780,7 @@ static int cxgbit_set_params(struct iscsi_conn *conn)
 		}
 	}
 
+enable_digest:
 	if (cxgbit_set_digest(csk))
 		return -1;
 
-- 
GitLab


From 79e57cfe00f40d509e2d007a5662db26cdbc74db Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Fri, 13 Jan 2017 20:53:25 +0530
Subject: [PATCH 0319/1084] target/cxgbit: add T6 iSCSI DDP completion feature

Chelsio T6 adapters reduce number of completion
to host by generating single completion for all
directly placed(DDP) iSCSI pdus in a sequence,
completion contains iSCSI hdr of the last pdu
in a sequence.

On receiving DDP completion cxgbit driver finds
iSCSI cmd using iscsit_find_cmd_from_itt_or_dump(),
then updates cmd->write_data_done, cmd->next_burst_len,
cmd->data_sn and calls __iscsit_check_dataout_hdr()
to validate iSCSI hdr.

(Update __iscsit_check_dataout_hdr parameter usage - nab)

Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/cxgbit/cxgbit_cm.c     |  3 +
 drivers/target/iscsi/cxgbit/cxgbit_lro.h    |  5 +-
 drivers/target/iscsi/cxgbit/cxgbit_main.c   | 67 ++++++++++++-----
 drivers/target/iscsi/cxgbit/cxgbit_target.c | 81 +++++++++++++++------
 4 files changed, 113 insertions(+), 43 deletions(-)

diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
index b26c85a65b80..37a05185dcbe 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
@@ -1114,6 +1114,9 @@ cxgbit_pass_accept_rpl(struct cxgbit_sock *csk, struct cpl_pass_accept_req *req)
 	opt2 = RX_CHANNEL_V(0) |
 		RSS_QUEUE_VALID_F | RSS_QUEUE_V(csk->rss_qid);
 
+	if (!is_t5(lldi->adapter_type))
+		opt2 |= RX_FC_DISABLE_F;
+
 	if (req->tcpopt.tstamp)
 		opt2 |= TSTAMPS_EN_F;
 	if (req->tcpopt.sack)
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_lro.h b/drivers/target/iscsi/cxgbit/cxgbit_lro.h
index 28c11bd1b930..dcaed3a1d23f 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_lro.h
+++ b/drivers/target/iscsi/cxgbit/cxgbit_lro.h
@@ -31,8 +31,9 @@ enum cxgbit_pducb_flags {
 	PDUCBF_RX_DATA		= (1 << 1), /* received pdu payload */
 	PDUCBF_RX_STATUS	= (1 << 2), /* received ddp status */
 	PDUCBF_RX_DATA_DDPD	= (1 << 3), /* pdu payload ddp'd */
-	PDUCBF_RX_HCRC_ERR	= (1 << 4), /* header digest error */
-	PDUCBF_RX_DCRC_ERR	= (1 << 5), /* data digest error */
+	PDUCBF_RX_DDP_CMP	= (1 << 4), /* ddp completion */
+	PDUCBF_RX_HCRC_ERR	= (1 << 5), /* header digest error */
+	PDUCBF_RX_DCRC_ERR	= (1 << 6), /* data digest error */
 };
 
 struct cxgbit_lro_pdu_cb {
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_main.c b/drivers/target/iscsi/cxgbit/cxgbit_main.c
index 6531aaac2b96..4fd775ace541 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_main.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_main.c
@@ -165,29 +165,24 @@ static int cxgbit_uld_state_change(void *handle, enum cxgb4_state state)
 }
 
 static void
-cxgbit_proc_ddp_status(unsigned int tid, struct cpl_rx_data_ddp *cpl,
-		       struct cxgbit_lro_pdu_cb *pdu_cb)
+cxgbit_process_ddpvld(struct cxgbit_sock *csk, struct cxgbit_lro_pdu_cb *pdu_cb,
+		      u32 ddpvld)
 {
-	unsigned int status = ntohl(cpl->ddpvld);
 
-	pdu_cb->flags |= PDUCBF_RX_STATUS;
-	pdu_cb->ddigest = ntohl(cpl->ulp_crc);
-	pdu_cb->pdulen = ntohs(cpl->len);
-
-	if (status & (1 << CPL_RX_ISCSI_DDP_STATUS_HCRC_SHIFT)) {
-		pr_info("tid 0x%x, status 0x%x, hcrc bad.\n", tid, status);
+	if (ddpvld & (1 << CPL_RX_ISCSI_DDP_STATUS_HCRC_SHIFT)) {
+		pr_info("tid 0x%x, status 0x%x, hcrc bad.\n", csk->tid, ddpvld);
 		pdu_cb->flags |= PDUCBF_RX_HCRC_ERR;
 	}
 
-	if (status & (1 << CPL_RX_ISCSI_DDP_STATUS_DCRC_SHIFT)) {
-		pr_info("tid 0x%x, status 0x%x, dcrc bad.\n", tid, status);
+	if (ddpvld & (1 << CPL_RX_ISCSI_DDP_STATUS_DCRC_SHIFT)) {
+		pr_info("tid 0x%x, status 0x%x, dcrc bad.\n", csk->tid, ddpvld);
 		pdu_cb->flags |= PDUCBF_RX_DCRC_ERR;
 	}
 
-	if (status & (1 << CPL_RX_ISCSI_DDP_STATUS_PAD_SHIFT))
-		pr_info("tid 0x%x, status 0x%x, pad bad.\n", tid, status);
+	if (ddpvld & (1 << CPL_RX_ISCSI_DDP_STATUS_PAD_SHIFT))
+		pr_info("tid 0x%x, status 0x%x, pad bad.\n", csk->tid, ddpvld);
 
-	if ((status & (1 << CPL_RX_ISCSI_DDP_STATUS_DDP_SHIFT)) &&
+	if ((ddpvld & (1 << CPL_RX_ISCSI_DDP_STATUS_DDP_SHIFT)) &&
 	    (!(pdu_cb->flags & PDUCBF_RX_DATA))) {
 		pdu_cb->flags |= PDUCBF_RX_DATA_DDPD;
 	}
@@ -201,13 +196,17 @@ cxgbit_lro_add_packet_rsp(struct sk_buff *skb, u8 op, const __be64 *rsp)
 						lro_cb->pdu_idx);
 	struct cpl_rx_iscsi_ddp *cpl = (struct cpl_rx_iscsi_ddp *)(rsp + 1);
 
-	cxgbit_proc_ddp_status(lro_cb->csk->tid, cpl, pdu_cb);
+	cxgbit_process_ddpvld(lro_cb->csk, pdu_cb, be32_to_cpu(cpl->ddpvld));
+
+	pdu_cb->flags |= PDUCBF_RX_STATUS;
+	pdu_cb->ddigest = ntohl(cpl->ulp_crc);
+	pdu_cb->pdulen = ntohs(cpl->len);
 
 	if (pdu_cb->flags & PDUCBF_RX_HDR)
 		pdu_cb->complete = true;
 
-	lro_cb->complete = true;
 	lro_cb->pdu_totallen += pdu_cb->pdulen;
+	lro_cb->complete = true;
 	lro_cb->pdu_idx++;
 }
 
@@ -257,7 +256,7 @@ cxgbit_lro_add_packet_gl(struct sk_buff *skb, u8 op, const struct pkt_gl *gl)
 			cxgbit_skcb_flags(skb) = 0;
 
 		lro_cb->complete = false;
-	} else {
+	} else if (op == CPL_ISCSI_DATA) {
 		struct cpl_iscsi_data *cpl = (struct cpl_iscsi_data *)gl->va;
 
 		offset = sizeof(struct cpl_iscsi_data);
@@ -267,6 +266,36 @@ cxgbit_lro_add_packet_gl(struct sk_buff *skb, u8 op, const struct pkt_gl *gl)
 		pdu_cb->doffset = lro_cb->offset;
 		pdu_cb->nr_dfrags = gl->nfrags;
 		pdu_cb->dfrag_idx = skb_shinfo(skb)->nr_frags;
+		lro_cb->complete = false;
+	} else {
+		struct cpl_rx_iscsi_cmp *cpl;
+
+		cpl = (struct cpl_rx_iscsi_cmp *)gl->va;
+		offset = sizeof(struct cpl_rx_iscsi_cmp);
+		pdu_cb->flags |= (PDUCBF_RX_HDR | PDUCBF_RX_STATUS);
+		len = be16_to_cpu(cpl->len);
+		pdu_cb->hdr = gl->va + offset;
+		pdu_cb->hlen = len;
+		pdu_cb->hfrag_idx = skb_shinfo(skb)->nr_frags;
+		pdu_cb->ddigest = be32_to_cpu(cpl->ulp_crc);
+		pdu_cb->pdulen = ntohs(cpl->len);
+
+		if (unlikely(gl->nfrags > 1))
+			cxgbit_skcb_flags(skb) = 0;
+
+		cxgbit_process_ddpvld(lro_cb->csk, pdu_cb,
+				      be32_to_cpu(cpl->ddpvld));
+
+		if (pdu_cb->flags & PDUCBF_RX_DATA_DDPD) {
+			pdu_cb->flags |= PDUCBF_RX_DDP_CMP;
+			pdu_cb->complete = true;
+		} else if (pdu_cb->flags & PDUCBF_RX_DATA) {
+			pdu_cb->complete = true;
+		}
+
+		lro_cb->pdu_totallen += pdu_cb->hlen + pdu_cb->dlen;
+		lro_cb->complete = true;
+		lro_cb->pdu_idx++;
 	}
 
 	cxgbit_copy_frags(skb, gl, offset);
@@ -413,6 +442,7 @@ cxgbit_uld_lro_rx_handler(void *hndl, const __be64 *rsp,
 	switch (op) {
 	case CPL_ISCSI_HDR:
 	case CPL_ISCSI_DATA:
+	case CPL_RX_ISCSI_CMP:
 	case CPL_RX_ISCSI_DDP:
 	case CPL_FW4_ACK:
 		lro_flush = false;
@@ -459,7 +489,8 @@ cxgbit_uld_lro_rx_handler(void *hndl, const __be64 *rsp,
 			return 0;
 		}
 
-		if (op == CPL_ISCSI_HDR || op == CPL_ISCSI_DATA) {
+		if ((op == CPL_ISCSI_HDR) || (op == CPL_ISCSI_DATA) ||
+		    (op == CPL_RX_ISCSI_CMP)) {
 			if (!cxgbit_lro_receive(csk, op, rsp, gl, lro_mgr,
 						napi))
 				return 0;
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c
index 4780fae2a1d1..2714e5901d18 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_target.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c
@@ -1021,11 +1021,36 @@ static int cxgbit_handle_iscsi_dataout(struct cxgbit_sock *csk)
 	int rc, sg_nents, sg_off;
 	bool dcrc_err = false;
 
-	rc = iscsit_check_dataout_hdr(conn, (unsigned char *)hdr, &cmd);
-	if (rc < 0)
-		return rc;
-	else if (!cmd)
-		return 0;
+	if (pdu_cb->flags & PDUCBF_RX_DDP_CMP) {
+		u32 offset = be32_to_cpu(hdr->offset);
+		u32 ddp_data_len;
+		u32 payload_length = ntoh24(hdr->dlength);
+		bool success = false;
+
+		cmd = iscsit_find_cmd_from_itt_or_dump(conn, hdr->itt, 0);
+		if (!cmd)
+			return 0;
+
+		ddp_data_len = offset - cmd->write_data_done;
+		atomic_long_add(ddp_data_len, &conn->sess->rx_data_octets);
+
+		cmd->write_data_done = offset;
+		cmd->next_burst_len = ddp_data_len;
+		cmd->data_sn = be32_to_cpu(hdr->datasn);
+
+		rc = __iscsit_check_dataout_hdr(conn, (unsigned char *)hdr,
+						cmd, payload_length, &success);
+		if (rc < 0)
+			return rc;
+		else if (!success)
+			return 0;
+	} else {
+		rc = iscsit_check_dataout_hdr(conn, (unsigned char *)hdr, &cmd);
+		if (rc < 0)
+			return rc;
+		else if (!cmd)
+			return 0;
+	}
 
 	if (pdu_cb->flags & PDUCBF_RX_DCRC_ERR) {
 		pr_err("ITT: 0x%08x, Offset: %u, Length: %u,"
@@ -1389,6 +1414,9 @@ static void cxgbit_lro_hskb_reset(struct cxgbit_sock *csk)
 	for (i = 0; i < ssi->nr_frags; i++)
 		put_page(skb_frag_page(&ssi->frags[i]));
 	ssi->nr_frags = 0;
+	skb->data_len = 0;
+	skb->truesize -= skb->len;
+	skb->len = 0;
 }
 
 static void
@@ -1402,39 +1430,42 @@ cxgbit_lro_skb_merge(struct cxgbit_sock *csk, struct sk_buff *skb, u8 pdu_idx)
 	unsigned int len = 0;
 
 	if (pdu_cb->flags & PDUCBF_RX_HDR) {
-		hpdu_cb->flags = pdu_cb->flags;
+		u8 hfrag_idx = hssi->nr_frags;
+
+		hpdu_cb->flags |= pdu_cb->flags;
 		hpdu_cb->seq = pdu_cb->seq;
 		hpdu_cb->hdr = pdu_cb->hdr;
 		hpdu_cb->hlen = pdu_cb->hlen;
 
-		memcpy(&hssi->frags[0], &ssi->frags[pdu_cb->hfrag_idx],
+		memcpy(&hssi->frags[hfrag_idx], &ssi->frags[pdu_cb->hfrag_idx],
 		       sizeof(skb_frag_t));
 
-		get_page(skb_frag_page(&hssi->frags[0]));
-		hssi->nr_frags = 1;
-		hpdu_cb->frags = 1;
-		hpdu_cb->hfrag_idx = 0;
+		get_page(skb_frag_page(&hssi->frags[hfrag_idx]));
+		hssi->nr_frags++;
+		hpdu_cb->frags++;
+		hpdu_cb->hfrag_idx = hfrag_idx;
 
-		len = hssi->frags[0].size;
-		hskb->len = len;
-		hskb->data_len = len;
-		hskb->truesize = len;
+		len = hssi->frags[hfrag_idx].size;
+		hskb->len += len;
+		hskb->data_len += len;
+		hskb->truesize += len;
 	}
 
 	if (pdu_cb->flags & PDUCBF_RX_DATA) {
-		u8 hfrag_idx = 1, i;
+		u8 dfrag_idx = hssi->nr_frags, i;
 
 		hpdu_cb->flags |= pdu_cb->flags;
+		hpdu_cb->dfrag_idx = dfrag_idx;
 
 		len = 0;
-		for (i = 0; i < pdu_cb->nr_dfrags; hfrag_idx++, i++) {
-			memcpy(&hssi->frags[hfrag_idx],
+		for (i = 0; i < pdu_cb->nr_dfrags; dfrag_idx++, i++) {
+			memcpy(&hssi->frags[dfrag_idx],
 			       &ssi->frags[pdu_cb->dfrag_idx + i],
 			       sizeof(skb_frag_t));
 
-			get_page(skb_frag_page(&hssi->frags[hfrag_idx]));
+			get_page(skb_frag_page(&hssi->frags[dfrag_idx]));
 
-			len += hssi->frags[hfrag_idx].size;
+			len += hssi->frags[dfrag_idx].size;
 
 			hssi->nr_frags++;
 			hpdu_cb->frags++;
@@ -1443,7 +1474,6 @@ cxgbit_lro_skb_merge(struct cxgbit_sock *csk, struct sk_buff *skb, u8 pdu_idx)
 		hpdu_cb->dlen = pdu_cb->dlen;
 		hpdu_cb->doffset = hpdu_cb->hlen;
 		hpdu_cb->nr_dfrags = pdu_cb->nr_dfrags;
-		hpdu_cb->dfrag_idx = 1;
 		hskb->len += len;
 		hskb->data_len += len;
 		hskb->truesize += len;
@@ -1528,10 +1558,15 @@ static int cxgbit_rx_lro_skb(struct cxgbit_sock *csk, struct sk_buff *skb)
 
 static int cxgbit_rx_skb(struct cxgbit_sock *csk, struct sk_buff *skb)
 {
+	struct cxgb4_lld_info *lldi = &csk->com.cdev->lldi;
 	int ret = -1;
 
-	if (likely(cxgbit_skcb_flags(skb) & SKCBF_RX_LRO))
-		ret = cxgbit_rx_lro_skb(csk, skb);
+	if (likely(cxgbit_skcb_flags(skb) & SKCBF_RX_LRO)) {
+		if (is_t5(lldi->adapter_type))
+			ret = cxgbit_rx_lro_skb(csk, skb);
+		else
+			ret = cxgbit_process_lro_skb(csk, skb);
+	}
 
 	__kfree_skb(skb);
 	return ret;
-- 
GitLab


From 4d65491c269729a1e3b375c45e73213f49103d33 Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Fri, 20 Jan 2017 16:44:33 +0530
Subject: [PATCH 0320/1084] target/iscsi: Fix unsolicited data seq_end_offset
 calculation

In case of unsolicited data for the first sequence
seq_end_offset must be set to minimum of total data length
and FirstBurstLength, so do not add cmd->write_data_done
to the min of total data length and FirstBurstLength.

This patch avoids that with ImmediateData=Yes, InitialR2T=No,
MaxXmitDataSegmentLength < FirstBurstLength that a WRITE command
with IO size above FirstBurstLength triggers sequence error
messages, for example

Set following parameters on target (linux-4.8.12)
ImmediateData = Yes
InitialR2T = No
MaxXmitDataSegmentLength = 8k
FirstBurstLength = 64k

Log in from Open iSCSI initiator and execute
dd if=/dev/zero of=/dev/sdb bs=128k count=1 oflag=direct

Error messages on target
Command ITT: 0x00000035 with Offset: 65536, Length: 8192 outside
of Sequence 73728:131072 while DataSequenceInOrder=Yes.
Command ITT: 0x00000035, received DataSN: 0x00000001 higher than
expected 0x00000000.
Unable to perform within-command recovery while ERL=0.

Signed-off-by: Varun Prakash <varun@chelsio.com>
[ bvanassche: Use min() instead of open-coding it / edited patch description ]
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target_erl0.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c
index b54e72c7ab0f..a8bcbc43b047 100644
--- a/drivers/target/iscsi/iscsi_target_erl0.c
+++ b/drivers/target/iscsi/iscsi_target_erl0.c
@@ -44,10 +44,8 @@ void iscsit_set_dataout_sequence_values(
 	 */
 	if (cmd->unsolicited_data) {
 		cmd->seq_start_offset = cmd->write_data_done;
-		cmd->seq_end_offset = (cmd->write_data_done +
-			((cmd->se_cmd.data_length >
-			  conn->sess->sess_ops->FirstBurstLength) ?
-			 conn->sess->sess_ops->FirstBurstLength : cmd->se_cmd.data_length));
+		cmd->seq_end_offset = min(cmd->se_cmd.data_length,
+					conn->sess->sess_ops->FirstBurstLength);
 		return;
 	}
 
-- 
GitLab


From 6cb3216a7863fd13fb125f0867fce55c2bbd8c8e Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Mon, 13 Feb 2017 12:18:29 -0800
Subject: [PATCH 0321/1084] qla2xxx: Fix a warning reported by the "smatch"
 static checker

Fix the following warning reported by the "smatch" static checker:

drivers/scsi/qla2xxx/qla_init.c:3910 qla2x00_alloc_fcport()
warn: use 'flags' here instead of GFP_XXX?

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/qla2xxx/qla_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index f65431480833..ac579fc5f202 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -3909,7 +3909,7 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags)
 
 	fcport->ct_desc.ct_sns = dma_alloc_coherent(&vha->hw->pdev->dev,
 		sizeof(struct ct_sns_pkt), &fcport->ct_desc.ct_sns_dma,
-			GFP_ATOMIC);
+		flags);
 	fcport->disc_state = DSC_DELETED;
 	fcport->fw_login_state = DSC_LS_PORT_UNAVAIL;
 	fcport->deleted = QLA_SESS_DELETED;
-- 
GitLab


From 0ab8ac6f504d61b59856bd34e62bbfd60edd2bb0 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Fri, 13 Jan 2017 04:47:48 -0600
Subject: [PATCH 0322/1084] target: export protocol identifier

I think the transport statistics device file was supposed
to show scsiTransportProtocolType. It instead shows the
fabric name which is normally closer to the driver name.

I was thinking I cannot change from fabric name to protocol
type name incase people are expecting the driver name, so
this patch adds another file proto_id that exports the SCSI
protocol identifier ID.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_stat.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c
index 1a39033d2bff..be380e4acfcd 100644
--- a/drivers/target/target_core_stat.c
+++ b/drivers/target/target_core_stat.c
@@ -795,16 +795,34 @@ static ssize_t target_stat_transport_dev_name_show(struct config_item *item,
 	return ret;
 }
 
+static ssize_t target_stat_transport_proto_id_show(struct config_item *item,
+		char *page)
+{
+	struct se_lun *lun = to_transport_stat(item);
+	struct se_device *dev;
+	struct se_portal_group *tpg = lun->lun_tpg;
+	ssize_t ret = -ENODEV;
+
+	rcu_read_lock();
+	dev = rcu_dereference(lun->lun_se_dev);
+	if (dev)
+		ret = snprintf(page, PAGE_SIZE, "%u\n", tpg->proto_id);
+	rcu_read_unlock();
+	return ret;
+}
+
 CONFIGFS_ATTR_RO(target_stat_transport_, inst);
 CONFIGFS_ATTR_RO(target_stat_transport_, device);
 CONFIGFS_ATTR_RO(target_stat_transport_, indx);
 CONFIGFS_ATTR_RO(target_stat_transport_, dev_name);
+CONFIGFS_ATTR_RO(target_stat_transport_, proto_id);
 
 static struct configfs_attribute *target_stat_scsi_transport_attrs[] = {
 	&target_stat_transport_attr_inst,
 	&target_stat_transport_attr_device,
 	&target_stat_transport_attr_indx,
 	&target_stat_transport_attr_dev_name,
+	&target_stat_transport_attr_proto_id,
 	NULL,
 };
 
-- 
GitLab


From 762b6f00a995863afa274d6b5ffa3880dac1714b Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Wed, 15 Feb 2017 23:04:54 +0300
Subject: [PATCH 0323/1084] uapi: fix linux/target_core_user.h userspace
 compilation errors

Consistently use types from linux/types.h to fix the following
linux/target_core_user.h userspace compilation errors:

/usr/include/linux/target_core_user.h:108:4: error: unknown type name 'uint32_t'
    uint32_t iov_cnt;
/usr/include/linux/target_core_user.h:109:4: error: unknown type name 'uint32_t'
    uint32_t iov_bidi_cnt;
/usr/include/linux/target_core_user.h:110:4: error: unknown type name 'uint32_t'
    uint32_t iov_dif_cnt;
/usr/include/linux/target_core_user.h:111:4: error: unknown type name 'uint64_t'
    uint64_t cdb_off;
/usr/include/linux/target_core_user.h:112:4: error: unknown type name 'uint64_t'
    uint64_t __pad1;
/usr/include/linux/target_core_user.h:113:4: error: unknown type name 'uint64_t'
    uint64_t __pad2;
/usr/include/linux/target_core_user.h:117:4: error: unknown type name 'uint8_t'
    uint8_t scsi_status;
/usr/include/linux/target_core_user.h:118:4: error: unknown type name 'uint8_t'
    uint8_t __pad1;
/usr/include/linux/target_core_user.h:119:4: error: unknown type name 'uint16_t'
    uint16_t __pad2;
/usr/include/linux/target_core_user.h:120:4: error: unknown type name 'uint32_t'
    uint32_t __pad3;

Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/uapi/linux/target_core_user.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
index c506cddb8165..af17b4154ef6 100644
--- a/include/uapi/linux/target_core_user.h
+++ b/include/uapi/linux/target_core_user.h
@@ -105,26 +105,26 @@ struct tcmu_cmd_entry {
 
 	union {
 		struct {
-			uint32_t iov_cnt;
-			uint32_t iov_bidi_cnt;
-			uint32_t iov_dif_cnt;
-			uint64_t cdb_off;
-			uint64_t __pad1;
-			uint64_t __pad2;
+			__u32 iov_cnt;
+			__u32 iov_bidi_cnt;
+			__u32 iov_dif_cnt;
+			__u64 cdb_off;
+			__u64 __pad1;
+			__u64 __pad2;
 			struct iovec iov[0];
 		} req;
 		struct {
-			uint8_t scsi_status;
-			uint8_t __pad1;
-			uint16_t __pad2;
-			uint32_t __pad3;
+			__u8 scsi_status;
+			__u8 __pad1;
+			__u16 __pad2;
+			__u32 __pad3;
 			char sense_buffer[TCMU_SENSE_BUFFERSIZE];
 		} rsp;
 	};
 
 } __packed;
 
-#define TCMU_OP_ALIGN_SIZE sizeof(uint64_t)
+#define TCMU_OP_ALIGN_SIZE sizeof(__u64)
 
 enum tcmu_genl_cmd {
 	TCMU_CMD_UNSPEC,
-- 
GitLab


From a311e738b6d81febae21f0447c5b31752f0d1634 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Fri, 17 Feb 2017 17:34:13 +1100
Subject: [PATCH 0324/1084] powerpc/powernv: Make PCI non-optional

Bare metal systems without PCI don't exist, so there's no real point in
making PCI optional, it just breaks the build from time to time. In fact
the build is broken now if you turn off PCI_MSI but enable KVM.

Using select for PCI is OK because we (powerpc) define config PCI, and it
has no dependencies. Selecting PCI_MSI is slightly fishy, because it's
in drivers/pci and it is user-visible, but its only dependency is PCI,
so selecting it can't actually lead to breakage.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 604190cab522..3a07e4dcf97c 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -5,7 +5,8 @@ config PPC_POWERNV
 	select PPC_XICS
 	select PPC_ICP_NATIVE
 	select PPC_P7_NAP
-	select PPC_PCI_CHOICE if EMBEDDED
+	select PCI
+	select PCI_MSI
 	select EPAPR_BOOT
 	select PPC_INDIRECT_PIO
 	select PPC_UDBG_16550
-- 
GitLab


From 9f0c18aec620bc9d82268b3cb937568dd07b43ff Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 15 Feb 2017 12:21:17 -0600
Subject: [PATCH 0325/1084] objtool: Fix CONFIG_STACK_VALIDATION=y warning for
 out-of-tree modules

When building a CONFIG_STACK_VALIDATION enabled kernel without the
libelf devel package installed, the Makefile prints a warning:

  "Cannot use CONFIG_STACK_VALIDATION, please install libelf-dev, libelf-devel or elfutils-libelf-devel"

But when building an out-of-tree module, the warning doesn't show.
Instead it tries to use objtool, and the build fails with:

  /bin/sh: ./tools/objtool/objtool: No such file or directory

Make sure the warning and the disabling of objtool occur in all cases,
by moving the CONFIG_STACK_VALIDATION checks outside the 'ifeq
($(KBUILD_EXTMOD),)' block in the Makefile.

Tested-By: Marc MERLIN <marc@merlins.org>
Suggested-by: Jessica Yu <jeyu@redhat.com>
Reported-by: Marc MERLIN <marc@merlins.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Reviewed-by: Jessica Yu <jeyu@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Marek <mmarek@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 3b27a0c85d70 ("objtool: Detect and warn if libelf is missing and don't break the build")
Link: http://lkml.kernel.org/r/b3088ae4a8698143d4851965793c61fec2135b1f.1487182864.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Makefile | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/Makefile b/Makefile
index f1e6a02a0c19..32c84577aa93 100644
--- a/Makefile
+++ b/Makefile
@@ -908,6 +908,18 @@ mod_sign_cmd = true
 endif
 export mod_sign_cmd
 
+ifdef CONFIG_STACK_VALIDATION
+  has_libelf := $(call try-run,\
+		echo "int main() {}" | $(HOSTCC) -xc -o /dev/null -lelf -,1,0)
+  ifeq ($(has_libelf),1)
+    objtool_target := tools/objtool FORCE
+  else
+    $(warning "Cannot use CONFIG_STACK_VALIDATION, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
+    SKIP_STACK_VALIDATION := 1
+    export SKIP_STACK_VALIDATION
+  endif
+endif
+
 
 ifeq ($(KBUILD_EXTMOD),)
 core-y		+= kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/
@@ -1035,18 +1047,6 @@ prepare0: archprepare gcc-plugins
 # All the preparing..
 prepare: prepare0 prepare-objtool
 
-ifdef CONFIG_STACK_VALIDATION
-  has_libelf := $(call try-run,\
-		echo "int main() {}" | $(HOSTCC) -xc -o /dev/null -lelf -,1,0)
-  ifeq ($(has_libelf),1)
-    objtool_target := tools/objtool FORCE
-  else
-    $(warning "Cannot use CONFIG_STACK_VALIDATION, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
-    SKIP_STACK_VALIDATION := 1
-    export SKIP_STACK_VALIDATION
-  endif
-endif
-
 PHONY += prepare-objtool
 prepare-objtool: $(objtool_target)
 
-- 
GitLab


From 30c71233a1d4b5c23ad6652847285bf6b57086e2 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 15 Dec 2016 08:37:56 -0500
Subject: [PATCH 0326/1084] ceph: clean up unsafe d_parent access in
 __choose_mds

__choose_mds exists to pick an MDS to use when issuing a call. Doing
that typically involves picking an inode and using the authoritative
MDS for it. In most cases, that's pretty straightforward, as we are
using an inode to which we hold a reference (usually represented by
r_dentry or r_inode in the request).

In the case of a snapshotted directory however, we need to fetch
the non-snapped parent, which involves walking back up the parents
in the tree. The dentries in the snapshot dir are effectively frozen
but the overall parent is _not_, and could vanish if a concurrent
rename were to occur.

Clean this code up and take special care to ensure the validity of
the entries we're working with. First, try to use the inode in
r_locked_dir if one exists. If not and all we have is r_dentry,
then we have to walk back up the tree. Use the rcu_read_lock for
this so we can ensure that any d_parent we find won't go away, and
take extra care to deal with the possibility that the dentries could
go negative.

Change get_nonsnap_parent to return an inode, and take a reference to
that inode before returning (if any). Change all of the other places
where we set "inode" in __choose_mds to also take a reference, and then
call iput on that inode before exiting the function.

Link: http://tracker.ceph.com/issues/18148
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c | 64 +++++++++++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 22 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index c9d2e553a6c4..377ac34ddbb3 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -667,6 +667,27 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
 	ceph_mdsc_put_request(req);
 }
 
+/*
+ * Walk back up the dentry tree until we hit a dentry representing a
+ * non-snapshot inode. We do this using the rcu_read_lock (which must be held
+ * when calling this) to ensure that the objects won't disappear while we're
+ * working with them. Once we hit a candidate dentry, we attempt to take a
+ * reference to it, and return that as the result.
+ */
+static struct inode *get_nonsnap_parent(struct dentry *dentry) { struct inode
+	*inode = NULL;
+
+	while (dentry && !IS_ROOT(dentry)) {
+		inode = d_inode_rcu(dentry);
+		if (!inode || ceph_snap(inode) == CEPH_NOSNAP)
+			break;
+		dentry = dentry->d_parent;
+	}
+	if (inode)
+		inode = igrab(inode);
+	return inode;
+}
+
 /*
  * Choose mds to send request to next.  If there is a hint set in the
  * request (e.g., due to a prior forward hint from the mds), use that.
@@ -675,19 +696,6 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
  *
  * Called under mdsc->mutex.
  */
-static struct dentry *get_nonsnap_parent(struct dentry *dentry)
-{
-	/*
-	 * we don't need to worry about protecting the d_parent access
-	 * here because we never renaming inside the snapped namespace
-	 * except to resplice to another snapdir, and either the old or new
-	 * result is a valid result.
-	 */
-	while (!IS_ROOT(dentry) && ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
-		dentry = dentry->d_parent;
-	return dentry;
-}
-
 static int __choose_mds(struct ceph_mds_client *mdsc,
 			struct ceph_mds_request *req)
 {
@@ -717,30 +725,39 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 	inode = NULL;
 	if (req->r_inode) {
 		inode = req->r_inode;
+		ihold(inode);
 	} else if (req->r_dentry) {
 		/* ignore race with rename; old or new d_parent is okay */
-		struct dentry *parent = req->r_dentry->d_parent;
-		struct inode *dir = d_inode(parent);
+		struct dentry *parent;
+		struct inode *dir;
+
+		rcu_read_lock();
+		parent = req->r_dentry->d_parent;
+		dir = req->r_locked_dir ? : d_inode_rcu(parent);
 
-		if (dir->i_sb != mdsc->fsc->sb) {
-			/* not this fs! */
+		if (!dir || dir->i_sb != mdsc->fsc->sb) {
+			/*  not this fs or parent went negative */
 			inode = d_inode(req->r_dentry);
+			if (inode)
+				ihold(inode);
 		} else if (ceph_snap(dir) != CEPH_NOSNAP) {
 			/* direct snapped/virtual snapdir requests
 			 * based on parent dir inode */
-			struct dentry *dn = get_nonsnap_parent(parent);
-			inode = d_inode(dn);
+			inode = get_nonsnap_parent(parent);
 			dout("__choose_mds using nonsnap parent %p\n", inode);
 		} else {
 			/* dentry target */
 			inode = d_inode(req->r_dentry);
 			if (!inode || mode == USE_AUTH_MDS) {
 				/* dir + name */
-				inode = dir;
+				inode = igrab(dir);
 				hash = ceph_dentry_hash(dir, req->r_dentry);
 				is_hash = true;
+			} else {
+				ihold(inode);
 			}
 		}
+		rcu_read_unlock();
 	}
 
 	dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash,
@@ -769,7 +786,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 				     (int)r, frag.ndist);
 				if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
 				    CEPH_MDS_STATE_ACTIVE)
-					return mds;
+					goto out;
 			}
 
 			/* since this file/dir wasn't known to be
@@ -784,7 +801,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 				     inode, ceph_vinop(inode), frag.frag, mds);
 				if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
 				    CEPH_MDS_STATE_ACTIVE)
-					return mds;
+					goto out;
 			}
 		}
 	}
@@ -797,6 +814,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 		cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
 	if (!cap) {
 		spin_unlock(&ci->i_ceph_lock);
+		iput(inode);
 		goto random;
 	}
 	mds = cap->session->s_mds;
@@ -804,6 +822,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 	     inode, ceph_vinop(inode), mds,
 	     cap == ci->i_auth_cap ? "auth " : "", cap);
 	spin_unlock(&ci->i_ceph_lock);
+out:
+	iput(inode);
 	return mds;
 
 random:
-- 
GitLab


From c6b0b656ca24ede6657abb4a2cd910fa9c1879ba Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 15 Dec 2016 08:37:57 -0500
Subject: [PATCH 0327/1084] ceph: clean up unsafe d_parent accesses in
 build_dentry_path

While we hold a reference to the dentry when build_dentry_path is
called, we could end up racing with a rename that changes d_parent.
Handle that situation correctly, by using the rcu_read_lock to
ensure that the parent dentry and inode stick around long enough
to safely check ceph_snap and ceph_ino.

Link: http://tracker.ceph.com/issues/18148
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 377ac34ddbb3..7c2eb28c24f6 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1800,13 +1800,18 @@ static int build_dentry_path(struct dentry *dentry,
 			     int *pfreepath)
 {
 	char *path;
+	struct inode *dir;
 
-	if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_NOSNAP) {
-		*pino = ceph_ino(d_inode(dentry->d_parent));
+	rcu_read_lock();
+	dir = d_inode_rcu(dentry->d_parent);
+	if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
+		*pino = ceph_ino(dir);
+		rcu_read_unlock();
 		*ppath = dentry->d_name.name;
 		*ppathlen = dentry->d_name.len;
 		return 0;
 	}
+	rcu_read_unlock();
 	path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
 	if (IS_ERR(path))
 		return PTR_ERR(path);
-- 
GitLab


From fd36a71762f3b0fcb9741ed24021afabec7e0c45 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 15 Dec 2016 08:37:58 -0500
Subject: [PATCH 0328/1084] ceph: pass parent dir ino info to build_dentry_path

In the event that we have a parent inode reference in the request, we
can use that instead of mucking about in the dcache. Pass any parent
inode info we have down to build_dentry_path so it can make use of it.

Link: http://tracker.ceph.com/issues/18148
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 7c2eb28c24f6..c41c6aab3232 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1795,15 +1795,15 @@ char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
 	return path;
 }
 
-static int build_dentry_path(struct dentry *dentry,
+static int build_dentry_path(struct dentry *dentry, struct inode *dir,
 			     const char **ppath, int *ppathlen, u64 *pino,
 			     int *pfreepath)
 {
 	char *path;
-	struct inode *dir;
 
 	rcu_read_lock();
-	dir = d_inode_rcu(dentry->d_parent);
+	if (!dir)
+		dir = d_inode_rcu(dentry->d_parent);
 	if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
 		*pino = ceph_ino(dir);
 		rcu_read_unlock();
@@ -1847,8 +1847,8 @@ static int build_inode_path(struct inode *inode,
  * an explicit ino+path.
  */
 static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
-				  const char *rpath, u64 rino,
-				  const char **ppath, int *pathlen,
+				  struct inode *rdiri, const char *rpath,
+				  u64 rino, const char **ppath, int *pathlen,
 				  u64 *ino, int *freepath)
 {
 	int r = 0;
@@ -1858,7 +1858,8 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
 		dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
 		     ceph_snap(rinode));
 	} else if (rdentry) {
-		r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath);
+		r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino,
+					freepath);
 		dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
 		     *ppath);
 	} else if (rpath || rino) {
@@ -1891,7 +1892,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
 	int ret;
 
 	ret = set_request_path_attr(req->r_inode, req->r_dentry,
-			      req->r_path1, req->r_ino1.ino,
+			      req->r_locked_dir, req->r_path1, req->r_ino1.ino,
 			      &path1, &pathlen1, &ino1, &freepath1);
 	if (ret < 0) {
 		msg = ERR_PTR(ret);
@@ -1899,6 +1900,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
 	}
 
 	ret = set_request_path_attr(NULL, req->r_old_dentry,
+			      req->r_old_dentry_dir,
 			      req->r_path2, req->r_ino2.ino,
 			      &path2, &pathlen2, &ino2, &freepath2);
 	if (ret < 0) {
-- 
GitLab


From adf0d68701c7f3e50f21308c76f41e60956a6832 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 15 Dec 2016 08:37:58 -0500
Subject: [PATCH 0329/1084] ceph: fix unsafe dcache access in
 ceph_encode_dentry_release

Accessing d_parent requires some sort of locking or it could vanish
out from under us. Since we take the d_lock anyway, use that to fetch
d_parent and take a reference to it, and then use that reference to
call ceph_encode_inode_release.

Link: http://tracker.ceph.com/issues/18148
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/caps.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 94fd76d04683..d1b4c543cab1 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3926,7 +3926,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
 int ceph_encode_dentry_release(void **p, struct dentry *dentry,
 			       int mds, int drop, int unless)
 {
-	struct inode *dir = d_inode(dentry->d_parent);
+	struct dentry *parent;
 	struct ceph_mds_request_release *rel = *p;
 	struct ceph_dentry_info *di = ceph_dentry(dentry);
 	int force = 0;
@@ -3941,9 +3941,12 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
 	spin_lock(&dentry->d_lock);
 	if (di->lease_session && di->lease_session->s_mds == mds)
 		force = 1;
+	parent = dget(dentry->d_parent);
 	spin_unlock(&dentry->d_lock);
 
-	ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
+	ret = ceph_encode_inode_release(p, d_inode(parent), mds, drop,
+					unless, force);
+	dput(parent);
 
 	spin_lock(&dentry->d_lock);
 	if (ret && di->lease_session && di->lease_session->s_mds == mds) {
-- 
GitLab


From ca6c8ae0f7930dad7e10664e3b5bc657dd75be60 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 15 Dec 2016 08:37:59 -0500
Subject: [PATCH 0330/1084] ceph: pass parent inode info to
 ceph_encode_dentry_release if we have it

If we have a parent inode reference already, then we don't need to
go back up the directory tree to find one.

Link: http://tracker.ceph.com/issues/18148
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/caps.c       | 11 +++++++----
 fs/ceph/mds_client.c |  7 +++++--
 fs/ceph/super.h      |  1 +
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index d1b4c543cab1..4951ab96ffc8 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3924,9 +3924,10 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
 }
 
 int ceph_encode_dentry_release(void **p, struct dentry *dentry,
+			       struct inode *dir,
 			       int mds, int drop, int unless)
 {
-	struct dentry *parent;
+	struct dentry *parent = NULL;
 	struct ceph_mds_request_release *rel = *p;
 	struct ceph_dentry_info *di = ceph_dentry(dentry);
 	int force = 0;
@@ -3941,11 +3942,13 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
 	spin_lock(&dentry->d_lock);
 	if (di->lease_session && di->lease_session->s_mds == mds)
 		force = 1;
-	parent = dget(dentry->d_parent);
+	if (!dir) {
+		parent = dget(dentry->d_parent);
+		dir = d_inode(parent);
+	}
 	spin_unlock(&dentry->d_lock);
 
-	ret = ceph_encode_inode_release(p, d_inode(parent), mds, drop,
-					unless, force);
+	ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
 	dput(parent);
 
 	spin_lock(&dentry->d_lock);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index c41c6aab3232..1b78457c992b 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1954,10 +1954,13 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
 		      mds, req->r_inode_drop, req->r_inode_unless, 0);
 	if (req->r_dentry_drop)
 		releases += ceph_encode_dentry_release(&p, req->r_dentry,
-		       mds, req->r_dentry_drop, req->r_dentry_unless);
+				req->r_locked_dir, mds, req->r_dentry_drop,
+				req->r_dentry_unless);
 	if (req->r_old_dentry_drop)
 		releases += ceph_encode_dentry_release(&p, req->r_old_dentry,
-		       mds, req->r_old_dentry_drop, req->r_old_dentry_unless);
+				req->r_old_dentry_dir, mds,
+				req->r_old_dentry_drop,
+				req->r_old_dentry_unless);
 	if (req->r_old_inode_drop)
 		releases += ceph_encode_inode_release(&p,
 		      d_inode(req->r_old_dentry),
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 3373b61faefd..a80a915ca247 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -904,6 +904,7 @@ extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
 extern int ceph_encode_inode_release(void **p, struct inode *inode,
 				     int mds, int drop, int unless, int force);
 extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
+				      struct inode *dir,
 				      int mds, int drop, int unless);
 
 extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
-- 
GitLab


From 52953d55917e45fdf62d5e6cb37c61c32dc8d373 Mon Sep 17 00:00:00 2001
From: Seraphime Kirkovski <kirkseraph@gmail.com>
Date: Mon, 26 Dec 2016 10:26:34 +0100
Subject: [PATCH 0331/1084] ceph: cleanup ACCESS_ONCE -> READ_ONCE

This removes the uses of ACCESS_ONCE in favor of READ_ONCE

Signed-off-by: Seraphime Kirkovski <kirkseraph@gmail.com>
Signed-off-by: Yan, Zheng <zyan@redhat.com>
---
 fs/ceph/addr.c       |  4 ++--
 fs/ceph/caps.c       |  2 +-
 fs/ceph/dir.c        |  2 +-
 fs/ceph/inode.c      |  2 +-
 fs/ceph/mds_client.c | 10 +++++-----
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index e4b066cd912a..39852567d66e 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -771,7 +771,7 @@ static int ceph_writepages_start(struct address_space *mapping,
 	     wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
 	     (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
 
-	if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+	if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
 		if (ci->i_wrbuffer_ref > 0) {
 			pr_warn_ratelimited(
 				"writepage_start %p %lld forced umount\n",
@@ -1194,7 +1194,7 @@ static int ceph_update_writeable_page(struct file *file,
 	int r;
 	struct ceph_snap_context *snapc, *oldest;
 
-	if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+	if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
 		dout(" page %p forced umount\n", page);
 		unlock_page(page);
 		return -EIO;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 4951ab96ffc8..e3a2a3f32568 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2477,7 +2477,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
 
 		if (ci->i_ceph_flags & CEPH_I_CAP_DROPPED) {
 			int mds_wanted;
-			if (ACCESS_ONCE(mdsc->fsc->mount_state) ==
+			if (READ_ONCE(mdsc->fsc->mount_state) ==
 			    CEPH_MOUNT_SHUTDOWN) {
 				dout("get_cap_refs %p forced umount\n", inode);
 				*err = -EIO;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 8ab1fdf0bd49..d4385563b70a 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1194,7 +1194,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
 	struct inode *dir;
 
 	if (flags & LOOKUP_RCU) {
-		parent = ACCESS_ONCE(dentry->d_parent);
+		parent = READ_ONCE(dentry->d_parent);
 		dir = d_inode_rcu(parent);
 		if (!dir)
 			return -ECHILD;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5e659d054b40..4926265f4223 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1720,7 +1720,7 @@ static void ceph_invalidate_work(struct work_struct *work)
 
 	mutex_lock(&ci->i_truncate_mutex);
 
-	if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+	if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
 		pr_warn_ratelimited("invalidate_pages %p %lld forced umount\n",
 				    inode, ceph_ino(inode));
 		mapping_set_error(inode->i_mapping, -EIO);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 1b78457c992b..176512960b14 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1166,7 +1166,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 		ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
 
 		if (ci->i_wrbuffer_ref > 0 &&
-		    ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
+		    READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
 			invalidate = true;
 
 		while (!list_empty(&ci->i_cap_flush_list)) {
@@ -2126,12 +2126,12 @@ static int __do_request(struct ceph_mds_client *mdsc,
 		err = -EIO;
 		goto finish;
 	}
-	if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+	if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
 		dout("do_request forced umount\n");
 		err = -EIO;
 		goto finish;
 	}
-	if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) {
+	if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) {
 		if (mdsc->mdsmap_err) {
 			err = mdsc->mdsmap_err;
 			dout("do_request mdsmap err %d\n", err);
@@ -3586,7 +3586,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
 {
 	u64 want_tid, want_flush;
 
-	if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
+	if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
 		return;
 
 	dout("sync\n");
@@ -3617,7 +3617,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
  */
 static bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped)
 {
-	if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
+	if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
 		return true;
 	return atomic_read(&mdsc->num_sessions) <= skipped;
 }
-- 
GitLab


From 0fbc5360bf433ffeccbb94ef806d637049899741 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 29 Dec 2016 20:19:32 +0000
Subject: [PATCH 0332/1084] ceph: fix spelling mistake: "enabing" -> "enabling"

trivial fix to spelling mistake in debug message

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Yan, Zheng <zyan@redhat.com>
---
 fs/ceph/cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 5bc5d37b1217..4e7421caf380 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -234,7 +234,7 @@ void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp)
 		fscache_enable_cookie(ci->fscache, ceph_fscache_can_enable,
 				inode);
 		if (fscache_cookie_enabled(ci->fscache)) {
-			dout("fscache_file_set_cookie %p %p enabing cache\n",
+			dout("fscache_file_set_cookie %p %p enabling cache\n",
 			     inode, filp);
 		}
 	}
-- 
GitLab


From 7c94ba27903c3c46c5829e96efb7c2a8ab7510e9 Mon Sep 17 00:00:00 2001
From: Andreas Gerstmayr <andreas.gerstmayr@catalysts.cc>
Date: Tue, 10 Jan 2017 14:17:56 +0100
Subject: [PATCH 0333/1084] ceph: set io_pages bdi hint

This patch sets the io_pages bdi hint based on the rsize mount option.
Without this patch large buffered reads (request size > max readahead)
are processed sequentially in chunks of the readahead size (i.e. read
requests are sent out up to the readahead size, then the
do_generic_file_read() function waits until the first page is received).

With this patch read requests are sent out at once up to the size
specified in the rsize mount option (default: 64 MB).

Signed-off-by: Andreas Gerstmayr <andreas.gerstmayr@catalysts.cc>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Yan, Zheng <zyan@redhat.com>
---
 Documentation/filesystems/ceph.txt | 5 ++---
 fs/ceph/super.c                    | 8 ++++++++
 fs/ceph/super.h                    | 4 ++--
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt
index f5306ee40ea9..0b302a11718a 100644
--- a/Documentation/filesystems/ceph.txt
+++ b/Documentation/filesystems/ceph.txt
@@ -98,11 +98,10 @@ Mount Options
 	size.
 
   rsize=X
-	Specify the maximum read size in bytes.  By default there is no
-	maximum.
+	Specify the maximum read size in bytes.  Default: 64 MB.
 
   rasize=X
-	Specify the maximum readahead.
+	Specify the maximum readahead.  Default: 8 MB.
 
   mount_timeout=X
 	Specify the timeout value for mount (in seconds), in the case
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 6bd20d707bfd..a0a0b6d02f89 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -952,6 +952,14 @@ static int ceph_register_bdi(struct super_block *sb,
 		fsc->backing_dev_info.ra_pages =
 			VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
 
+	if (fsc->mount_options->rsize > fsc->mount_options->rasize &&
+	    fsc->mount_options->rsize >= PAGE_SIZE)
+		fsc->backing_dev_info.io_pages =
+			(fsc->mount_options->rsize + PAGE_SIZE - 1)
+			>> PAGE_SHIFT;
+	else if (fsc->mount_options->rsize == 0)
+		fsc->backing_dev_info.io_pages = ULONG_MAX;
+
 	err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld",
 			   atomic_long_inc_return(&bdi_seq));
 	if (!err)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index a80a915ca247..f3f9215abf8e 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -45,8 +45,8 @@
 #define ceph_test_mount_opt(fsc, opt) \
 	(!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
 
-#define CEPH_RSIZE_DEFAULT             0           /* max read size */
-#define CEPH_RASIZE_DEFAULT            (8192*1024) /* readahead */
+#define CEPH_RSIZE_DEFAULT              (64*1024*1024) /* max read size */
+#define CEPH_RASIZE_DEFAULT             (8192*1024)    /* max readahead */
 #define CEPH_MAX_READDIR_DEFAULT        1024
 #define CEPH_MAX_READDIR_BYTES_DEFAULT  (512*1024)
 #define CEPH_SNAPDIRNAME_DEFAULT        ".snap"
-- 
GitLab


From eb65b919b914e65485fd8134912066f4fafc4f1e Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Thu, 12 Jan 2017 17:18:00 +0800
Subject: [PATCH 0334/1084] ceph: avoid updating mds_wanted too frequently

user space may open/close single file frequently. It's not good
to send a clientcaps message to mds for each open/close syscall.

Signed-off-by: Yan, Zheng <zyan@redhat.com>
---
 fs/ceph/caps.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index e3a2a3f32568..d941c48e8bff 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1184,6 +1184,13 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 		delayed = 1;
 	}
 	ci->i_ceph_flags &= ~(CEPH_I_NODELAY | CEPH_I_FLUSH);
+	if (want & ~cap->mds_wanted) {
+		/* user space may open/close single file frequently.
+		 * This avoids droping mds_wanted immediately after
+		 * requesting new mds_wanted.
+		 */
+		__cap_set_timeouts(mdsc, ci);
+	}
 
 	cap->issued &= retain;  /* drop bits we don't want */
 	if (cap->implemented & ~cap->issued) {
@@ -2485,15 +2492,14 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
 				goto out_unlock;
 			}
 			mds_wanted = __ceph_caps_mds_wanted(ci);
-			if ((mds_wanted & need) != need) {
+			if (need & ~(mds_wanted & need)) {
 				dout("get_cap_refs %p caps were dropped"
 				     " (session killed?)\n", inode);
 				*err = -ESTALE;
 				ret = 1;
 				goto out_unlock;
 			}
-			if ((mds_wanted & file_wanted) ==
-			    (file_wanted & (CEPH_CAP_FILE_RD|CEPH_CAP_FILE_WR)))
+			if (!(file_wanted & ~mds_wanted))
 				ci->i_ceph_flags &= ~CEPH_I_CAP_DROPPED;
 		}
 
-- 
GitLab


From d24cdcd3e40a6825135498e11c20c7976b9bf545 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 16 Jan 2017 12:06:09 +0100
Subject: [PATCH 0335/1084] libceph: use BUG() instead of BUG_ON(1)

I ran into this compile warning, which is the result of BUG_ON(1)
not always leading to the compiler treating the code path as
unreachable:

    include/linux/ceph/osdmap.h: In function 'ceph_can_shift_osds':
    include/linux/ceph/osdmap.h:62:1: error: control reaches end of non-void function [-Werror=return-type]

Using BUG() here avoids the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osdmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 9a9041784dcf..412906609954 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -57,7 +57,7 @@ static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool)
 	case CEPH_POOL_TYPE_EC:
 		return false;
 	default:
-		BUG_ON(1);
+		BUG();
 	}
 }
 
-- 
GitLab


From 7fea24c6d4a553c59937ae4fef95c730a88125cb Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 16 Jan 2017 14:35:17 +0100
Subject: [PATCH 0336/1084] libceph: include linux/sched.h into crypto.c
 directly

Currently crypto.c gets linux/sched.h indirectly through linux/slab.h
from linux/kasan.h.  Include it directly for memalloc_noio_*() inlines.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/crypto.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 292e33bd916e..85747b7f91a9 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -3,6 +3,7 @@
 
 #include <linux/err.h>
 #include <linux/scatterlist.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <crypto/aes.h>
 #include <crypto/skcipher.h>
-- 
GitLab


From 24c149ad6914d349d8b64749f20f3f8ea5031fe0 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 12 Jan 2017 14:42:40 -0500
Subject: [PATCH 0337/1084] ceph: fix bogus endianness change in
 ceph_ioctl_set_layout

sparse says:

    fs/ceph/ioctl.c:100:28: warning: cast to restricted __le64

preferred_osd is a __s64 so we don't need to do any conversion. Also,
just remove the cast in ceph_ioctl_get_layout as it's not needed.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Sage Weil <sage@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/ioctl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 7d752d53353a..4c9c72f26eb9 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -25,7 +25,7 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
 		l.stripe_count = ci->i_layout.stripe_count;
 		l.object_size = ci->i_layout.object_size;
 		l.data_pool = ci->i_layout.pool_id;
-		l.preferred_osd = (s32)-1;
+		l.preferred_osd = -1;
 		if (copy_to_user(arg, &l, sizeof(l)))
 			return -EFAULT;
 	}
@@ -97,7 +97,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
 		nl.data_pool = ci->i_layout.pool_id;
 
 	/* this is obsolete, and always -1 */
-	nl.preferred_osd = le64_to_cpu(-1);
+	nl.preferred_osd = -1;
 
 	err = __validate_layout(mdsc, &nl);
 	if (err)
-- 
GitLab


From d641df819db8b80198fd85d9de91137e8a823b07 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Thu, 19 Jan 2017 11:21:29 +0800
Subject: [PATCH 0338/1084] ceph: update readpages osd request according to
 size of pages

add_to_page_cache_lru() can fails, so the actual pages to read
can be smaller than the initial size of osd request. We need to
update osd request size in that case.

Signed-off-by: Yan, Zheng <zyan@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
---
 fs/ceph/addr.c        | 1 +
 net/ceph/osd_client.c | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 39852567d66e..4547bbf80e4f 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -391,6 +391,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
 			nr_pages = i;
 			if (nr_pages > 0) {
 				len = nr_pages << PAGE_SHIFT;
+				osd_req_op_extent_update(req, 0, len);
 				break;
 			}
 			goto out_pages;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 842f049abb86..3a2417bb6ff0 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -672,7 +672,8 @@ void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
 	BUG_ON(length > previous);
 
 	op->extent.length = length;
-	op->indata_len -= previous - length;
+	if (op->op == CEPH_OSD_OP_WRITE || op->op == CEPH_OSD_OP_WRITEFULL)
+		op->indata_len -= previous - length;
 }
 EXPORT_SYMBOL(osd_req_op_extent_update);
 
-- 
GitLab


From 00f06cba53f53f3f7be8ac4f9ba2c2f6a94bca6f Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Tue, 24 Jan 2017 10:02:32 +0800
Subject: [PATCH 0339/1084] ceph: make sure flushing inode in proper session's
 cap_flushing list

when flushing inode's auth cap changes, we need to move it into the
new auth cap session's cap_flushing list

Signed-off-by: Yan, Zheng <zyan@redhat.com>
---
 fs/ceph/caps.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index d941c48e8bff..ed8c7addce91 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3410,6 +3410,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
 			tcap->implemented |= issued;
 			if (cap == ci->i_auth_cap)
 				ci->i_auth_cap = tcap;
+
 			if (!list_empty(&ci->i_cap_flush_list) &&
 			    ci->i_auth_cap == tcap) {
 				spin_lock(&mdsc->cap_dirty_lock);
@@ -3423,9 +3424,18 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
 	} else if (tsession) {
 		/* add placeholder for the export tagert */
 		int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
+		tcap = new_cap;
 		ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0,
 			     t_seq - 1, t_mseq, (u64)-1, flag, &new_cap);
 
+		if (!list_empty(&ci->i_cap_flush_list) &&
+		    ci->i_auth_cap == tcap) {
+			spin_lock(&mdsc->cap_dirty_lock);
+			list_move_tail(&ci->i_flushing_item,
+				       &tcap->session->s_cap_flushing);
+			spin_unlock(&mdsc->cap_dirty_lock);
+		}
+
 		__ceph_remove_cap(cap, false);
 		goto out_unlock;
 	}
-- 
GitLab


From c1944fedd8c492ce1c1a99ca9064dcc7bafa80e9 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Sun, 29 Jan 2017 22:15:47 +0800
Subject: [PATCH 0340/1084] ceph: avoid calling ceph_renew_caps() infinitely

__ceph_caps_mds_wanted() ignores caps from stale session. So the
return value of __ceph_caps_mds_wanted() can keep the same across
ceph_renew_caps(). This causes try_get_cap_refs() to keep calling
ceph_renew_caps(). The fix is ignore the session valid check for
the try_get_cap_refs() case. If session is stale, just let the
caps requester sleep.

Signed-off-by: Yan, Zheng <zyan@redhat.com>
---
 fs/ceph/caps.c  | 6 +++---
 fs/ceph/file.c  | 2 +-
 fs/ceph/super.h | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index ed8c7addce91..3c2dfd72e5b2 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -867,7 +867,7 @@ int __ceph_caps_file_wanted(struct ceph_inode_info *ci)
 /*
  * Return caps we have registered with the MDS(s) as 'wanted'.
  */
-int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
+int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check)
 {
 	struct ceph_cap *cap;
 	struct rb_node *p;
@@ -875,7 +875,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
 
 	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
 		cap = rb_entry(p, struct ceph_cap, ci_node);
-		if (!__cap_is_valid(cap))
+		if (check && !__cap_is_valid(cap))
 			continue;
 		if (cap == ci->i_auth_cap)
 			mds_wanted |= cap->mds_wanted;
@@ -2491,7 +2491,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
 				ret = 1;
 				goto out_unlock;
 			}
-			mds_wanted = __ceph_caps_mds_wanted(ci);
+			mds_wanted = __ceph_caps_mds_wanted(ci, false);
 			if (need & ~(mds_wanted & need)) {
 				dout("get_cap_refs %p caps were dropped"
 				     " (session killed?)\n", inode);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 045d30d26624..beb24f8bb917 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -283,7 +283,7 @@ int ceph_open(struct inode *inode, struct file *file)
 	spin_lock(&ci->i_ceph_lock);
 	if (__ceph_is_any_real_caps(ci) &&
 	    (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
-		int mds_wanted = __ceph_caps_mds_wanted(ci);
+		int mds_wanted = __ceph_caps_mds_wanted(ci, true);
 		int issued = __ceph_caps_issued(ci, NULL);
 
 		dout("open %p fmode %d want %s issued %s using existing\n",
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index f3f9215abf8e..6477264bfc7e 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -602,7 +602,7 @@ static inline int __ceph_caps_wanted(struct ceph_inode_info *ci)
 }
 
 /* what the mds thinks we want */
-extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci);
+extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check);
 
 extern void ceph_caps_init(struct ceph_mds_client *mdsc);
 extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
-- 
GitLab


From 6fffaef954c432f6ddb42ec1064087e8ed704ca8 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 31 Jan 2017 10:55:38 -0500
Subject: [PATCH 0341/1084] ceph: remove "Debugging hook" from ceph_fill_trace

Keeping around commented out code is just asking for it to bitrot and
makes viewing the code under cscope more confusing.  If
we really need this, then we can revert this patch and put it under a
Kconfig option.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/inode.c | 34 ----------------------------------
 1 file changed, 34 deletions(-)

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 4926265f4223..38a54ebf647d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1120,40 +1120,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 	dout("fill_trace %p is_dentry %d is_target %d\n", req,
 	     rinfo->head->is_dentry, rinfo->head->is_target);
 
-#if 0
-	/*
-	 * Debugging hook:
-	 *
-	 * If we resend completed ops to a recovering mds, we get no
-	 * trace.  Since that is very rare, pretend this is the case
-	 * to ensure the 'no trace' handlers in the callers behave.
-	 *
-	 * Fill in inodes unconditionally to avoid breaking cap
-	 * invariants.
-	 */
-	if (rinfo->head->op & CEPH_MDS_OP_WRITE) {
-		pr_info("fill_trace faking empty trace on %lld %s\n",
-			req->r_tid, ceph_mds_op_name(rinfo->head->op));
-		if (rinfo->head->is_dentry) {
-			rinfo->head->is_dentry = 0;
-			err = fill_inode(req->r_locked_dir,
-					 &rinfo->diri, rinfo->dirfrag,
-					 session, req->r_request_started, -1);
-		}
-		if (rinfo->head->is_target) {
-			rinfo->head->is_target = 0;
-			ininfo = rinfo->targeti.in;
-			vino.ino = le64_to_cpu(ininfo->ino);
-			vino.snap = le64_to_cpu(ininfo->snapid);
-			in = ceph_get_inode(sb, vino);
-			err = fill_inode(in, &rinfo->targeti, NULL,
-					 session, req->r_request_started,
-					 req->r_fmode);
-			iput(in);
-		}
-	}
-#endif
-
 	if (!rinfo->head->is_target && !rinfo->head->is_dentry) {
 		dout("fill_trace reply is empty!\n");
 		if (rinfo->head->result == 0 && req->r_locked_dir)
-- 
GitLab


From f5a03b080450784e671998921feb62fd3846c953 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 31 Jan 2017 11:06:13 -0500
Subject: [PATCH 0342/1084] ceph: drop session argument to ceph_fill_trace

Just get it from r_session since that's what's always passed in.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/inode.c      | 4 ++--
 fs/ceph/mds_client.c | 2 +-
 fs/ceph/super.h      | 3 +--
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 38a54ebf647d..b18462c64cdd 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1108,9 +1108,9 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in)
  *
  * Called with snap_rwsem (read).
  */
-int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
-		    struct ceph_mds_session *session)
+int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
 {
+	struct ceph_mds_session *session = req->r_session;
 	struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
 	struct inode *in = NULL;
 	struct ceph_vino vino;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 176512960b14..4a68067a76b5 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2516,7 +2516,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 	/* insert trace into our cache */
 	mutex_lock(&req->r_fill_mutex);
 	current->journal_info = req;
-	err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
+	err = ceph_fill_trace(mdsc->fsc->sb, req);
 	if (err == 0) {
 		if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
 				    req->r_op == CEPH_MDS_OP_LSSNAP))
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 6477264bfc7e..950170136be9 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -764,8 +764,7 @@ extern void ceph_fill_file_time(struct inode *inode, int issued,
 				u64 time_warp_seq, struct timespec *ctime,
 				struct timespec *mtime, struct timespec *atime);
 extern int ceph_fill_trace(struct super_block *sb,
-			   struct ceph_mds_request *req,
-			   struct ceph_mds_session *session);
+			   struct ceph_mds_request *req);
 extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 				    struct ceph_mds_session *session);
 
-- 
GitLab


From bc2de10dc4da5036ada3381775bd966f0c21c603 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 1 Feb 2017 13:49:09 -0500
Subject: [PATCH 0343/1084] ceph: convert bools in ceph_mds_request to a new
 r_req_flags field

Currently, we have a bunch of bool flags in struct ceph_mds_request. We
need more flags though, but each bool takes (at least) a byte. Those
add up over time.

Merge all of the existing bools in this struct into a single unsigned
long, and use the set/test/clear_bit macros to manipulate them. These
are atomic operations, but that is required here to prevent
load/modify/store races. The existing flags are protected by different
locks, so we can't rely on them for that purpose.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/debugfs.c    |  2 +-
 fs/ceph/dir.c        |  4 ++--
 fs/ceph/inode.c      | 18 ++++++++---------
 fs/ceph/mds_client.c | 47 +++++++++++++++++++++++---------------------
 fs/ceph/mds_client.h | 12 +++++++----
 5 files changed, 45 insertions(+), 38 deletions(-)

diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 39ff678e567f..f2ae393e2c31 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -70,7 +70,7 @@ static int mdsc_show(struct seq_file *s, void *p)
 
 		seq_printf(s, "%s", ceph_mds_op_name(req->r_op));
 
-		if (req->r_got_unsafe)
+		if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
 			seq_puts(s, "\t(unsafe)");
 		else
 			seq_puts(s, "\t");
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index d4385563b70a..5fb86d71220e 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -371,7 +371,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 		/* hints to request -> mds selection code */
 		req->r_direct_mode = USE_AUTH_MDS;
 		req->r_direct_hash = ceph_frag_value(frag);
-		req->r_direct_is_hash = true;
+		__set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
 		if (fi->last_name) {
 			req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL);
 			if (!req->r_path2) {
@@ -417,7 +417,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 		fi->frag = frag;
 		fi->last_readdir = req;
 
-		if (req->r_did_prepopulate) {
+		if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) {
 			fi->readdir_cache_idx = req->r_readdir_cache_idx;
 			if (fi->readdir_cache_idx < 0) {
 				/* preclude from marking dir ordered */
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index b18462c64cdd..ebfb156aba89 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1199,8 +1199,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
 
 		err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL,
 				session, req->r_request_started,
-				(!req->r_aborted && rinfo->head->result == 0) ?
-				req->r_fmode : -1,
+				(!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
+				rinfo->head->result == 0) ?  req->r_fmode : -1,
 				&req->r_caps_reservation);
 		if (err < 0) {
 			pr_err("fill_inode badness %p %llx.%llx\n",
@@ -1213,8 +1213,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
 	 * ignore null lease/binding on snapdir ENOENT, or else we
 	 * will have trouble splicing in the virtual snapdir later
 	 */
-	if (rinfo->head->is_dentry && !req->r_aborted &&
-	    req->r_locked_dir &&
+	if (rinfo->head->is_dentry && req->r_locked_dir &&
+	    !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
 	    (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
 					       fsc->mount_options->snapdir_name,
 					       req->r_dentry->d_name.len))) {
@@ -1317,9 +1317,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
 			update_dentry_lease(dn, rinfo->dlease, session,
 					    req->r_request_started);
 		dout(" final dn %p\n", dn);
-	} else if (!req->r_aborted &&
-		   (req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
-		    req->r_op == CEPH_MDS_OP_MKSNAP)) {
+	} else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
+		    req->r_op == CEPH_MDS_OP_MKSNAP) &&
+		   !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
 		struct dentry *dn = req->r_dentry;
 		struct inode *dir = req->r_locked_dir;
 
@@ -1444,7 +1444,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 	u32 fpos_offset;
 	struct ceph_readdir_cache_control cache_ctl = {};
 
-	if (req->r_aborted)
+	if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
 		return readdir_prepopulate_inodes_only(req, session);
 
 	if (rinfo->hash_order && req->r_path2) {
@@ -1598,7 +1598,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 	}
 out:
 	if (err == 0 && skipped == 0) {
-		req->r_did_prepopulate = true;
+		set_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags);
 		req->r_readdir_cache_idx = cache_ctl.index;
 	}
 	ceph_readdir_cache_release(&cache_ctl);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 4a68067a76b5..ccf75a3260e8 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -644,13 +644,15 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
 
 	erase_request(&mdsc->request_tree, req);
 
-	if (req->r_unsafe_dir && req->r_got_unsafe) {
+	if (req->r_unsafe_dir  &&
+	    test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
 		struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir);
 		spin_lock(&ci->i_unsafe_lock);
 		list_del_init(&req->r_unsafe_dir_item);
 		spin_unlock(&ci->i_unsafe_lock);
 	}
-	if (req->r_target_inode && req->r_got_unsafe) {
+	if (req->r_target_inode &&
+	    test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
 		struct ceph_inode_info *ci = ceph_inode(req->r_target_inode);
 		spin_lock(&ci->i_unsafe_lock);
 		list_del_init(&req->r_unsafe_target_item);
@@ -705,7 +707,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 	int mode = req->r_direct_mode;
 	int mds = -1;
 	u32 hash = req->r_direct_hash;
-	bool is_hash = req->r_direct_is_hash;
+	bool is_hash = test_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
 
 	/*
 	 * is there a specific mds we should try?  ignore hint if we have
@@ -2042,7 +2044,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
 	dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req,
 	     req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
 
-	if (req->r_got_unsafe) {
+	if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
 		void *p;
 		/*
 		 * Replay.  Do not regenerate message (and rebuild
@@ -2091,7 +2093,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
 
 	rhead = msg->front.iov_base;
 	rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
-	if (req->r_got_unsafe)
+	if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
 		flags |= CEPH_MDS_FLAG_REPLAY;
 	if (req->r_locked_dir)
 		flags |= CEPH_MDS_FLAG_WANT_DENTRY;
@@ -2114,8 +2116,8 @@ static int __do_request(struct ceph_mds_client *mdsc,
 	int mds = -1;
 	int err = 0;
 
-	if (req->r_err || req->r_got_result) {
-		if (req->r_aborted)
+	if (req->r_err || test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
+		if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
 			__unregister_request(mdsc, req);
 		goto out;
 	}
@@ -2245,7 +2247,7 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds)
 	while (p) {
 		req = rb_entry(p, struct ceph_mds_request, r_node);
 		p = rb_next(p);
-		if (req->r_got_unsafe)
+		if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
 			continue;
 		if (req->r_attempts > 0)
 			continue; /* only new requests */
@@ -2319,7 +2321,7 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
 	mutex_lock(&mdsc->mutex);
 
 	/* only abort if we didn't race with a real reply */
-	if (req->r_got_result) {
+	if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
 		err = le32_to_cpu(req->r_reply_info.head->result);
 	} else if (err < 0) {
 		dout("aborted request %lld with %d\n", req->r_tid, err);
@@ -2331,7 +2333,7 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
 		 */
 		mutex_lock(&req->r_fill_mutex);
 		req->r_err = err;
-		req->r_aborted = true;
+		set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
 		mutex_unlock(&req->r_fill_mutex);
 
 		if (req->r_locked_dir &&
@@ -2409,14 +2411,14 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 	}
 
 	/* dup? */
-	if ((req->r_got_unsafe && !head->safe) ||
-	    (req->r_got_safe && head->safe)) {
+	if ((test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags) && !head->safe) ||
+	    (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags) && head->safe)) {
 		pr_warn("got a dup %s reply on %llu from mds%d\n",
 			   head->safe ? "safe" : "unsafe", tid, mds);
 		mutex_unlock(&mdsc->mutex);
 		goto out;
 	}
-	if (req->r_got_safe) {
+	if (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags)) {
 		pr_warn("got unsafe after safe on %llu from mds%d\n",
 			   tid, mds);
 		mutex_unlock(&mdsc->mutex);
@@ -2455,10 +2457,10 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 
 
 	if (head->safe) {
-		req->r_got_safe = true;
+		set_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags);
 		__unregister_request(mdsc, req);
 
-		if (req->r_got_unsafe) {
+		if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
 			/*
 			 * We already handled the unsafe response, now do the
 			 * cleanup.  No need to examine the response; the MDS
@@ -2476,7 +2478,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 			goto out;
 		}
 	} else {
-		req->r_got_unsafe = true;
+		set_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags);
 		list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe);
 		if (req->r_unsafe_dir) {
 			struct ceph_inode_info *ci =
@@ -2530,7 +2532,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 	if (realm)
 		ceph_put_snap_realm(mdsc, realm);
 
-	if (err == 0 && req->r_got_unsafe && req->r_target_inode) {
+	if (err == 0 && req->r_target_inode &&
+	    test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
 		struct ceph_inode_info *ci = ceph_inode(req->r_target_inode);
 		spin_lock(&ci->i_unsafe_lock);
 		list_add_tail(&req->r_unsafe_target_item, &ci->i_unsafe_iops);
@@ -2538,12 +2541,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 	}
 out_err:
 	mutex_lock(&mdsc->mutex);
-	if (!req->r_aborted) {
+	if (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
 		if (err) {
 			req->r_err = err;
 		} else {
 			req->r_reply =  ceph_msg_get(msg);
-			req->r_got_result = true;
+			set_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags);
 		}
 	} else {
 		dout("reply arrived after request %lld was aborted\n", tid);
@@ -2587,7 +2590,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
 		goto out;  /* dup reply? */
 	}
 
-	if (req->r_aborted) {
+	if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
 		dout("forward tid %llu aborted, unregistering\n", tid);
 		__unregister_request(mdsc, req);
 	} else if (fwd_seq <= req->r_num_fwd) {
@@ -2597,7 +2600,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
 		/* resend. forward race not possible; mds would drop */
 		dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
 		BUG_ON(req->r_err);
-		BUG_ON(req->r_got_result);
+		BUG_ON(test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags));
 		req->r_attempts = 0;
 		req->r_num_fwd = fwd_seq;
 		req->r_resend_mds = next_mds;
@@ -2762,7 +2765,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
 	while (p) {
 		req = rb_entry(p, struct ceph_mds_request, r_node);
 		p = rb_next(p);
-		if (req->r_got_unsafe)
+		if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
 			continue;
 		if (req->r_attempts == 0)
 			continue; /* only old requests */
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 3c6f77b7bb02..409b0e3c3b7a 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -205,6 +205,14 @@ struct ceph_mds_request {
 	struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */
 	struct inode *r_target_inode;       /* resulting inode */
 
+#define CEPH_MDS_R_DIRECT_IS_HASH	(1) /* r_direct_hash is valid */
+#define CEPH_MDS_R_ABORTED		(2) /* call was aborted */
+#define CEPH_MDS_R_GOT_UNSAFE		(3) /* got an unsafe reply */
+#define CEPH_MDS_R_GOT_SAFE		(4) /* got a safe reply */
+#define CEPH_MDS_R_GOT_RESULT		(5) /* got a result */
+#define CEPH_MDS_R_DID_PREPOPULATE	(6) /* prepopulated readdir */
+	unsigned long	r_req_flags;
+
 	struct mutex r_fill_mutex;
 
 	union ceph_mds_request_args r_args;
@@ -216,7 +224,6 @@ struct ceph_mds_request {
 	/* for choosing which mds to send this request to */
 	int r_direct_mode;
 	u32 r_direct_hash;      /* choose dir frag based on this dentry hash */
-	bool r_direct_is_hash;  /* true if r_direct_hash is valid */
 
 	/* data payload is used for xattr ops */
 	struct ceph_pagelist *r_pagelist;
@@ -234,7 +241,6 @@ struct ceph_mds_request {
 	struct ceph_mds_reply_info_parsed r_reply_info;
 	struct page *r_locked_page;
 	int r_err;
-	bool r_aborted;
 
 	unsigned long r_timeout;  /* optional.  jiffies, 0 is "wait forever" */
 	unsigned long r_started;  /* start time to measure timeout against */
@@ -262,9 +268,7 @@ struct ceph_mds_request {
 	ceph_mds_request_callback_t r_callback;
 	ceph_mds_request_wait_callback_t r_wait_for_completion;
 	struct list_head  r_unsafe_item;  /* per-session unsafe list item */
-	bool		  r_got_unsafe, r_got_safe, r_got_result;
 
-	bool              r_did_prepopulate;
 	long long	  r_dir_release_cnt;
 	long long	  r_dir_ordered_cnt;
 	int		  r_readdir_cache_idx;
-- 
GitLab


From 3dd69aabcef3d835446a9a1e11d2eab0e6e35e95 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 31 Jan 2017 10:28:26 -0500
Subject: [PATCH 0344/1084] ceph: add a new flag to indicate whether parent is
 locked

struct ceph_mds_request has an r_locked_dir pointer, which is set to
indicate the parent inode and that its i_rwsem is locked.  In some
critical places, we need to be able to indicate the parent inode to the
request handling code, even when its i_rwsem may not be locked.

Most of the code that operates on r_locked_dir doesn't require that the
i_rwsem be locked. We only really need it to handle manipulation of the
dcache. The rest (filling of the inode, updating dentry leases, etc.)
already has its own locking.

Add a new r_req_flags bit that indicates whether the parent is locked
when doing the request, and rename the pointer to "r_parent". For now,
all the places that set r_parent also set this flag, but that will
change in a later patch.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/dir.c        | 21 ++++++++++++++-------
 fs/ceph/export.c     |  3 ++-
 fs/ceph/file.c       |  3 ++-
 fs/ceph/inode.c      | 13 +++++++------
 fs/ceph/mds_client.c | 24 ++++++++++++------------
 fs/ceph/mds_client.h |  3 ++-
 6 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 5fb86d71220e..c8562b8a8c7e 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -752,7 +752,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 		mask |= CEPH_CAP_XATTR_SHARED;
 	req->r_args.getattr.mask = cpu_to_le32(mask);
 
-	req->r_locked_dir = dir;
+	req->r_parent = dir;
+	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
 	err = ceph_mdsc_do_request(mdsc, NULL, req);
 	err = ceph_handle_snapdir(req, dentry, err);
 	dentry = ceph_finish_lookup(req, dentry, err);
@@ -813,7 +814,8 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
 	}
 	req->r_dentry = dget(dentry);
 	req->r_num_caps = 2;
-	req->r_locked_dir = dir;
+	req->r_parent = dir;
+	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
 	req->r_args.mknod.mode = cpu_to_le32(mode);
 	req->r_args.mknod.rdev = cpu_to_le32(rdev);
 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
@@ -864,7 +866,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
 		ceph_mdsc_put_request(req);
 		goto out;
 	}
-	req->r_locked_dir = dir;
+	req->r_parent = dir;
+	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
 	req->r_dentry = dget(dentry);
 	req->r_num_caps = 2;
 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
@@ -913,7 +916,8 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 
 	req->r_dentry = dget(dentry);
 	req->r_num_caps = 2;
-	req->r_locked_dir = dir;
+	req->r_parent = dir;
+	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
 	req->r_args.mkdir.mode = cpu_to_le32(mode);
 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
@@ -957,7 +961,8 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
 	req->r_dentry = dget(dentry);
 	req->r_num_caps = 2;
 	req->r_old_dentry = dget(old_dentry);
-	req->r_locked_dir = dir;
+	req->r_parent = dir;
+	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
 	/* release LINK_SHARED on source inode (mds will lock it) */
@@ -1023,7 +1028,8 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
 	}
 	req->r_dentry = dget(dentry);
 	req->r_num_caps = 2;
-	req->r_locked_dir = dir;
+	req->r_parent = dir;
+	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
 	req->r_inode_drop = drop_caps_for_unlink(inode);
@@ -1066,7 +1072,8 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
 	req->r_num_caps = 2;
 	req->r_old_dentry = dget(old_dentry);
 	req->r_old_dentry_dir = old_dir;
-	req->r_locked_dir = new_dir;
+	req->r_parent = new_dir;
+	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
 	req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
 	req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 180bbef760f2..e8f11fa565c5 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -207,7 +207,8 @@ static int ceph_get_name(struct dentry *parent, char *name,
 	req->r_inode = d_inode(child);
 	ihold(d_inode(child));
 	req->r_ino2 = ceph_vino(d_inode(parent));
-	req->r_locked_dir = d_inode(parent);
+	req->r_parent = d_inode(parent);
+	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
 	req->r_num_caps = 2;
 	err = ceph_mdsc_do_request(mdsc, NULL, req);
 
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index beb24f8bb917..baad3b688ada 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -379,7 +379,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
                mask |= CEPH_CAP_XATTR_SHARED;
        req->r_args.open.mask = cpu_to_le32(mask);
 
-	req->r_locked_dir = dir;           /* caller holds dir->i_mutex */
+	req->r_parent = dir;
+	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
 	err = ceph_mdsc_do_request(mdsc,
 				   (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
 				   req);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index ebfb156aba89..35a8c453bea6 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1122,13 +1122,13 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
 
 	if (!rinfo->head->is_target && !rinfo->head->is_dentry) {
 		dout("fill_trace reply is empty!\n");
-		if (rinfo->head->result == 0 && req->r_locked_dir)
+		if (rinfo->head->result == 0 && req->r_parent)
 			ceph_invalidate_dir_request(req);
 		return 0;
 	}
 
 	if (rinfo->head->is_dentry) {
-		struct inode *dir = req->r_locked_dir;
+		struct inode *dir = req->r_parent;
 
 		if (dir) {
 			err = fill_inode(dir, NULL,
@@ -1213,8 +1213,9 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
 	 * ignore null lease/binding on snapdir ENOENT, or else we
 	 * will have trouble splicing in the virtual snapdir later
 	 */
-	if (rinfo->head->is_dentry && req->r_locked_dir &&
-	    !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
+	if (rinfo->head->is_dentry &&
+            !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
+	    test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
 	    (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
 					       fsc->mount_options->snapdir_name,
 					       req->r_dentry->d_name.len))) {
@@ -1223,7 +1224,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
 		 * mknod symlink mkdir  : null -> new inode
 		 * unlink               : linked -> null
 		 */
-		struct inode *dir = req->r_locked_dir;
+		struct inode *dir = req->r_parent;
 		struct dentry *dn = req->r_dentry;
 		bool have_dir_cap, have_lease;
 
@@ -1321,7 +1322,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
 		    req->r_op == CEPH_MDS_OP_MKSNAP) &&
 		   !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
 		struct dentry *dn = req->r_dentry;
-		struct inode *dir = req->r_locked_dir;
+		struct inode *dir = req->r_parent;
 
 		/* fill out a snapdir LOOKUPSNAP dentry */
 		BUG_ON(!dn);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index ccf75a3260e8..52521f339745 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -547,8 +547,8 @@ void ceph_mdsc_release_request(struct kref *kref)
 		ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
 		iput(req->r_inode);
 	}
-	if (req->r_locked_dir)
-		ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
+	if (req->r_parent)
+		ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
 	iput(req->r_target_inode);
 	if (req->r_dentry)
 		dput(req->r_dentry);
@@ -735,7 +735,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
 
 		rcu_read_lock();
 		parent = req->r_dentry->d_parent;
-		dir = req->r_locked_dir ? : d_inode_rcu(parent);
+		dir = req->r_parent ? : d_inode_rcu(parent);
 
 		if (!dir || dir->i_sb != mdsc->fsc->sb) {
 			/*  not this fs or parent went negative */
@@ -1894,7 +1894,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
 	int ret;
 
 	ret = set_request_path_attr(req->r_inode, req->r_dentry,
-			      req->r_locked_dir, req->r_path1, req->r_ino1.ino,
+			      req->r_parent, req->r_path1, req->r_ino1.ino,
 			      &path1, &pathlen1, &ino1, &freepath1);
 	if (ret < 0) {
 		msg = ERR_PTR(ret);
@@ -1956,7 +1956,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
 		      mds, req->r_inode_drop, req->r_inode_unless, 0);
 	if (req->r_dentry_drop)
 		releases += ceph_encode_dentry_release(&p, req->r_dentry,
-				req->r_locked_dir, mds, req->r_dentry_drop,
+				req->r_parent, mds, req->r_dentry_drop,
 				req->r_dentry_unless);
 	if (req->r_old_dentry_drop)
 		releases += ceph_encode_dentry_release(&p, req->r_old_dentry,
@@ -2095,14 +2095,14 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
 	rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
 	if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
 		flags |= CEPH_MDS_FLAG_REPLAY;
-	if (req->r_locked_dir)
+	if (req->r_parent)
 		flags |= CEPH_MDS_FLAG_WANT_DENTRY;
 	rhead->flags = cpu_to_le32(flags);
 	rhead->num_fwd = req->r_num_fwd;
 	rhead->num_retry = req->r_attempts - 1;
 	rhead->ino = 0;
 
-	dout(" r_locked_dir = %p\n", req->r_locked_dir);
+	dout(" r_parent = %p\n", req->r_parent);
 	return 0;
 }
 
@@ -2282,11 +2282,11 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
 
 	dout("do_request on %p\n", req);
 
-	/* take CAP_PIN refs for r_inode, r_locked_dir, r_old_dentry */
+	/* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */
 	if (req->r_inode)
 		ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
-	if (req->r_locked_dir)
-		ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
+	if (req->r_parent)
+		ceph_get_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
 	if (req->r_old_dentry_dir)
 		ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
 				  CEPH_CAP_PIN);
@@ -2336,7 +2336,7 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
 		set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
 		mutex_unlock(&req->r_fill_mutex);
 
-		if (req->r_locked_dir &&
+		if (req->r_parent &&
 		    (req->r_op & CEPH_MDS_OP_WRITE))
 			ceph_invalidate_dir_request(req);
 	} else {
@@ -2355,7 +2355,7 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
  */
 void ceph_invalidate_dir_request(struct ceph_mds_request *req)
 {
-	struct inode *inode = req->r_locked_dir;
+	struct inode *inode = req->r_parent;
 
 	dout("invalidate_dir_request %p (complete, lease(s))\n", inode);
 
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 409b0e3c3b7a..ac0475a2daa7 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -202,7 +202,7 @@ struct ceph_mds_request {
 	char *r_path1, *r_path2;
 	struct ceph_vino r_ino1, r_ino2;
 
-	struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */
+	struct inode *r_parent;		    /* parent dir inode */
 	struct inode *r_target_inode;       /* resulting inode */
 
 #define CEPH_MDS_R_DIRECT_IS_HASH	(1) /* r_direct_hash is valid */
@@ -211,6 +211,7 @@ struct ceph_mds_request {
 #define CEPH_MDS_R_GOT_SAFE		(4) /* got a safe reply */
 #define CEPH_MDS_R_GOT_RESULT		(5) /* got a result */
 #define CEPH_MDS_R_DID_PREPOPULATE	(6) /* prepopulated readdir */
+#define CEPH_MDS_R_PARENT_LOCKED	(7) /* is r_parent->i_rwsem wlocked? */
 	unsigned long	r_req_flags;
 
 	struct mutex r_fill_mutex;
-- 
GitLab


From 80d025ffede88969f6adf7266fbdedfd5641148a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 26 Jan 2017 16:14:18 -0500
Subject: [PATCH 0345/1084] ceph: don't update_dentry_lease unless we actually
 got one

This if block updates the dentry lease even in the case where
the MDS didn't grant one.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 35a8c453bea6..542e3c6b0f1b 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1286,8 +1286,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
 				ceph_dir_clear_ordered(dir);
 				dout("d_delete %p\n", dn);
 				d_delete(dn);
-			} else {
-				if (have_lease && d_unhashed(dn))
+			} else if (have_lease) {
+				if (d_unhashed(dn))
 					d_add(dn, NULL);
 				update_dentry_lease(dn, rinfo->dlease,
 						    session,
-- 
GitLab


From f5d55f03973f79a008bf8635452849f8ef589c23 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 27 Jan 2017 09:13:57 -0500
Subject: [PATCH 0346/1084] ceph: vet the target and parent inodes before
 updating dentry lease

In a later patch, we're going to need to allow ceph_fill_trace to
update the dentry's lease when the parent is not locked. This is
potentially racy though -- by the time we get around to processing the
trace, the parent may have already changed.

Change update_dentry_lease to take a ceph_vino pointer and use that to
ensure that the dentry's parent still matches it before updating the
lease.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/inode.c | 72 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 48 insertions(+), 24 deletions(-)

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 542e3c6b0f1b..f5111df561e4 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1016,7 +1016,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
 static void update_dentry_lease(struct dentry *dentry,
 				struct ceph_mds_reply_lease *lease,
 				struct ceph_mds_session *session,
-				unsigned long from_time)
+				unsigned long from_time,
+				struct ceph_vino *tgt_vino,
+				struct ceph_vino *dir_vino)
 {
 	struct ceph_dentry_info *di = ceph_dentry(dentry);
 	long unsigned duration = le32_to_cpu(lease->duration_ms);
@@ -1024,13 +1026,27 @@ static void update_dentry_lease(struct dentry *dentry,
 	long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000;
 	struct inode *dir;
 
+	/*
+	 * Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that
+	 * we expect a negative dentry.
+	 */
+	if (!tgt_vino && d_really_is_positive(dentry))
+		return;
+
+	if (tgt_vino && (d_really_is_negative(dentry) ||
+			!ceph_ino_compare(d_inode(dentry), tgt_vino)))
+		return;
+
 	spin_lock(&dentry->d_lock);
 	dout("update_dentry_lease %p duration %lu ms ttl %lu\n",
 	     dentry, duration, ttl);
 
-	/* make lease_rdcache_gen match directory */
 	dir = d_inode(dentry->d_parent);
 
+	/* make sure parent matches dir_vino */
+	if (!ceph_ino_compare(dir, dir_vino))
+		goto out_unlock;
+
 	/* only track leases on regular dentries */
 	if (ceph_snap(dir) != CEPH_NOSNAP)
 		goto out_unlock;
@@ -1113,7 +1129,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
 	struct ceph_mds_session *session = req->r_session;
 	struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
 	struct inode *in = NULL;
-	struct ceph_vino vino;
+	struct ceph_vino tvino, dvino;
 	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
 	int err = 0;
 
@@ -1154,8 +1170,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
 			dname.name = rinfo->dname;
 			dname.len = rinfo->dname_len;
 			dname.hash = full_name_hash(parent, dname.name, dname.len);
-			vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
-			vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+			tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+			tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
 retry_lookup:
 			dn = d_lookup(parent, &dname);
 			dout("d_lookup on parent=%p name=%.*s got %p\n",
@@ -1172,8 +1188,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
 				}
 				err = 0;
 			} else if (d_really_is_positive(dn) &&
-				   (ceph_ino(d_inode(dn)) != vino.ino ||
-				    ceph_snap(d_inode(dn)) != vino.snap)) {
+				   (ceph_ino(d_inode(dn)) != tvino.ino ||
+				    ceph_snap(d_inode(dn)) != tvino.snap)) {
 				dout(" dn %p points to wrong inode %p\n",
 				     dn, d_inode(dn));
 				d_delete(dn);
@@ -1187,10 +1203,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
 	}
 
 	if (rinfo->head->is_target) {
-		vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
-		vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+		tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+		tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
 
-		in = ceph_get_inode(sb, vino);
+		in = ceph_get_inode(sb, tvino);
 		if (IS_ERR(in)) {
 			err = PTR_ERR(in);
 			goto done;
@@ -1231,10 +1247,12 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
 		BUG_ON(!dn);
 		BUG_ON(!dir);
 		BUG_ON(d_inode(dn->d_parent) != dir);
-		BUG_ON(ceph_ino(dir) !=
-		       le64_to_cpu(rinfo->diri.in->ino));
-		BUG_ON(ceph_snap(dir) !=
-		       le64_to_cpu(rinfo->diri.in->snapid));
+
+		dvino.ino = le64_to_cpu(rinfo->diri.in->ino);
+		dvino.snap = le64_to_cpu(rinfo->diri.in->snapid);
+
+		BUG_ON(ceph_ino(dir) != dvino.ino);
+		BUG_ON(ceph_snap(dir) != dvino.snap);
 
 		/* do we have a lease on the whole dir? */
 		have_dir_cap =
@@ -1291,7 +1309,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
 					d_add(dn, NULL);
 				update_dentry_lease(dn, rinfo->dlease,
 						    session,
-						    req->r_request_started);
+						    req->r_request_started,
+						    NULL, &dvino);
 			}
 			goto done;
 		}
@@ -1314,9 +1333,13 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
 			have_lease = false;
 		}
 
-		if (have_lease)
+		if (have_lease) {
+			tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+			tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
 			update_dentry_lease(dn, rinfo->dlease, session,
-					    req->r_request_started);
+					    req->r_request_started,
+					    &tvino, &dvino);
+		}
 		dout(" final dn %p\n", dn);
 	} else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
 		    req->r_op == CEPH_MDS_OP_MKSNAP) &&
@@ -1490,14 +1513,14 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 	/* FIXME: release caps/leases if error occurs */
 	for (i = 0; i < rinfo->dir_nr; i++) {
 		struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
-		struct ceph_vino vino;
+		struct ceph_vino tvino, dvino;
 
 		dname.name = rde->name;
 		dname.len = rde->name_len;
 		dname.hash = full_name_hash(parent, dname.name, dname.len);
 
-		vino.ino = le64_to_cpu(rde->inode.in->ino);
-		vino.snap = le64_to_cpu(rde->inode.in->snapid);
+		tvino.ino = le64_to_cpu(rde->inode.in->ino);
+		tvino.snap = le64_to_cpu(rde->inode.in->snapid);
 
 		if (rinfo->hash_order) {
 			u32 hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
@@ -1526,8 +1549,8 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 				goto out;
 			}
 		} else if (d_really_is_positive(dn) &&
-			   (ceph_ino(d_inode(dn)) != vino.ino ||
-			    ceph_snap(d_inode(dn)) != vino.snap)) {
+			   (ceph_ino(d_inode(dn)) != tvino.ino ||
+			    ceph_snap(d_inode(dn)) != tvino.snap)) {
 			dout(" dn %p points to wrong inode %p\n",
 			     dn, d_inode(dn));
 			d_delete(dn);
@@ -1539,7 +1562,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 		if (d_really_is_positive(dn)) {
 			in = d_inode(dn);
 		} else {
-			in = ceph_get_inode(parent->d_sb, vino);
+			in = ceph_get_inode(parent->d_sb, tvino);
 			if (IS_ERR(in)) {
 				dout("new_inode badness\n");
 				d_drop(dn);
@@ -1584,8 +1607,9 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 
 		ceph_dentry(dn)->offset = rde->offset;
 
+		dvino = ceph_vino(d_inode(parent));
 		update_dentry_lease(dn, rde->lease, req->r_session,
-				    req->r_request_started);
+				    req->r_request_started, &tvino, &dvino);
 
 		if (err == 0 && skipped == 0 && cache_ctl.index >= 0) {
 			ret = fill_readdir_cache(d_inode(parent), dn,
-- 
GitLab


From cdde7c43513de7d8baec3ad7f6a01a177e00e968 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 27 Jan 2017 13:07:10 -0500
Subject: [PATCH 0347/1084] ceph: call update_dentry_lease even when r_locked
 dir is not set

We don't really require that the parent be locked in order to update the
lease on a dentry. Lease info is protected by the d_lock. In the event
that the parent is not locked in ceph_fill_trace, and we have both
parent and target info, go ahead and update the dentry lease.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/inode.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index f5111df561e4..68f46132b157 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1360,6 +1360,26 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
 			goto done;
 		}
 		req->r_dentry = dn;  /* may have spliced */
+	} else if (rinfo->head->is_dentry) {
+		struct ceph_vino *ptvino = NULL;
+
+		if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) ||
+		    le32_to_cpu(rinfo->dlease->duration_ms)) {
+			dvino.ino = le64_to_cpu(rinfo->diri.in->ino);
+			dvino.snap = le64_to_cpu(rinfo->diri.in->snapid);
+
+			if (rinfo->head->is_target) {
+				tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+				tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+				ptvino = &tvino;
+			}
+
+			update_dentry_lease(req->r_dentry, rinfo->dlease,
+				session, req->r_request_started, ptvino,
+				&dvino);
+		} else {
+			dout("%s: no dentry lease or dir cap\n", __func__);
+		}
 	}
 done:
 	dout("fill_trace done err=%d\n", err);
-- 
GitLab


From 5eb9f6040f3854c5b92c60fd211dc0235ef25ca8 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 30 Jan 2017 09:47:25 -0500
Subject: [PATCH 0348/1084] ceph: do a LOOKUP in d_revalidate instead of
 GETATTR

In commit c3f4688a08f (ceph: don't set req->r_locked_dir in
ceph_d_revalidate), we changed the code to do a GETATTR instead of a
LOOKUP as the parent info isn't strictly necessary to revalidate the
dentry. What we missed there though is that in order to update the lease
on the dentry after revalidating it, we _do_ need parent info.

Change ceph_d_revalidate back to doing a LOOKUP instead of a GETATTR so
that we can get the parent info in order to update the lease from
ceph_fill_trace. Note that we set req->r_parent here, but we cannot set
the CEPH_MDS_R_PARENT_LOCKED flag as we can't guarantee that it is.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/dir.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index c8562b8a8c7e..3e9ad501addf 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1244,11 +1244,12 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
 			return -ECHILD;
 
 		op = ceph_snap(dir) == CEPH_SNAPDIR ?
-			CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_GETATTR;
+			CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
 		req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
 		if (!IS_ERR(req)) {
 			req->r_dentry = dget(dentry);
-			req->r_num_caps = op == CEPH_MDS_OP_GETATTR ? 1 : 2;
+			req->r_num_caps = 2;
+			req->r_parent = dir;
 
 			mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
 			if (ceph_security_xattr_wanted(dir))
-- 
GitLab


From 19def166f3781edb4c68c3c68fcb8199eed2d605 Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Sun, 5 Feb 2017 16:07:32 +0900
Subject: [PATCH 0349/1084] libceph: remove unneeded stddef.h include

This was causing a build failure for openrisc when using musl and
gcc 5.4.0 since the file is not available in the toolchain.

It doesnt seem this is needed and removing it does not cause any build
warnings for me.

Signed-off-by: Stafford Horne <shorne@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/snapshot.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/ceph/snapshot.c b/net/ceph/snapshot.c
index 154683f5f14c..705414e78ae0 100644
--- a/net/ceph/snapshot.c
+++ b/net/ceph/snapshot.c
@@ -18,8 +18,6 @@
  * 02110-1301, USA.
  */
 
-#include <stddef.h>
-
 #include <linux/types.h>
 #include <linux/export.h>
 #include <linux/ceph/libceph.h>
-- 
GitLab


From 1b6a78b5b91cdc07cc0b940b458e90c86835cf73 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Tue, 31 Jan 2017 15:55:06 +0100
Subject: [PATCH 0350/1084] libceph: add osdmap_set_crush() helper

Simplify osdmap_decode() and osdmap_apply_incremental() a bit.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/osdmap.c | 36 ++++++++++++++++--------------------
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index d2436880b305..47df075b31e5 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -808,6 +808,17 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
 	return 0;
 }
 
+static int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush)
+{
+	if (IS_ERR(crush))
+		return PTR_ERR(crush);
+
+	if (map->crush)
+		crush_destroy(map->crush);
+	map->crush = crush;
+	return 0;
+}
+
 #define OSDMAP_WRAPPER_COMPAT_VER	7
 #define OSDMAP_CLIENT_DATA_COMPAT_VER	1
 
@@ -1214,13 +1225,9 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
 
 	/* crush */
 	ceph_decode_32_safe(p, end, len, e_inval);
-	map->crush = crush_decode(*p, min(*p + len, end));
-	if (IS_ERR(map->crush)) {
-		err = PTR_ERR(map->crush);
-		map->crush = NULL;
+	err = osdmap_set_crush(map, crush_decode(*p, min(*p + len, end)));
+	if (err)
 		goto bad;
-	}
-	*p += len;
 
 	/* ignore the rest */
 	*p = end;
@@ -1375,7 +1382,6 @@ static int decode_new_up_state_weight(void **p, void *end,
 struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 					     struct ceph_osdmap *map)
 {
-	struct crush_map *newcrush = NULL;
 	struct ceph_fsid fsid;
 	u32 epoch = 0;
 	struct ceph_timespec modified;
@@ -1414,12 +1420,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 	/* new crush? */
 	ceph_decode_32_safe(p, end, len, e_inval);
 	if (len > 0) {
-		newcrush = crush_decode(*p, min(*p+len, end));
-		if (IS_ERR(newcrush)) {
-			err = PTR_ERR(newcrush);
-			newcrush = NULL;
+		err = osdmap_set_crush(map,
+				       crush_decode(*p, min(*p + len, end)));
+		if (err)
 			goto bad;
-		}
 		*p += len;
 	}
 
@@ -1439,12 +1443,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 
 	map->epoch++;
 	map->modified = modified;
-	if (newcrush) {
-		if (map->crush)
-			crush_destroy(map->crush);
-		map->crush = newcrush;
-		newcrush = NULL;
-	}
 
 	/* new_pools */
 	err = decode_new_pools(p, end, map);
@@ -1505,8 +1503,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 	print_hex_dump(KERN_DEBUG, "osdmap: ",
 		       DUMP_PREFIX_OFFSET, 16, 1,
 		       start, end - start, true);
-	if (newcrush)
-		crush_destroy(newcrush);
 	return ERR_PTR(err);
 }
 
-- 
GitLab


From 66a0e2d579dbec5c676cfe446234ffebb267c564 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Tue, 31 Jan 2017 15:55:06 +0100
Subject: [PATCH 0351/1084] crush: remove mutable part of CRUSH map

Then add it to the working state. It would be very nice if we didn't
have to take a lock to calculate a crush placement. By moving the
permutation array into the working data, we can treat the CRUSH map as
immutable.

Reflects ceph.git commit cbcd039651c0569551cb90d26ce27e1432671f2a.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osdmap.h  |   1 +
 include/linux/crush/crush.h  |  41 +++++--
 include/linux/crush/mapper.h |   4 +-
 net/ceph/crush/crush.c       |   5 -
 net/ceph/crush/mapper.c      | 206 +++++++++++++++++++++++------------
 net/ceph/osdmap.c            |  47 +++++++-
 6 files changed, 218 insertions(+), 86 deletions(-)

diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 412906609954..cef1cab789b9 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -175,6 +175,7 @@ struct ceph_osdmap {
 
 	struct mutex crush_scratch_mutex;
 	int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3];
+	void *crush_workspace;
 };
 
 static inline bool ceph_osd_exists(struct ceph_osdmap *map, int osd)
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index be8f12b8f195..fbecbd089d75 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -135,13 +135,6 @@ struct crush_bucket {
 	__u32 size;      /* num items */
 	__s32 *items;
 
-	/*
-	 * cached random permutation: used for uniform bucket and for
-	 * the linear search fallback for the other bucket types.
-	 */
-	__u32 perm_x;  /* @x for which *perm is defined */
-	__u32 perm_n;  /* num elements of *perm that are permuted/defined */
-	__u32 *perm;
 };
 
 struct crush_bucket_uniform {
@@ -211,6 +204,21 @@ struct crush_map {
 	 * device fails. */
 	__u8 chooseleaf_stable;
 
+	/*
+	 * This value is calculated after decode or construction by
+	 * the builder. It is exposed here (rather than having a
+	 * 'build CRUSH working space' function) so that callers can
+	 * reserve a static buffer, allocate space on the stack, or
+	 * otherwise avoid calling into the heap allocator if they
+	 * want to. The size of the working space depends on the map,
+	 * while the size of the scratch vector passed to the mapper
+	 * depends on the size of the desired result set.
+	 *
+	 * Nothing stops the caller from allocating both in one swell
+	 * foop and passing in two points, though.
+	 */
+	size_t working_size;
+
 #ifndef __KERNEL__
 	/*
 	 * version 0 (original) of straw_calc has various flaws.  version 1
@@ -248,4 +256,23 @@ static inline int crush_calc_tree_node(int i)
 	return ((i+1) << 1)-1;
 }
 
+/*
+ * These data structures are private to the CRUSH implementation. They
+ * are exposed in this header file because builder needs their
+ * definitions to calculate the total working size.
+ *
+ * Moving this out of the crush map allow us to treat the CRUSH map as
+ * immutable within the mapper and removes the requirement for a CRUSH
+ * map lock.
+ */
+struct crush_work_bucket {
+	__u32 perm_x; /* @x for which *perm is defined */
+	__u32 perm_n; /* num elements of *perm that are permuted/defined */
+	__u32 *perm;  /* Permutation of the bucket's items */
+};
+
+struct crush_work {
+	struct crush_work_bucket **work; /* Per-bucket working store */
+};
+
 #endif
diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
index 5dfd5b1125d2..3303c7fd8a31 100644
--- a/include/linux/crush/mapper.h
+++ b/include/linux/crush/mapper.h
@@ -15,6 +15,8 @@ extern int crush_do_rule(const struct crush_map *map,
 			 int ruleno,
 			 int x, int *result, int result_max,
 			 const __u32 *weights, int weight_max,
-			 int *scratch);
+			 void *cwin, int *scratch);
+
+void crush_init_workspace(const struct crush_map *map, void *v);
 
 #endif
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 80d7c3a97cb8..5bf94c04f645 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -45,7 +45,6 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
 
 void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b)
 {
-	kfree(b->h.perm);
 	kfree(b->h.items);
 	kfree(b);
 }
@@ -54,14 +53,12 @@ void crush_destroy_bucket_list(struct crush_bucket_list *b)
 {
 	kfree(b->item_weights);
 	kfree(b->sum_weights);
-	kfree(b->h.perm);
 	kfree(b->h.items);
 	kfree(b);
 }
 
 void crush_destroy_bucket_tree(struct crush_bucket_tree *b)
 {
-	kfree(b->h.perm);
 	kfree(b->h.items);
 	kfree(b->node_weights);
 	kfree(b);
@@ -71,7 +68,6 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
 {
 	kfree(b->straws);
 	kfree(b->item_weights);
-	kfree(b->h.perm);
 	kfree(b->h.items);
 	kfree(b);
 }
@@ -79,7 +75,6 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
 void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b)
 {
 	kfree(b->item_weights);
-	kfree(b->h.perm);
 	kfree(b->h.items);
 	kfree(b);
 }
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 130ab407c5ec..9e75be5ec716 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -54,7 +54,6 @@ int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size
 	return -1;
 }
 
-
 /*
  * bucket choose methods
  *
@@ -72,59 +71,60 @@ int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size
  * Since this is expensive, we optimize for the r=0 case, which
  * captures the vast majority of calls.
  */
-static int bucket_perm_choose(struct crush_bucket *bucket,
+static int bucket_perm_choose(const struct crush_bucket *bucket,
+			      struct crush_work_bucket *work,
 			      int x, int r)
 {
 	unsigned int pr = r % bucket->size;
 	unsigned int i, s;
 
 	/* start a new permutation if @x has changed */
-	if (bucket->perm_x != (__u32)x || bucket->perm_n == 0) {
+	if (work->perm_x != (__u32)x || work->perm_n == 0) {
 		dprintk("bucket %d new x=%d\n", bucket->id, x);
-		bucket->perm_x = x;
+		work->perm_x = x;
 
 		/* optimize common r=0 case */
 		if (pr == 0) {
 			s = crush_hash32_3(bucket->hash, x, bucket->id, 0) %
 				bucket->size;
-			bucket->perm[0] = s;
-			bucket->perm_n = 0xffff;   /* magic value, see below */
+			work->perm[0] = s;
+			work->perm_n = 0xffff;   /* magic value, see below */
 			goto out;
 		}
 
 		for (i = 0; i < bucket->size; i++)
-			bucket->perm[i] = i;
-		bucket->perm_n = 0;
-	} else if (bucket->perm_n == 0xffff) {
+			work->perm[i] = i;
+		work->perm_n = 0;
+	} else if (work->perm_n == 0xffff) {
 		/* clean up after the r=0 case above */
 		for (i = 1; i < bucket->size; i++)
-			bucket->perm[i] = i;
-		bucket->perm[bucket->perm[0]] = 0;
-		bucket->perm_n = 1;
+			work->perm[i] = i;
+		work->perm[work->perm[0]] = 0;
+		work->perm_n = 1;
 	}
 
 	/* calculate permutation up to pr */
-	for (i = 0; i < bucket->perm_n; i++)
+	for (i = 0; i < work->perm_n; i++)
 		dprintk(" perm_choose have %d: %d\n", i, bucket->perm[i]);
-	while (bucket->perm_n <= pr) {
-		unsigned int p = bucket->perm_n;
+	while (work->perm_n <= pr) {
+		unsigned int p = work->perm_n;
 		/* no point in swapping the final entry */
 		if (p < bucket->size - 1) {
 			i = crush_hash32_3(bucket->hash, x, bucket->id, p) %
 				(bucket->size - p);
 			if (i) {
-				unsigned int t = bucket->perm[p + i];
-				bucket->perm[p + i] = bucket->perm[p];
-				bucket->perm[p] = t;
+				unsigned int t = work->perm[p + i];
+				work->perm[p + i] = work->perm[p];
+				work->perm[p] = t;
 			}
 			dprintk(" perm_choose swap %d with %d\n", p, p+i);
 		}
-		bucket->perm_n++;
+		work->perm_n++;
 	}
 	for (i = 0; i < bucket->size; i++)
 		dprintk(" perm_choose  %d: %d\n", i, bucket->perm[i]);
 
-	s = bucket->perm[pr];
+	s = work->perm[pr];
 out:
 	dprintk(" perm_choose %d sz=%d x=%d r=%d (%d) s=%d\n", bucket->id,
 		bucket->size, x, r, pr, s);
@@ -132,14 +132,14 @@ static int bucket_perm_choose(struct crush_bucket *bucket,
 }
 
 /* uniform */
-static int bucket_uniform_choose(struct crush_bucket_uniform *bucket,
-				 int x, int r)
+static int bucket_uniform_choose(const struct crush_bucket_uniform *bucket,
+				 struct crush_work_bucket *work, int x, int r)
 {
-	return bucket_perm_choose(&bucket->h, x, r);
+	return bucket_perm_choose(&bucket->h, work, x, r);
 }
 
 /* list */
-static int bucket_list_choose(struct crush_bucket_list *bucket,
+static int bucket_list_choose(const struct crush_bucket_list *bucket,
 			      int x, int r)
 {
 	int i;
@@ -155,8 +155,9 @@ static int bucket_list_choose(struct crush_bucket_list *bucket,
 		w *= bucket->sum_weights[i];
 		w = w >> 16;
 		/*dprintk(" scaled %llx\n", w);*/
-		if (w < bucket->item_weights[i])
+		if (w < bucket->item_weights[i]) {
 			return bucket->h.items[i];
+		}
 	}
 
 	dprintk("bad list sums for bucket %d\n", bucket->h.id);
@@ -192,7 +193,7 @@ static int terminal(int x)
 	return x & 1;
 }
 
-static int bucket_tree_choose(struct crush_bucket_tree *bucket,
+static int bucket_tree_choose(const struct crush_bucket_tree *bucket,
 			      int x, int r)
 {
 	int n;
@@ -224,7 +225,7 @@ static int bucket_tree_choose(struct crush_bucket_tree *bucket,
 
 /* straw */
 
-static int bucket_straw_choose(struct crush_bucket_straw *bucket,
+static int bucket_straw_choose(const struct crush_bucket_straw *bucket,
 			       int x, int r)
 {
 	__u32 i;
@@ -301,7 +302,7 @@ static __u64 crush_ln(unsigned int xin)
  *
  */
 
-static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
+static int bucket_straw2_choose(const struct crush_bucket_straw2 *bucket,
 				int x, int r)
 {
 	unsigned int i, high = 0;
@@ -344,37 +345,42 @@ static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
 			high_draw = draw;
 		}
 	}
+
 	return bucket->h.items[high];
 }
 
 
-static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
+static int crush_bucket_choose(const struct crush_bucket *in,
+			       struct crush_work_bucket *work,
+			       int x, int r)
 {
 	dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
 	BUG_ON(in->size == 0);
 	switch (in->alg) {
 	case CRUSH_BUCKET_UNIFORM:
-		return bucket_uniform_choose((struct crush_bucket_uniform *)in,
-					  x, r);
+		return bucket_uniform_choose(
+			(const struct crush_bucket_uniform *)in,
+			work, x, r);
 	case CRUSH_BUCKET_LIST:
-		return bucket_list_choose((struct crush_bucket_list *)in,
+		return bucket_list_choose((const struct crush_bucket_list *)in,
 					  x, r);
 	case CRUSH_BUCKET_TREE:
-		return bucket_tree_choose((struct crush_bucket_tree *)in,
+		return bucket_tree_choose((const struct crush_bucket_tree *)in,
 					  x, r);
 	case CRUSH_BUCKET_STRAW:
-		return bucket_straw_choose((struct crush_bucket_straw *)in,
-					   x, r);
+		return bucket_straw_choose(
+			(const struct crush_bucket_straw *)in,
+			x, r);
 	case CRUSH_BUCKET_STRAW2:
-		return bucket_straw2_choose((struct crush_bucket_straw2 *)in,
-					    x, r);
+		return bucket_straw2_choose(
+			(const struct crush_bucket_straw2 *)in,
+			x, r);
 	default:
 		dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
 		return in->items[0];
 	}
 }
 
-
 /*
  * true if device is marked "out" (failed, fully offloaded)
  * of the cluster
@@ -416,7 +422,8 @@ static int is_out(const struct crush_map *map,
  * @parent_r: r value passed from the parent
  */
 static int crush_choose_firstn(const struct crush_map *map,
-			       struct crush_bucket *bucket,
+			       struct crush_work *work,
+			       const struct crush_bucket *bucket,
 			       const __u32 *weight, int weight_max,
 			       int x, int numrep, int type,
 			       int *out, int outpos,
@@ -434,7 +441,7 @@ static int crush_choose_firstn(const struct crush_map *map,
 	int rep;
 	unsigned int ftotal, flocal;
 	int retry_descent, retry_bucket, skip_rep;
-	struct crush_bucket *in = bucket;
+	const struct crush_bucket *in = bucket;
 	int r;
 	int i;
 	int item = 0;
@@ -473,9 +480,13 @@ static int crush_choose_firstn(const struct crush_map *map,
 				if (local_fallback_retries > 0 &&
 				    flocal >= (in->size>>1) &&
 				    flocal > local_fallback_retries)
-					item = bucket_perm_choose(in, x, r);
+					item = bucket_perm_choose(
+						in, work->work[-1-in->id],
+						x, r);
 				else
-					item = crush_bucket_choose(in, x, r);
+					item = crush_bucket_choose(
+						in, work->work[-1-in->id],
+						x, r);
 				if (item >= map->max_devices) {
 					dprintk("   bad item %d\n", item);
 					skip_rep = 1;
@@ -518,19 +529,21 @@ static int crush_choose_firstn(const struct crush_map *map,
 							sub_r = r >> (vary_r-1);
 						else
 							sub_r = 0;
-						if (crush_choose_firstn(map,
-							 map->buckets[-1-item],
-							 weight, weight_max,
-							 x, stable ? 1 : outpos+1, 0,
-							 out2, outpos, count,
-							 recurse_tries, 0,
-							 local_retries,
-							 local_fallback_retries,
-							 0,
-							 vary_r,
-							 stable,
-							 NULL,
-							 sub_r) <= outpos)
+						if (crush_choose_firstn(
+							    map,
+							    work,
+							    map->buckets[-1-item],
+							    weight, weight_max,
+							    x, stable ? 1 : outpos+1, 0,
+							    out2, outpos, count,
+							    recurse_tries, 0,
+							    local_retries,
+							    local_fallback_retries,
+							    0,
+							    vary_r,
+							    stable,
+							    NULL,
+							    sub_r) <= outpos)
 							/* didn't get leaf */
 							reject = 1;
 					} else {
@@ -600,7 +613,8 @@ static int crush_choose_firstn(const struct crush_map *map,
  *
  */
 static void crush_choose_indep(const struct crush_map *map,
-			       struct crush_bucket *bucket,
+			       struct crush_work *work,
+			       const struct crush_bucket *bucket,
 			       const __u32 *weight, int weight_max,
 			       int x, int left, int numrep, int type,
 			       int *out, int outpos,
@@ -610,7 +624,7 @@ static void crush_choose_indep(const struct crush_map *map,
 			       int *out2,
 			       int parent_r)
 {
-	struct crush_bucket *in = bucket;
+	const struct crush_bucket *in = bucket;
 	int endpos = outpos + left;
 	int rep;
 	unsigned int ftotal;
@@ -678,7 +692,9 @@ static void crush_choose_indep(const struct crush_map *map,
 					break;
 				}
 
-				item = crush_bucket_choose(in, x, r);
+				item = crush_bucket_choose(
+					in, work->work[-1-in->id],
+					x, r);
 				if (item >= map->max_devices) {
 					dprintk("   bad item %d\n", item);
 					out[rep] = CRUSH_ITEM_NONE;
@@ -724,13 +740,15 @@ static void crush_choose_indep(const struct crush_map *map,
 
 				if (recurse_to_leaf) {
 					if (item < 0) {
-						crush_choose_indep(map,
-						   map->buckets[-1-item],
-						   weight, weight_max,
-						   x, 1, numrep, 0,
-						   out2, rep,
-						   recurse_tries, 0,
-						   0, NULL, r);
+						crush_choose_indep(
+							map,
+							work,
+							map->buckets[-1-item],
+							weight, weight_max,
+							x, 1, numrep, 0,
+							out2, rep,
+							recurse_tries, 0,
+							0, NULL, r);
 						if (out2[rep] == CRUSH_ITEM_NONE) {
 							/* placed nothing; no leaf */
 							break;
@@ -781,6 +799,53 @@ static void crush_choose_indep(const struct crush_map *map,
 #endif
 }
 
+
+/*
+ * This takes a chunk of memory and sets it up to be a shiny new
+ * working area for a CRUSH placement computation. It must be called
+ * on any newly allocated memory before passing it in to
+ * crush_do_rule. It may be used repeatedly after that, so long as the
+ * map has not changed. If the map /has/ changed, you must make sure
+ * the working size is no smaller than what was allocated and re-run
+ * crush_init_workspace.
+ *
+ * If you do retain the working space between calls to crush, make it
+ * thread-local.
+ */
+void crush_init_workspace(const struct crush_map *map, void *v)
+{
+	struct crush_work *w = v;
+	__s32 b;
+
+	/*
+	 * We work by moving through the available space and setting
+	 * values and pointers as we go.
+	 *
+	 * It's a bit like Forth's use of the 'allot' word since we
+	 * set the pointer first and then reserve the space for it to
+	 * point to by incrementing the point.
+	 */
+	v += sizeof(struct crush_work *);
+	w->work = v;
+	v += map->max_buckets * sizeof(struct crush_work_bucket *);
+	for (b = 0; b < map->max_buckets; ++b) {
+		if (!map->buckets[b])
+			continue;
+
+		w->work[b] = v;
+		switch (map->buckets[b]->alg) {
+		default:
+			v += sizeof(struct crush_work_bucket);
+			break;
+		}
+		w->work[b]->perm_x = 0;
+		w->work[b]->perm_n = 0;
+		w->work[b]->perm = v;
+		v += map->buckets[b]->size * sizeof(__u32);
+	}
+	BUG_ON(v - (void *)w != map->working_size);
+}
+
 /**
  * crush_do_rule - calculate a mapping with the given input and rule
  * @map: the crush_map
@@ -790,14 +855,16 @@ static void crush_choose_indep(const struct crush_map *map,
  * @result_max: maximum result size
  * @weight: weight vector (for map leaves)
  * @weight_max: size of weight vector
+ * @cwin: pointer to at least map->working_size bytes of memory
  * @scratch: scratch vector for private use; must be >= 3 * result_max
  */
 int crush_do_rule(const struct crush_map *map,
 		  int ruleno, int x, int *result, int result_max,
 		  const __u32 *weight, int weight_max,
-		  int *scratch)
+		  void *cwin, int *scratch)
 {
 	int result_len;
+	struct crush_work *cw = cwin;
 	int *a = scratch;
 	int *b = scratch + result_max;
 	int *c = scratch + result_max*2;
@@ -807,7 +874,7 @@ int crush_do_rule(const struct crush_map *map,
 	int *o;
 	int osize;
 	int *tmp;
-	struct crush_rule *rule;
+	const struct crush_rule *rule;
 	__u32 step;
 	int i, j;
 	int numrep;
@@ -840,7 +907,7 @@ int crush_do_rule(const struct crush_map *map,
 
 	for (step = 0; step < rule->len; step++) {
 		int firstn = 0;
-		struct crush_rule_step *curstep = &rule->steps[step];
+		const struct crush_rule_step *curstep = &rule->steps[step];
 
 		switch (curstep->op) {
 		case CRUSH_RULE_TAKE:
@@ -936,6 +1003,7 @@ int crush_do_rule(const struct crush_map *map,
 						recurse_tries = choose_tries;
 					osize += crush_choose_firstn(
 						map,
+						cw,
 						map->buckets[bno],
 						weight, weight_max,
 						x, numrep,
@@ -956,6 +1024,7 @@ int crush_do_rule(const struct crush_map *map,
 						    numrep : (result_max-osize));
 					crush_choose_indep(
 						map,
+						cw,
 						map->buckets[bno],
 						weight, weight_max,
 						x, out_size, numrep,
@@ -997,5 +1066,6 @@ int crush_do_rule(const struct crush_map *map,
 			break;
 		}
 	}
+
 	return result_len;
 }
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 47df075b31e5..3892e7fa7747 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -153,6 +153,32 @@ static int skip_name_map(void **p, void *end)
         return -EINVAL;
 }
 
+static void crush_finalize(struct crush_map *c)
+{
+	__s32 b;
+
+	/* Space for the array of pointers to per-bucket workspace */
+	c->working_size = sizeof(struct crush_work) +
+	    c->max_buckets * sizeof(struct crush_work_bucket *);
+
+	for (b = 0; b < c->max_buckets; b++) {
+		if (!c->buckets[b])
+			continue;
+
+		switch (c->buckets[b]->alg) {
+		default:
+			/*
+			 * The base case, permutation variables and
+			 * the pointer to the permutation array.
+			 */
+			c->working_size += sizeof(struct crush_work_bucket);
+			break;
+		}
+		/* Every bucket has a permutation array. */
+		c->working_size += c->buckets[b]->size * sizeof(__u32);
+	}
+}
+
 static struct crush_map *crush_decode(void *pbyval, void *end)
 {
 	struct crush_map *c;
@@ -246,10 +272,6 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 		b->items = kcalloc(b->size, sizeof(__s32), GFP_NOFS);
 		if (b->items == NULL)
 			goto badmem;
-		b->perm = kcalloc(b->size, sizeof(u32), GFP_NOFS);
-		if (b->perm == NULL)
-			goto badmem;
-		b->perm_n = 0;
 
 		ceph_decode_need(p, end, b->size*sizeof(u32), bad);
 		for (j = 0; j < b->size; j++)
@@ -368,6 +390,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
 	dout("crush decode tunable chooseleaf_stable = %d\n",
 	     c->chooseleaf_stable);
 
+	crush_finalize(c);
+
 done:
 	dout("crush_decode success\n");
 	return c;
@@ -753,6 +777,7 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
 	kfree(map->osd_weight);
 	kfree(map->osd_addr);
 	kfree(map->osd_primary_affinity);
+	kfree(map->crush_workspace);
 	kfree(map);
 }
 
@@ -810,12 +835,23 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
 
 static int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush)
 {
+	void *workspace;
+
 	if (IS_ERR(crush))
 		return PTR_ERR(crush);
 
+	workspace = kmalloc(crush->working_size, GFP_NOIO);
+	if (!workspace) {
+		crush_destroy(crush);
+		return -ENOMEM;
+	}
+	crush_init_workspace(crush, workspace);
+
 	if (map->crush)
 		crush_destroy(map->crush);
+	kfree(map->crush_workspace);
 	map->crush = crush;
+	map->crush_workspace = workspace;
 	return 0;
 }
 
@@ -1940,7 +1976,8 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
 
 	mutex_lock(&map->crush_scratch_mutex);
 	r = crush_do_rule(map->crush, ruleno, x, result, result_max,
-			  weight, weight_max, map->crush_scratch_ary);
+			  weight, weight_max, map->crush_workspace,
+			  map->crush_scratch_ary);
 	mutex_unlock(&map->crush_scratch_mutex);
 
 	return r;
-- 
GitLab


From 743efcffffc6620ab44ea9ec67c7e4e28dfa7742 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Tue, 31 Jan 2017 15:55:06 +0100
Subject: [PATCH 0352/1084] crush: merge working data and scratch

Much like Arlo Guthrie, I decided that one big pile is better than two
little piles.

Reflects ceph.git commit 95c2df6c7e0b22d2ea9d91db500cf8b9441c73ba.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osdmap.h  |  3 +--
 include/linux/crush/mapper.h | 14 +++++++++++++-
 net/ceph/crush/mapper.c      | 17 +++++++----------
 net/ceph/osdmap.c            | 14 ++++++++------
 4 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index cef1cab789b9..8cebdc4158c3 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -173,8 +173,7 @@ struct ceph_osdmap {
 	 * the list of osds that store+replicate them. */
 	struct crush_map *crush;
 
-	struct mutex crush_scratch_mutex;
-	int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3];
+	struct mutex crush_workspace_mutex;
 	void *crush_workspace;
 };
 
diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
index 3303c7fd8a31..c95e19e1ff11 100644
--- a/include/linux/crush/mapper.h
+++ b/include/linux/crush/mapper.h
@@ -15,7 +15,19 @@ extern int crush_do_rule(const struct crush_map *map,
 			 int ruleno,
 			 int x, int *result, int result_max,
 			 const __u32 *weights, int weight_max,
-			 void *cwin, int *scratch);
+			 void *cwin);
+
+/*
+ * Returns the exact amount of workspace that will need to be used
+ * for a given combination of crush_map and result_max. The caller can
+ * then allocate this much on its own, either on the stack, in a
+ * per-thread long-lived buffer, or however it likes.
+ */
+static inline size_t crush_work_size(const struct crush_map *map,
+				     int result_max)
+{
+	return map->working_size + result_max * 3 * sizeof(__u32);
+}
 
 void crush_init_workspace(const struct crush_map *map, void *v);
 
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 9e75be5ec716..2e31217ccae3 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -855,23 +855,22 @@ void crush_init_workspace(const struct crush_map *map, void *v)
  * @result_max: maximum result size
  * @weight: weight vector (for map leaves)
  * @weight_max: size of weight vector
- * @cwin: pointer to at least map->working_size bytes of memory
- * @scratch: scratch vector for private use; must be >= 3 * result_max
+ * @cwin: pointer to at least crush_work_size() bytes of memory
  */
 int crush_do_rule(const struct crush_map *map,
 		  int ruleno, int x, int *result, int result_max,
 		  const __u32 *weight, int weight_max,
-		  void *cwin, int *scratch)
+		  void *cwin)
 {
 	int result_len;
 	struct crush_work *cw = cwin;
-	int *a = scratch;
-	int *b = scratch + result_max;
-	int *c = scratch + result_max*2;
+	int *a = cwin + map->working_size;
+	int *b = a + result_max;
+	int *c = b + result_max;
+	int *w = a;
+	int *o = b;
 	int recurse_to_leaf;
-	int *w;
 	int wsize = 0;
-	int *o;
 	int osize;
 	int *tmp;
 	const struct crush_rule *rule;
@@ -902,8 +901,6 @@ int crush_do_rule(const struct crush_map *map,
 
 	rule = map->rules[ruleno];
 	result_len = 0;
-	w = a;
-	o = b;
 
 	for (step = 0; step < rule->len; step++) {
 		int firstn = 0;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 3892e7fa7747..2374956c4d40 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -743,7 +743,7 @@ struct ceph_osdmap *ceph_osdmap_alloc(void)
 	map->pool_max = -1;
 	map->pg_temp = RB_ROOT;
 	map->primary_temp = RB_ROOT;
-	mutex_init(&map->crush_scratch_mutex);
+	mutex_init(&map->crush_workspace_mutex);
 
 	return map;
 }
@@ -836,11 +836,14 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
 static int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush)
 {
 	void *workspace;
+	size_t work_size;
 
 	if (IS_ERR(crush))
 		return PTR_ERR(crush);
 
-	workspace = kmalloc(crush->working_size, GFP_NOIO);
+	work_size = crush_work_size(crush, CEPH_PG_MAX_SIZE);
+	dout("%s work_size %zu bytes\n", __func__, work_size);
+	workspace = kmalloc(work_size, GFP_NOIO);
 	if (!workspace) {
 		crush_destroy(crush);
 		return -ENOMEM;
@@ -1974,11 +1977,10 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
 
 	BUG_ON(result_max > CEPH_PG_MAX_SIZE);
 
-	mutex_lock(&map->crush_scratch_mutex);
+	mutex_lock(&map->crush_workspace_mutex);
 	r = crush_do_rule(map->crush, ruleno, x, result, result_max,
-			  weight, weight_max, map->crush_workspace,
-			  map->crush_scratch_ary);
-	mutex_unlock(&map->crush_scratch_mutex);
+			  weight, weight_max, map->crush_workspace);
+	mutex_unlock(&map->crush_workspace_mutex);
 
 	return r;
 }
-- 
GitLab


From ef9324bb11357c02a4f0529b806341e5b768d872 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 8 Feb 2017 18:57:48 +0100
Subject: [PATCH 0353/1084] libceph: don't go through with the mapping if the
 PG is too wide

With EC overwrites maturing, the kernel client will be getting exposed
to potentially very wide EC pools.  While "min(pi->size, X)" works fine
when the cluster is stable and happy, truncating OSD sets interferes
with resend logic (ceph_is_new_interval(), etc).  Abort the mapping if
the pool is too wide, assigning the request to the homeless session.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 net/ceph/osdmap.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 2374956c4d40..6824c0ec8373 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -2013,8 +2013,14 @@ static void pg_to_raw_osds(struct ceph_osdmap *osdmap,
 		return;
 	}
 
-	len = do_crush(osdmap, ruleno, pps, raw->osds,
-		       min_t(int, pi->size, ARRAY_SIZE(raw->osds)),
+	if (pi->size > ARRAY_SIZE(raw->osds)) {
+		pr_err_ratelimited("pool %lld ruleset %d type %d too wide: size %d > %zu\n",
+		       pi->id, pi->crush_ruleset, pi->type, pi->size,
+		       ARRAY_SIZE(raw->osds));
+		return;
+	}
+
+	len = do_crush(osdmap, ruleno, pps, raw->osds, pi->size,
 		       osdmap->osd_weight, osdmap->max_osd);
 	if (len < 0) {
 		pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n",
-- 
GitLab


From 083a51fbc57ca848ab087692f3cc97898fd88b54 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 9 Feb 2017 16:14:52 +0100
Subject: [PATCH 0354/1084] libceph: bump CEPH_PG_MAX_SIZE to 32

... to accommodate potentially very wide EC pools.  This increases the
size of a typical rbd ceph_osd_request by ~12% (from 1040 to 1168 bytes),
but I'd rather go future proof here.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/rados.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 5c0da61cb763..5d0018782d50 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -50,7 +50,7 @@ struct ceph_timespec {
 #define CEPH_PG_LAYOUT_LINEAR 2
 #define CEPH_PG_LAYOUT_HYBRID 3
 
-#define CEPH_PG_MAX_SIZE      16  /* max # osds in a single pg */
+#define CEPH_PG_MAX_SIZE      32  /* max # osds in a single pg */
 
 /*
  * placement group.
-- 
GitLab


From 848d796c8c918e2d60060992786f82754f539cd4 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 25 Jan 2017 18:16:21 +0100
Subject: [PATCH 0355/1084] rbd: use kstrndup() in rbd_header_from_disk()

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 36d2b9f4e836..c3251f9123c0 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -992,15 +992,11 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev,
 	/* Allocate this now to avoid having to handle failure below */
 
 	if (first_time) {
-		size_t len;
-
-		len = strnlen(ondisk->object_prefix,
-				sizeof (ondisk->object_prefix));
-		object_prefix = kmalloc(len + 1, GFP_KERNEL);
+		object_prefix = kstrndup(ondisk->object_prefix,
+					 sizeof(ondisk->object_prefix),
+					 GFP_KERNEL);
 		if (!object_prefix)
 			return -ENOMEM;
-		memcpy(object_prefix, ondisk->object_prefix, len);
-		object_prefix[len] = '\0';
 	}
 
 	/* Allocate the snapshot context and fill it in */
-- 
GitLab


From 24dca799fdbeeb0c955ef2f56a37461e6566aa07 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 25 Jan 2017 18:16:21 +0100
Subject: [PATCH 0356/1084] rbd: kill rbd_image_header::{crypt_type,comp_type}

Image format 1 is deprecated and format 2 doesn't have these.  Also,
__rbd_dev_create() takes care of zeroing (or otherwise initializing)
format 2 specific fields.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c       | 9 ---------
 drivers/block/rbd_types.h | 3 ---
 2 files changed, 12 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c3251f9123c0..fa47f53942b7 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -144,8 +144,6 @@ struct rbd_image_header {
 	/* These six fields never change for a given rbd image */
 	char *object_prefix;
 	__u8 obj_order;
-	__u8 crypt_type;
-	__u8 comp_type;
 	u64 stripe_unit;
 	u64 stripe_count;
 	u64 features;		/* Might be changeable someday? */
@@ -1047,12 +1045,6 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev,
 	if (first_time) {
 		header->object_prefix = object_prefix;
 		header->obj_order = ondisk->options.order;
-		header->crypt_type = ondisk->options.crypt_type;
-		header->comp_type = ondisk->options.comp_type;
-		/* The rest aren't used for format 1 images */
-		header->stripe_unit = 0;
-		header->stripe_count = 0;
-		header->features = 0;
 	} else {
 		ceph_put_snap_context(header->snapc);
 		kfree(header->snap_names);
@@ -5938,7 +5930,6 @@ static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev)
 		if (ret < 0)
 			goto out_err;
 	}
-	/* No support for crypto and compression type format 2 images */
 
 	return 0;
 out_err:
diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h
index 94f367db27b0..be9c76d292f5 100644
--- a/drivers/block/rbd_types.h
+++ b/drivers/block/rbd_types.h
@@ -57,9 +57,6 @@ enum rbd_notify_op {
 #define RBD_MIN_OBJ_ORDER       16
 #define RBD_MAX_OBJ_ORDER       30
 
-#define RBD_COMP_NONE		0
-#define RBD_CRYPT_NONE		0
-
 #define RBD_HEADER_TEXT		"<<< Rados Block Device Image >>>\n"
 #define RBD_HEADER_SIGNATURE	"RBD"
 #define RBD_HEADER_VERSION	"001.005"
-- 
GitLab


From 431a02cd82dd028f650d588f7cedc78643c5c86a Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 25 Jan 2017 18:16:21 +0100
Subject: [PATCH 0357/1084] rbd: initialize rbd_dev->header_oloc early

No reason to delay it until image_id is known.  This will be required
by some rbd_obj_method_sync() callers, after rbd_obj_method_sync() is
changed to take oloc.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index fa47f53942b7..047e877cacb8 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4863,7 +4863,7 @@ static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc,
 	init_rwsem(&rbd_dev->header_rwsem);
 
 	ceph_oid_init(&rbd_dev->header_oid);
-	ceph_oloc_init(&rbd_dev->header_oloc);
+	rbd_dev->header_oloc.pool = spec->pool_id;
 
 	mutex_init(&rbd_dev->watch_mutex);
 	rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED;
@@ -6062,8 +6062,6 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev)
 	/* Record the header object name for this rbd image. */
 
 	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
-
-	rbd_dev->header_oloc.pool = rbd_dev->layout.pool_id;
 	if (rbd_dev->image_format == 1)
 		ret = ceph_oid_aprintf(&rbd_dev->header_oid, GFP_KERNEL, "%s%s",
 				       spec->image_name, RBD_SUFFIX);
-- 
GitLab


From fe5478e0f6694312ad17dea7083296c1aea0a049 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 25 Jan 2017 18:16:21 +0100
Subject: [PATCH 0358/1084] rbd: do away with obj_request in
 rbd_obj_read_sync()

rbd_obj_request machinery is completely unnecessary here; all that's
being done is fetching a metadata object - no striping, cloning, etc.
More importantly, rbd_osd_req_create() grabs pool id from layout and
that is becoming a data pool id.

Kill offset argument - all metadata objects are small and read in full.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c | 81 ++++++++++++++++++---------------------------
 1 file changed, 32 insertions(+), 49 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 047e877cacb8..2c596f7498bd 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4242,63 +4242,46 @@ static void rbd_free_disk(struct rbd_device *rbd_dev)
 }
 
 static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
-				const char *object_name,
-				u64 offset, u64 length, void *buf)
+			     struct ceph_object_id *oid,
+			     struct ceph_object_locator *oloc,
+			     void *buf, int buf_len)
 
 {
-	struct rbd_obj_request *obj_request;
-	struct page **pages = NULL;
-	u32 page_count;
-	size_t size;
+	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+	struct ceph_osd_request *req;
+	struct page **pages;
+	int num_pages = calc_pages_for(0, buf_len);
 	int ret;
 
-	page_count = (u32) calc_pages_for(offset, length);
-	pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
-	if (IS_ERR(pages))
-		return PTR_ERR(pages);
-
-	ret = -ENOMEM;
-	obj_request = rbd_obj_request_create(object_name, offset, length,
-							OBJ_REQUEST_PAGES);
-	if (!obj_request)
-		goto out;
-
-	obj_request->pages = pages;
-	obj_request->page_count = page_count;
-
-	obj_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1,
-						  obj_request);
-	if (!obj_request->osd_req)
-		goto out;
+	req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
 
-	osd_req_op_extent_init(obj_request->osd_req, 0, CEPH_OSD_OP_READ,
-					offset, length, 0, 0);
-	osd_req_op_extent_osd_data_pages(obj_request->osd_req, 0,
-					obj_request->pages,
-					obj_request->length,
-					obj_request->offset & ~PAGE_MASK,
-					false, false);
+	ceph_oid_copy(&req->r_base_oid, oid);
+	ceph_oloc_copy(&req->r_base_oloc, oloc);
+	req->r_flags = CEPH_OSD_FLAG_READ;
 
-	rbd_obj_request_submit(obj_request);
-	ret = rbd_obj_request_wait(obj_request);
+	ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
 	if (ret)
-		goto out;
+		goto out_req;
 
-	ret = obj_request->result;
-	if (ret < 0)
-		goto out;
+	pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
+	if (IS_ERR(pages)) {
+		ret = PTR_ERR(pages);
+		goto out_req;
+	}
 
-	rbd_assert(obj_request->xferred <= (u64) SIZE_MAX);
-	size = (size_t) obj_request->xferred;
-	ceph_copy_from_page_vector(pages, buf, 0, size);
-	rbd_assert(size <= (size_t)INT_MAX);
-	ret = (int)size;
-out:
-	if (obj_request)
-		rbd_obj_request_put(obj_request);
-	else
-		ceph_release_page_vector(pages, page_count);
+	osd_req_op_extent_init(req, 0, CEPH_OSD_OP_READ, 0, buf_len, 0, 0);
+	osd_req_op_extent_osd_data_pages(req, 0, pages, buf_len, 0, false,
+					 true);
+
+	ceph_osdc_start_request(osdc, req, false);
+	ret = ceph_osdc_wait_request(osdc, req);
+	if (ret >= 0)
+		ceph_copy_from_page_vector(pages, buf, 0, ret);
 
+out_req:
+	ceph_osdc_put_request(req);
 	return ret;
 }
 
@@ -4334,8 +4317,8 @@ static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
 		if (!ondisk)
 			return -ENOMEM;
 
-		ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_oid.name,
-				       0, size, ondisk);
+		ret = rbd_obj_read_sync(rbd_dev, &rbd_dev->header_oid,
+					&rbd_dev->header_oloc, ondisk, size);
 		if (ret < 0)
 			goto out;
 		if ((size_t)ret < size) {
-- 
GitLab


From 2544a02090aa76ffb8069e6bf23d886e34d9c8da Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 25 Jan 2017 18:16:21 +0100
Subject: [PATCH 0359/1084] libceph: pass reply buffer length through
 ceph_osdc_call()

To spare checking for "this reply fits into a page, but does it fit
into my buffer?" in some callers, osd_req_op_cls_response_data_pages()
needs to know how big it is.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 net/ceph/cls_lock_client.c | 2 +-
 net/ceph/osd_client.c      | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c
index 50f040fdb2a9..f13a1ea87459 100644
--- a/net/ceph/cls_lock_client.c
+++ b/net/ceph/cls_lock_client.c
@@ -278,7 +278,7 @@ int ceph_cls_lock_info(struct ceph_osd_client *osdc,
 	int get_info_op_buf_size;
 	int name_len = strlen(lock_name);
 	struct page *get_info_op_page, *reply_page;
-	size_t reply_len;
+	size_t reply_len = PAGE_SIZE;
 	void *p, *end;
 	int ret;
 
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 3a2417bb6ff0..ac4753421d0c 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -4023,7 +4023,7 @@ EXPORT_SYMBOL(ceph_osdc_maybe_request_map);
  * Execute an OSD class method on an object.
  *
  * @flags: CEPH_OSD_FLAG_*
- * @resp_len: out param for reply length
+ * @resp_len: in/out param for reply length
  */
 int ceph_osdc_call(struct ceph_osd_client *osdc,
 		   struct ceph_object_id *oid,
@@ -4036,6 +4036,9 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
 	struct ceph_osd_request *req;
 	int ret;
 
+	if (req_len > PAGE_SIZE || (resp_page && *resp_len > PAGE_SIZE))
+		return -E2BIG;
+
 	req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO);
 	if (!req)
 		return -ENOMEM;
@@ -4054,7 +4057,7 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
 						  0, false, false);
 	if (resp_page)
 		osd_req_op_cls_response_data_pages(req, 0, &resp_page,
-						   PAGE_SIZE, 0, false, false);
+						   *resp_len, 0, false, false);
 
 	ceph_osdc_start_request(osdc, req, false);
 	ret = ceph_osdc_wait_request(osdc, req);
-- 
GitLab


From ecd4a68a26a2159b9f7233c6e892c94c90074cb0 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 25 Jan 2017 18:16:21 +0100
Subject: [PATCH 0360/1084] rbd: switch rbd_obj_method_sync() to
 ceph_osdc_call()

As explained in the previous commit, rbd_obj_request machinery (and
rbd_osd_req_create() in particular) shouldn't be used for working with
metadata objects.

Switch to the recently added ceph_osdc_call().  It assumes single pages
for outbound and inbound buffers, but that's OK - none of the callers
need more than that.  These pages need to be allocated (messenger is in
dire need of proper iterator interface!), but we are swapping for
pages[] and pagelist allocations in the existing code.

Kill class_name argument - all rbd methods are under "rbd".

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c | 162 ++++++++++++++++++--------------------------
 1 file changed, 67 insertions(+), 95 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 2c596f7498bd..644786e208ac 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3971,17 +3971,17 @@ static void rbd_reregister_watch(struct work_struct *work)
  * returned in the outbound buffer, or a negative error code.
  */
 static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
-			     const char *object_name,
-			     const char *class_name,
+			     struct ceph_object_id *oid,
+			     struct ceph_object_locator *oloc,
 			     const char *method_name,
 			     const void *outbound,
 			     size_t outbound_size,
 			     void *inbound,
 			     size_t inbound_size)
 {
-	struct rbd_obj_request *obj_request;
-	struct page **pages;
-	u32 page_count;
+	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+	struct page *req_page = NULL;
+	struct page *reply_page;
 	int ret;
 
 	/*
@@ -3991,61 +3991,35 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
 	 * method.  Currently if this is present it will be a
 	 * snapshot id.
 	 */
-	page_count = (u32)calc_pages_for(0, inbound_size);
-	pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
-	if (IS_ERR(pages))
-		return PTR_ERR(pages);
+	if (outbound) {
+		if (outbound_size > PAGE_SIZE)
+			return -E2BIG;
 
-	ret = -ENOMEM;
-	obj_request = rbd_obj_request_create(object_name, 0, inbound_size,
-							OBJ_REQUEST_PAGES);
-	if (!obj_request)
-		goto out;
-
-	obj_request->pages = pages;
-	obj_request->page_count = page_count;
-
-	obj_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1,
-						  obj_request);
-	if (!obj_request->osd_req)
-		goto out;
-
-	osd_req_op_cls_init(obj_request->osd_req, 0, CEPH_OSD_OP_CALL,
-					class_name, method_name);
-	if (outbound_size) {
-		struct ceph_pagelist *pagelist;
-
-		pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
-		if (!pagelist)
-			goto out;
+		req_page = alloc_page(GFP_KERNEL);
+		if (!req_page)
+			return -ENOMEM;
 
-		ceph_pagelist_init(pagelist);
-		ceph_pagelist_append(pagelist, outbound, outbound_size);
-		osd_req_op_cls_request_data_pagelist(obj_request->osd_req, 0,
-						pagelist);
+		memcpy(page_address(req_page), outbound, outbound_size);
 	}
-	osd_req_op_cls_response_data_pages(obj_request->osd_req, 0,
-					obj_request->pages, inbound_size,
-					0, false, false);
 
-	rbd_obj_request_submit(obj_request);
-	ret = rbd_obj_request_wait(obj_request);
-	if (ret)
-		goto out;
-
-	ret = obj_request->result;
-	if (ret < 0)
-		goto out;
+	reply_page = alloc_page(GFP_KERNEL);
+	if (!reply_page) {
+		if (req_page)
+			__free_page(req_page);
+		return -ENOMEM;
+	}
 
-	rbd_assert(obj_request->xferred < (u64)INT_MAX);
-	ret = (int)obj_request->xferred;
-	ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred);
-out:
-	if (obj_request)
-		rbd_obj_request_put(obj_request);
-	else
-		ceph_release_page_vector(pages, page_count);
+	ret = ceph_osdc_call(osdc, oid, oloc, RBD_DRV_NAME, method_name,
+			     CEPH_OSD_FLAG_READ, req_page, outbound_size,
+			     reply_page, &inbound_size);
+	if (!ret) {
+		memcpy(inbound, page_address(reply_page), inbound_size);
+		ret = inbound_size;
+	}
 
+	if (req_page)
+		__free_page(req_page);
+	__free_page(reply_page);
 	return ret;
 }
 
@@ -4939,10 +4913,10 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
 		__le64 size;
 	} __attribute__ ((packed)) size_buf = { 0 };
 
-	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name,
-				"rbd", "get_size",
-				&snapid, sizeof (snapid),
-				&size_buf, sizeof (size_buf));
+	ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+				  &rbd_dev->header_oloc, "get_size",
+				  &snapid, sizeof(snapid),
+				  &size_buf, sizeof(size_buf));
 	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
 	if (ret < 0)
 		return ret;
@@ -4979,9 +4953,9 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
 	if (!reply_buf)
 		return -ENOMEM;
 
-	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name,
-				"rbd", "get_object_prefix", NULL, 0,
-				reply_buf, RBD_OBJ_PREFIX_LEN_MAX);
+	ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+				  &rbd_dev->header_oloc, "get_object_prefix",
+				  NULL, 0, reply_buf, RBD_OBJ_PREFIX_LEN_MAX);
 	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
 	if (ret < 0)
 		goto out;
@@ -5014,10 +4988,10 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
 	u64 unsup;
 	int ret;
 
-	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name,
-				"rbd", "get_features",
-				&snapid, sizeof (snapid),
-				&features_buf, sizeof (features_buf));
+	ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+				  &rbd_dev->header_oloc, "get_features",
+				  &snapid, sizeof(snapid),
+				  &features_buf, sizeof(features_buf));
 	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
 	if (ret < 0)
 		return ret;
@@ -5076,10 +5050,9 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
 	}
 
 	snapid = cpu_to_le64(rbd_dev->spec->snap_id);
-	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name,
-				"rbd", "get_parent",
-				&snapid, sizeof (snapid),
-				reply_buf, size);
+	ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+				  &rbd_dev->header_oloc, "get_parent",
+				  &snapid, sizeof(snapid), reply_buf, size);
 	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
 	if (ret < 0)
 		goto out_err;
@@ -5179,9 +5152,9 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
 	u64 stripe_count;
 	int ret;
 
-	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name,
-				"rbd", "get_stripe_unit_count", NULL, 0,
-				(char *)&striping_info_buf, size);
+	ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+				&rbd_dev->header_oloc, "get_stripe_unit_count",
+				NULL, 0, &striping_info_buf, size);
 	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
 	if (ret < 0)
 		return ret;
@@ -5218,6 +5191,7 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
 
 static char *rbd_dev_image_name(struct rbd_device *rbd_dev)
 {
+	CEPH_DEFINE_OID_ONSTACK(oid);
 	size_t image_id_size;
 	char *image_id;
 	void *p;
@@ -5245,10 +5219,10 @@ static char *rbd_dev_image_name(struct rbd_device *rbd_dev)
 	if (!reply_buf)
 		goto out;
 
-	ret = rbd_obj_method_sync(rbd_dev, RBD_DIRECTORY,
-				"rbd", "dir_get_name",
-				image_id, image_id_size,
-				reply_buf, size);
+	ceph_oid_printf(&oid, "%s", RBD_DIRECTORY);
+	ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc,
+				  "dir_get_name", image_id, image_id_size,
+				  reply_buf, size);
 	if (ret < 0)
 		goto out;
 	p = reply_buf;
@@ -5427,9 +5401,9 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
 	if (!reply_buf)
 		return -ENOMEM;
 
-	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name,
-				"rbd", "get_snapcontext", NULL, 0,
-				reply_buf, size);
+	ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+				  &rbd_dev->header_oloc, "get_snapcontext",
+				  NULL, 0, reply_buf, size);
 	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
 	if (ret < 0)
 		goto out;
@@ -5492,10 +5466,9 @@ static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
 		return ERR_PTR(-ENOMEM);
 
 	snapid = cpu_to_le64(snap_id);
-	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_oid.name,
-				"rbd", "get_snapshot_name",
-				&snapid, sizeof (snapid),
-				reply_buf, size);
+	ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+				  &rbd_dev->header_oloc, "get_snapshot_name",
+				  &snapid, sizeof(snapid), reply_buf, size);
 	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
 	if (ret < 0) {
 		snap_name = ERR_PTR(ret);
@@ -5802,7 +5775,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
 {
 	int ret;
 	size_t size;
-	char *object_name;
+	CEPH_DEFINE_OID_ONSTACK(oid);
 	void *response;
 	char *image_id;
 
@@ -5822,12 +5795,12 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
 	 * First, see if the format 2 image id file exists, and if
 	 * so, get the image's persistent id from it.
 	 */
-	size = sizeof (RBD_ID_PREFIX) + strlen(rbd_dev->spec->image_name);
-	object_name = kmalloc(size, GFP_NOIO);
-	if (!object_name)
-		return -ENOMEM;
-	sprintf(object_name, "%s%s", RBD_ID_PREFIX, rbd_dev->spec->image_name);
-	dout("rbd id object name is %s\n", object_name);
+	ret = ceph_oid_aprintf(&oid, GFP_KERNEL, "%s%s", RBD_ID_PREFIX,
+			       rbd_dev->spec->image_name);
+	if (ret)
+		return ret;
+
+	dout("rbd id object name is %s\n", oid.name);
 
 	/* Response will be an encoded string, which includes a length */
 
@@ -5840,9 +5813,9 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
 
 	/* If it doesn't exist we'll assume it's a format 1 image */
 
-	ret = rbd_obj_method_sync(rbd_dev, object_name,
-				"rbd", "get_id", NULL, 0,
-				response, RBD_IMAGE_ID_LEN_MAX);
+	ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc,
+				  "get_id", NULL, 0,
+				  response, RBD_IMAGE_ID_LEN_MAX);
 	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
 	if (ret == -ENOENT) {
 		image_id = kstrdup("", GFP_KERNEL);
@@ -5865,8 +5838,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
 	}
 out:
 	kfree(response);
-	kfree(object_name);
-
+	ceph_oid_destroy(&oid);
 	return ret;
 }
 
-- 
GitLab


From 03acf083355ffeb89b9269d205004d6d888fff99 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 25 Jan 2017 18:16:22 +0100
Subject: [PATCH 0361/1084] rbd: remove now unused rbd_obj_request_wait() and
 helpers

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c | 38 --------------------------------------
 1 file changed, 38 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 644786e208ac..e2ffd4fffd06 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1619,44 +1619,6 @@ static void rbd_obj_request_submit(struct rbd_obj_request *obj_request)
 	ceph_osdc_start_request(osd_req->r_osdc, osd_req, false);
 }
 
-static void rbd_obj_request_end(struct rbd_obj_request *obj_request)
-{
-	dout("%s %p\n", __func__, obj_request);
-	ceph_osdc_cancel_request(obj_request->osd_req);
-}
-
-/*
- * Wait for an object request to complete.  If interrupted, cancel the
- * underlying osd request.
- *
- * @timeout: in jiffies, 0 means "wait forever"
- */
-static int __rbd_obj_request_wait(struct rbd_obj_request *obj_request,
-				  unsigned long timeout)
-{
-	long ret;
-
-	dout("%s %p\n", __func__, obj_request);
-	ret = wait_for_completion_interruptible_timeout(
-					&obj_request->completion,
-					ceph_timeout_jiffies(timeout));
-	if (ret <= 0) {
-		if (ret == 0)
-			ret = -ETIMEDOUT;
-		rbd_obj_request_end(obj_request);
-	} else {
-		ret = 0;
-	}
-
-	dout("%s %p ret %d\n", __func__, obj_request, (int)ret);
-	return ret;
-}
-
-static int rbd_obj_request_wait(struct rbd_obj_request *obj_request)
-{
-	return __rbd_obj_request_wait(obj_request, 0);
-}
-
 static void rbd_img_request_complete(struct rbd_img_request *img_request)
 {
 
-- 
GitLab


From 5bc3fb177548503735bcc35fe98475d883740ecb Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 25 Jan 2017 18:16:22 +0100
Subject: [PATCH 0362/1084] rbd: use rbd_obj_bytes() more

Returning u64 doesn't make sense: max header->obj_order is 25 and
ceph_file_layout::object_size is u32.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index e2ffd4fffd06..55e30db0576d 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -970,6 +970,14 @@ static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk)
 	return true;
 }
 
+/*
+ * returns the size of an object in the image
+ */
+static u32 rbd_obj_bytes(struct rbd_image_header *header)
+{
+	return 1U << header->obj_order;
+}
+
 /*
  * Fill an rbd image header with information from the given format 1
  * on-disk header.
@@ -1255,7 +1263,7 @@ static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
 
 static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset)
 {
-	u64 segment_size = (u64) 1 << rbd_dev->header.obj_order;
+	u64 segment_size = rbd_obj_bytes(&rbd_dev->header);
 
 	return offset & (segment_size - 1);
 }
@@ -1263,7 +1271,7 @@ static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset)
 static u64 rbd_segment_length(struct rbd_device *rbd_dev,
 				u64 offset, u64 length)
 {
-	u64 segment_size = (u64) 1 << rbd_dev->header.obj_order;
+	u64 segment_size = rbd_obj_bytes(&rbd_dev->header);
 
 	offset &= segment_size - 1;
 
@@ -1274,14 +1282,6 @@ static u64 rbd_segment_length(struct rbd_device *rbd_dev,
 	return length;
 }
 
-/*
- * returns the size of an object in the image
- */
-static u64 rbd_obj_bytes(struct rbd_image_header *header)
-{
-	return 1 << header->obj_order;
-}
-
 /*
  * bio helpers
  */
@@ -2721,7 +2721,7 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
 	 * child image to which the original request was to be sent.
 	 */
 	img_offset = obj_request->img_offset - obj_request->offset;
-	length = (u64)1 << rbd_dev->header.obj_order;
+	length = rbd_obj_bytes(&rbd_dev->header);
 
 	/*
 	 * There is no defined parent data beyond the parent
@@ -5130,7 +5130,7 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
 	 * out, and only fail if the image has non-default values.
 	 */
 	ret = -EINVAL;
-	obj_size = (u64)1 << rbd_dev->header.obj_order;
+	obj_size = rbd_obj_bytes(&rbd_dev->header);
 	p = &striping_info_buf;
 	stripe_unit = ceph_decode_64(&p);
 	if (stripe_unit != obj_size) {
-- 
GitLab


From 263423f8adf4acbdc4fd26ed5b35f4a6408bc0ab Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 25 Jan 2017 18:16:22 +0100
Subject: [PATCH 0363/1084] rbd: introduce rbd_init_layout()

Rather than initializing layout fields with some made up values in
__rbd_dev_create(), move the initialization into rbd_init_layout() and
call it after the header is actually populated.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 55e30db0576d..e7131c758118 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -978,6 +978,21 @@ static u32 rbd_obj_bytes(struct rbd_image_header *header)
 	return 1U << header->obj_order;
 }
 
+static void rbd_init_layout(struct rbd_device *rbd_dev)
+{
+	if (rbd_dev->header.stripe_unit == 0 ||
+	    rbd_dev->header.stripe_count == 0) {
+		rbd_dev->header.stripe_unit = rbd_obj_bytes(&rbd_dev->header);
+		rbd_dev->header.stripe_count = 1;
+	}
+
+	rbd_dev->layout.stripe_unit = rbd_dev->header.stripe_unit;
+	rbd_dev->layout.stripe_count = rbd_dev->header.stripe_count;
+	rbd_dev->layout.object_size = rbd_obj_bytes(&rbd_dev->header);
+	rbd_dev->layout.pool_id = rbd_dev->spec->pool_id;
+	RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL);
+}
+
 /*
  * Fill an rbd image header with information from the given format 1
  * on-disk header.
@@ -1053,6 +1068,7 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev,
 	if (first_time) {
 		header->object_prefix = object_prefix;
 		header->obj_order = ondisk->options.order;
+		rbd_init_layout(rbd_dev);
 	} else {
 		ceph_put_snap_context(header->snapc);
 		kfree(header->snap_names);
@@ -4804,12 +4820,6 @@ static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc,
 	rbd_dev->rbd_client = rbdc;
 	rbd_dev->spec = spec;
 
-	rbd_dev->layout.stripe_unit = 1 << RBD_MAX_OBJ_ORDER;
-	rbd_dev->layout.stripe_count = 1;
-	rbd_dev->layout.object_size = 1 << RBD_MAX_OBJ_ORDER;
-	rbd_dev->layout.pool_id = spec->pool_id;
-	RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL);
-
 	return rbd_dev;
 }
 
@@ -5848,12 +5858,13 @@ static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev)
 			goto out_err;
 	}
 
+	rbd_init_layout(rbd_dev);
 	return 0;
+
 out_err:
 	rbd_dev->header.features = 0;
 	kfree(rbd_dev->header.object_prefix);
 	rbd_dev->header.object_prefix = NULL;
-
 	return ret;
 }
 
-- 
GitLab


From 7e97332ea9caad3b7c6d86bc3b982e17eda2f736 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 25 Jan 2017 18:16:22 +0100
Subject: [PATCH 0364/1084] rbd: support for data-pool feature

Add support for RBD_FEATURE_DATA_POOL feature.  rbd_dev->layout.pool_id
now stores the data pool id.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index e7131c758118..ceac7877b5f4 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -123,9 +123,11 @@ static int atomic_dec_return_safe(atomic_t *v)
 #define RBD_FEATURE_LAYERING	(1<<0)
 #define RBD_FEATURE_STRIPINGV2	(1<<1)
 #define RBD_FEATURE_EXCLUSIVE_LOCK (1<<2)
+#define RBD_FEATURE_DATA_POOL (1<<7)
 #define RBD_FEATURES_ALL	(RBD_FEATURE_LAYERING |		\
 				 RBD_FEATURE_STRIPINGV2 |	\
-				 RBD_FEATURE_EXCLUSIVE_LOCK)
+				 RBD_FEATURE_EXCLUSIVE_LOCK |	\
+				 RBD_FEATURE_DATA_POOL)
 
 /* Features supported by this (client software) implementation. */
 
@@ -146,6 +148,7 @@ struct rbd_image_header {
 	__u8 obj_order;
 	u64 stripe_unit;
 	u64 stripe_count;
+	s64 data_pool_id;
 	u64 features;		/* Might be changeable someday? */
 
 	/* The remaining fields need to be updated occasionally */
@@ -989,7 +992,8 @@ static void rbd_init_layout(struct rbd_device *rbd_dev)
 	rbd_dev->layout.stripe_unit = rbd_dev->header.stripe_unit;
 	rbd_dev->layout.stripe_count = rbd_dev->header.stripe_count;
 	rbd_dev->layout.object_size = rbd_obj_bytes(&rbd_dev->header);
-	rbd_dev->layout.pool_id = rbd_dev->spec->pool_id;
+	rbd_dev->layout.pool_id = rbd_dev->header.data_pool_id == CEPH_NOPOOL ?
+			  rbd_dev->spec->pool_id : rbd_dev->header.data_pool_id;
 	RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL);
 }
 
@@ -4797,6 +4801,7 @@ static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc,
 	INIT_LIST_HEAD(&rbd_dev->node);
 	init_rwsem(&rbd_dev->header_rwsem);
 
+	rbd_dev->header.data_pool_id = CEPH_NOPOOL;
 	ceph_oid_init(&rbd_dev->header_oid);
 	rbd_dev->header_oloc.pool = spec->pool_id;
 
@@ -5161,6 +5166,24 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
 	return 0;
 }
 
+static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev)
+{
+	__le64 data_pool_id;
+	int ret;
+
+	ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
+				  &rbd_dev->header_oloc, "get_data_pool",
+				  NULL, 0, &data_pool_id, sizeof(data_pool_id));
+	if (ret < 0)
+		return ret;
+	if (ret < sizeof(data_pool_id))
+		return -EBADMSG;
+
+	rbd_dev->header.data_pool_id = le64_to_cpu(data_pool_id);
+	WARN_ON(rbd_dev->header.data_pool_id == CEPH_NOPOOL);
+	return 0;
+}
+
 static char *rbd_dev_image_name(struct rbd_device *rbd_dev)
 {
 	CEPH_DEFINE_OID_ONSTACK(oid);
@@ -5858,6 +5881,12 @@ static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev)
 			goto out_err;
 	}
 
+	if (rbd_dev->header.features & RBD_FEATURE_DATA_POOL) {
+		ret = rbd_dev_v2_data_pool(rbd_dev);
+		if (ret)
+			goto out_err;
+	}
+
 	rbd_init_layout(rbd_dev);
 	return 0;
 
-- 
GitLab


From 67e2b65274e3c5fc9100544c1f90adcc85dbe597 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 25 Jan 2017 18:16:22 +0100
Subject: [PATCH 0365/1084] rbd: set offset and length outside of
 rbd_obj_request_create()

The allocation doesn't depend on offset and length.  Both offset and
length can be changed after obj_request is allocated, too.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index ceac7877b5f4..c975d49e612c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1631,7 +1631,9 @@ static void rbd_obj_request_submit(struct rbd_obj_request *obj_request)
 {
 	struct ceph_osd_request *osd_req = obj_request->osd_req;
 
-	dout("%s %p osd_req %p\n", __func__, obj_request, osd_req);
+	dout("%s %p \"%s\" %llu~%llu osd_req %p\n", __func__,
+	     obj_request, obj_request->object_name, obj_request->offset,
+	     obj_request->length, osd_req);
 	if (obj_request_img_data_test(obj_request)) {
 		WARN_ON(obj_request->callback != rbd_img_obj_callback);
 		rbd_img_request_get(obj_request->img_request);
@@ -2073,7 +2075,6 @@ static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req)
 /* object_name is assumed to be a non-null pointer and NUL-terminated */
 
 static struct rbd_obj_request *rbd_obj_request_create(const char *object_name,
-						u64 offset, u64 length,
 						enum obj_request_type type)
 {
 	struct rbd_obj_request *obj_request;
@@ -2094,18 +2095,13 @@ static struct rbd_obj_request *rbd_obj_request_create(const char *object_name,
 	}
 
 	obj_request->object_name = memcpy(name, object_name, size);
-	obj_request->offset = offset;
-	obj_request->length = length;
-	obj_request->flags = 0;
 	obj_request->which = BAD_WHICH;
 	obj_request->type = type;
 	INIT_LIST_HEAD(&obj_request->links);
 	init_completion(&obj_request->completion);
 	kref_init(&obj_request->kref);
 
-	dout("%s: \"%s\" %llu/%llu %d -> obj %p\n", __func__, object_name,
-		offset, length, (int)type, obj_request);
-
+	dout("%s %p\n", __func__, obj_request);
 	return obj_request;
 }
 
@@ -2517,21 +2513,21 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
 	while (resid) {
 		struct ceph_osd_request *osd_req;
 		const char *object_name;
-		u64 offset;
-		u64 length;
+		u64 offset = rbd_segment_offset(rbd_dev, img_offset);
+		u64 length = rbd_segment_length(rbd_dev, img_offset, resid);
 
 		object_name = rbd_segment_name(rbd_dev, img_offset);
 		if (!object_name)
 			goto out_unwind;
-		offset = rbd_segment_offset(rbd_dev, img_offset);
-		length = rbd_segment_length(rbd_dev, img_offset, resid);
-		obj_request = rbd_obj_request_create(object_name,
-						offset, length, type);
+		obj_request = rbd_obj_request_create(object_name, type);
 		/* object request has its own copy of the object name */
 		rbd_segment_name_free(object_name);
 		if (!obj_request)
 			goto out_unwind;
 
+		obj_request->offset = offset;
+		obj_request->length = length;
+
 		/*
 		 * set obj_request->img_request before creating the
 		 * osd_request so that it gets the right snapc
@@ -2870,7 +2866,7 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
 	size_t size;
 	int ret;
 
-	stat_request = rbd_obj_request_create(obj_request->object_name, 0, 0,
+	stat_request = rbd_obj_request_create(obj_request->object_name,
 					      OBJ_REQUEST_PAGES);
 	if (!stat_request)
 		return -ENOMEM;
-- 
GitLab


From bc81207ea9fc01896cbc102f025f1ebb2c4bae1d Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 25 Jan 2017 18:16:23 +0100
Subject: [PATCH 0366/1084] rbd: factor out __rbd_osd_req_create()

Factor OSD request allocation and initialization code out into
__rbd_osd_req_create().

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c | 103 +++++++++++++++++---------------------------
 1 file changed, 40 insertions(+), 63 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c975d49e612c..b44b457b4dc7 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1952,6 +1952,38 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request)
 	osd_req->r_data_offset = obj_request->offset;
 }
 
+static struct ceph_osd_request *
+__rbd_osd_req_create(struct rbd_device *rbd_dev,
+		     struct ceph_snap_context *snapc,
+		     int num_ops, unsigned int flags,
+		     struct rbd_obj_request *obj_request)
+{
+	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+	struct ceph_osd_request *req;
+
+	req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false, GFP_NOIO);
+	if (!req)
+		return NULL;
+
+	req->r_flags = flags;
+	req->r_callback = rbd_osd_req_callback;
+	req->r_priv = obj_request;
+
+	req->r_base_oloc.pool = rbd_dev->layout.pool_id;
+	if (ceph_oid_aprintf(&req->r_base_oid, GFP_NOIO, "%s",
+			     obj_request->object_name))
+		goto err_req;
+
+	if (ceph_osdc_alloc_messages(req, GFP_NOIO))
+		goto err_req;
+
+	return req;
+
+err_req:
+	ceph_osdc_put_request(req);
+	return NULL;
+}
+
 /*
  * Create an osd request.  A read request has one osd op (read).
  * A write request has either one (watch) or two (hint+write) osd ops.
@@ -1965,8 +1997,6 @@ static struct ceph_osd_request *rbd_osd_req_create(
 					struct rbd_obj_request *obj_request)
 {
 	struct ceph_snap_context *snapc = NULL;
-	struct ceph_osd_client *osdc;
-	struct ceph_osd_request *osd_req;
 
 	if (obj_request_img_data_test(obj_request) &&
 		(op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_WRITE)) {
@@ -1981,35 +2011,10 @@ static struct ceph_osd_request *rbd_osd_req_create(
 
 	rbd_assert(num_ops == 1 || ((op_type == OBJ_OP_WRITE) && num_ops == 2));
 
-	/* Allocate and initialize the request, for the num_ops ops */
-
-	osdc = &rbd_dev->rbd_client->client->osdc;
-	osd_req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false,
-					  GFP_NOIO);
-	if (!osd_req)
-		goto fail;
-
-	if (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD)
-		osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
-	else
-		osd_req->r_flags = CEPH_OSD_FLAG_READ;
-
-	osd_req->r_callback = rbd_osd_req_callback;
-	osd_req->r_priv = obj_request;
-
-	osd_req->r_base_oloc.pool = rbd_dev->layout.pool_id;
-	if (ceph_oid_aprintf(&osd_req->r_base_oid, GFP_NOIO, "%s",
-			     obj_request->object_name))
-		goto fail;
-
-	if (ceph_osdc_alloc_messages(osd_req, GFP_NOIO))
-		goto fail;
-
-	return osd_req;
-
-fail:
-	ceph_osdc_put_request(osd_req);
-	return NULL;
+	return __rbd_osd_req_create(rbd_dev, snapc, num_ops,
+	    (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD) ?
+	    CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK : CEPH_OSD_FLAG_READ,
+	    obj_request);
 }
 
 /*
@@ -2022,10 +2027,6 @@ static struct ceph_osd_request *
 rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request)
 {
 	struct rbd_img_request *img_request;
-	struct ceph_snap_context *snapc;
-	struct rbd_device *rbd_dev;
-	struct ceph_osd_client *osdc;
-	struct ceph_osd_request *osd_req;
 	int num_osd_ops = 3;
 
 	rbd_assert(obj_request_img_data_test(obj_request));
@@ -2037,36 +2038,12 @@ rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request)
 	if (img_request_discard_test(img_request))
 		num_osd_ops = 2;
 
-	/* Allocate and initialize the request, for all the ops */
-
-	snapc = img_request->snapc;
-	rbd_dev = img_request->rbd_dev;
-	osdc = &rbd_dev->rbd_client->client->osdc;
-	osd_req = ceph_osdc_alloc_request(osdc, snapc, num_osd_ops,
-						false, GFP_NOIO);
-	if (!osd_req)
-		goto fail;
-
-	osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
-	osd_req->r_callback = rbd_osd_req_callback;
-	osd_req->r_priv = obj_request;
-
-	osd_req->r_base_oloc.pool = rbd_dev->layout.pool_id;
-	if (ceph_oid_aprintf(&osd_req->r_base_oid, GFP_NOIO, "%s",
-			     obj_request->object_name))
-		goto fail;
-
-	if (ceph_osdc_alloc_messages(osd_req, GFP_NOIO))
-		goto fail;
-
-	return osd_req;
-
-fail:
-	ceph_osdc_put_request(osd_req);
-	return NULL;
+	return __rbd_osd_req_create(img_request->rbd_dev,
+				    img_request->snapc, num_osd_ops,
+				    CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
+				    obj_request);
 }
 
-
 static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req)
 {
 	ceph_osdc_put_request(osd_req);
-- 
GitLab


From 223768d02e2ca5c1747099e03cf949c17aebffd2 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 25 Jan 2017 18:16:23 +0100
Subject: [PATCH 0367/1084] rbd: RBD_V{1,2}_DATA_FORMAT macros

... and also fix up the comment -- format 1 data objects have always
been 12 hex digits long.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c       | 6 ++----
 drivers/block/rbd_types.h | 7 ++++---
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index b44b457b4dc7..e10d0708a419 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1257,18 +1257,16 @@ static void rbd_segment_name_free(const char *name)
 
 static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
 {
+	const char *name_format = rbd_dev->image_format == 1 ?
+				      RBD_V1_DATA_FORMAT : RBD_V2_DATA_FORMAT;
 	char *name;
 	u64 segment;
 	int ret;
-	char *name_format;
 
 	name = kmem_cache_alloc(rbd_segment_name_cache, GFP_NOIO);
 	if (!name)
 		return NULL;
 	segment = offset >> rbd_dev->header.obj_order;
-	name_format = "%s.%012llx";
-	if (rbd_dev->image_format == 2)
-		name_format = "%s.%016llx";
 	ret = snprintf(name, CEPH_MAX_OID_NAME_LEN + 1, name_format,
 			rbd_dev->header.object_prefix, segment);
 	if (ret < 0 || ret > CEPH_MAX_OID_NAME_LEN) {
diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h
index be9c76d292f5..62ff50d3e7a6 100644
--- a/drivers/block/rbd_types.h
+++ b/drivers/block/rbd_types.h
@@ -25,8 +25,8 @@
  */
 
 #define RBD_HEADER_PREFIX      "rbd_header."
-#define RBD_DATA_PREFIX        "rbd_data."
 #define RBD_ID_PREFIX          "rbd_id."
+#define RBD_V2_DATA_FORMAT     "%s.%016llx"
 
 #define RBD_LOCK_NAME          "rbd_lock"
 #define RBD_LOCK_TAG           "internal"
@@ -42,13 +42,14 @@ enum rbd_notify_op {
 /*
  * For format version 1, rbd image 'foo' consists of objects
  *   foo.rbd		- image metadata
- *   rb.<idhi>.<idlo>.00000000
- *   rb.<idhi>.<idlo>.00000001
+ *   rb.<idhi>.<idlo>.<extra>.000000000000
+ *   rb.<idhi>.<idlo>.<extra>.000000000001
  *   ...		- data
  * There is no notion of a persistent image id in rbd format 1.
  */
 
 #define RBD_SUFFIX		".rbd"
+#define RBD_V1_DATA_FORMAT	"%s.%012llx"
 
 #define RBD_DIRECTORY           "rbd_directory"
 #define RBD_INFO                "rbd_info"
-- 
GitLab


From a90bb0c1d47dc1195bc6ac753b34a52535089f80 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 25 Jan 2017 18:16:23 +0100
Subject: [PATCH 0368/1084] rbd: store and use obj_request->object_no

object_no can be trivially formatted into an object name.  We already
store object names in OSD requests with special care to avoid dynamic
allocations for short names.  Storing a name in obj_request, obtained
as below (!), is a waste and will be removed in the next commit.

    name = kmem_cache_alloc(rbd_segment_name_cache, ...);
    snprintf(name, ...);
    obj_request->object_name = kstrdup(name);
    kmem_cache_free(rbd_segment_name_cache, name);
    ...
    ceph_oid_aprintf(..., "%s", obj_request->object_name);

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index e10d0708a419..6c094b580eae 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -232,6 +232,7 @@ enum obj_req_flags {
 
 struct rbd_obj_request {
 	const char		*object_name;
+	u64			object_no;
 	u64			offset;		/* object start byte */
 	u64			length;		/* bytes from offset */
 	unsigned long		flags;
@@ -1629,8 +1630,8 @@ static void rbd_obj_request_submit(struct rbd_obj_request *obj_request)
 {
 	struct ceph_osd_request *osd_req = obj_request->osd_req;
 
-	dout("%s %p \"%s\" %llu~%llu osd_req %p\n", __func__,
-	     obj_request, obj_request->object_name, obj_request->offset,
+	dout("%s %p object_no %016llx %llu~%llu osd_req %p\n", __func__,
+	     obj_request, obj_request->object_no, obj_request->offset,
 	     obj_request->length, osd_req);
 	if (obj_request_img_data_test(obj_request)) {
 		WARN_ON(obj_request->callback != rbd_img_obj_callback);
@@ -1925,8 +1926,8 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req)
 		rbd_osd_call_callback(obj_request);
 		break;
 	default:
-		rbd_warn(NULL, "%s: unsupported op %hu",
-			obj_request->object_name, (unsigned short) opcode);
+		rbd_warn(NULL, "unexpected OSD op: object_no %016llx opcode %d",
+			 obj_request->object_no, opcode);
 		break;
 	}
 
@@ -1958,6 +1959,8 @@ __rbd_osd_req_create(struct rbd_device *rbd_dev,
 {
 	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
 	struct ceph_osd_request *req;
+	const char *name_format = rbd_dev->image_format == 1 ?
+				      RBD_V1_DATA_FORMAT : RBD_V2_DATA_FORMAT;
 
 	req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false, GFP_NOIO);
 	if (!req)
@@ -1968,8 +1971,8 @@ __rbd_osd_req_create(struct rbd_device *rbd_dev,
 	req->r_priv = obj_request;
 
 	req->r_base_oloc.pool = rbd_dev->layout.pool_id;
-	if (ceph_oid_aprintf(&req->r_base_oid, GFP_NOIO, "%s",
-			     obj_request->object_name))
+	if (ceph_oid_aprintf(&req->r_base_oid, GFP_NOIO, name_format,
+			rbd_dev->header.object_prefix, obj_request->object_no))
 		goto err_req;
 
 	if (ceph_osdc_alloc_messages(req, GFP_NOIO))
@@ -2488,6 +2491,7 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
 	while (resid) {
 		struct ceph_osd_request *osd_req;
 		const char *object_name;
+		u64 object_no = img_offset >> rbd_dev->header.obj_order;
 		u64 offset = rbd_segment_offset(rbd_dev, img_offset);
 		u64 length = rbd_segment_length(rbd_dev, img_offset, resid);
 
@@ -2500,6 +2504,7 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
 		if (!obj_request)
 			goto out_unwind;
 
+		obj_request->object_no = object_no;
 		obj_request->offset = offset;
 		obj_request->length = length;
 
@@ -2846,6 +2851,8 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
 	if (!stat_request)
 		return -ENOMEM;
 
+	stat_request->object_no = obj_request->object_no;
+
 	stat_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1,
 						   stat_request);
 	if (!stat_request->osd_req) {
-- 
GitLab


From 6c696d8560e74cd42458931375875d62ae88c6ae Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 25 Jan 2017 18:16:23 +0100
Subject: [PATCH 0369/1084] rbd: kill obj_request->object_name and
 rbd_segment_name_cache

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c         | 79 ++++---------------------------------
 include/linux/ceph/osdmap.h |  7 ----
 2 files changed, 7 insertions(+), 79 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 6c094b580eae..b6e269b4d534 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -231,7 +231,6 @@ enum obj_req_flags {
 };
 
 struct rbd_obj_request {
-	const char		*object_name;
 	u64			object_no;
 	u64			offset;		/* object start byte */
 	u64			length;		/* bytes from offset */
@@ -440,7 +439,6 @@ static DEFINE_SPINLOCK(rbd_client_list_lock);
 
 static struct kmem_cache	*rbd_img_request_cache;
 static struct kmem_cache	*rbd_obj_request_cache;
-static struct kmem_cache	*rbd_segment_name_cache;
 
 static int rbd_major;
 static DEFINE_IDA(rbd_dev_id_ida);
@@ -1249,37 +1247,6 @@ static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev)
 	rbd_dev->mapping.features = 0;
 }
 
-static void rbd_segment_name_free(const char *name)
-{
-	/* The explicit cast here is needed to drop the const qualifier */
-
-	kmem_cache_free(rbd_segment_name_cache, (void *)name);
-}
-
-static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
-{
-	const char *name_format = rbd_dev->image_format == 1 ?
-				      RBD_V1_DATA_FORMAT : RBD_V2_DATA_FORMAT;
-	char *name;
-	u64 segment;
-	int ret;
-
-	name = kmem_cache_alloc(rbd_segment_name_cache, GFP_NOIO);
-	if (!name)
-		return NULL;
-	segment = offset >> rbd_dev->header.obj_order;
-	ret = snprintf(name, CEPH_MAX_OID_NAME_LEN + 1, name_format,
-			rbd_dev->header.object_prefix, segment);
-	if (ret < 0 || ret > CEPH_MAX_OID_NAME_LEN) {
-		pr_err("error formatting segment name for #%llu (%d)\n",
-			segment, ret);
-		rbd_segment_name_free(name);
-		name = NULL;
-	}
-
-	return name;
-}
-
 static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset)
 {
 	u64 segment_size = rbd_obj_bytes(&rbd_dev->header);
@@ -2050,29 +2017,17 @@ static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req)
 	ceph_osdc_put_request(osd_req);
 }
 
-/* object_name is assumed to be a non-null pointer and NUL-terminated */
-
-static struct rbd_obj_request *rbd_obj_request_create(const char *object_name,
-						enum obj_request_type type)
+static struct rbd_obj_request *
+rbd_obj_request_create(enum obj_request_type type)
 {
 	struct rbd_obj_request *obj_request;
-	size_t size;
-	char *name;
 
 	rbd_assert(obj_request_type_valid(type));
 
-	size = strlen(object_name) + 1;
-	name = kmalloc(size, GFP_NOIO);
-	if (!name)
-		return NULL;
-
 	obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_NOIO);
-	if (!obj_request) {
-		kfree(name);
+	if (!obj_request)
 		return NULL;
-	}
 
-	obj_request->object_name = memcpy(name, object_name, size);
 	obj_request->which = BAD_WHICH;
 	obj_request->type = type;
 	INIT_LIST_HEAD(&obj_request->links);
@@ -2114,8 +2069,6 @@ static void rbd_obj_request_destroy(struct kref *kref)
 		break;
 	}
 
-	kfree(obj_request->object_name);
-	obj_request->object_name = NULL;
 	kmem_cache_free(rbd_obj_request_cache, obj_request);
 }
 
@@ -2490,17 +2443,11 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
 
 	while (resid) {
 		struct ceph_osd_request *osd_req;
-		const char *object_name;
 		u64 object_no = img_offset >> rbd_dev->header.obj_order;
 		u64 offset = rbd_segment_offset(rbd_dev, img_offset);
 		u64 length = rbd_segment_length(rbd_dev, img_offset, resid);
 
-		object_name = rbd_segment_name(rbd_dev, img_offset);
-		if (!object_name)
-			goto out_unwind;
-		obj_request = rbd_obj_request_create(object_name, type);
-		/* object request has its own copy of the object name */
-		rbd_segment_name_free(object_name);
+		obj_request = rbd_obj_request_create(type);
 		if (!obj_request)
 			goto out_unwind;
 
@@ -2846,8 +2793,7 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
 	size_t size;
 	int ret;
 
-	stat_request = rbd_obj_request_create(obj_request->object_name,
-					      OBJ_REQUEST_PAGES);
+	stat_request = rbd_obj_request_create(OBJ_REQUEST_PAGES);
 	if (!stat_request)
 		return -ENOMEM;
 
@@ -6389,27 +6335,16 @@ static int rbd_slab_init(void)
 	if (!rbd_obj_request_cache)
 		goto out_err;
 
-	rbd_assert(!rbd_segment_name_cache);
-	rbd_segment_name_cache = kmem_cache_create("rbd_segment_name",
-					CEPH_MAX_OID_NAME_LEN + 1, 1, 0, NULL);
-	if (rbd_segment_name_cache)
-		return 0;
-out_err:
-	kmem_cache_destroy(rbd_obj_request_cache);
-	rbd_obj_request_cache = NULL;
+	return 0;
 
+out_err:
 	kmem_cache_destroy(rbd_img_request_cache);
 	rbd_img_request_cache = NULL;
-
 	return -ENOMEM;
 }
 
 static void rbd_slab_exit(void)
 {
-	rbd_assert(rbd_segment_name_cache);
-	kmem_cache_destroy(rbd_segment_name_cache);
-	rbd_segment_name_cache = NULL;
-
 	rbd_assert(rbd_obj_request_cache);
 	kmem_cache_destroy(rbd_obj_request_cache);
 	rbd_obj_request_cache = NULL;
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 8cebdc4158c3..938656f70807 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -81,13 +81,6 @@ void ceph_oloc_copy(struct ceph_object_locator *dest,
 		    const struct ceph_object_locator *src);
 void ceph_oloc_destroy(struct ceph_object_locator *oloc);
 
-/*
- * Maximum supported by kernel client object name length
- *
- * (probably outdated: must be >= RBD_MAX_MD_NAME_LEN -- currently 100)
- */
-#define CEPH_MAX_OID_NAME_LEN 100
-
 /*
  * 51-char inline_name is long enough for all cephfs and all but one
  * rbd requests: <imgname> in "<imgname>.rbd"/"rbd_id.<imgname>" can be
-- 
GitLab


From b9942bc9d3c70d359496a1a0afc528d4ab89be46 Mon Sep 17 00:00:00 2001
From: Bhumika Goyal <bhumirks@gmail.com>
Date: Sat, 11 Feb 2017 12:14:38 +0530
Subject: [PATCH 0370/1084] rbd: constify device_type structure

Declare device_type structure as const as it is only stored in the
type field of a device structure. This field is of type const, so add
const to the declaration of device_type structure.

File size before:
   text	   data	    bss	    dec	    hex	filename
  61546	  11610	    208	  73364	  11e94	drivers/block/rbd.o

File size after:
   text	   data	    bss	    dec	    hex	filename
  61610	  11578	    208	  73396	  11eb4	drivers/block/rbd.o

Signed-off-by: Bhumika Goyal <bhumirks@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index b6e269b4d534..ffb8d3afc6b4 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4630,7 +4630,7 @@ static const struct attribute_group *rbd_attr_groups[] = {
 
 static void rbd_dev_release(struct device *dev);
 
-static struct device_type rbd_device_type = {
+static const struct device_type rbd_device_type = {
 	.name		= "rbd",
 	.groups		= rbd_attr_groups,
 	.release	= rbd_dev_release,
-- 
GitLab


From df963ea8a082d31521a120e8e31a29ad8a1dc215 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Tue, 14 Feb 2017 10:09:40 -0500
Subject: [PATCH 0371/1084] ceph: remove req from unsafe list when
 unregistering it

There's no reason a request should ever be on a s_unsafe list but not
in the request tree.

Cc: stable@vger.kernel.org
Link: http://tracker.ceph.com/issues/18474
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 52521f339745..fdbc3544e41c 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -628,6 +628,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
 {
 	dout("__unregister_request %p tid %lld\n", req, req->r_tid);
 
+	/* Never leave an unregistered request on an unsafe list! */
+	list_del_init(&req->r_unsafe_item);
+
 	if (req->r_tid == mdsc->oldest_tid) {
 		struct rb_node *p = rb_next(&req->r_node);
 		mdsc->oldest_tid = 0;
@@ -1058,7 +1061,6 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
 	while (!list_empty(&session->s_unsafe)) {
 		req = list_first_entry(&session->s_unsafe,
 				       struct ceph_mds_request, r_unsafe_item);
-		list_del_init(&req->r_unsafe_item);
 		pr_warn_ratelimited(" dropping unsafe request %llu\n",
 				    req->r_tid);
 		__unregister_request(mdsc, req);
@@ -2469,7 +2471,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 			 * useful we could do with a revised return value.
 			 */
 			dout("got safe reply %llu, mds%d\n", tid, mds);
-			list_del_init(&req->r_unsafe_item);
 
 			/* last unsafe request during umount? */
 			if (mdsc->stopping && !__get_oldest_req(mdsc))
-- 
GitLab


From 80df1988201ac6648609eba13d48aef9f7974c10 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 17 Feb 2017 17:17:37 +0900
Subject: [PATCH 0372/1084] perf utils: Add perf_quiet_option()

The perf_quiet_option() is to suppress all messages.  It's intended to
be called just after parsing options.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: kernel-team@lge.com
Link: http://lkml.kernel.org/r/20170217081742.17417-2-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/debug.c | 17 +++++++++++++++++
 tools/perf/util/debug.h |  1 +
 2 files changed, 18 insertions(+)

diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index c1838b643108..03eb81f30d0d 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -203,11 +203,28 @@ int perf_debug_option(const char *str)
 		v = (v < 0) || (v > 10) ? 0 : v;
 	}
 
+	if (quiet)
+		v = -1;
+
 	*var->ptr = v;
 	free(s);
 	return 0;
 }
 
+int perf_quiet_option(void)
+{
+	struct debug_variable *var = &debug_variables[0];
+
+	/* disable all debug messages */
+	while (var->name) {
+		*var->ptr = -1;
+		var++;
+	}
+
+	quiet = true;
+	return 0;
+}
+
 #define DEBUG_WRAPPER(__n, __l)				\
 static int pr_ ## __n ## _wrapper(const char *fmt, ...)	\
 {							\
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index d242adc3d5a2..98832f5531d3 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -54,5 +54,6 @@ int veprintf(int level, int var, const char *fmt, va_list args);
 
 int perf_debug_option(const char *str);
 void perf_debug_setup(void);
+int perf_quiet_option(void);
 
 #endif	/* __PERF_DEBUG_H */
-- 
GitLab


From bb963e16507ca7670f0bb47ccaada8874b2ba6a1 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 17 Feb 2017 17:17:38 +0900
Subject: [PATCH 0373/1084] perf utils: Check verbose flag properly

It now can have negative value to suppress the message entirely.  So it
needs to check it being positive.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: kernel-team@lge.com
Link: http://lkml.kernel.org/r/20170217081742.17417-3-namhyung@kernel.org
[ Adjust fuzz on tools/perf/util/pmu.c, add > 0 checks in many other places ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-diff.c           |  2 +-
 tools/perf/builtin-mem.c            |  4 ++--
 tools/perf/builtin-record.c         |  2 +-
 tools/perf/builtin-report.c         |  2 +-
 tools/perf/builtin-sched.c          | 12 ++++++------
 tools/perf/builtin-stat.c           |  6 +++---
 tools/perf/builtin-top.c            |  2 +-
 tools/perf/builtin-trace.c          |  6 +++---
 tools/perf/pmu-events/json.c        |  2 +-
 tools/perf/tests/attr.c             |  2 +-
 tools/perf/tests/builtin-test.c     |  2 +-
 tools/perf/tests/code-reading.c     |  2 +-
 tools/perf/tests/fdarray.c          |  2 +-
 tools/perf/tests/llvm.c             |  2 +-
 tools/perf/tests/parse-events.c     |  2 +-
 tools/perf/tests/perf-record.c      |  4 ++--
 tools/perf/tests/python-use.c       |  2 +-
 tools/perf/tests/thread-map.c       |  6 +++---
 tools/perf/tests/vmlinux-kallsyms.c |  2 +-
 tools/perf/ui/browsers/map.c        |  6 +++---
 tools/perf/ui/hist.c                |  2 +-
 tools/perf/util/annotate.c          |  2 +-
 tools/perf/util/dso.c               |  2 +-
 tools/perf/util/hist.c              |  6 +++---
 tools/perf/util/pmu.c               |  8 ++++----
 tools/perf/util/probe-event.c       |  2 +-
 tools/perf/util/sort.c              |  8 ++++----
 tools/perf/util/stat.c              |  2 +-
 tools/perf/util/symbol-elf.c        |  2 +-
 29 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 70a289347591..7ad0d78ea743 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -744,7 +744,7 @@ static void data_process(void)
 
 		first = false;
 
-		if (verbose || data__files_cnt > 2)
+		if (verbose > 0 || data__files_cnt > 2)
 			data__fprintf();
 
 		/* Don't sort callchain for perf diff */
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index cd7bc4d104e2..6114e07ca613 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -42,8 +42,8 @@ static int parse_record_events(const struct option *opt,
 
 		fprintf(stderr, "%-13s%-*s%s\n",
 			e->tag,
-			verbose ? 25 : 0,
-			verbose ? perf_mem_events__name(j) : "",
+			verbose > 0 ? 25 : 0,
+			verbose > 0 ? perf_mem_events__name(j) : "",
 			e->supported ? ": available" : "");
 	}
 	exit(0);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index b87bbef73394..451b11e35c26 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -432,7 +432,7 @@ static int record__open(struct record *rec)
 try_again:
 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
-				if (verbose)
+				if (verbose > 0)
 					ui__warning("%s\n", msg);
 				goto try_again;
 			}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index dbd7fa028861..a94488114bbf 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1009,7 +1009,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
  		 * providing it only in verbose mode not to bloat too
  		 * much struct symbol.
  		 */
-		if (verbose) {
+		if (verbose > 0) {
 			/*
 			 * XXX: Need to provide a less kludgy way to ask for
 			 * more space per symbol, the u32 is for the index on
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 270eb2d8ca6b..b94cf0de715a 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -460,7 +460,7 @@ static struct task_desc *register_pid(struct perf_sched *sched,
 	BUG_ON(!sched->tasks);
 	sched->tasks[task->nr] = task;
 
-	if (verbose)
+	if (verbose > 0)
 		printf("registered task #%ld, PID %ld (%s)\n", sched->nr_tasks, pid, comm);
 
 	return task;
@@ -794,7 +794,7 @@ replay_wakeup_event(struct perf_sched *sched,
 	const u32 pid	 = perf_evsel__intval(evsel, sample, "pid");
 	struct task_desc *waker, *wakee;
 
-	if (verbose) {
+	if (verbose > 0) {
 		printf("sched_wakeup event %p\n", evsel);
 
 		printf(" ... pid %d woke up %s/%d\n", sample->tid, comm, pid);
@@ -822,7 +822,7 @@ static int replay_switch_event(struct perf_sched *sched,
 	int cpu = sample->cpu;
 	s64 delta;
 
-	if (verbose)
+	if (verbose > 0)
 		printf("sched_switch event %p\n", evsel);
 
 	if (cpu >= MAX_CPUS || cpu < 0)
@@ -870,7 +870,7 @@ static int replay_fork_event(struct perf_sched *sched,
 		goto out_put;
 	}
 
-	if (verbose) {
+	if (verbose > 0) {
 		printf("fork event\n");
 		printf("... parent: %s/%d\n", thread__comm_str(parent), parent->tid);
 		printf("...  child: %s/%d\n", thread__comm_str(child), child->tid);
@@ -1573,7 +1573,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 
 	timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
 	color_fprintf(stdout, color, "  %12s secs ", stimestamp);
-	if (new_shortname || (verbose && sched_in->tid)) {
+	if (new_shortname || (verbose > 0 && sched_in->tid)) {
 		const char *pid_color = color;
 
 		if (thread__has_color(sched_in))
@@ -2050,7 +2050,7 @@ static void save_task_callchain(struct perf_sched *sched,
 
 	if (thread__resolve_callchain(thread, cursor, evsel, sample,
 				      NULL, NULL, sched->max_stack + 2) != 0) {
-		if (verbose)
+		if (verbose > 0)
 			error("Failed to resolve callchain. Skipping\n");
 
 		return;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 9989b03c21f2..13b54999ad79 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -573,7 +573,7 @@ static int __run_perf_stat(int argc, const char **argv)
 			if (errno == EINVAL || errno == ENOSYS ||
 			    errno == ENOENT || errno == EOPNOTSUPP ||
 			    errno == ENXIO) {
-				if (verbose)
+				if (verbose > 0)
 					ui__warning("%s event is not supported by the kernel.\n",
 						    perf_evsel__name(counter));
 				counter->supported = false;
@@ -582,7 +582,7 @@ static int __run_perf_stat(int argc, const char **argv)
 				    !(counter->leader->nr_members > 1))
 					continue;
 			} else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
-                                if (verbose)
+                                if (verbose > 0)
                                         ui__warning("%s\n", msg);
                                 goto try_again;
                         }
@@ -2539,7 +2539,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	status = 0;
 	for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
-		if (run_count != 1 && verbose)
+		if (run_count != 1 && verbose > 0)
 			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
 				run_idx + 1);
 
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 5a7fd7af3a6d..ab9077915763 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -871,7 +871,7 @@ static int perf_top__start_counters(struct perf_top *top)
 		if (perf_evsel__open(counter, top->evlist->cpus,
 				     top->evlist->threads) < 0) {
 			if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
-				if (verbose)
+				if (verbose > 0)
 					ui__warning("%s\n", msg);
 				goto try_again;
 			}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 40ef9b293d1b..256f1fac6f7e 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1399,7 +1399,7 @@ static struct syscall *trace__syscall_info(struct trace *trace,
 	return &trace->syscalls.table[id];
 
 out_cant_read:
-	if (verbose) {
+	if (verbose > 0) {
 		fprintf(trace->output, "Problems reading syscall %d", id);
 		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
 			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
@@ -1801,10 +1801,10 @@ static void print_location(FILE *f, struct perf_sample *sample,
 			   bool print_dso, bool print_sym)
 {
 
-	if ((verbose || print_dso) && al->map)
+	if ((verbose > 0 || print_dso) && al->map)
 		fprintf(f, "%s@", al->map->dso->long_name);
 
-	if ((verbose || print_sym) && al->sym)
+	if ((verbose > 0 || print_sym) && al->sym)
 		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
 			al->addr - al->sym->start);
 	else if (al->map)
diff --git a/tools/perf/pmu-events/json.c b/tools/perf/pmu-events/json.c
index f67bbb0aa36e..0544398d6e2d 100644
--- a/tools/perf/pmu-events/json.c
+++ b/tools/perf/pmu-events/json.c
@@ -49,7 +49,7 @@ static char *mapfile(const char *fn, size_t *size)
 	int err;
 	int fd = open(fn, O_RDONLY);
 
-	if (fd < 0 && verbose && fn) {
+	if (fd < 0 && verbose > 0 && fn) {
 		pr_err("Error opening events file '%s': %s\n", fn,
 				strerror(errno));
 	}
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index 28d1605b0338..88dc51f4c27b 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -144,7 +144,7 @@ static int run_dir(const char *d, const char *perf)
 	int vcnt = min(verbose, (int) sizeof(v) - 1);
 	char cmd[3*PATH_MAX];
 
-	if (verbose)
+	if (verbose > 0)
 		vcnt++;
 
 	snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s",
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 37e326bfd2dc..83c4669cbc5b 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -299,7 +299,7 @@ static int run_test(struct test *test, int subtest)
 		if (!dont_fork) {
 			pr_debug("test child forked, pid %d\n", getpid());
 
-			if (!verbose) {
+			if (verbose <= 0) {
 				int nullfd = open("/dev/null", O_WRONLY);
 
 				if (nullfd >= 0) {
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index ff5bc6363a79..d1f693041324 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -599,7 +599,7 @@ static int do_test_code_reading(bool try_kcore)
 				continue;
 			}
 
-			if (verbose) {
+			if (verbose > 0) {
 				char errbuf[512];
 				perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
 				pr_debug("perf_evlist__open() failed!\n%s\n", errbuf);
diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c
index a2b5ff9bf83d..bc5982f42dc3 100644
--- a/tools/perf/tests/fdarray.c
+++ b/tools/perf/tests/fdarray.c
@@ -19,7 +19,7 @@ static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE
 {
 	int printed = 0;
 
-	if (!verbose)
+	if (verbose <= 0)
 		return 0;
 
 	printed += fprintf(fp, "\n%s: ", prefix);
diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c
index d357dab72e68..482b5365e68d 100644
--- a/tools/perf/tests/llvm.c
+++ b/tools/perf/tests/llvm.c
@@ -76,7 +76,7 @@ test_llvm__fetch_bpf_obj(void **p_obj_buf,
 	 * Skip this test if user's .perfconfig doesn't set [llvm] section
 	 * and clang is not found in $PATH, and this is not perf test -v
 	 */
-	if (!force && (verbose == 0 &&
+	if (!force && (verbose <= 0 &&
 		       !llvm_param.user_set_param &&
 		       llvm__search_clang())) {
 		pr_debug("No clang and no verbosive, skip this test\n");
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index aa9276bfe3e9..1dc838014422 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -1808,7 +1808,7 @@ static void debug_warn(const char *warn, va_list params)
 {
 	char msg[1024];
 
-	if (!verbose)
+	if (verbose <= 0)
 		return;
 
 	vsnprintf(msg, sizeof(msg), warn, params);
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 541da7a68f91..87893f3ba5f1 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -172,13 +172,13 @@ int test__PERF_RECORD(int subtest __maybe_unused)
 
 				err = perf_evlist__parse_sample(evlist, event, &sample);
 				if (err < 0) {
-					if (verbose)
+					if (verbose > 0)
 						perf_event__fprintf(event, stderr);
 					pr_debug("Couldn't parse sample\n");
 					goto out_delete_evlist;
 				}
 
-				if (verbose) {
+				if (verbose > 0) {
 					pr_info("%" PRIu64" %d ", sample.time, sample.cpu);
 					perf_event__fprintf(event, stderr);
 				}
diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c
index 7a52834ee0d0..fa79509da535 100644
--- a/tools/perf/tests/python-use.c
+++ b/tools/perf/tests/python-use.c
@@ -15,7 +15,7 @@ int test__python_use(int subtest __maybe_unused)
 	int ret;
 
 	if (asprintf(&cmd, "echo \"import sys ; sys.path.append('%s'); import perf\" | %s %s",
-		     PYTHONPATH, PYTHON, verbose ? "" : "2> /dev/null") < 0)
+		     PYTHONPATH, PYTHON, verbose > 0 ? "" : "2> /dev/null") < 0)
 		return -1;
 
 	ret = system(cmd) ? -1 : 0;
diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c
index a4a4b4625ac3..f2d2e542d0ee 100644
--- a/tools/perf/tests/thread-map.c
+++ b/tools/perf/tests/thread-map.c
@@ -109,7 +109,7 @@ int test__thread_map_remove(int subtest __maybe_unused)
 	TEST_ASSERT_VAL("failed to allocate thread_map",
 			threads);
 
-	if (verbose)
+	if (verbose > 0)
 		thread_map__fprintf(threads, stderr);
 
 	TEST_ASSERT_VAL("failed to remove thread",
@@ -117,7 +117,7 @@ int test__thread_map_remove(int subtest __maybe_unused)
 
 	TEST_ASSERT_VAL("thread_map count != 1", threads->nr == 1);
 
-	if (verbose)
+	if (verbose > 0)
 		thread_map__fprintf(threads, stderr);
 
 	TEST_ASSERT_VAL("failed to remove thread",
@@ -125,7 +125,7 @@ int test__thread_map_remove(int subtest __maybe_unused)
 
 	TEST_ASSERT_VAL("thread_map count != 0", threads->nr == 0);
 
-	if (verbose)
+	if (verbose > 0)
 		thread_map__fprintf(threads, stderr);
 
 	TEST_ASSERT_VAL("failed to not remove thread",
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index a5082331f246..862b043e5924 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -168,7 +168,7 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
 		err = -1;
 	}
 
-	if (!verbose)
+	if (verbose <= 0)
 		goto out;
 
 	header_printed = false;
diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c
index 98a34664bb7e..9ce142de536d 100644
--- a/tools/perf/ui/browsers/map.c
+++ b/tools/perf/ui/browsers/map.c
@@ -73,7 +73,7 @@ static int map_browser__run(struct map_browser *browser)
 
 	if (ui_browser__show(&browser->b, browser->map->dso->long_name,
 			     "Press ESC to exit, %s / to search",
-			     verbose ? "" : "restart with -v to use") < 0)
+			     verbose > 0 ? "" : "restart with -v to use") < 0)
 		return -1;
 
 	while (1) {
@@ -81,7 +81,7 @@ static int map_browser__run(struct map_browser *browser)
 
 		switch (key) {
 		case '/':
-			if (verbose)
+			if (verbose > 0)
 				map_browser__search(browser);
 		default:
 			break;
@@ -117,7 +117,7 @@ int map__browse(struct map *map)
 
 		if (maxaddr < pos->end)
 			maxaddr = pos->end;
-		if (verbose) {
+		if (verbose > 0) {
 			u32 *idx = symbol__browser_index(pos);
 			*idx = mb.b.nr_entries;
 		}
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 18cfcdc90356..5d632dca672a 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -648,7 +648,7 @@ unsigned int hists__sort_list_width(struct hists *hists)
 		ret += fmt->width(fmt, &dummy_hpp, hists);
 	}
 
-	if (verbose && hists__has(hists, sym)) /* Addr + origin */
+	if (verbose > 0 && hists__has(hists, sym)) /* Addr + origin */
 		ret += 3 + BITS_PER_LONG / 4;
 
 	return ret;
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 06cc04e5806a..273f21fa32b5 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1768,7 +1768,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
 	printf("%-*.*s----\n",
 	       graph_dotted_len, graph_dotted_len, graph_dotted_line);
 
-	if (verbose)
+	if (verbose > 0)
 		symbol__annotate_hits(sym, evsel);
 
 	list_for_each_entry(pos, &notes->src->source, node) {
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 28d41e709128..1a03e9e310a4 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -1058,7 +1058,7 @@ int dso__name_len(const struct dso *dso)
 {
 	if (!dso)
 		return strlen("[unknown]");
-	if (verbose)
+	if (verbose > 0)
 		return dso->long_name_len;
 
 	return dso->short_name_len;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 32c6a939e4cc..eaf72a938fb4 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -69,7 +69,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 	 */
 	if (h->ms.sym) {
 		symlen = h->ms.sym->namelen + 4;
-		if (verbose)
+		if (verbose > 0)
 			symlen += BITS_PER_LONG / 4 + 2 + 3;
 		hists__new_col_len(hists, HISTC_SYMBOL, symlen);
 	} else {
@@ -93,7 +93,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 	if (h->branch_info) {
 		if (h->branch_info->from.sym) {
 			symlen = (int)h->branch_info->from.sym->namelen + 4;
-			if (verbose)
+			if (verbose > 0)
 				symlen += BITS_PER_LONG / 4 + 2 + 3;
 			hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
 
@@ -107,7 +107,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 
 		if (h->branch_info->to.sym) {
 			symlen = (int)h->branch_info->to.sym->namelen + 4;
-			if (verbose)
+			if (verbose > 0)
 				symlen += BITS_PER_LONG / 4 + 2 + 3;
 			hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
 
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 63cb46cb9b0f..12f84dd2ac5d 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -745,7 +745,7 @@ static int pmu_resolve_param_term(struct parse_events_term *term,
 		}
 	}
 
-	if (verbose)
+	if (verbose > 0)
 		printf("Required parameter '%s' not specified\n", term->config);
 
 	return -1;
@@ -803,7 +803,7 @@ static int pmu_config_term(struct list_head *formats,
 
 	format = pmu_find_format(formats, term->config);
 	if (!format) {
-		if (verbose)
+		if (verbose > 0)
 			printf("Invalid event/parameter '%s'\n", term->config);
 		if (err) {
 			char *pmu_term = pmu_formats_string(formats);
@@ -847,7 +847,7 @@ static int pmu_config_term(struct list_head *formats,
 		val = term->val.num;
 	} else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
 		if (strcmp(term->val.str, "?")) {
-			if (verbose) {
+			if (verbose > 0) {
 				pr_info("Invalid sysfs entry %s=%s\n",
 						term->config, term->val.str);
 			}
@@ -1232,7 +1232,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
 			printf("%*s", 8, "[");
 			wordwrap(aliases[j].desc, 8, columns, 0);
 			printf("]\n");
-			if (verbose)
+			if (verbose > 0)
 				printf("%*s%s/%s/\n", 8, "", aliases[j].pmu, aliases[j].str);
 		} else
 			printf("  %-50s [Kernel PMU event]\n", aliases[j].name);
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 35f5b7b7715c..28fb62c32678 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -594,7 +594,7 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp,
 	pr_debug("try to find information at %" PRIx64 " in %s\n", addr,
 		 tp->module ? : "kernel");
 
-	dinfo = debuginfo_cache__open(tp->module, verbose == 0);
+	dinfo = debuginfo_cache__open(tp->module, verbose <= 0);
 	if (dinfo)
 		ret = debuginfo__find_probe_point(dinfo,
 						 (unsigned long)addr, pp);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index df622f4e301e..0ff622288d24 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -151,7 +151,7 @@ static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
 	if (!dso_l || !dso_r)
 		return cmp_null(dso_r, dso_l);
 
-	if (verbose) {
+	if (verbose > 0) {
 		dso_name_l = dso_l->long_name;
 		dso_name_r = dso_r->long_name;
 	} else {
@@ -172,8 +172,8 @@ static int _hist_entry__dso_snprintf(struct map *map, char *bf,
 				     size_t size, unsigned int width)
 {
 	if (map && map->dso) {
-		const char *dso_name = !verbose ? map->dso->short_name :
-			map->dso->long_name;
+		const char *dso_name = verbose > 0 ? map->dso->long_name :
+			map->dso->short_name;
 		return repsep_snprintf(bf, size, "%-*.*s", width, width, dso_name);
 	}
 
@@ -261,7 +261,7 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
 {
 	size_t ret = 0;
 
-	if (verbose) {
+	if (verbose > 0) {
 		char o = map ? dso__symtab_origin(map->dso) : '!';
 		ret += repsep_snprintf(bf, size, "%-#*llx %c ",
 				       BITS_PER_LONG / 4 + 2, ip, o);
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 39345c2ddfc2..0d51334a9b46 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -344,7 +344,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 	for (i = 0; i < 3; i++)
 		update_stats(&ps->res_stats[i], count[i]);
 
-	if (verbose) {
+	if (verbose > 0) {
 		fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
 			perf_evsel__name(counter), count[0], count[1], count[2]);
 	}
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index adbc6c02c3aa..4e59ddeb4eda 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -213,7 +213,7 @@ static bool want_demangle(bool is_kernel_sym)
 
 static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
 {
-	int demangle_flags = verbose ? (DMGL_PARAMS | DMGL_ANSI) : DMGL_NO_OPTS;
+	int demangle_flags = verbose > 0 ? (DMGL_PARAMS | DMGL_ANSI) : DMGL_NO_OPTS;
 	char *demangled = NULL;
 
 	/*
-- 
GitLab


From 27fafab59a60b6f02491f2ff44cafd4f2335e487 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 17 Feb 2017 17:17:39 +0900
Subject: [PATCH 0374/1084] perf report: Add -q/--quiet option

The -q/--quiet option is to suppress any message.  Sometimes users just
want to see the numbers and it can be used for that case.

Before:

  $ perf report | head -15
  Failed to open /lib/modules/3.19.3-3-ARCH/kernel/fs/ext4/ext4.ko.gz, continuing without symbols
  Failed to open /lib/modules/3.19.3-3-ARCH/kernel/fs/jbd2/jbd2.ko.gz, continuing without symbols
  Failed to open /tmp/perf-14507.map, continuing without symbols
  ...
  # To display the perf.data header info, please use --header/--header-only options.
  #
  #
  # Total Lost Samples: 0
  #
  # Samples: 39K of event 'cycles'
  # Event count (approx.): 30444796573
  #
  # Overhead  Command      Shared Object        Symbol
  # ........  ...........  ...................  .........................
  #
       9.28%  swapper	   [kernel.vmlinux]     [k] intel_idle
       5.64%  swapper	   [kernel.vmlinux]     [k] native_write_msr_safe
       1.93%  swapper	   [kernel.vmlinux]     [k] __switch_to
       1.89%  swapper	   [kernel.vmlinux]     [k] menu_select
       1.75%  sched-pipe   [kernel.vmlinux]     [k] __switch_to

After:

  $ perf report -q | head
       9.28%  swapper	   [kernel.vmlinux]     [k] intel_idle
       5.64%  swapper	   [kernel.vmlinux]     [k] native_write_msr_safe
       1.93%  swapper	   [kernel.vmlinux]     [k] __switch_to
       1.89%  swapper	   [kernel.vmlinux]     [k] menu_select
       1.75%  sched-pipe   [kernel.vmlinux]     [k] __switch_to
       1.67%  swapper	   [kernel.vmlinux]     [k] cpu_startup_entry
       1.48%  sched-pipe   [kernel.vmlinux]     [k] enqueue_entity
       1.46%  swapper	   [kernel.vmlinux]     [k] __schedule
       1.36%  swapper	   [kernel.vmlinux]     [k] native_read_tsc
       1.34%  sched-pipe   [kernel.vmlinux]     [k] __schedule

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Suggested-and-Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: kernel-team@lge.com
Link: http://lkml.kernel.org/r/20170217081742.17417-4-namhyung@kernel.org
[ Removed builtin-report.c verbose > 0 hunk added to the previous patch ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-report.txt |  4 ++++
 tools/perf/builtin-report.c              | 19 +++++++++++++++----
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index f2914f03ae7b..c04cc0647c16 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -25,6 +25,10 @@ OPTIONS
 --verbose::
         Be more verbose. (show symbol address, etc)
 
+-q::
+--quiet::
+	Do not show any message.  (Suppress -v)
+
 -n::
 --show-nr-samples::
 	Show the number of samples for each symbol
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index a94488114bbf..0a88670e56f3 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -320,6 +320,9 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
 	size_t size = sizeof(buf);
 	int socked_id = hists->socket_filter;
 
+	if (quiet)
+		return 0;
+
 	if (symbol_conf.filter_relative) {
 		nr_samples = hists->stats.nr_non_filtered_samples;
 		nr_events = hists->stats.total_non_filtered_period;
@@ -372,7 +375,11 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
 {
 	struct perf_evsel *pos;
 
-	fprintf(stdout, "#\n# Total Lost Samples: %" PRIu64 "\n#\n", evlist->stats.total_lost_samples);
+	if (!quiet) {
+		fprintf(stdout, "#\n# Total Lost Samples: %" PRIu64 "\n#\n",
+			evlist->stats.total_lost_samples);
+	}
+
 	evlist__for_each_entry(evlist, pos) {
 		struct hists *hists = evsel__hists(pos);
 		const char *evname = perf_evsel__name(pos);
@@ -382,7 +389,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
 			continue;
 
 		hists__fprintf_nr_sample_events(hists, rep, evname, stdout);
-		hists__fprintf(hists, true, 0, 0, rep->min_percent, stdout,
+		hists__fprintf(hists, !quiet, 0, 0, rep->min_percent, stdout,
 			       symbol_conf.use_callchain);
 		fprintf(stdout, "\n\n");
 	}
@@ -716,6 +723,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "input file name"),
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"),
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
@@ -863,6 +871,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		report.symbol_filter_str = argv[0];
 	}
 
+	if (quiet)
+		perf_quiet_option();
+
 	if (symbol_conf.vmlinux_name &&
 	    access(symbol_conf.vmlinux_name, R_OK)) {
 		pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name);
@@ -983,14 +994,14 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		goto error;
 	}
 
-	if (report.header || report.header_only) {
+	if ((report.header || report.header_only) && !quiet) {
 		perf_session__fprintf_info(session, stdout,
 					   report.show_full_info);
 		if (report.header_only) {
 			ret = 0;
 			goto error;
 		}
-	} else if (use_browser == 0) {
+	} else if (use_browser == 0 && !quiet) {
 		fputs("# To display the perf.data header info, please use --header/--header-only options.\n#\n",
 		      stdout);
 	}
-- 
GitLab


From 63b42fce864a468ee02e6647474c4df9bfdc6166 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 17 Feb 2017 17:17:40 +0900
Subject: [PATCH 0375/1084] perf diff: Add -q/--quiet option

The -q/--quiet option is to suppress any message.  Sometimes users just
want to see the numbers and it can be used for that case.

Committer notes:

Before:

  # perf diff | head -10
  Failed to open /tmp/perf-6678.map, continuing without symbols
  Failed to open /tmp/perf-6678.map, continuing without symbols
  Failed to open /tmp/perf-2646.map, continuing without symbols
  # Event 'cycles'
  #
  # Baseline  Delta Abs  Shared Object               Symbol
  # ........  .........  ..........................  ............................................
  #
       5.36%     -1.76%  [kernel.vmlinux]            [k] intel_idle
       2.80%     +1.48%  firefox                     [.] 0x00000000000101fe
      57.12%     -1.25%  libxul.so                   [.] 0x00000000009bea92
       1.36%     -1.11%  [kernel.vmlinux]            [k] __schedule
       4.26%     -1.00%  perf-6678.map               [.] 0x00007fac4b0e9320

After:

  # perf diff -q | head -10
       5.36%     -1.76%  [kernel.vmlinux]            [k] intel_idle
       2.80%     +1.48%  firefox                     [.] 0x00000000000101fe
      57.12%     -1.25%  libxul.so                   [.] 0x00000000009bea92
       1.36%     -1.11%  [kernel.vmlinux]            [k] __schedule
       4.26%     -1.00%  perf-6678.map               [.] 0x00007fac4b0e9320
       1.86%     +0.95%  [kernel.vmlinux]            [k] update_blocked_averages
       0.80%     -0.70%  [kernel.vmlinux]            [k] native_sched_clock
       0.74%     -0.58%  [kernel.vmlinux]            [k] native_write_msr
       0.76%     -0.56%  qemu-system-x86_64          [.] 0x00000000002395c0
                 +0.54%  libpulsecommon-10.0.so      [.] 0x000000000002d91b
  #

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Suggested-and-Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: kernel-team@lge.com
Link: http://lkml.kernel.org/r/20170217081742.17417-5-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-diff.txt |  4 ++++
 tools/perf/builtin-diff.c              | 14 ++++++++++----
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index 66dbe3dee74b..a79c84ae61aa 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -73,6 +73,10 @@ OPTIONS
 	Be verbose, for instance, show the raw counts in addition to the
 	diff.
 
+-q::
+--quiet::
+	Do not show any message.  (Suppress -v)
+
 -f::
 --force::
         Don't do ownership validation.
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 7ad0d78ea743..1b96a3122228 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -691,7 +691,7 @@ static void hists__process(struct hists *hists)
 	hists__precompute(hists);
 	hists__output_resort(hists, NULL);
 
-	hists__fprintf(hists, true, 0, 0, 0, stdout,
+	hists__fprintf(hists, !quiet, 0, 0, 0, stdout,
 		       symbol_conf.use_callchain);
 }
 
@@ -739,12 +739,14 @@ static void data_process(void)
 				hists__link(hists_base, hists);
 		}
 
-		fprintf(stdout, "%s# Event '%s'\n#\n", first ? "" : "\n",
-			perf_evsel__name(evsel_base));
+		if (!quiet) {
+			fprintf(stdout, "%s# Event '%s'\n#\n", first ? "" : "\n",
+				perf_evsel__name(evsel_base));
+		}
 
 		first = false;
 
-		if (verbose > 0 || data__files_cnt > 2)
+		if (verbose > 0 || ((data__files_cnt > 2) && !quiet))
 			data__fprintf();
 
 		/* Don't sort callchain for perf diff */
@@ -807,6 +809,7 @@ static const char * const diff_usage[] = {
 static const struct option options[] = {
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"),
 	OPT_BOOLEAN('b', "baseline-only", &show_baseline_only,
 		    "Show only items with match in baseline"),
 	OPT_CALLBACK('c', "compute", &compute,
@@ -1328,6 +1331,9 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	argc = parse_options(argc, argv, options, diff_usage, 0);
 
+	if (quiet)
+		perf_quiet_option();
+
 	if (symbol__init(NULL) < 0)
 		return -1;
 
-- 
GitLab


From eddaef88961bbf2c0301f3216689d51fa7d0d472 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 17 Feb 2017 17:17:41 +0900
Subject: [PATCH 0376/1084] perf annotate: Add -q/--quiet option

The -q/--quiet option is to suppress any message.  Sometimes users just
want to see the numbers and it can be used for that case.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Suggested-and-Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: kernel-team@lge.com
Link: http://lkml.kernel.org/r/20170217081742.17417-6-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-annotate.txt | 4 ++++
 tools/perf/builtin-annotate.c              | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index 8ffbd272952d..a89273d8e744 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -39,6 +39,10 @@ OPTIONS
 --verbose::
         Be more verbose. (Show symbol address, etc)
 
+-q::
+--quiet::
+	Do not show any message.  (Suppress -v)
+
 -D::
 --dump-raw-trace::
         Dump raw trace in ASCII.
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index ebb628332a6e..4f52d85f5ebc 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -410,6 +410,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('q', "quiet", &quiet, "do now show any message"),
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
 	OPT_BOOLEAN(0, "gtk", &annotate.use_gtk, "Use the GTK interface"),
@@ -463,6 +464,9 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 		annotate.sym_hist_filter = argv[0];
 	}
 
+	if (quiet)
+		perf_quiet_option();
+
 	file.path  = input_name;
 
 	annotate.session = perf_session__new(&file, false, &annotate.tool);
-- 
GitLab


From 68ba32352d51474d163d58e084b62a12bb610b21 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 17 Feb 2017 17:17:42 +0900
Subject: [PATCH 0377/1084] perf record: Honor --quiet option properly

It should call perf_quiet_option() to suppress messages.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: kernel-team@lge.com
Link: http://lkml.kernel.org/r/20170217081742.17417-7-namhyung@kernel.org
[ Fix merge clash with 483635a9d080 ("perf record: Add -a as default target") ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 451b11e35c26..bc84a375295d 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1677,6 +1677,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	argc = parse_options(argc, argv, record_options, record_usage,
 			    PARSE_OPT_STOP_AT_NON_OPTION);
+	if (quiet)
+		perf_quiet_option();
 
 	/* Make system wide (-a) the default target. */
 	if (!argc && target__none(&rec->opts.target))
-- 
GitLab


From 7687a7a4435f4b32d8e775a7b6b25aea3a25d25a Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Mon, 20 Feb 2017 16:51:23 +0100
Subject: [PATCH 0378/1084] vfs: extract common parts of
 {compat_,}do_readv_writev()

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/read_write.c | 80 ++++++++++++++++++-------------------------------
 1 file changed, 29 insertions(+), 51 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index 820a3f06c46a..12a216021f87 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -834,25 +834,15 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 	return ret;
 }
 
-static ssize_t do_readv_writev(int type, struct file *file,
-			       const struct iovec __user * uvector,
-			       unsigned long nr_segs, loff_t *pos,
-			       int flags)
+static ssize_t __do_readv_writev(int type, struct file *file,
+				 struct iov_iter *iter, loff_t *pos, int flags)
 {
 	size_t tot_len;
-	struct iovec iovstack[UIO_FASTIOV];
-	struct iovec *iov = iovstack;
-	struct iov_iter iter;
-	ssize_t ret;
+	ssize_t ret = 0;
 	io_fn_t fn;
 	iter_fn_t iter_fn;
 
-	ret = import_iovec(type, uvector, nr_segs,
-			   ARRAY_SIZE(iovstack), &iov, &iter);
-	if (ret < 0)
-		return ret;
-
-	tot_len = iov_iter_count(&iter);
+	tot_len = iov_iter_count(iter);
 	if (!tot_len)
 		goto out;
 	ret = rw_verify_area(type, file, pos, tot_len);
@@ -869,15 +859,14 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	}
 
 	if (iter_fn)
-		ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags);
+		ret = do_iter_readv_writev(file, iter, pos, iter_fn, flags);
 	else
-		ret = do_loop_readv_writev(file, &iter, pos, fn, flags);
+		ret = do_loop_readv_writev(file, iter, pos, fn, flags);
 
 	if (type != READ)
 		file_end_write(file);
 
 out:
-	kfree(iov);
 	if ((ret + (type == READ)) > 0) {
 		if (type == READ)
 			fsnotify_access(file);
@@ -887,6 +876,27 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	return ret;
 }
 
+static ssize_t do_readv_writev(int type, struct file *file,
+			       const struct iovec __user *uvector,
+			       unsigned long nr_segs, loff_t *pos,
+			       int flags)
+{
+	struct iovec iovstack[UIO_FASTIOV];
+	struct iovec *iov = iovstack;
+	struct iov_iter iter;
+	ssize_t ret;
+
+	ret = import_iovec(type, uvector, nr_segs,
+			   ARRAY_SIZE(iovstack), &iov, &iter);
+	if (ret < 0)
+		return ret;
+
+	ret = __do_readv_writev(type, file, &iter, pos, flags);
+	kfree(iov);
+
+	return ret;
+}
+
 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
 		  unsigned long vlen, loff_t *pos, int flags)
 {
@@ -1064,51 +1074,19 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
 			       unsigned long nr_segs, loff_t *pos,
 			       int flags)
 {
-	compat_ssize_t tot_len;
 	struct iovec iovstack[UIO_FASTIOV];
 	struct iovec *iov = iovstack;
 	struct iov_iter iter;
 	ssize_t ret;
-	io_fn_t fn;
-	iter_fn_t iter_fn;
 
 	ret = compat_import_iovec(type, uvector, nr_segs,
 				  UIO_FASTIOV, &iov, &iter);
 	if (ret < 0)
 		return ret;
 
-	tot_len = iov_iter_count(&iter);
-	if (!tot_len)
-		goto out;
-	ret = rw_verify_area(type, file, pos, tot_len);
-	if (ret < 0)
-		goto out;
-
-	if (type == READ) {
-		fn = file->f_op->read;
-		iter_fn = file->f_op->read_iter;
-	} else {
-		fn = (io_fn_t)file->f_op->write;
-		iter_fn = file->f_op->write_iter;
-		file_start_write(file);
-	}
-
-	if (iter_fn)
-		ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags);
-	else
-		ret = do_loop_readv_writev(file, &iter, pos, fn, flags);
-
-	if (type != READ)
-		file_end_write(file);
-
-out:
+	ret = __do_readv_writev(type, file, &iter, pos, flags);
 	kfree(iov);
-	if ((ret + (type == READ)) > 0) {
-		if (type == READ)
-			fsnotify_access(file);
-		else
-			fsnotify_modify(file);
-	}
+
 	return ret;
 }
 
-- 
GitLab


From 0f78d06ac1e9b470cbd8f913ee1688c8b2c8feb3 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Mon, 20 Feb 2017 16:51:23 +0100
Subject: [PATCH 0379/1084] vfs: pass type instead of fn to
 do_{loop,iter}_readv_writev()

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/read_write.c | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index 12a216021f87..198614f757fa 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -23,9 +23,6 @@
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
 
-typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
-typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *);
-
 const struct file_operations generic_ro_fops = {
 	.llseek		= generic_file_llseek,
 	.read_iter	= generic_file_read_iter,
@@ -675,7 +672,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
 EXPORT_SYMBOL(iov_shorten);
 
 static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
-		loff_t *ppos, iter_fn_t fn, int flags)
+		loff_t *ppos, int type, int flags)
 {
 	struct kiocb kiocb;
 	ssize_t ret;
@@ -692,7 +689,10 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
 		kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
 	kiocb.ki_pos = *ppos;
 
-	ret = fn(&kiocb, iter);
+	if (type == READ)
+		ret = filp->f_op->read_iter(&kiocb, iter);
+	else
+		ret = filp->f_op->write_iter(&kiocb, iter);
 	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
@@ -700,7 +700,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
 
 /* Do it by hand, with file-ops */
 static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
-		loff_t *ppos, io_fn_t fn, int flags)
+		loff_t *ppos, int type, int flags)
 {
 	ssize_t ret = 0;
 
@@ -711,7 +711,13 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
 		struct iovec iovec = iov_iter_iovec(iter);
 		ssize_t nr;
 
-		nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos);
+		if (type == READ) {
+			nr = filp->f_op->read(filp, iovec.iov_base,
+					      iovec.iov_len, ppos);
+		} else {
+			nr = filp->f_op->write(filp, iovec.iov_base,
+					       iovec.iov_len, ppos);
+		}
 
 		if (nr < 0) {
 			if (!ret)
@@ -839,8 +845,6 @@ static ssize_t __do_readv_writev(int type, struct file *file,
 {
 	size_t tot_len;
 	ssize_t ret = 0;
-	io_fn_t fn;
-	iter_fn_t iter_fn;
 
 	tot_len = iov_iter_count(iter);
 	if (!tot_len)
@@ -849,19 +853,14 @@ static ssize_t __do_readv_writev(int type, struct file *file,
 	if (ret < 0)
 		goto out;
 
-	if (type == READ) {
-		fn = file->f_op->read;
-		iter_fn = file->f_op->read_iter;
-	} else {
-		fn = (io_fn_t)file->f_op->write;
-		iter_fn = file->f_op->write_iter;
+	if (type != READ)
 		file_start_write(file);
-	}
 
-	if (iter_fn)
-		ret = do_iter_readv_writev(file, iter, pos, iter_fn, flags);
+	if ((type == READ && file->f_op->read_iter) ||
+	    (type == WRITE && file->f_op->write_iter))
+		ret = do_iter_readv_writev(file, iter, pos, type, flags);
 	else
-		ret = do_loop_readv_writev(file, iter, pos, fn, flags);
+		ret = do_loop_readv_writev(file, iter, pos, type, flags);
 
 	if (type != READ)
 		file_end_write(file);
-- 
GitLab


From bb7462b6fd64e40809a857223bf7f0e628969f87 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Mon, 20 Feb 2017 16:51:23 +0100
Subject: [PATCH 0380/1084] vfs: use helpers for calling
 f_op->{read,write}_iter()

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 drivers/block/loop.c |  4 ++--
 fs/aio.c             |  4 ++--
 fs/read_write.c      | 12 ++++++------
 fs/splice.c          |  2 +-
 include/linux/fs.h   | 12 ++++++++++++
 5 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f347285c67ec..2cf2903a0715 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -501,9 +501,9 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
 	cmd->iocb.ki_flags = IOCB_DIRECT;
 
 	if (rw == WRITE)
-		ret = file->f_op->write_iter(&cmd->iocb, &iter);
+		ret = call_write_iter(file, &cmd->iocb, &iter);
 	else
-		ret = file->f_op->read_iter(&cmd->iocb, &iter);
+		ret = call_read_iter(file, &cmd->iocb, &iter);
 
 	if (ret != -EIOCBQUEUED)
 		cmd->iocb.ki_complete(&cmd->iocb, ret, 0);
diff --git a/fs/aio.c b/fs/aio.c
index 4ab67e8cb776..9f4a9a2e7ae4 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1494,7 +1494,7 @@ static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
 		return ret;
 	ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
 	if (!ret)
-		ret = aio_ret(req, file->f_op->read_iter(req, &iter));
+		ret = aio_ret(req, call_read_iter(file, req, &iter));
 	kfree(iovec);
 	return ret;
 }
@@ -1519,7 +1519,7 @@ static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
 	if (!ret) {
 		req->ki_flags |= IOCB_WRITE;
 		file_start_write(file);
-		ret = aio_ret(req, file->f_op->write_iter(req, &iter));
+		ret = aio_ret(req, call_write_iter(file, req, &iter));
 		/*
 		 * We release freeze protection in aio_complete().  Fool lockdep
 		 * by telling it the lock got released so that it doesn't
diff --git a/fs/read_write.c b/fs/read_write.c
index 198614f757fa..f2ed9fdc98fd 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -367,7 +367,7 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos)
 	kiocb.ki_pos = *ppos;
 
 	iter->type |= READ;
-	ret = file->f_op->read_iter(&kiocb, iter);
+	ret = call_read_iter(file, &kiocb, iter);
 	BUG_ON(ret == -EIOCBQUEUED);
 	if (ret > 0)
 		*ppos = kiocb.ki_pos;
@@ -387,7 +387,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
 	kiocb.ki_pos = *ppos;
 
 	iter->type |= WRITE;
-	ret = file->f_op->write_iter(&kiocb, iter);
+	ret = call_write_iter(file, &kiocb, iter);
 	BUG_ON(ret == -EIOCBQUEUED);
 	if (ret > 0)
 		*ppos = kiocb.ki_pos;
@@ -436,7 +436,7 @@ static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo
 	kiocb.ki_pos = *ppos;
 	iov_iter_init(&iter, READ, &iov, 1, len);
 
-	ret = filp->f_op->read_iter(&kiocb, &iter);
+	ret = call_read_iter(filp, &kiocb, &iter);
 	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
@@ -493,7 +493,7 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t
 	kiocb.ki_pos = *ppos;
 	iov_iter_init(&iter, WRITE, &iov, 1, len);
 
-	ret = filp->f_op->write_iter(&kiocb, &iter);
+	ret = call_write_iter(filp, &kiocb, &iter);
 	BUG_ON(ret == -EIOCBQUEUED);
 	if (ret > 0)
 		*ppos = kiocb.ki_pos;
@@ -690,9 +690,9 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
 	kiocb.ki_pos = *ppos;
 
 	if (type == READ)
-		ret = filp->f_op->read_iter(&kiocb, iter);
+		ret = call_read_iter(filp, &kiocb, iter);
 	else
-		ret = filp->f_op->write_iter(&kiocb, iter);
+		ret = call_write_iter(filp, &kiocb, iter);
 	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
diff --git a/fs/splice.c b/fs/splice.c
index 873d83104e79..6518f058bd7f 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -306,7 +306,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
 	idx = to.idx;
 	init_sync_kiocb(&kiocb, in);
 	kiocb.ki_pos = *ppos;
-	ret = in->f_op->read_iter(&kiocb, &to);
+	ret = call_read_iter(in, &kiocb, &to);
 	if (ret > 0) {
 		*ppos = kiocb.ki_pos;
 		file_accessed(in);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 78c81e6f5d76..a3145cdff8f2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1715,6 +1715,18 @@ struct inode_operations {
 	int (*set_acl)(struct inode *, struct posix_acl *, int);
 } ____cacheline_aligned;
 
+static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
+				     struct iov_iter *iter)
+{
+	return file->f_op->read_iter(kio, iter);
+}
+
+static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio,
+				      struct iov_iter *iter)
+{
+	return file->f_op->write_iter(kio, iter);
+}
+
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
-- 
GitLab


From f74ac01520c9f6d89bbc3c6931a72f757b742f86 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Mon, 20 Feb 2017 16:51:23 +0100
Subject: [PATCH 0381/1084] mm: use helper for calling f_op->mmap()

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 drivers/gpu/drm/i915/i915_gem_dmabuf.c | 2 +-
 drivers/gpu/drm/vgem/vgem_drv.c        | 2 +-
 fs/coda/file.c                         | 2 +-
 include/linux/fs.h                     | 5 +++++
 ipc/shm.c                              | 2 +-
 mm/mmap.c                              | 2 +-
 mm/nommu.c                             | 4 ++--
 7 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 5e38299b5df6..84fc9f001576 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -141,7 +141,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
 	if (!obj->base.filp)
 		return -ENODEV;
 
-	ret = obj->base.filp->f_op->mmap(obj->base.filp, vma);
+	ret = call_mmap(obj->base.filp, vma);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 477e07f0ecb6..9f7e222b3e89 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -287,7 +287,7 @@ static int vgem_prime_mmap(struct drm_gem_object *obj,
 	if (!obj->filp)
 		return -ENODEV;
 
-	ret = obj->filp->f_op->mmap(obj->filp, vma);
+	ret = call_mmap(obj->filp, vma);
 	if (ret)
 		return ret;
 
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 6e0154eb6fcc..9d956cd6d46f 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -96,7 +96,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma)
 	cfi->cfi_mapcount++;
 	spin_unlock(&cii->c_lock);
 
-	return host_file->f_op->mmap(host_file, vma);
+	return call_mmap(host_file, vma);
 }
 
 int coda_open(struct inode *coda_inode, struct file *coda_file)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a3145cdff8f2..93691b59e476 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1727,6 +1727,11 @@ static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio,
 	return file->f_op->write_iter(kio, iter);
 }
 
+static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	return file->f_op->mmap(file, vma);
+}
+
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
diff --git a/ipc/shm.c b/ipc/shm.c
index 81203e8ba013..4329fe3ef594 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -423,7 +423,7 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma)
 	if (ret)
 		return ret;
 
-	ret = sfd->file->f_op->mmap(sfd->file, vma);
+	ret = call_mmap(sfd->file, vma);
 	if (ret) {
 		shm_close(vma);
 		return ret;
diff --git a/mm/mmap.c b/mm/mmap.c
index dc4291dcc99b..3714aa4e6f81 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1668,7 +1668,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		 * new file must not have been exposed to user-space, yet.
 		 */
 		vma->vm_file = get_file(file);
-		error = file->f_op->mmap(file, vma);
+		error = call_mmap(file, vma);
 		if (error)
 			goto unmap_and_free_vma;
 
diff --git a/mm/nommu.c b/mm/nommu.c
index 24f9f5f39145..e366354f777d 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1084,7 +1084,7 @@ static int do_mmap_shared_file(struct vm_area_struct *vma)
 {
 	int ret;
 
-	ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
+	ret = call_mmap(vma->vm_file, vma);
 	if (ret == 0) {
 		vma->vm_region->vm_top = vma->vm_region->vm_end;
 		return 0;
@@ -1115,7 +1115,7 @@ static int do_mmap_private(struct vm_area_struct *vma,
 	 * - VM_MAYSHARE will be set if it may attempt to share
 	 */
 	if (capabilities & NOMMU_MAP_DIRECT) {
-		ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
+		ret = call_mmap(vma->vm_file, vma);
 		if (ret == 0) {
 			/* shouldn't return success if we're not sharing */
 			BUG_ON(!(vma->vm_flags & VM_MAYSHARE));
-- 
GitLab


From 0eb8af4916a540c362a2950e5ab54eca32eb7d58 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Mon, 20 Feb 2017 16:51:23 +0100
Subject: [PATCH 0382/1084] vfs: use helper for calling f_op->fsync()

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/sync.c          | 2 +-
 include/linux/fs.h | 6 ++++++
 ipc/shm.c          | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/sync.c b/fs/sync.c
index 2a54c1f22035..11ba023434b1 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -192,7 +192,7 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
 		spin_unlock(&inode->i_lock);
 		mark_inode_dirty_sync(inode);
 	}
-	return file->f_op->fsync(file, start, end, datasync);
+	return call_fsync(file, start, end, datasync);
 }
 EXPORT_SYMBOL(vfs_fsync_range);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 93691b59e476..72a33007ec24 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1732,6 +1732,12 @@ static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
 	return file->f_op->mmap(file, vma);
 }
 
+static inline int call_fsync(struct file *file, loff_t start, loff_t end,
+			     int datasync)
+{
+	return file->f_op->fsync(file, start, end, datasync);
+}
+
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
diff --git a/ipc/shm.c b/ipc/shm.c
index 4329fe3ef594..258aff2e03bb 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -452,7 +452,7 @@ static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 
 	if (!sfd->file->f_op->fsync)
 		return -EINVAL;
-	return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
+	return call_fsync(sfd->file, start, end, datasync);
 }
 
 static long shm_fallocate(struct file *file, int mode, loff_t offset,
-- 
GitLab


From eb994594bc22220976f367c03ffa141580aa45e4 Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Tue, 21 Feb 2017 01:53:00 +1030
Subject: [PATCH 0383/1084] ipmi: bt-bmc: Use a regmap for register access
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The registers for the bt-bmc device live under the Aspeed LPC
controller. Devicetree bindings have recently been introduced for the
LPC controller where the "host" portion of the LPC register space is
described as a syscon device. Future devicetrees describing the bt-bmc
device should nest its node under the appropriate "simple-mfd", "syscon"
compatible node.

This change allows the bt-bmc driver to function with both syscon and
non-syscon- based devicetree descriptions by always using a regmap for
register access, either retrieved from the parent syscon device or
instantiated if none exists.

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/Kconfig  |  3 +-
 drivers/char/ipmi/bt-bmc.c | 80 ++++++++++++++++++++++++++++----------
 2 files changed, 61 insertions(+), 22 deletions(-)

diff --git a/drivers/char/ipmi/Kconfig b/drivers/char/ipmi/Kconfig
index 7f816655cbbf..90f3edffb067 100644
--- a/drivers/char/ipmi/Kconfig
+++ b/drivers/char/ipmi/Kconfig
@@ -78,7 +78,8 @@ config IPMI_POWEROFF
 endif # IPMI_HANDLER
 
 config ASPEED_BT_IPMI_BMC
-	depends on ARCH_ASPEED
+	depends on ARCH_ASPEED || COMPILE_TEST
+       depends on REGMAP && REGMAP_MMIO && MFD_SYSCON
 	tristate "BT IPMI bmc driver"
 	help
 	  Provides a driver for the BT (Block Transfer) IPMI interface
diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c
index fc9e8891eae3..d6f5d9eb102d 100644
--- a/drivers/char/ipmi/bt-bmc.c
+++ b/drivers/char/ipmi/bt-bmc.c
@@ -12,10 +12,13 @@
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/mfd/syscon.h>
 #include <linux/miscdevice.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/poll.h>
+#include <linux/regmap.h>
 #include <linux/sched.h>
 #include <linux/timer.h>
 
@@ -60,7 +63,8 @@
 struct bt_bmc {
 	struct device		dev;
 	struct miscdevice	miscdev;
-	void __iomem		*base;
+	struct regmap		*map;
+	int			offset;
 	int			irq;
 	wait_queue_head_t	queue;
 	struct timer_list	poll_timer;
@@ -69,14 +73,29 @@ struct bt_bmc {
 
 static atomic_t open_count = ATOMIC_INIT(0);
 
+static const struct regmap_config bt_regmap_cfg = {
+	.reg_bits = 32,
+	.val_bits = 32,
+	.reg_stride = 4,
+};
+
 static u8 bt_inb(struct bt_bmc *bt_bmc, int reg)
 {
-	return ioread8(bt_bmc->base + reg);
+	uint32_t val = 0;
+	int rc;
+
+	rc = regmap_read(bt_bmc->map, bt_bmc->offset + reg, &val);
+	WARN(rc != 0, "regmap_read() failed: %d\n", rc);
+
+	return rc == 0 ? (u8) val : 0;
 }
 
 static void bt_outb(struct bt_bmc *bt_bmc, u8 data, int reg)
 {
-	iowrite8(data, bt_bmc->base + reg);
+	int rc;
+
+	rc = regmap_write(bt_bmc->map, bt_bmc->offset + reg, data);
+	WARN(rc != 0, "regmap_write() failed: %d\n", rc);
 }
 
 static void clr_rd_ptr(struct bt_bmc *bt_bmc)
@@ -367,14 +386,18 @@ static irqreturn_t bt_bmc_irq(int irq, void *arg)
 {
 	struct bt_bmc *bt_bmc = arg;
 	u32 reg;
+	int rc;
+
+	rc = regmap_read(bt_bmc->map, bt_bmc->offset + BT_CR2, &reg);
+	if (rc)
+		return IRQ_NONE;
 
-	reg = ioread32(bt_bmc->base + BT_CR2);
 	reg &= BT_CR2_IRQ_H2B | BT_CR2_IRQ_HBUSY;
 	if (!reg)
 		return IRQ_NONE;
 
 	/* ack pending IRQs */
-	iowrite32(reg, bt_bmc->base + BT_CR2);
+	regmap_write(bt_bmc->map, bt_bmc->offset + BT_CR2, reg);
 
 	wake_up(&bt_bmc->queue);
 	return IRQ_HANDLED;
@@ -384,7 +407,6 @@ static int bt_bmc_config_irq(struct bt_bmc *bt_bmc,
 			     struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	u32 reg;
 	int rc;
 
 	bt_bmc->irq = platform_get_irq(pdev, 0);
@@ -405,18 +427,17 @@ static int bt_bmc_config_irq(struct bt_bmc *bt_bmc,
 	 * will be cleared (along with B2H) when we can write the next
 	 * message to the BT buffer
 	 */
-	reg = ioread32(bt_bmc->base + BT_CR1);
-	reg |= BT_CR1_IRQ_H2B | BT_CR1_IRQ_HBUSY;
-	iowrite32(reg, bt_bmc->base + BT_CR1);
+	rc = regmap_update_bits(bt_bmc->map, bt_bmc->offset + BT_CR1,
+				(BT_CR1_IRQ_H2B | BT_CR1_IRQ_HBUSY),
+				(BT_CR1_IRQ_H2B | BT_CR1_IRQ_HBUSY));
 
-	return 0;
+	return rc;
 }
 
 static int bt_bmc_probe(struct platform_device *pdev)
 {
 	struct bt_bmc *bt_bmc;
 	struct device *dev;
-	struct resource *res;
 	int rc;
 
 	if (!pdev || !pdev->dev.of_node)
@@ -431,10 +452,27 @@ static int bt_bmc_probe(struct platform_device *pdev)
 
 	dev_set_drvdata(&pdev->dev, bt_bmc);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	bt_bmc->base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(bt_bmc->base))
-		return PTR_ERR(bt_bmc->base);
+	bt_bmc->map = syscon_node_to_regmap(pdev->dev.parent->of_node);
+	if (IS_ERR(bt_bmc->map)) {
+		struct resource *res;
+		void __iomem *base;
+
+		/*
+		 * Assume it's not the MFD-based devicetree description, in
+		 * which case generate a regmap ourselves
+		 */
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		base = devm_ioremap_resource(&pdev->dev, res);
+		if (IS_ERR(base))
+			return PTR_ERR(base);
+
+		bt_bmc->map = devm_regmap_init_mmio(dev, base, &bt_regmap_cfg);
+		bt_bmc->offset = 0;
+	} else {
+		rc = of_property_read_u32(dev->of_node, "reg", &bt_bmc->offset);
+		if (rc)
+			return rc;
+	}
 
 	mutex_init(&bt_bmc->mutex);
 	init_waitqueue_head(&bt_bmc->queue);
@@ -461,12 +499,12 @@ static int bt_bmc_probe(struct platform_device *pdev)
 		add_timer(&bt_bmc->poll_timer);
 	}
 
-	iowrite32((BT_IO_BASE << BT_CR0_IO_BASE) |
-		  (BT_IRQ << BT_CR0_IRQ) |
-		  BT_CR0_EN_CLR_SLV_RDP |
-		  BT_CR0_EN_CLR_SLV_WRP |
-		  BT_CR0_ENABLE_IBT,
-		  bt_bmc->base + BT_CR0);
+	regmap_write(bt_bmc->map, bt_bmc->offset + BT_CR0,
+		     (BT_IO_BASE << BT_CR0_IO_BASE) |
+		     (BT_IRQ << BT_CR0_IRQ) |
+		     BT_CR0_EN_CLR_SLV_RDP |
+		     BT_CR0_EN_CLR_SLV_WRP |
+		     BT_CR0_ENABLE_IBT);
 
 	clr_b_busy(bt_bmc);
 
-- 
GitLab


From e3b88ee95b4e4bf3e9729a4695d695b9c7c296c8 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 14 Feb 2017 16:25:45 -0800
Subject: [PATCH 0384/1084] target: Fix handling of aborted failed commands

If a target driver (e.g. tcm_qla2xxx) calls
transport_generic_request_failure() to report that receiving data
has failed and that SCSI command has already been aborted by the
initiator, ensure that the SCSI status ABORTED is sent back to the
initiator instead of the sense code provided by the target driver.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 22190003534d..efb9e6f38201 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1647,6 +1647,9 @@ void transport_generic_request_failure(struct se_cmd *cmd,
 {
 	int ret = 0, post_ret = 0;
 
+	if (transport_check_aborted_status(cmd, 1))
+		return;
+
 	pr_debug("-----[ Storage Engine Exception for cmd: %p ITT: 0x%08llx"
 		" CDB: 0x%02x\n", cmd, cmd->tag, cmd->t_task_cdb[0]);
 	pr_debug("-----[ i_state: %d t_state: %d sense_reason: %d\n",
-- 
GitLab


From 758e99fefe1d9230111296956335cd35995c0eaf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Feb 2017 17:04:42 -0500
Subject: [PATCH 0385/1084] nfsd: minor nfsd_setattr cleanup

Simplify exit paths, size_change use.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: stable@kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/vfs.c | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 2c132162796e..1c1b1d71b82b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -377,7 +377,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 	__be32		err;
 	int		host_err;
 	bool		get_write_count;
-	int		size_change = 0;
+	bool		size_change = (iap->ia_valid & ATTR_SIZE);
 
 	if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
 		accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
@@ -390,11 +390,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 	/* Get inode */
 	err = fh_verify(rqstp, fhp, ftype, accmode);
 	if (err)
-		goto out;
+		return err;
 	if (get_write_count) {
 		host_err = fh_want_write(fhp);
 		if (host_err)
-			return nfserrno(host_err);
+			goto out;
 	}
 
 	dentry = fhp->fh_dentry;
@@ -405,19 +405,21 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 		iap->ia_valid &= ~ATTR_MODE;
 
 	if (!iap->ia_valid)
-		goto out;
+		return 0;
 
 	nfsd_sanitize_attrs(inode, iap);
 
+	if (check_guard && guardtime != inode->i_ctime.tv_sec)
+		return nfserr_notsync;
+
 	/*
 	 * The size case is special, it changes the file in addition to the
 	 * attributes.
 	 */
-	if (iap->ia_valid & ATTR_SIZE) {
+	if (size_change) {
 		err = nfsd_get_write_access(rqstp, fhp, iap);
 		if (err)
-			goto out;
-		size_change = 1;
+			return err;
 
 		/*
 		 * RFC5661, Section 18.30.4:
@@ -432,23 +434,16 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 
 	iap->ia_valid |= ATTR_CTIME;
 
-	if (check_guard && guardtime != inode->i_ctime.tv_sec) {
-		err = nfserr_notsync;
-		goto out_put_write_access;
-	}
-
 	fh_lock(fhp);
 	host_err = notify_change(dentry, iap, NULL);
 	fh_unlock(fhp);
-	err = nfserrno(host_err);
 
-out_put_write_access:
 	if (size_change)
 		put_write_access(inode);
-	if (!err)
-		err = nfserrno(commit_metadata(fhp));
 out:
-	return err;
+	if (!host_err)
+		host_err = commit_metadata(fhp);
+	return nfserrno(host_err);
 }
 
 #if defined(CONFIG_NFSD_V4)
-- 
GitLab


From 51ec502a32665fed66c7f03799ede4023b212536 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 14 Feb 2017 16:25:54 -0800
Subject: [PATCH 0386/1084] target: Delete tmr from list before processing

This patch does an explicit list_del_init(tmr->tmr_list) in
core_tmr_drain_tmr_list() before starting processing of
outstanding TMRs to abort, instead of explicitly checking
which TMR descriptor matches the caller.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: David Disseldorp <ddiss@suse.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_tmr.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 311dc3c2f1dc..a806d9bca3d2 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -215,13 +215,8 @@ static void core_tmr_drain_tmr_list(
 	 * LUN_RESET tmr..
 	 */
 	spin_lock_irqsave(&dev->se_tmr_lock, flags);
+	list_del_init(&tmr->tmr_list);
 	list_for_each_entry_safe(tmr_p, tmr_pp, &dev->dev_tmr_list, tmr_list) {
-		/*
-		 * Allow the received TMR to return with FUNCTION_COMPLETE.
-		 */
-		if (tmr_p == tmr)
-			continue;
-
 		cmd = tmr_p->task_cmd;
 		if (!cmd) {
 			pr_err("Unable to locate struct se_cmd for TMR\n");
-- 
GitLab


From 943db62c316c578f8e2cc6fb81a5f641096b29bf Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Wed, 15 Feb 2017 13:45:30 -0500
Subject: [PATCH 0387/1084] powerpc/pseries: Revert 'Auto-online hotplugged
 memory'

This reverts commit ec999072442a ("powerpc/pseries: Auto-online
hotplugged memory"), and 9dc512819e4b ("powerpc: Fix unused function
warning 'lmb_to_memblock'").

Using the auto-online acpability does online added memory but does not
update the associated device struct to indicate that the memory is
online. This causes the pseries memory DLPAR code to fail when trying to
remove a LMB that was previously removed and added back. This happens
when validating that the LMB is removable.

This patch reverts to the previous behavior of calling device_online()
to online the LMB when it is DLPAR added and moves the lmb_to_memblock()
routine out of CONFIG_MEMORY_HOTREMOVE now that we call it for add.

Fixes: ec999072442a ("powerpc/pseries: Auto-online hotplugged memory")
Cc: stable@vger.kernel.org # v4.8+
Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/configs/pseries_defconfig        |  1 -
 .../platforms/pseries/hotplug-memory.c        | 52 +++++++++++++------
 2 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 6d0eb02fefa4..4ff68b752618 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -58,7 +58,6 @@ CONFIG_KEXEC_FILE=y
 CONFIG_IRQ_ALL_CPUS=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
-CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_PPC_64K_PAGES=y
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index e28abfa013e5..e104c71ea44a 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -320,6 +320,19 @@ static int dlpar_remove_device_tree_lmb(struct of_drconf_cell *lmb)
 	return dlpar_update_device_tree_lmb(lmb);
 }
 
+static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
+{
+	unsigned long section_nr;
+	struct mem_section *mem_sect;
+	struct memory_block *mem_block;
+
+	section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
+	mem_sect = __nr_to_section(section_nr);
+
+	mem_block = find_memory_block(mem_sect);
+	return mem_block;
+}
+
 #ifdef CONFIG_MEMORY_HOTREMOVE
 static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
 {
@@ -407,19 +420,6 @@ static bool lmb_is_removable(struct of_drconf_cell *lmb)
 
 static int dlpar_add_lmb(struct of_drconf_cell *);
 
-static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
-{
-	unsigned long section_nr;
-	struct mem_section *mem_sect;
-	struct memory_block *mem_block;
-
-	section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
-	mem_sect = __nr_to_section(section_nr);
-
-	mem_block = find_memory_block(mem_sect);
-	return mem_block;
-}
-
 static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
 {
 	struct memory_block *mem_block;
@@ -728,6 +728,20 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index,
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
+static int dlpar_online_lmb(struct of_drconf_cell *lmb)
+{
+	struct memory_block *mem_block;
+	int rc;
+
+	mem_block = lmb_to_memblock(lmb);
+	if (!mem_block)
+		return -EINVAL;
+
+	rc = device_online(&mem_block->dev);
+	put_device(&mem_block->dev);
+	return rc;
+}
+
 static int dlpar_add_lmb(struct of_drconf_cell *lmb)
 {
 	unsigned long block_sz;
@@ -751,10 +765,18 @@ static int dlpar_add_lmb(struct of_drconf_cell *lmb)
 
 	/* Add the memory */
 	rc = add_memory(nid, lmb->base_addr, block_sz);
-	if (rc)
+	if (rc) {
 		dlpar_remove_device_tree_lmb(lmb);
-	else
+		return rc;
+	}
+
+	rc = dlpar_online_lmb(lmb);
+	if (rc) {
+		remove_memory(nid, lmb->base_addr, block_sz);
+		dlpar_remove_device_tree_lmb(lmb);
+	} else {
 		lmb->flags |= DRCONF_MEM_ASSIGNED;
+	}
 
 	return rc;
 }
-- 
GitLab


From 5e48dc0aa4d9daf93e9bff2a274473623a134ec8 Mon Sep 17 00:00:00 2001
From: Douglas Miller <dougmill@linux.vnet.ibm.com>
Date: Tue, 7 Feb 2017 07:40:44 -0600
Subject: [PATCH 0388/1084] powerpc/xmon: Dump memory in CPU endian format

Extend the dump command to allow display of 2, 4, and 8 byte words in
CPU endian format. Also adds dump command for "1 byte values" for the
sake of symmetry. New commands are:

  d1	dump 1 byte values
  d2	dump 2 byte values
  d4	dump 4 byte values
  d8	dump 8 byte values

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Douglas Miller <dougmill@linux.vnet.ibm.com>
Acked-by: Balbir Singh <bsingharora@gmail.com>
---
 arch/powerpc/xmon/xmon.c | 58 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 55 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index a44b049b9cf6..57503cdbd76f 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -212,6 +212,10 @@ Commands:\n\
   "\
   C	checksum\n\
   d	dump bytes\n\
+  d1	dump 1 byte values\n\
+  d2	dump 2 byte values\n\
+  d4	dump 4 byte values\n\
+  d8	dump 8 byte values\n\
   di	dump instructions\n\
   df	dump float values\n\
   dd	dump double values\n\
@@ -2334,9 +2338,42 @@ static void dump_pacas(void)
 }
 #endif
 
+static void dump_by_size(unsigned long addr, long count, int size)
+{
+	unsigned char temp[16];
+	int i, j;
+	u64 val;
+
+	count = ALIGN(count, 16);
+
+	for (i = 0; i < count; i += 16, addr += 16) {
+		printf(REG, addr);
+
+		if (mread(addr, temp, 16) != 16) {
+			printf("\nFaulted reading %d bytes from 0x"REG"\n", 16, addr);
+			return;
+		}
+
+		for (j = 0; j < 16; j += size) {
+			putchar(' ');
+			switch (size) {
+			case 1: val = temp[j]; break;
+			case 2: val = *(u16 *)&temp[j]; break;
+			case 4: val = *(u32 *)&temp[j]; break;
+			case 8: val = *(u64 *)&temp[j]; break;
+			default: val = 0;
+			}
+
+			printf("%0*lx", size * 2, val);
+		}
+		printf("\n");
+	}
+}
+
 static void
 dump(void)
 {
+	static char last[] = { "d?\n" };
 	int c;
 
 	c = inchar();
@@ -2350,8 +2387,9 @@ dump(void)
 	}
 #endif
 
-	if ((isxdigit(c) && c != 'f' && c != 'd') || c == '\n')
+	if (c == '\n')
 		termch = c;
+
 	scanhex((void *)&adrs);
 	if (termch != '\n')
 		termch = 0;
@@ -2383,9 +2421,23 @@ dump(void)
 			ndump = 64;
 		else if (ndump > MAX_DUMP)
 			ndump = MAX_DUMP;
-		prdump(adrs, ndump);
+
+		switch (c) {
+		case '8':
+		case '4':
+		case '2':
+		case '1':
+			ndump = ALIGN(ndump, 16);
+			dump_by_size(adrs, ndump, c - '0');
+			last[1] = c;
+			last_cmd = last;
+			break;
+		default:
+			prdump(adrs, ndump);
+			last_cmd = "d\n";
+		}
+
 		adrs += ndump;
-		last_cmd = "d\n";
 	}
 }
 
-- 
GitLab


From 171ed0fcd8966d82c45376f1434678e7b9d4d9b1 Mon Sep 17 00:00:00 2001
From: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Date: Mon, 6 Feb 2017 12:07:17 +1100
Subject: [PATCH 0389/1084] cxl: fix nested locking hang during EEH hotplug

Commit 14a3ae34bfd0 ("cxl: Prevent read/write to AFU config space while AFU
not configured") introduced a rwsem to fix an invalid memory access that
occurred when someone attempts to access the config space of an AFU on a
vPHB whilst the AFU is deconfigured, such as during EEH recovery.

It turns out that it's possible to run into a nested locking issue when EEH
recovery fails and a full device hotplug is required.
cxl_pci_error_detected() deconfigures the AFU, taking a writer lock on
configured_rwsem. When EEH recovery fails, the EEH code calls
pci_hp_remove_devices() to remove the device, which in turn calls
cxl_remove() -> cxl_pci_remove_afu() -> pci_deconfigure_afu(), which tries
to grab the writer lock that's already held.

Standard rwsem semantics don't express what we really want to do here and
don't allow for nested locking. Fix this by replacing the rwsem with an
atomic_t which we can control more finely. Allow the AFU to be locked
multiple times so long as there are no readers.

Fixes: 14a3ae34bfd0 ("cxl: Prevent read/write to AFU config space while AFU not configured")
Cc: stable@vger.kernel.org # v4.9+
Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Acked-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/misc/cxl/cxl.h  |  5 +++--
 drivers/misc/cxl/main.c |  3 +--
 drivers/misc/cxl/pci.c  | 11 +++++++++--
 drivers/misc/cxl/vphb.c | 18 ++++++++++++++----
 4 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 6c722d96b775..79e60ec70bd3 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -418,8 +418,9 @@ struct cxl_afu {
 	struct dentry *debugfs;
 	struct mutex contexts_lock;
 	spinlock_t afu_cntl_lock;
-	/* Used to block access to AFU config space while deconfigured */
-	struct rw_semaphore configured_rwsem;
+
+	/* -1: AFU deconfigured/locked, >= 0: number of readers */
+	atomic_t configured_state;
 
 	/* AFU error buffer fields and bin attribute for sysfs */
 	u64 eb_len, eb_offset;
diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
index 2a6bf1d0a3a4..cc1706a92ace 100644
--- a/drivers/misc/cxl/main.c
+++ b/drivers/misc/cxl/main.c
@@ -268,8 +268,7 @@ struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice)
 	idr_init(&afu->contexts_idr);
 	mutex_init(&afu->contexts_lock);
 	spin_lock_init(&afu->afu_cntl_lock);
-	init_rwsem(&afu->configured_rwsem);
-	down_write(&afu->configured_rwsem);
+	atomic_set(&afu->configured_state, -1);
 	afu->prefault_mode = CXL_PREFAULT_NONE;
 	afu->irqs_max = afu->adapter->user_irqs;
 
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index cca938845ffd..91f645992c94 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1129,7 +1129,7 @@ static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc
 	if ((rc = cxl_native_register_psl_irq(afu)))
 		goto err2;
 
-	up_write(&afu->configured_rwsem);
+	atomic_set(&afu->configured_state, 0);
 	return 0;
 
 err2:
@@ -1142,7 +1142,14 @@ static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc
 
 static void pci_deconfigure_afu(struct cxl_afu *afu)
 {
-	down_write(&afu->configured_rwsem);
+	/*
+	 * It's okay to deconfigure when AFU is already locked, otherwise wait
+	 * until there are no readers
+	 */
+	if (atomic_read(&afu->configured_state) != -1) {
+		while (atomic_cmpxchg(&afu->configured_state, 0, -1) != -1)
+			schedule();
+	}
 	cxl_native_release_psl_irq(afu);
 	if (afu->adapter->native->sl_ops->release_serr_irq)
 		afu->adapter->native->sl_ops->release_serr_irq(afu);
diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
index 639a343b7836..512a4897dbf6 100644
--- a/drivers/misc/cxl/vphb.c
+++ b/drivers/misc/cxl/vphb.c
@@ -83,6 +83,16 @@ static inline struct cxl_afu *pci_bus_to_afu(struct pci_bus *bus)
 	return phb ? phb->private_data : NULL;
 }
 
+static void cxl_afu_configured_put(struct cxl_afu *afu)
+{
+	atomic_dec_if_positive(&afu->configured_state);
+}
+
+static bool cxl_afu_configured_get(struct cxl_afu *afu)
+{
+	return atomic_inc_unless_negative(&afu->configured_state);
+}
+
 static inline int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn,
 				       struct cxl_afu *afu, int *_record)
 {
@@ -107,7 +117,7 @@ static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
 
 	afu = pci_bus_to_afu(bus);
 	/* Grab a reader lock on afu. */
-	if (afu == NULL || !down_read_trylock(&afu->configured_rwsem))
+	if (afu == NULL || !cxl_afu_configured_get(afu))
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
 	rc = cxl_pcie_config_info(bus, devfn, afu, &record);
@@ -132,7 +142,7 @@ static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
 	}
 
 out:
-	up_read(&afu->configured_rwsem);
+	cxl_afu_configured_put(afu);
 	return rc ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
 }
 
@@ -144,7 +154,7 @@ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
 
 	afu = pci_bus_to_afu(bus);
 	/* Grab a reader lock on afu. */
-	if (afu == NULL || !down_read_trylock(&afu->configured_rwsem))
+	if (afu == NULL || !cxl_afu_configured_get(afu))
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
 	rc = cxl_pcie_config_info(bus, devfn, afu, &record);
@@ -166,7 +176,7 @@ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
 	}
 
 out:
-	up_read(&afu->configured_rwsem);
+	cxl_afu_configured_put(afu);
 	return rc ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL;
 }
 
-- 
GitLab


From 3dbbaf200f532e01e56168b8339f2981f2cb1d67 Mon Sep 17 00:00:00 2001
From: Michael Roth <mdroth@linux.vnet.ibm.com>
Date: Mon, 20 Feb 2017 19:12:18 -0600
Subject: [PATCH 0390/1084] powerpc/pseries: Advertise Hot Plug Event support
 to firmware

With the inclusion of commit 333f7b76865b ("powerpc/pseries: Implement
indexed-count hotplug memory add") and commit 753843471cbb
("powerpc/pseries: Implement indexed-count hotplug memory remove"), we
now have complete handling of the RTAS hotplug event format as described
by PAPR via ACR "PAPR Changes for Hotplug RTAS Events".

This capability is indicated by byte 6, bit 2 (5 in IBM numbering) of
architecture option vector 5, and allows for greater control over
cpu/memory/pci hot plug/unplug operations.

Existing pseries kernels will utilize this capability based on the
existence of the /event-sources/hot-plug-events DT property, so we
only need to advertise it via CAS and do not need a corresponding
FW_FEATURE_* value to test for.

Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/prom.h | 1 +
 arch/powerpc/kernel/prom_init.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 2c8001cc93b6..4a90634e8322 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -153,6 +153,7 @@ struct of_drconf_cell {
 #define OV5_XCMO		0x0440	/* Page Coalescing */
 #define OV5_TYPE1_AFFINITY	0x0580	/* Type 1 NUMA affinity */
 #define OV5_PRRN		0x0540	/* Platform Resource Reassignment */
+#define OV5_HP_EVT		0x0604	/* Hot Plug Event support */
 #define OV5_RESIZE_HPT		0x0601	/* Hash Page Table resizing */
 #define OV5_PFO_HW_RNG		0x1180	/* PFO Random Number Generator */
 #define OV5_PFO_HW_842		0x1140	/* PFO Compression Accelerator */
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index f3c879935f87..1a835e77b54b 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -839,7 +839,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
 		0,
 #endif
 		.associativity = OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN),
-		.bin_opts = OV5_FEAT(OV5_RESIZE_HPT),
+		.bin_opts = OV5_FEAT(OV5_RESIZE_HPT) | OV5_FEAT(OV5_HP_EVT),
 		.micro_checkpoint = 0,
 		.reserved0 = 0,
 		.max_cpus = cpu_to_be32(NR_CPUS),	/* number of cores supported */
-- 
GitLab


From 783112f7401ff449d979530209b3f6c2594fdb4e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Feb 2017 07:21:33 +0100
Subject: [PATCH 0391/1084] nfsd: special case truncates some more

Both the NFS protocols and the Linux VFS use a setattr operation with a
bitmap of attributes to set to set various file attributes including the
file size and the uid/gid.

The Linux syscalls never mix size updates with unrelated updates like
the uid/gid, and some file systems like XFS and GFS2 rely on the fact
that truncates don't update random other attributes, and many other file
systems handle the case but do not update the other attributes in the
same transaction.  NFSD on the other hand passes the attributes it gets
on the wire more or less directly through to the VFS, leading to updates
the file systems don't expect.  XFS at least has an assert on the
allowed attributes, which caught an unusual NFS client setting the size
and group at the same time.

To handle this issue properly this splits the notify_change call in
nfsd_setattr into two separate ones.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: stable@vger.kernel.org
Tested-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/vfs.c | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 1c1b1d71b82b..19d50f600e8d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -414,13 +414,19 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 
 	/*
 	 * The size case is special, it changes the file in addition to the
-	 * attributes.
+	 * attributes, and file systems don't expect it to be mixed with
+	 * "random" attribute changes.  We thus split out the size change
+	 * into a separate call to ->setattr, and do the rest as a separate
+	 * setattr call.
 	 */
 	if (size_change) {
 		err = nfsd_get_write_access(rqstp, fhp, iap);
 		if (err)
 			return err;
+	}
 
+	fh_lock(fhp);
+	if (size_change) {
 		/*
 		 * RFC5661, Section 18.30.4:
 		 *   Changing the size of a file with SETATTR indirectly
@@ -428,16 +434,30 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 		 *
 		 * (and similar for the older RFCs)
 		 */
-		if (iap->ia_size != i_size_read(inode))
-			iap->ia_valid |= ATTR_MTIME;
+		struct iattr size_attr = {
+			.ia_valid	= ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
+			.ia_size	= iap->ia_size,
+		};
+
+		host_err = notify_change(dentry, &size_attr, NULL);
+		if (host_err)
+			goto out_unlock;
+		iap->ia_valid &= ~ATTR_SIZE;
+
+		/*
+		 * Avoid the additional setattr call below if the only other
+		 * attribute that the client sends is the mtime, as we update
+		 * it as part of the size change above.
+		 */
+		if ((iap->ia_valid & ~ATTR_MTIME) == 0)
+			goto out_unlock;
 	}
 
 	iap->ia_valid |= ATTR_CTIME;
-
-	fh_lock(fhp);
 	host_err = notify_change(dentry, iap, NULL);
-	fh_unlock(fhp);
 
+out_unlock:
+	fh_unlock(fhp);
 	if (size_change)
 		put_write_access(inode);
 out:
-- 
GitLab


From 9761a2469dc287c6d75ca148f4fc483becbcad88 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sun, 19 Feb 2017 00:34:59 +0300
Subject: [PATCH 0392/1084] sunrpc: silence uninitialized variable warning

kstrtouint() can return a couple different error codes so the check for
"ret == -EINVAL" is wrong and static analysis tools correctly complain
that we can use "num" without initializing it.  It's not super harmful
because we check the bounds.  But it's also easy enough to fix.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtsock.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 18b4e7ff8879..5cbabf2c75b2 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -3261,7 +3261,9 @@ static int param_set_uint_minmax(const char *val,
 	if (!val)
 		return -EINVAL;
 	ret = kstrtouint(val, 0, &num);
-	if (ret == -EINVAL || num < min || num > max)
+	if (ret)
+		return ret;
+	if (num < min || num > max)
 		return -EINVAL;
 	*((unsigned int *)kp->arg) = num;
 	return 0;
-- 
GitLab


From 7e6a69ee954caf8dd8c98f70c8ee894c8ebfdcf0 Mon Sep 17 00:00:00 2001
From: Mike Leach <mike.leach@linaro.org>
Date: Fri, 17 Feb 2017 19:13:10 +0000
Subject: [PATCH 0393/1084] arm64: dts: juno: update definition for
 programmable replicator

Juno platforms have a programmable replicator splitting the trace output
to TPIU and ETR. Currently this is not being programmed as it is being
treated as a none-programmable replicator - which is the default
operational mode for these devices. The TPIU in the system is enabled by
default, and this combination is causing back-pressure in the trace
system resulting in overflows at the source.

Replaces the existing definition with one that defines the programmable
replicator, using the "qcom,coresight-replicator1x" driver that provides
the correct functionality for CoreSight programmable replicators.

Reviewed-and-Tested-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 arch/arm64/boot/dts/arm/juno-base.dtsi | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/boot/dts/arm/juno-base.dtsi b/arch/arm64/boot/dts/arm/juno-base.dtsi
index 9d799d938d2f..df539e865b90 100644
--- a/arch/arm64/boot/dts/arm/juno-base.dtsi
+++ b/arch/arm64/boot/dts/arm/juno-base.dtsi
@@ -372,12 +372,13 @@ cluster1_etm3_out_port: endpoint {
 		};
 	};
 
-	coresight-replicator {
-		/*
-		 * Non-configurable replicators don't show up on the
-		 * AMBA bus.  As such no need to add "arm,primecell".
-		 */
-		compatible = "arm,coresight-replicator";
+	replicator@20120000 {
+		compatible = "qcom,coresight-replicator1x", "arm,primecell";
+		reg = <0 0x20120000 0 0x1000>;
+
+		clocks = <&soc_smc50mhz>;
+		clock-names = "apb_pclk";
+		power-domains = <&scpi_devpd 0>;
 
 		ports {
 			#address-cells = <1>;
-- 
GitLab


From 248e23b50e2da0753f3b5faa068939cbe9f8a75a Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sun, 12 Feb 2017 11:26:33 +0100
Subject: [PATCH 0394/1084] batman-adv: Fix double free during fragment merge
 error

The function batadv_frag_skb_buffer was supposed not to consume the skbuff
on errors. This was followed in the helper function
batadv_frag_insert_packet when the skb would potentially be inserted in the
fragment queue. But it could happen that the next helper function
batadv_frag_merge_packets would try to merge the fragments and fail. This
results in a kfree_skb of all the enqueued fragments (including the just
inserted one). batadv_recv_frag_packet would detect the error in
batadv_frag_skb_buffer and try to free the skb again.

The behavior of batadv_frag_skb_buffer (and its helper
batadv_frag_insert_packet) must therefore be changed to always consume the
skbuff to have a common behavior and avoid the double kfree_skb.

Fixes: 610bfc6bc99b ("batman-adv: Receive fragmented packets and merge")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/fragmentation.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 0854ebd8613e..31e97e9aee0d 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -239,8 +239,10 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
 	spin_unlock_bh(&chain->lock);
 
 err:
-	if (!ret)
+	if (!ret) {
 		kfree(frag_entry_new);
+		kfree_skb(skb);
+	}
 
 	return ret;
 }
@@ -313,7 +315,7 @@ batadv_frag_merge_packets(struct hlist_head *chain)
  *
  * There are three possible outcomes: 1) Packet is merged: Return true and
  * set *skb to merged packet; 2) Packet is buffered: Return true and set *skb
- * to NULL; 3) Error: Return false and leave skb as is.
+ * to NULL; 3) Error: Return false and free skb.
  *
  * Return: true when packet is merged or buffered, false when skb is not not
  * used.
@@ -338,9 +340,9 @@ bool batadv_frag_skb_buffer(struct sk_buff **skb,
 		goto out_err;
 
 out:
-	*skb = skb_out;
 	ret = true;
 out_err:
+	*skb = skb_out;
 	return ret;
 }
 
-- 
GitLab


From 51c6b429c0c95e67edd1cb0b548c5cf6a6604763 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Mon, 13 Feb 2017 20:44:31 +0100
Subject: [PATCH 0395/1084] batman-adv: Fix transmission of final, 16th
 fragment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Trying to split and transmit a unicast packet in 16 parts will fail for
the final fragment: After having sent the 15th one with a frag_packet.no
index of 14, we will increase the the index to 15 - and return with an
error code immediately, even though one more fragment is due for
transmission and allowed.

Fixing this issue by moving the check before incrementing the index.

While at it, adding an unlikely(), because the check is actually more of
an assertion.

Fixes: ee75ed88879a ("batman-adv: Fragment and send skbs larger than mtu")
Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/fragmentation.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 31e97e9aee0d..11149e5be4e0 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -501,6 +501,12 @@ int batadv_frag_send_packet(struct sk_buff *skb,
 
 	/* Eat and send fragments from the tail of skb */
 	while (skb->len > max_fragment_size) {
+		/* The initial check in this function should cover this case */
+		if (unlikely(frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1)) {
+			ret = -EINVAL;
+			goto put_primary_if;
+		}
+
 		skb_fragment = batadv_frag_create(skb, &frag_header, mtu);
 		if (!skb_fragment) {
 			ret = -ENOMEM;
@@ -517,12 +523,6 @@ int batadv_frag_send_packet(struct sk_buff *skb,
 		}
 
 		frag_header.no++;
-
-		/* The initial check in this function should cover this case */
-		if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) {
-			ret = -EINVAL;
-			goto put_primary_if;
-		}
 	}
 
 	/* Make room for the fragment header. */
-- 
GitLab


From 0ae060ca2bdfe11181094699b0f76437ec210b17 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 19 Feb 2017 16:08:25 -0500
Subject: [PATCH 0396/1084] SUNRPC: Add generic helpers for xdr_stream
 encode/decode

Add some generic helpers for encoding/decoding opaque structures and
basic u32/u64.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/xdr.h | 177 +++++++++++++++++++++++++++++++++++++
 1 file changed, 177 insertions(+)

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 56c48c884a24..dc7e51a769ac 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -242,6 +242,183 @@ extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
 extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
 extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data);
 
+/**
+ * xdr_align_size - Calculate padded size of an object
+ * @n: Size of an object being XDR encoded (in bytes)
+ *
+ * Return value:
+ *   Size (in bytes) of the object including xdr padding
+ */
+static inline size_t
+xdr_align_size(size_t n)
+{
+	const size_t mask = sizeof(__u32) - 1;
+
+	return (n + mask) & ~mask;
+}
+
+/**
+ * xdr_stream_encode_u32 - Encode a 32-bit integer
+ * @xdr: pointer to xdr_stream
+ * @n: integer to encode
+ *
+ * Return values:
+ *   On success, returns length in bytes of XDR buffer consumed
+ *   %-EMSGSIZE on XDR buffer overflow
+ */
+static inline ssize_t
+xdr_stream_encode_u32(struct xdr_stream *xdr, __u32 n)
+{
+	const size_t len = sizeof(n);
+	__be32 *p = xdr_reserve_space(xdr, len);
+
+	if (unlikely(!p))
+		return -EMSGSIZE;
+	*p = cpu_to_be32(n);
+	return len;
+}
+
+/**
+ * xdr_stream_encode_u64 - Encode a 64-bit integer
+ * @xdr: pointer to xdr_stream
+ * @n: 64-bit integer to encode
+ *
+ * Return values:
+ *   On success, returns length in bytes of XDR buffer consumed
+ *   %-EMSGSIZE on XDR buffer overflow
+ */
+static inline ssize_t
+xdr_stream_encode_u64(struct xdr_stream *xdr, __u64 n)
+{
+	const size_t len = sizeof(n);
+	__be32 *p = xdr_reserve_space(xdr, len);
+
+	if (unlikely(!p))
+		return -EMSGSIZE;
+	xdr_encode_hyper(p, n);
+	return len;
+}
+
+/**
+ * xdr_stream_encode_opaque_fixed - Encode fixed length opaque xdr data
+ * @xdr: pointer to xdr_stream
+ * @ptr: pointer to opaque data object
+ * @len: size of object pointed to by @ptr
+ *
+ * Return values:
+ *   On success, returns length in bytes of XDR buffer consumed
+ *   %-EMSGSIZE on XDR buffer overflow
+ */
+static inline ssize_t
+xdr_stream_encode_opaque_fixed(struct xdr_stream *xdr, const void *ptr, size_t len)
+{
+	__be32 *p = xdr_reserve_space(xdr, len);
+
+	if (unlikely(!p))
+		return -EMSGSIZE;
+	xdr_encode_opaque_fixed(p, ptr, len);
+	return xdr_align_size(len);
+}
+
+/**
+ * xdr_stream_encode_opaque - Encode variable length opaque xdr data
+ * @xdr: pointer to xdr_stream
+ * @ptr: pointer to opaque data object
+ * @len: size of object pointed to by @ptr
+ *
+ * Return values:
+ *   On success, returns length in bytes of XDR buffer consumed
+ *   %-EMSGSIZE on XDR buffer overflow
+ */
+static inline ssize_t
+xdr_stream_encode_opaque(struct xdr_stream *xdr, const void *ptr, size_t len)
+{
+	size_t count = sizeof(__u32) + xdr_align_size(len);
+	__be32 *p = xdr_reserve_space(xdr, count);
+
+	if (unlikely(!p))
+		return -EMSGSIZE;
+	xdr_encode_opaque(p, ptr, len);
+	return count;
+}
+
+/**
+ * xdr_stream_decode_u32 - Decode a 32-bit integer
+ * @xdr: pointer to xdr_stream
+ * @ptr: location to store integer
+ *
+ * Return values:
+ *   %0 on success
+ *   %-EBADMSG on XDR buffer overflow
+ */
+static inline ssize_t
+xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr)
+{
+	const size_t count = sizeof(*ptr);
+	__be32 *p = xdr_inline_decode(xdr, count);
+
+	if (unlikely(!p))
+		return -EBADMSG;
+	*ptr = be32_to_cpup(p);
+	return 0;
+}
+
+/**
+ * xdr_stream_decode_opaque_fixed - Decode fixed length opaque xdr data
+ * @xdr: pointer to xdr_stream
+ * @ptr: location to store data
+ * @len: size of buffer pointed to by @ptr
+ *
+ * Return values:
+ *   On success, returns size of object stored in @ptr
+ *   %-EBADMSG on XDR buffer overflow
+ */
+static inline ssize_t
+xdr_stream_decode_opaque_fixed(struct xdr_stream *xdr, void *ptr, size_t len)
+{
+	__be32 *p = xdr_inline_decode(xdr, len);
+
+	if (unlikely(!p))
+		return -EBADMSG;
+	xdr_decode_opaque_fixed(p, ptr, len);
+	return len;
+}
+
+/**
+ * xdr_stream_decode_opaque_inline - Decode variable length opaque xdr data
+ * @xdr: pointer to xdr_stream
+ * @ptr: location to store pointer to opaque data
+ * @maxlen: maximum acceptable object size
+ *
+ * Note: the pointer stored in @ptr cannot be assumed valid after the XDR
+ * buffer has been destroyed, or even after calling xdr_inline_decode()
+ * on @xdr. It is therefore expected that the object it points to should
+ * be processed immediately.
+ *
+ * Return values:
+ *   On success, returns size of object stored in *@ptr
+ *   %-EBADMSG on XDR buffer overflow
+ *   %-EMSGSIZE if the size of the object would exceed @maxlen
+ */
+static inline ssize_t
+xdr_stream_decode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t maxlen)
+{
+	__be32 *p;
+	__u32 len;
+
+	*ptr = NULL;
+	if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0))
+		return -EBADMSG;
+	if (len != 0) {
+		p = xdr_inline_decode(xdr, len);
+		if (unlikely(!p))
+			return -EBADMSG;
+		if (unlikely(len > maxlen))
+			return -EMSGSIZE;
+		*ptr = p;
+	}
+	return len;
+}
 #endif /* __KERNEL__ */
 
 #endif /* _SUNRPC_XDR_H_ */
-- 
GitLab


From ab6e9aaf16cfdfca630f9745fd6d453818df7f64 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 19 Feb 2017 16:08:26 -0500
Subject: [PATCH 0397/1084] NFSv4: Replace ad-hoc xdr encode/decode helpers
 with xdr_stream_* generics

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/callback_xdr.c                  |  8 ++-----
 fs/nfs/flexfilelayout/flexfilelayout.c |  5 +---
 fs/nfs/nfs4xdr.c                       | 33 +++++++-------------------
 3 files changed, 12 insertions(+), 34 deletions(-)

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index eb094c6011d8..e732a65db546 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -582,12 +582,8 @@ static __be32 decode_notify_lock_args(struct svc_rqst *rqstp, struct xdr_stream
 
 static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
 {
-	__be32 *p;
-
-	p = xdr_reserve_space(xdr, 4 + len);
-	if (unlikely(p == NULL))
-		return htonl(NFS4ERR_RESOURCE);
-	xdr_encode_opaque(p, str, len);
+	if (unlikely(xdr_stream_encode_opaque(xdr, str, len) < 0))
+		return cpu_to_be32(NFS4ERR_RESOURCE);
 	return 0;
 }
 
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 6104696325be..7b5f37f5ddf1 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1949,10 +1949,7 @@ static int ff_layout_encode_ioerr(struct xdr_stream *xdr,
 static void
 encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len)
 {
-	__be32 *p;
-
-	p = xdr_reserve_space(xdr, len);
-	xdr_encode_opaque_fixed(p, buf, len);
+	WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0);
 }
 
 static void
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e9255cb453e6..5f0a6fcd0030 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -924,34 +924,22 @@ static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes)
 
 static void encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len)
 {
-	__be32 *p;
-
-	p = xdr_reserve_space(xdr, len);
-	xdr_encode_opaque_fixed(p, buf, len);
+	WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0);
 }
 
 static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
 {
-	__be32 *p;
-
-	p = reserve_space(xdr, 4 + len);
-	xdr_encode_opaque(p, str, len);
+	WARN_ON_ONCE(xdr_stream_encode_opaque(xdr, str, len) < 0);
 }
 
 static void encode_uint32(struct xdr_stream *xdr, u32 n)
 {
-	__be32 *p;
-
-	p = reserve_space(xdr, 4);
-	*p = cpu_to_be32(n);
+	WARN_ON_ONCE(xdr_stream_encode_u32(xdr, n) < 0);
 }
 
 static void encode_uint64(struct xdr_stream *xdr, u64 n)
 {
-	__be32 *p;
-
-	p = reserve_space(xdr, 8);
-	xdr_encode_hyper(p, n);
+	WARN_ON_ONCE(xdr_stream_encode_u64(xdr, n) < 0);
 }
 
 static void encode_nfs4_seqid(struct xdr_stream *xdr,
@@ -4294,15 +4282,12 @@ static int decode_access(struct xdr_stream *xdr, u32 *supported, u32 *access)
 
 static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len)
 {
-	__be32 *p;
-
-	p = xdr_inline_decode(xdr, len);
-	if (likely(p)) {
-		memcpy(buf, p, len);
-		return 0;
+	ssize_t ret = xdr_stream_decode_opaque_fixed(xdr, buf, len);
+	if (unlikely(ret < 0)) {
+		print_overflow_msg(__func__, xdr);
+		return -EIO;
 	}
-	print_overflow_msg(__func__, xdr);
-	return -EIO;
+	return 0;
 }
 
 static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
-- 
GitLab


From 6da59ce2fd2047fd9cb141479f20d5f84614e84f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 19 Feb 2017 16:08:27 -0500
Subject: [PATCH 0398/1084] NFSv4: Replace the open coded
 decode_opaque_inline() with the new generic

Also ensure that we always check that the size of the decoded object
matches the expectation that it must be smaller than NFS4_OPAQUE_LIMIT.
This should be true for all the current users of decode_opaque_inline(),
including decode_ace(), decode_pathname(), decode_attr_fs_locations()
and decode_exchange_id().

Note that this allows us to get rid of a number of existing checks in
decode_exchange_id(),

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4xdr.c | 29 ++++++++---------------------
 1 file changed, 8 insertions(+), 21 deletions(-)

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 5f0a6fcd0030..d10cc2a62dff 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3050,20 +3050,15 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
 
 static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string)
 {
-	__be32 *p;
-
-	p = xdr_inline_decode(xdr, 4);
-	if (unlikely(!p))
-		goto out_overflow;
-	*len = be32_to_cpup(p);
-	p = xdr_inline_decode(xdr, *len);
-	if (unlikely(!p))
-		goto out_overflow;
-	*string = (char *)p;
+	ssize_t ret = xdr_stream_decode_opaque_inline(xdr, (void **)string,
+			NFS4_OPAQUE_LIMIT);
+	if (unlikely(ret < 0)) {
+		if (ret == -EBADMSG)
+			print_overflow_msg(__func__, xdr);
+		return -EIO;
+	}
+	*len = ret;
 	return 0;
-out_overflow:
-	print_overflow_msg(__func__, xdr);
-	return -EIO;
 }
 
 static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
@@ -5645,8 +5640,6 @@ static int decode_exchange_id(struct xdr_stream *xdr,
 	status = decode_opaque_inline(xdr, &dummy, &dummy_str);
 	if (unlikely(status))
 		return status;
-	if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
-		return -EIO;
 	memcpy(res->server_owner->major_id, dummy_str, dummy);
 	res->server_owner->major_id_sz = dummy;
 
@@ -5654,8 +5647,6 @@ static int decode_exchange_id(struct xdr_stream *xdr,
 	status = decode_opaque_inline(xdr, &dummy, &dummy_str);
 	if (unlikely(status))
 		return status;
-	if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
-		return -EIO;
 	memcpy(res->server_scope->server_scope, dummy_str, dummy);
 	res->server_scope->server_scope_sz = dummy;
 
@@ -5670,16 +5661,12 @@ static int decode_exchange_id(struct xdr_stream *xdr,
 		status = decode_opaque_inline(xdr, &dummy, &dummy_str);
 		if (unlikely(status))
 			return status;
-		if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
-			return -EIO;
 		memcpy(res->impl_id->domain, dummy_str, dummy);
 
 		/* nii_name */
 		status = decode_opaque_inline(xdr, &dummy, &dummy_str);
 		if (unlikely(status))
 			return status;
-		if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
-			return -EIO;
 		memcpy(res->impl_id->name, dummy_str, dummy);
 
 		/* nii_date */
-- 
GitLab


From c065eeea3ba03232cb8aaf9d3dc7ad0a9fcb267b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 19 Feb 2017 16:08:28 -0500
Subject: [PATCH 0399/1084] NFSv4: Replace callback string decode function with
 a generic

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/callback_xdr.c | 30 ++++++++----------------------
 1 file changed, 8 insertions(+), 22 deletions(-)

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index e732a65db546..2ade5cb52b8e 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -83,23 +83,15 @@ static __be32 *read_buf(struct xdr_stream *xdr, size_t nbytes)
 	return p;
 }
 
-static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len, const char **str)
+static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len,
+		const char **str, size_t maxlen)
 {
-	__be32 *p;
-
-	p = read_buf(xdr, 4);
-	if (unlikely(p == NULL))
-		return htonl(NFS4ERR_RESOURCE);
-	*len = ntohl(*p);
-
-	if (*len != 0) {
-		p = read_buf(xdr, *len);
-		if (unlikely(p == NULL))
-			return htonl(NFS4ERR_RESOURCE);
-		*str = (const char *)p;
-	} else
-		*str = NULL;
+	ssize_t err;
 
+	err = xdr_stream_decode_opaque_inline(xdr, (void **)str, maxlen);
+	if (err < 0)
+		return cpu_to_be32(NFS4ERR_RESOURCE);
+	*len = err;
 	return 0;
 }
 
@@ -162,15 +154,9 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
 	__be32 *p;
 	__be32 status;
 
-	status = decode_string(xdr, &hdr->taglen, &hdr->tag);
+	status = decode_string(xdr, &hdr->taglen, &hdr->tag, CB_OP_TAGLEN_MAXSZ);
 	if (unlikely(status != 0))
 		return status;
-	/* We do not like overly long tags! */
-	if (hdr->taglen > CB_OP_TAGLEN_MAXSZ) {
-		printk("NFS: NFSv4 CALLBACK %s: client sent tag of length %u\n",
-				__func__, hdr->taglen);
-		return htonl(NFS4ERR_RESOURCE);
-	}
 	p = read_buf(xdr, 12);
 	if (unlikely(p == NULL))
 		return htonl(NFS4ERR_RESOURCE);
-- 
GitLab


From 5a1f6d9e9b803003271b40b67786ff46fa4eda01 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 19 Feb 2017 16:08:29 -0500
Subject: [PATCH 0400/1084] NFSv4: Fix the underestimation of delegation XDR
 space reservation

Account for the "space_limit" field in struct open_write_delegation4.

Fixes: 2cebf82883f4 ("NFSv4: Fix the underestimate of NFSv4 open request size")
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4xdr.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index d10cc2a62dff..26808fbaacb0 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -169,8 +169,10 @@ static int nfs4_stat_to_errno(int);
 				open_owner_id_maxsz + \
 				encode_opentype_maxsz + \
 				encode_claim_null_maxsz)
+#define decode_space_limit_maxsz	(3)
 #define decode_ace_maxsz	(3 + nfs4_owner_maxsz)
 #define decode_delegation_maxsz	(1 + decode_stateid_maxsz + 1 + \
+				decode_space_limit_maxsz + \
 				decode_ace_maxsz)
 #define decode_change_info_maxsz	(5)
 #define decode_open_maxsz	(op_decode_hdr_maxsz + \
-- 
GitLab


From 1bbe60ff49becd7554e9b32b5e8e6fcee4b33db2 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 19 Feb 2017 16:08:30 -0500
Subject: [PATCH 0401/1084] NFSv4: Remove bogus "struct nfs_client" argument
 from decode_ace()

We shouldn't need to force callers to carry an unused argument.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4xdr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 26808fbaacb0..ec0d76712e43 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3127,7 +3127,7 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
 }
 
 /* Dummy routine */
-static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp)
+static int decode_ace(struct xdr_stream *xdr, void *ace)
 {
 	__be32 *p;
 	unsigned int strlen;
@@ -5075,7 +5075,7 @@ static int decode_rw_delegation(struct xdr_stream *xdr,
 		if (decode_space_limit(xdr, &res->pagemod_limit) < 0)
 				return -EIO;
 	}
-	return decode_ace(xdr, NULL, res->server->nfs_client);
+	return decode_ace(xdr, NULL);
 out_overflow:
 	print_overflow_msg(__func__, xdr);
 	return -EIO;
-- 
GitLab


From 5c741d4f2215371ae17e3dbdf3d46da850662e13 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 19 Feb 2017 16:08:31 -0500
Subject: [PATCH 0402/1084] SUNRPC: Add a helper function
 xdr_stream_decode_string_dup()

Create a helper function that decodes a xdr string object, allocates a memory
buffer and then store it as a NUL terminated string.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/xdr.h |  2 ++
 net/sunrpc/xdr.c           | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index dc7e51a769ac..054c8cde18f3 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -242,6 +242,8 @@ extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
 extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
 extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data);
 
+ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str,
+		size_t maxlen, gfp_t gfp_flags);
 /**
  * xdr_align_size - Calculate padded size of an object
  * @n: Size of an object being XDR encoded (in bytes)
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 7f1071e103ca..1f7082144e01 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1518,3 +1518,37 @@ xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len,
 }
 EXPORT_SYMBOL_GPL(xdr_process_buf);
 
+/**
+ * xdr_stream_decode_string_dup - Decode and duplicate variable length string
+ * @xdr: pointer to xdr_stream
+ * @str: location to store pointer to string
+ * @maxlen: maximum acceptable string length
+ * @gfp_flags: GFP mask to use
+ *
+ * Return values:
+ *   On success, returns length of NUL-terminated string stored in *@ptr
+ *   %-EBADMSG on XDR buffer overflow
+ *   %-EMSGSIZE if the size of the string would exceed @maxlen
+ *   %-ENOMEM on memory allocation failure
+ */
+ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str,
+		size_t maxlen, gfp_t gfp_flags)
+{
+	void *p;
+	ssize_t ret;
+
+	ret = xdr_stream_decode_opaque_inline(xdr, &p, maxlen);
+	if (ret > 0) {
+		char *s = kmalloc(ret + 1, gfp_flags);
+		if (s != NULL) {
+			memcpy(s, p, ret);
+			s[ret] = '\0';
+			*str = s;
+			return strlen(s);
+		}
+		ret = -ENOMEM;
+	}
+	*str = NULL;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_decode_string_dup);
-- 
GitLab


From 686a816ab6bedb99a892f3171f6c9ccecabff180 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 19 Feb 2017 16:08:32 -0500
Subject: [PATCH 0403/1084] NFSv4: Clean up owner/group attribute decode

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4xdr.c | 117 +++++++++++++++++++++++------------------------
 1 file changed, 57 insertions(+), 60 deletions(-)

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index ec0d76712e43..3268a2393512 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3875,45 +3875,50 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
 	return -EIO;
 }
 
+static ssize_t decode_nfs4_string(struct xdr_stream *xdr,
+		struct nfs4_string *name, gfp_t gfp_flags)
+{
+	ssize_t ret;
+
+	ret = xdr_stream_decode_string_dup(xdr, &name->data,
+			XDR_MAX_NETOBJ, gfp_flags);
+	name->len = 0;
+	if (ret > 0)
+		name->len = ret;
+	return ret;
+}
+
 static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
 		const struct nfs_server *server, kuid_t *uid,
 		struct nfs4_string *owner_name)
 {
-	uint32_t len;
-	__be32 *p;
-	int ret = 0;
+	ssize_t len;
+	char *p;
 
 	*uid = make_kuid(&init_user_ns, -2);
 	if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U)))
 		return -EIO;
-	if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) {
-		p = xdr_inline_decode(xdr, 4);
-		if (unlikely(!p))
-			goto out_overflow;
-		len = be32_to_cpup(p);
-		p = xdr_inline_decode(xdr, len);
-		if (unlikely(!p))
-			goto out_overflow;
-		if (owner_name != NULL) {
-			owner_name->data = kmemdup(p, len, GFP_NOWAIT);
-			if (owner_name->data != NULL) {
-				owner_name->len = len;
-				ret = NFS_ATTR_FATTR_OWNER_NAME;
-			}
-		} else if (len < XDR_MAX_NETOBJ) {
-			if (nfs_map_name_to_uid(server, (char *)p, len, uid) == 0)
-				ret = NFS_ATTR_FATTR_OWNER;
-			else
-				dprintk("%s: nfs_map_name_to_uid failed!\n",
-						__func__);
-		} else
-			dprintk("%s: name too long (%u)!\n",
-					__func__, len);
-		bitmap[1] &= ~FATTR4_WORD1_OWNER;
+	if (!(bitmap[1] & FATTR4_WORD1_OWNER))
+		return 0;
+	bitmap[1] &= ~FATTR4_WORD1_OWNER;
+
+	if (owner_name != NULL) {
+		len = decode_nfs4_string(xdr, owner_name, GFP_NOWAIT);
+		if (len <= 0)
+			goto out;
+		dprintk("%s: name=%s\n", __func__, owner_name->data);
+		return NFS_ATTR_FATTR_OWNER_NAME;
+	} else {
+		len = xdr_stream_decode_opaque_inline(xdr, (void **)&p,
+				XDR_MAX_NETOBJ);
+		if (len <= 0 || nfs_map_name_to_uid(server, p, len, uid) != 0)
+			goto out;
+		dprintk("%s: uid=%d\n", __func__, (int)from_kuid(&init_user_ns, *uid));
+		return NFS_ATTR_FATTR_OWNER;
 	}
-	dprintk("%s: uid=%d\n", __func__, (int)from_kuid(&init_user_ns, *uid));
-	return ret;
-out_overflow:
+out:
+	if (len != -EBADMSG)
+		return 0;
 	print_overflow_msg(__func__, xdr);
 	return -EIO;
 }
@@ -3922,41 +3927,33 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
 		const struct nfs_server *server, kgid_t *gid,
 		struct nfs4_string *group_name)
 {
-	uint32_t len;
-	__be32 *p;
-	int ret = 0;
+	ssize_t len;
+	char *p;
 
 	*gid = make_kgid(&init_user_ns, -2);
 	if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U)))
 		return -EIO;
-	if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) {
-		p = xdr_inline_decode(xdr, 4);
-		if (unlikely(!p))
-			goto out_overflow;
-		len = be32_to_cpup(p);
-		p = xdr_inline_decode(xdr, len);
-		if (unlikely(!p))
-			goto out_overflow;
-		if (group_name != NULL) {
-			group_name->data = kmemdup(p, len, GFP_NOWAIT);
-			if (group_name->data != NULL) {
-				group_name->len = len;
-				ret = NFS_ATTR_FATTR_GROUP_NAME;
-			}
-		} else if (len < XDR_MAX_NETOBJ) {
-			if (nfs_map_group_to_gid(server, (char *)p, len, gid) == 0)
-				ret = NFS_ATTR_FATTR_GROUP;
-			else
-				dprintk("%s: nfs_map_group_to_gid failed!\n",
-						__func__);
-		} else
-			dprintk("%s: name too long (%u)!\n",
-					__func__, len);
-		bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP;
+	if (!(bitmap[1] & FATTR4_WORD1_OWNER_GROUP))
+		return 0;
+	bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP;
+
+	if (group_name != NULL) {
+		len = decode_nfs4_string(xdr, group_name, GFP_NOWAIT);
+		if (len <= 0)
+			goto out;
+		dprintk("%s: name=%s\n", __func__, group_name->data);
+		return NFS_ATTR_FATTR_OWNER_NAME;
+	} else {
+		len = xdr_stream_decode_opaque_inline(xdr, (void **)&p,
+				XDR_MAX_NETOBJ);
+		if (len <= 0 || nfs_map_group_to_gid(server, p, len, gid) != 0)
+			goto out;
+		dprintk("%s: gid=%d\n", __func__, (int)from_kgid(&init_user_ns, *gid));
+		return NFS_ATTR_FATTR_GROUP;
 	}
-	dprintk("%s: gid=%d\n", __func__, (int)from_kgid(&init_user_ns, *gid));
-	return ret;
-out_overflow:
+out:
+	if (len != -EBADMSG)
+		return 0;
 	print_overflow_msg(__func__, xdr);
 	return -EIO;
 }
-- 
GitLab


From 0d4b7bf1202466aa51c6199a38b4fda06efa51c8 Mon Sep 17 00:00:00 2001
From: Jia Hongtao <hongtao.jia@nxp.com>
Date: Wed, 4 Jan 2017 16:57:49 +0800
Subject: [PATCH 0404/1084] dt-bindings: Update QorIQ TMU thermal bindings

For different types of SoC the sensor id and endianness may vary.
"#thermal-sensor-cells" is used to provide sensor id information.
"little-endian" property is to tell the endianness of TMU.

Signed-off-by: Jia Hongtao <hongtao.jia@nxp.com>
Acked-by: Rob Herring <robh@kernel.org>
Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 .../devicetree/bindings/thermal/qoriq-thermal.txt          | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Documentation/devicetree/bindings/thermal/qoriq-thermal.txt b/Documentation/devicetree/bindings/thermal/qoriq-thermal.txt
index 66223d561972..20ca4ef9d776 100644
--- a/Documentation/devicetree/bindings/thermal/qoriq-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/qoriq-thermal.txt
@@ -17,6 +17,12 @@ Required properties:
 	calibration data, as specified by the SoC reference manual.
 	The first cell of each pair is the value to be written to TTCFGR,
 	and the second is the value to be written to TSCFGR.
+- #thermal-sensor-cells : Must be 1. The sensor specifier is the monitoring
+	site ID, and represents the "n" in TRITSRn and TRATSRn.
+
+Optional property:
+- little-endian : If present, the TMU registers are little endian. If absent,
+	the default is big endian.
 
 Example:
 
@@ -60,4 +66,5 @@ tmu@f0000 {
 
 			       0x00030000 0x00000012
 			       0x00030001 0x0000001d>;
+	#thermal-sensor-cells = <1>;
 };
-- 
GitLab


From 734211cb621fb2861d7adcd7d2c06be8c6ef8993 Mon Sep 17 00:00:00 2001
From: Hongtao Jia <hongtao.jia@nxp.com>
Date: Wed, 4 Jan 2017 16:57:50 +0800
Subject: [PATCH 0405/1084] powerpc/mpc85xx: Update TMU device tree node for
 T1040/T1042

Update #thermal-sensor-cells from 0 to 1 according to the new binding. The
sensor specifier added is the monitoring site ID, and represents the "n" in
TRITSRn and TRATSRn.

Signed-off-by: Jia Hongtao <hongtao.jia@nxp.com>
Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 arch/powerpc/boot/dts/fsl/t1040si-post.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
index 44e399b17f6f..145c7f43b5b6 100644
--- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
@@ -526,7 +526,7 @@ tmu: tmu@f0000 {
 
 				       0x00030000 0x00000012
 				       0x00030001 0x0000001d>;
-		#thermal-sensor-cells = <0>;
+		#thermal-sensor-cells = <1>;
 	};
 
 	thermal-zones {
@@ -534,7 +534,7 @@ cpu_thermal: cpu-thermal {
 			polling-delay-passive = <1000>;
 			polling-delay = <5000>;
 
-			thermal-sensors = <&tmu>;
+			thermal-sensors = <&tmu 2>;
 
 			trips {
 				cpu_alert: cpu-alert {
-- 
GitLab


From ee73bcdb0b1aab35654912d10f07da0176848a87 Mon Sep 17 00:00:00 2001
From: Hongtao Jia <hongtao.jia@nxp.com>
Date: Wed, 4 Jan 2017 16:57:51 +0800
Subject: [PATCH 0406/1084] powerpc/mpc85xx: Update TMU device tree node for
 T1023/T1024

Update #thermal-sensor-cells from 0 to 1 according to the new binding. The
sensor specifier added is the monitoring site ID, and represents the "n" in
TRITSRn and TRATSRn.

Signed-off-by: Jia Hongtao <hongtao.jia@nxp.com>
Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 arch/powerpc/boot/dts/fsl/t1023si-post.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
index da2894c59479..4908af501098 100644
--- a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
@@ -422,7 +422,7 @@ tmu: tmu@f0000 {
 				       0x00030001 0x0000000d
 				       0x00030002 0x00000019
 				       0x00030003 0x00000024>;
-		#thermal-sensor-cells = <0>;
+		#thermal-sensor-cells = <1>;
 	};
 
 	thermal-zones {
@@ -430,7 +430,7 @@ cpu_thermal: cpu-thermal {
 			polling-delay-passive = <1000>;
 			polling-delay = <5000>;
 
-			thermal-sensors = <&tmu>;
+			thermal-sensors = <&tmu 0>;
 
 			trips {
 				cpu_alert: cpu-alert {
-- 
GitLab


From df3ab232e462bce20710596d697ade6b72497694 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 17 Feb 2017 19:49:09 -0500
Subject: [PATCH 0407/1084] pNFS/flexfiles: If the layout is invalid, it must
 be updated before retrying

If we see that our pNFS READ/WRITE/COMMIT operation failed, but we
also see that our layout segment is no longer valid, then we need to
get a new layout segment before retrying.

Fixes: 90816d1ddacf ("NFSv4.1/flexfiles: Don't mark the entire deviceid...")
Cc: stable@vger.kernel.org # v4.2+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/flexfilelayout/flexfilelayout.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 7b5f37f5ddf1..78f9a3081127 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1053,9 +1053,6 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
 	struct nfs_client *mds_client = mds_server->nfs_client;
 	struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
 
-	if (task->tk_status >= 0)
-		return 0;
-
 	switch (task->tk_status) {
 	/* MDS state errors */
 	case -NFS4ERR_DELEG_REVOKED:
@@ -1157,9 +1154,6 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
 {
 	struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
 
-	if (task->tk_status >= 0)
-		return 0;
-
 	switch (task->tk_status) {
 	/* File access problems. Don't mark the device as unavailable */
 	case -EACCES:
@@ -1195,6 +1189,13 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
 {
 	int vers = clp->cl_nfs_mod->rpc_vers->number;
 
+	if (task->tk_status >= 0)
+		return 0;
+
+	/* Handle the case of an invalid layout segment */
+	if (!pnfs_is_valid_lseg(lseg))
+		return -NFS4ERR_RESET_TO_PNFS;
+
 	switch (vers) {
 	case 3:
 		return ff_layout_async_handle_error_v3(task, lseg, idx);
-- 
GitLab


From 9d8cacbf5636657d2cd0dda17438a56d806d3224 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 17 Feb 2017 18:42:32 -0500
Subject: [PATCH 0408/1084] NFSv4: Fix reboot recovery in copy offload

Copy offload code needs to be hooked into the code for handling
NFS4ERR_BAD_STATEID by ensuring that we set the "stateid" field
in struct nfs4_exception.

Reported-by: Olga Kornievskaia <aglo@umich.edu>
Fixes: 2e72448b07dc3 ("NFS: Add COPY nfs operation")
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Cc: stable@vger.kernel.org # v4.7+
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs42proc.c | 63 +++++++++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 26 deletions(-)

diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 98cf58341066..1e486c73ec94 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -129,30 +129,26 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
 	return err;
 }
 
-static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
+static ssize_t _nfs42_proc_copy(struct file *src,
 				struct nfs_lock_context *src_lock,
-				struct file *dst, loff_t pos_dst,
+				struct file *dst,
 				struct nfs_lock_context *dst_lock,
-				size_t count)
+				struct nfs42_copy_args *args,
+				struct nfs42_copy_res *res)
 {
-	struct nfs42_copy_args args = {
-		.src_fh		= NFS_FH(file_inode(src)),
-		.src_pos	= pos_src,
-		.dst_fh		= NFS_FH(file_inode(dst)),
-		.dst_pos	= pos_dst,
-		.count		= count,
-	};
-	struct nfs42_copy_res res;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY],
-		.rpc_argp = &args,
-		.rpc_resp = &res,
+		.rpc_argp = args,
+		.rpc_resp = res,
 	};
 	struct inode *dst_inode = file_inode(dst);
 	struct nfs_server *server = NFS_SERVER(dst_inode);
+	loff_t pos_src = args->src_pos;
+	loff_t pos_dst = args->dst_pos;
+	size_t count = args->count;
 	int status;
 
-	status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context,
+	status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context,
 				     src_lock, FMODE_READ);
 	if (status)
 		return status;
@@ -162,7 +158,7 @@ static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
 	if (status)
 		return status;
 
-	status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,
+	status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock->open_context,
 				     dst_lock, FMODE_WRITE);
 	if (status)
 		return status;
@@ -172,22 +168,22 @@ static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
 		return status;
 
 	status = nfs4_call_sync(server->client, server, &msg,
-				&args.seq_args, &res.seq_res, 0);
+				&args->seq_args, &res->seq_res, 0);
 	if (status == -ENOTSUPP)
 		server->caps &= ~NFS_CAP_COPY;
 	if (status)
 		return status;
 
-	if (res.write_res.verifier.committed != NFS_FILE_SYNC) {
-		status = nfs_commit_file(dst, &res.write_res.verifier.verifier);
+	if (res->write_res.verifier.committed != NFS_FILE_SYNC) {
+		status = nfs_commit_file(dst, &res->write_res.verifier.verifier);
 		if (status)
 			return status;
 	}
 
 	truncate_pagecache_range(dst_inode, pos_dst,
-				 pos_dst + res.write_res.count);
+				 pos_dst + res->write_res.count);
 
-	return res.write_res.count;
+	return res->write_res.count;
 }
 
 ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
@@ -197,8 +193,22 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
 	struct nfs_server *server = NFS_SERVER(file_inode(dst));
 	struct nfs_lock_context *src_lock;
 	struct nfs_lock_context *dst_lock;
-	struct nfs4_exception src_exception = { };
-	struct nfs4_exception dst_exception = { };
+	struct nfs42_copy_args args = {
+		.src_fh		= NFS_FH(file_inode(src)),
+		.src_pos	= pos_src,
+		.dst_fh		= NFS_FH(file_inode(dst)),
+		.dst_pos	= pos_dst,
+		.count		= count,
+	};
+	struct nfs42_copy_res res;
+	struct nfs4_exception src_exception = {
+		.inode		= file_inode(src),
+		.stateid	= &args.src_stateid,
+	};
+	struct nfs4_exception dst_exception = {
+		.inode		= file_inode(dst),
+		.stateid	= &args.dst_stateid,
+	};
 	ssize_t err, err2;
 
 	if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY))
@@ -208,7 +218,6 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
 	if (IS_ERR(src_lock))
 		return PTR_ERR(src_lock);
 
-	src_exception.inode = file_inode(src);
 	src_exception.state = src_lock->open_context->state;
 
 	dst_lock = nfs_get_lock_context(nfs_file_open_context(dst));
@@ -217,15 +226,17 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
 		goto out_put_src_lock;
 	}
 
-	dst_exception.inode = file_inode(dst);
 	dst_exception.state = dst_lock->open_context->state;
 
 	do {
 		inode_lock(file_inode(dst));
-		err = _nfs42_proc_copy(src, pos_src, src_lock,
-				       dst, pos_dst, dst_lock, count);
+		err = _nfs42_proc_copy(src, src_lock,
+				dst, dst_lock,
+				&args, &res);
 		inode_unlock(file_inode(dst));
 
+		if (err >= 0)
+			break;
 		if (err == -ENOTSUPP) {
 			err = -EOPNOTSUPP;
 			break;
-- 
GitLab


From 2e38bea99a80eab408adee27f873a188d57b76cb Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 22 Feb 2017 20:08:25 +0100
Subject: [PATCH 0409/1084] fuse: add missing FR_FORCE

fuse_file_put() was missing the "force" flag for the RELEASE request when
sending synchronously (fuseblk).

If this flag is not set, then a sync request may be interrupted before it
is dequeued by the userspace filesystem.  In this case the OPEN won't be
balanced with a RELEASE.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: 5a18ec176c93 ("fuse: fix hang of single threaded fuseblk filesystem")
Cc: <stable@vger.kernel.org> # v2.6.38+
---
 fs/fuse/file.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2401c5dabb2a..5ec5870e423a 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -100,6 +100,7 @@ static void fuse_file_put(struct fuse_file *ff, bool sync)
 			iput(req->misc.release.inode);
 			fuse_put_request(ff->fc, req);
 		} else if (sync) {
+			__set_bit(FR_FORCE, &req->flags);
 			__clear_bit(FR_BACKGROUND, &req->flags);
 			fuse_request_send(ff->fc, req);
 			iput(req->misc.release.inode);
-- 
GitLab


From 267d84449f52349ee252db684ed95ede18e51744 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 22 Feb 2017 20:08:25 +0100
Subject: [PATCH 0410/1084] fuse: cleanup fuse_file refcounting

struct fuse_file is stored in file->private_data.  Make this always be a
counting reference for consistency.

This also allows fuse_sync_release() to call fuse_file_put() instead of
partially duplicating its functionality.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/dir.c    |  2 +-
 fs/fuse/file.c   | 18 +++++++++---------
 fs/fuse/fuse_i.h |  1 -
 3 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 811fd8929a18..e816166ce42f 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -473,7 +473,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 	if (err) {
 		fuse_sync_release(ff, flags);
 	} else {
-		file->private_data = fuse_file_get(ff);
+		file->private_data = ff;
 		fuse_finish_open(inode, file);
 	}
 	return err;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 5ec5870e423a..a5f79c59fe1e 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -58,7 +58,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
 	}
 
 	INIT_LIST_HEAD(&ff->write_entry);
-	atomic_set(&ff->count, 0);
+	atomic_set(&ff->count, 1);
 	RB_CLEAR_NODE(&ff->polled_node);
 	init_waitqueue_head(&ff->poll_wait);
 
@@ -75,7 +75,7 @@ void fuse_file_free(struct fuse_file *ff)
 	kfree(ff);
 }
 
-struct fuse_file *fuse_file_get(struct fuse_file *ff)
+static struct fuse_file *fuse_file_get(struct fuse_file *ff)
 {
 	atomic_inc(&ff->count);
 	return ff;
@@ -147,7 +147,7 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
 		ff->open_flags &= ~FOPEN_DIRECT_IO;
 
 	ff->nodeid = nodeid;
-	file->private_data = fuse_file_get(ff);
+	file->private_data = ff;
 
 	return 0;
 }
@@ -298,13 +298,13 @@ static int fuse_release(struct inode *inode, struct file *file)
 
 void fuse_sync_release(struct fuse_file *ff, int flags)
 {
-	WARN_ON(atomic_read(&ff->count) > 1);
+	WARN_ON(atomic_read(&ff->count) != 1);
 	fuse_prepare_release(ff, flags, FUSE_RELEASE);
-	__set_bit(FR_FORCE, &ff->reserved_req->flags);
-	__clear_bit(FR_BACKGROUND, &ff->reserved_req->flags);
-	fuse_request_send(ff->fc, ff->reserved_req);
-	fuse_put_request(ff->fc, ff->reserved_req);
-	kfree(ff);
+	/*
+	 * iput(NULL) is a no-op and since the refcount is 1 and everything's
+	 * synchronous, we are fine with not doing igrab() here"
+	 */
+	fuse_file_put(ff, true);
 }
 EXPORT_SYMBOL_GPL(fuse_sync_release);
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 91307940c8ac..83f797271aef 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -732,7 +732,6 @@ void fuse_read_fill(struct fuse_req *req, struct file *file,
 int fuse_open_common(struct inode *inode, struct file *file, bool isdir);
 
 struct fuse_file *fuse_file_alloc(struct fuse_conn *fc);
-struct fuse_file *fuse_file_get(struct fuse_file *ff);
 void fuse_file_free(struct fuse_file *ff);
 void fuse_finish_open(struct inode *inode, struct file *file);
 
-- 
GitLab


From 9a87ad3da905239413477ac0698734afc5cc30bd Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 22 Feb 2017 20:08:25 +0100
Subject: [PATCH 0411/1084] fuse: release: private_data cannot be NULL

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/file.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a5f79c59fe1e..7069ea232049 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -246,14 +246,9 @@ static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
 
 void fuse_release_common(struct file *file, int opcode)
 {
-	struct fuse_file *ff;
-	struct fuse_req *req;
-
-	ff = file->private_data;
-	if (unlikely(!ff))
-		return;
+	struct fuse_file *ff = file->private_data;
+	struct fuse_req *req = ff->reserved_req;
 
-	req = ff->reserved_req;
 	fuse_prepare_release(ff, file->f_flags, opcode);
 
 	if (ff->flock) {
-- 
GitLab


From a5e14c9376871ee74fe93fdcb380c5d54eaa2d43 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 17 Feb 2017 19:50:18 -0500
Subject: [PATCH 0412/1084] Revert "NFSv4.1: Handle
 NFS4ERR_BADSESSION/NFS4ERR_DEADSESSION replies to OP_SEQUENCE"

This reverts commit 2cf10cdd486c362f983abdce00dc1127e8ab8c59.

The patch has been seen to cause excessive looping.

Reported-by: Olga Kornievskaia <aglo@umich.edu>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Cc: stable@vger.kernel.org # 4.10+
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 45271e5994f0..92b2805ffb82 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -768,10 +768,6 @@ static int nfs41_sequence_process(struct rpc_task *task,
 	case -NFS4ERR_SEQ_FALSE_RETRY:
 		++slot->seq_nr;
 		goto retry_nowait;
-	case -NFS4ERR_DEADSESSION:
-	case -NFS4ERR_BADSESSION:
-		nfs4_schedule_session_recovery(session, res->sr_status);
-		goto retry_nowait;
 	default:
 		/* Just update the slot sequence no. */
 		slot->seq_done = 1;
-- 
GitLab


From 0333ad4e4f49e14217256e1db1134a70cf60b2de Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 29 Dec 2016 16:58:54 -0800
Subject: [PATCH 0413/1084] f2fs: avoid needless checkpoint in f2fs_trim_fs

The f2fs_trim_fs() doesn't need to do checkpoint if there are newly allocated
data blocks only which didn't change the critical checkpoint data such as nat
and sit entries.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 1a9ba69a22ba..917b5c5053ae 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1248,14 +1248,15 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	f2fs_flush_merged_bios(sbi);
 
 	/* this is the case of multiple fstrims without any changes */
-	if (cpc->reason == CP_DISCARD && !is_sbi_flag_set(sbi, SBI_IS_DIRTY)) {
-		f2fs_bug_on(sbi, NM_I(sbi)->dirty_nat_cnt);
-		f2fs_bug_on(sbi, SIT_I(sbi)->dirty_sentries);
-		f2fs_bug_on(sbi, prefree_segments(sbi));
-		flush_sit_entries(sbi, cpc);
-		clear_prefree_segments(sbi, cpc);
-		unblock_operations(sbi);
-		goto out;
+	if (cpc->reason == CP_DISCARD) {
+		if (NM_I(sbi)->dirty_nat_cnt == 0 &&
+				SIT_I(sbi)->dirty_sentries == 0 &&
+				prefree_segments(sbi) == 0) {
+			flush_sit_entries(sbi, cpc);
+			clear_prefree_segments(sbi, cpc);
+			unblock_operations(sbi);
+			goto out;
+		}
 	}
 
 	/*
-- 
GitLab


From f558b37bf4c35a54e1949f6533f39c64091bf60d Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Tue, 21 Feb 2017 20:30:47 +0530
Subject: [PATCH 0414/1084] powerpc/optprobes: Fix TOC handling in optprobes
 trampoline

Optprobes on powerpc are limited to kernel text area. We decided to also
optimize kretprobe_trampoline since that is also in kernel text area.
However,we failed to take into consideration the fact that the same
trampoline is also used to catch function returns from kernel modules.
As an example:

  $ sudo modprobe kobject-example
  $ sudo bash -c "echo 'r foo_show+8' > /sys/kernel/debug/tracing/kprobe_events"
  $ sudo bash -c "echo 1 > /sys/kernel/debug/tracing/events/kprobes/enable"
  $ sudo cat /sys/kernel/debug/kprobes/list
  c000000000041350  k  kretprobe_trampoline+0x0    [OPTIMIZED]
  d000000000e00200  r  foo_show+0x8  kobject_example
  $ cat /sys/kernel/kobject_example/foo
  Segmentation fault

With the below (trimmed) splat in dmesg:

  Unable to handle kernel paging request for data at address 0xfec40000
  Faulting instruction address: 0xc000000000041540
  Oops: Kernel access of bad area, sig: 11 [#1]
  ...
  NIP [c000000000041540] optimized_callback+0x70/0xe0
  LR [c000000000041e60] optinsn_slot+0xf8/0x10000
  Call Trace:
  [c0000000c7327850] [c000000000289af4] alloc_set_pte+0x1c4/0x860 (unreliable)
  [c0000000c7327890] [c000000000041e60] optinsn_slot+0xf8/0x10000
  --- interrupt: 700 at 0xc0000000c7327a80
	       LR = kretprobe_trampoline+0x0/0x10
  [c0000000c7327ba0] [c0000000003a30d4] sysfs_kf_seq_show+0x104/0x1d0
  [c0000000c7327bf0] [c0000000003a0bb4] kernfs_seq_show+0x44/0x60
  [c0000000c7327c10] [c000000000330578] seq_read+0xf8/0x560
  [c0000000c7327cb0] [c0000000003a1e64] kernfs_fop_read+0x194/0x260
  [c0000000c7327d00] [c0000000002f9954] __vfs_read+0x44/0x1a0
  [c0000000c7327d90] [c0000000002fb4cc] vfs_read+0xbc/0x1b0
  [c0000000c7327de0] [c0000000002fd138] SyS_read+0x68/0x110
  [c0000000c7327e30] [c00000000000b8e0] system_call+0x38/0xfc

Fix this by loading up the kernel TOC before calling into the kernel.
The original TOC gets restored as part of the usual pt_regs restore.

Fixes: 762df10bad69 ("powerpc/kprobes: Optimize kprobe in kretprobe_trampoline()")
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/optprobes_head.S | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
index 53e429b5a29d..4937bef7652f 100644
--- a/arch/powerpc/kernel/optprobes_head.S
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -65,6 +65,13 @@ optprobe_template_entry:
 	mfdsisr	r5
 	std	r5,_DSISR(r1)
 
+	/*
+	 * We may get here from a module, so load the kernel TOC in r2.
+	 * The original TOC gets restored when pt_regs is restored
+	 * further below.
+	 */
+	ld	r2,PACATOC(r13)
+
 	.global optprobe_template_op_address
 optprobe_template_op_address:
 	/*
-- 
GitLab


From fda2d27db6eae5c2468f9e4657539b72bbc238bb Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 22 Feb 2017 10:42:02 +0530
Subject: [PATCH 0415/1084] powerpc/mm/hash: Always clear UPRT and Host Radix
 bits when setting up CPU

We will set LPCR with correct value for radix during int. This make sure we
start with a sanitized value of LPCR. In case of kexec, cpus can have LPCR
value based on the previous translation mode we were running.

Fixes: fe036a0605d60 ("powerpc/64/kexec: Fix MMU cleanup on radix")
Cc: stable@vger.kernel.org # v4.9+
Acked-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/cpu_setup_power.S | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 917188615bf5..7fe8c79e6937 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -101,6 +101,8 @@ _GLOBAL(__setup_cpu_power9)
 	mfspr	r3,SPRN_LPCR
 	LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE)
 	or	r3, r3, r4
+	LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR)
+	andc	r3, r3, r4
 	bl	__init_LPCR
 	bl	__init_HFSCR
 	bl	__init_tlb_power9
@@ -122,6 +124,8 @@ _GLOBAL(__restore_cpu_power9)
 	mfspr   r3,SPRN_LPCR
 	LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE)
 	or	r3, r3, r4
+	LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR)
+	andc	r3, r3, r4
 	bl	__init_LPCR
 	bl	__init_HFSCR
 	bl	__init_tlb_power9
-- 
GitLab


From 9f3768e02335ddd6ebe1d85d5cb3a68ee6264004 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Weisbecker?= <fweisbec@gmail.com>
Date: Tue, 21 Feb 2017 16:18:41 +0100
Subject: [PATCH 0416/1084] powerpc: Remove leftover cputime_to_nsecs call
 causing build error

This type conversion is a leftover that got ignored during the kcpustat
conversion to nanosecs, resulting in build breakage with config having
CONFIG_NO_HZ_FULL=y.

	arch/powerpc/kernel/time.c: In function 'running_clock':
	arch/powerpc/kernel/time.c:712:2: error: implicit declaration of function 'cputime_to_nsecs' [-Werror=implicit-function-declaration]
	  return local_clock() - cputime_to_nsecs(kcpustat_this_cpu->cpustat[CPUTIME_STEAL]);

All we need is to remove it.

Fixes: e7f340ca9c07 ("powerpc, sched/cputime: Remove unused cputime definitions")
Reported-by: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/time.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index bc2e08d415fa..06bca663b4df 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -689,7 +689,7 @@ unsigned long long running_clock(void)
 	 * time and on a host which doesn't do any virtualisation TB *should* equal
 	 * VTB so it makes no difference anyway.
 	 */
-	return local_clock() - cputime_to_nsecs(kcpustat_this_cpu->cpustat[CPUTIME_STEAL]);
+	return local_clock() - kcpustat_this_cpu->cpustat[CPUTIME_STEAL];
 }
 #endif
 
-- 
GitLab


From 1c68856e6ea8abd714415e52ef88943c022e24f0 Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:10 -0800
Subject: [PATCH 0417/1084] scsi: aacraid: Fix camel case

Replaced camel case with snake case for init supported options.

Suggested-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/aachba.c  | 53 +++++++++---------
 drivers/scsi/aacraid/aacraid.h | 98 +++++++++++++++++-----------------
 drivers/scsi/aacraid/commsup.c |  6 +--
 drivers/scsi/aacraid/linit.c   | 32 +++++------
 drivers/scsi/aacraid/rx.c      |  2 +-
 drivers/scsi/aacraid/src.c     |  2 +-
 6 files changed, 100 insertions(+), 93 deletions(-)

diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 907f1e80665b..98d4ffd798c5 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -483,7 +483,7 @@ int aac_get_containers(struct aac_dev *dev)
 	if (status >= 0) {
 		dresp = (struct aac_get_container_count_resp *)fib_data(fibptr);
 		maximum_num_containers = le32_to_cpu(dresp->ContainerSwitchEntries);
-		if (fibptr->dev->supplement_adapter_info.SupportedOptions2 &
+		if (fibptr->dev->supplement_adapter_info.supported_options2 &
 		    AAC_OPTION_SUPPORTED_240_VOLUMES) {
 			maximum_num_containers =
 				le32_to_cpu(dresp->MaxSimpleVolumes);
@@ -639,13 +639,16 @@ static void _aac_probe_container2(void * context, struct fib * fibptr)
 	fsa_dev_ptr = fibptr->dev->fsa_dev;
 	if (fsa_dev_ptr) {
 		struct aac_mount * dresp = (struct aac_mount *) fib_data(fibptr);
+		__le32 sup_options2;
+
 		fsa_dev_ptr += scmd_id(scsicmd);
+		sup_options2 =
+			fibptr->dev->supplement_adapter_info.supported_options2;
 
 		if ((le32_to_cpu(dresp->status) == ST_OK) &&
 		    (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE) &&
 		    (le32_to_cpu(dresp->mnt[0].state) != FSCS_HIDDEN)) {
-			if (!(fibptr->dev->supplement_adapter_info.SupportedOptions2 &
-			    AAC_OPTION_VARIABLE_BLOCK_SIZE)) {
+			if (!(sup_options2 & AAC_OPTION_VARIABLE_BLOCK_SIZE)) {
 				dresp->mnt[0].fileinfo.bdevinfo.block_size = 0x200;
 				fsa_dev_ptr->block_size = 0x200;
 			} else {
@@ -688,7 +691,7 @@ static void _aac_probe_container1(void * context, struct fib * fibptr)
 	int status;
 
 	dresp = (struct aac_mount *) fib_data(fibptr);
-	if (!(fibptr->dev->supplement_adapter_info.SupportedOptions2 &
+	if (!(fibptr->dev->supplement_adapter_info.supported_options2 &
 	    AAC_OPTION_VARIABLE_BLOCK_SIZE))
 		dresp->mnt[0].capacityhigh = 0;
 	if ((le32_to_cpu(dresp->status) != ST_OK) ||
@@ -705,7 +708,7 @@ static void _aac_probe_container1(void * context, struct fib * fibptr)
 
 	dinfo = (struct aac_query_mount *)fib_data(fibptr);
 
-	if (fibptr->dev->supplement_adapter_info.SupportedOptions2 &
+	if (fibptr->dev->supplement_adapter_info.supported_options2 &
 	    AAC_OPTION_VARIABLE_BLOCK_SIZE)
 		dinfo->command = cpu_to_le32(VM_NameServeAllBlk);
 	else
@@ -745,7 +748,7 @@ static int _aac_probe_container(struct scsi_cmnd * scsicmd, int (*callback)(stru
 
 		dinfo = (struct aac_query_mount *)fib_data(fibptr);
 
-		if (fibptr->dev->supplement_adapter_info.SupportedOptions2 &
+		if (fibptr->dev->supplement_adapter_info.supported_options2 &
 		    AAC_OPTION_VARIABLE_BLOCK_SIZE)
 			dinfo->command = cpu_to_le32(VM_NameServeAllBlk);
 		else
@@ -896,12 +899,14 @@ char * get_container_type(unsigned tindex)
 static void setinqstr(struct aac_dev *dev, void *data, int tindex)
 {
 	struct scsi_inq *str;
+	struct aac_supplement_adapter_info *sup_adap_info;
 
+	sup_adap_info = &dev->supplement_adapter_info;
 	str = (struct scsi_inq *)(data); /* cast data to scsi inq block */
 	memset(str, ' ', sizeof(*str));
 
-	if (dev->supplement_adapter_info.AdapterTypeText[0]) {
-		char * cp = dev->supplement_adapter_info.AdapterTypeText;
+	if (sup_adap_info->adapter_type_text[0]) {
+		char *cp = sup_adap_info->adapter_type_text;
 		int c;
 		if ((cp[0] == 'A') && (cp[1] == 'O') && (cp[2] == 'C'))
 			inqstrcpy("SMC", str->vid);
@@ -911,8 +916,7 @@ static void setinqstr(struct aac_dev *dev, void *data, int tindex)
 				++cp;
 			c = *cp;
 			*cp = '\0';
-			inqstrcpy (dev->supplement_adapter_info.AdapterTypeText,
-				   str->vid);
+			inqstrcpy(sup_adap_info->adapter_type_text, str->vid);
 			*cp = c;
 			while (*cp && *cp != ' ')
 				++cp;
@@ -1675,8 +1679,8 @@ int aac_issue_bmic_identify(struct aac_dev *dev, u32 bus, u32 target)
 	if (!identify_resp)
 		goto fib_free_ptr;
 
-	vbus = (u32)le16_to_cpu(dev->supplement_adapter_info.VirtDeviceBus);
-	vid = (u32)le16_to_cpu(dev->supplement_adapter_info.VirtDeviceTarget);
+	vbus = (u32)le16_to_cpu(dev->supplement_adapter_info.virt_device_bus);
+	vid = (u32)le16_to_cpu(dev->supplement_adapter_info.virt_device_target);
 
 	aac_fib_init(fibptr);
 
@@ -1815,9 +1819,9 @@ int aac_report_phys_luns(struct aac_dev *dev, struct fib *fibptr, int rescan)
 	}
 
 	vbus = (u32) le16_to_cpu(
-			dev->supplement_adapter_info.VirtDeviceBus);
+			dev->supplement_adapter_info.virt_device_bus);
 	vid = (u32) le16_to_cpu(
-			dev->supplement_adapter_info.VirtDeviceTarget);
+			dev->supplement_adapter_info.virt_device_target);
 
 	aac_fib_init(fibptr);
 
@@ -1893,7 +1897,7 @@ int aac_get_adapter_info(struct aac_dev* dev)
 	}
 	memcpy(&dev->adapter_info, info, sizeof(*info));
 
-	dev->supplement_adapter_info.VirtDeviceBus = 0xffff;
+	dev->supplement_adapter_info.virt_device_bus = 0xffff;
 	if (dev->adapter_info.options & AAC_OPT_SUPPLEMENT_ADAPTER_INFO) {
 		struct aac_supplement_adapter_info * sinfo;
 
@@ -1961,7 +1965,7 @@ int aac_get_adapter_info(struct aac_dev* dev)
 	}
 
 	if (!dev->sync_mode && dev->sa_firmware &&
-			dev->supplement_adapter_info.VirtDeviceBus != 0xffff) {
+		dev->supplement_adapter_info.virt_device_bus != 0xffff) {
 		/* Thor SA Firmware -> CISS_REPORT_PHYSICAL_LUNS */
 		rcode = aac_report_phys_luns(dev, fibptr, AAC_INIT);
 	}
@@ -1976,8 +1980,8 @@ int aac_get_adapter_info(struct aac_dev* dev)
 			(tmp>>16)&0xff,
 			tmp&0xff,
 			le32_to_cpu(dev->adapter_info.kernelbuild),
-			(int)sizeof(dev->supplement_adapter_info.BuildDate),
-			dev->supplement_adapter_info.BuildDate);
+			(int)sizeof(dev->supplement_adapter_info.build_date),
+			dev->supplement_adapter_info.build_date);
 		tmp = le32_to_cpu(dev->adapter_info.monitorrev);
 		printk(KERN_INFO "%s%d: monitor %d.%d-%d[%d]\n",
 			dev->name, dev->id,
@@ -1993,14 +1997,15 @@ int aac_get_adapter_info(struct aac_dev* dev)
 		  shost_to_class(dev->scsi_host_ptr), buffer))
 			printk(KERN_INFO "%s%d: serial %s",
 			  dev->name, dev->id, buffer);
-		if (dev->supplement_adapter_info.VpdInfo.Tsid[0]) {
+		if (dev->supplement_adapter_info.vpd_info.tsid[0]) {
 			printk(KERN_INFO "%s%d: TSID %.*s\n",
 			  dev->name, dev->id,
-			  (int)sizeof(dev->supplement_adapter_info.VpdInfo.Tsid),
-			  dev->supplement_adapter_info.VpdInfo.Tsid);
+			  (int)sizeof(dev->supplement_adapter_info
+							.vpd_info.tsid),
+				dev->supplement_adapter_info.vpd_info.tsid);
 		}
 		if (!aac_check_reset || ((aac_check_reset == 1) &&
-		  (dev->supplement_adapter_info.SupportedOptions2 &
+		  (dev->supplement_adapter_info.supported_options2 &
 		  AAC_OPTION_IGNORE_RESET))) {
 			printk(KERN_INFO "%s%d: Reset Adapter Ignored\n",
 			  dev->name, dev->id);
@@ -2008,7 +2013,7 @@ int aac_get_adapter_info(struct aac_dev* dev)
 	}
 
 	dev->cache_protected = 0;
-	dev->jbod = ((dev->supplement_adapter_info.FeatureBits &
+	dev->jbod = ((dev->supplement_adapter_info.feature_bits &
 		AAC_FEATURE_JBOD) != 0);
 	dev->nondasd_support = 0;
 	dev->raid_scsi_mode = 0;
@@ -2631,7 +2636,7 @@ static int aac_start_stop(struct scsi_cmnd *scsicmd)
 	struct scsi_device *sdev = scsicmd->device;
 	struct aac_dev *aac = (struct aac_dev *)sdev->host->hostdata;
 
-	if (!(aac->supplement_adapter_info.SupportedOptions2 &
+	if (!(aac->supplement_adapter_info.supported_options2 &
 	      AAC_OPTION_POWER_MANAGEMENT)) {
 		scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 |
 				  SAM_STAT_GOOD;
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index f2344971e3cb..b5a2c87f289b 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1380,57 +1380,57 @@ struct aac_adapter_info
 
 struct aac_supplement_adapter_info
 {
-	u8	AdapterTypeText[17+1];
-	u8	Pad[2];
-	__le32	FlashMemoryByteSize;
-	__le32	FlashImageId;
-	__le32	MaxNumberPorts;
-	__le32	Version;
-	__le32	FeatureBits;
-	u8	SlotNumber;
-	u8	ReservedPad0[3];
-	u8	BuildDate[12];
-	__le32	CurrentNumberPorts;
+	u8	adapter_type_text[17+1];
+	u8	pad[2];
+	__le32	flash_memory_byte_size;
+	__le32	flash_image_id;
+	__le32	max_number_ports;
+	__le32	version;
+	__le32	feature_bits;
+	u8	slot_number;
+	u8	reserved_pad0[3];
+	u8	build_date[12];
+	__le32	current_number_ports;
 	struct {
-		u8	AssemblyPn[8];
-		u8	FruPn[8];
-		u8	BatteryFruPn[8];
-		u8	EcVersionString[8];
-		u8	Tsid[12];
-	}	VpdInfo;
-	__le32	FlashFirmwareRevision;
-	__le32	FlashFirmwareBuild;
-	__le32	RaidTypeMorphOptions;
-	__le32	FlashFirmwareBootRevision;
-	__le32	FlashFirmwareBootBuild;
-	u8	MfgPcbaSerialNo[12];
-	u8	MfgWWNName[8];
-	__le32	SupportedOptions2;
-	__le32	StructExpansion;
+		u8	assembly_pn[8];
+		u8	fru_pn[8];
+		u8	battery_fru_pn[8];
+		u8	ec_version_string[8];
+		u8	tsid[12];
+	}	vpd_info;
+	__le32	flash_firmware_revision;
+	__le32	flash_firmware_build;
+	__le32	raid_type_morph_options;
+	__le32	flash_firmware_boot_revision;
+	__le32	flash_firmware_boot_build;
+	u8	mfg_pcba_serial_no[12];
+	u8	mfg_wwn_name[8];
+	__le32	supported_options2;
+	__le32	struct_expansion;
 	/* StructExpansion == 1 */
-	__le32	FeatureBits3;
-	__le32	SupportedPerformanceModes;
-	u8	HostBusType;		/* uses HOST_BUS_TYPE_xxx defines */
-	u8	HostBusWidth;		/* actual width in bits or links */
-	u16	HostBusSpeed;		/* actual bus speed/link rate in MHz */
-	u8	MaxRRCDrives;		/* max. number of ITP-RRC drives/pool */
-	u8	MaxDiskXtasks;		/* max. possible num of DiskX Tasks */
-
-	u8	CpldVerLoaded;
-	u8	CpldVerInFlash;
-
-	__le64	MaxRRCCapacity;
-	__le32	CompiledMaxHistLogLevel;
-	u8	CustomBoardName[12];
-	u16	SupportedCntlrMode;	/* identify supported controller mode */
-	u16	ReservedForFuture16;
-	__le32	SupportedOptions3;	/* reserved for future options */
-
-	__le16	VirtDeviceBus;		/* virt. SCSI device for Thor */
-	__le16	VirtDeviceTarget;
-	__le16	VirtDeviceLUN;
-	__le16	Unused;
-	__le32	ReservedForFutureGrowth[68];
+	__le32	feature_bits3;
+	__le32	supported_performance_modes;
+	u8	host_bus_type;		/* uses HOST_BUS_TYPE_xxx defines */
+	u8	host_bus_width;		/* actual width in bits or links */
+	u16	host_bus_speed;		/* actual bus speed/link rate in MHz */
+	u8	max_rrc_drives;		/* max. number of ITP-RRC drives/pool */
+	u8	max_disk_xtasks;	/* max. possible num of DiskX Tasks */
+
+	u8	cpld_ver_loaded;
+	u8	cpld_ver_in_flash;
+
+	__le64	max_rrc_capacity;
+	__le32	compiled_max_hist_log_level;
+	u8	custom_board_name[12];
+	u16	supported_cntlr_mode;	/* identify supported controller mode */
+	u16	reserved_for_future16;
+	__le32	supported_options3;	/* reserved for future options */
+
+	__le16	virt_device_bus;		/* virt. SCSI device for Thor */
+	__le16	virt_device_target;
+	__le16	virt_device_lun;
+	__le16	unused;
+	__le32	reserved_for_future_growth[68];
 
 };
 #define AAC_FEATURE_FALCON	cpu_to_le32(0x00000010)
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 969727b67cdd..56090f5be6c5 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -1815,7 +1815,7 @@ int aac_check_health(struct aac_dev * aac)
 	printk(KERN_ERR "%s: Host adapter BLINK LED 0x%x\n", aac->name, BlinkLED);
 
 	if (!aac_check_reset || ((aac_check_reset == 1) &&
-		(aac->supplement_adapter_info.SupportedOptions2 &
+		(aac->supplement_adapter_info.supported_options2 &
 			AAC_OPTION_IGNORE_RESET)))
 		goto out;
 	host = aac->scsi_host_ptr;
@@ -2264,8 +2264,8 @@ static int aac_send_wellness_command(struct aac_dev *dev, char *wellness_str,
 
 	aac_fib_init(fibptr);
 
-	vbus = (u32)le16_to_cpu(dev->supplement_adapter_info.VirtDeviceBus);
-	vid = (u32)le16_to_cpu(dev->supplement_adapter_info.VirtDeviceTarget);
+	vbus = (u32)le16_to_cpu(dev->supplement_adapter_info.virt_device_bus);
+	vid = (u32)le16_to_cpu(dev->supplement_adapter_info.virt_device_target);
 
 	srbcmd = (struct aac_srb *)fib_data(fibptr);
 
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 137d22d3a005..ab4f1e7870b9 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -891,13 +891,13 @@ static int aac_eh_reset(struct scsi_cmnd* cmd)
 		 * Adapters that support a register, instead of a commanded,
 		 * reset.
 		 */
-		if (((aac->supplement_adapter_info.SupportedOptions2 &
+		if (((aac->supplement_adapter_info.supported_options2 &
 			  AAC_OPTION_MU_RESET) ||
-			  (aac->supplement_adapter_info.SupportedOptions2 &
+			  (aac->supplement_adapter_info.supported_options2 &
 			  AAC_OPTION_DOORBELL_RESET)) &&
 			  aac_check_reset &&
 			  ((aac_check_reset != 1) ||
-			   !(aac->supplement_adapter_info.SupportedOptions2 &
+			   !(aac->supplement_adapter_info.supported_options2 &
 			    AAC_OPTION_IGNORE_RESET))) {
 			/* Bypass wait for command quiesce */
 			aac_reset_adapter(aac, 2, IOP_HWSOFT_RESET);
@@ -1029,8 +1029,8 @@ static ssize_t aac_show_model(struct device *device,
 	struct aac_dev *dev = (struct aac_dev*)class_to_shost(device)->hostdata;
 	int len;
 
-	if (dev->supplement_adapter_info.AdapterTypeText[0]) {
-		char * cp = dev->supplement_adapter_info.AdapterTypeText;
+	if (dev->supplement_adapter_info.adapter_type_text[0]) {
+		char *cp = dev->supplement_adapter_info.adapter_type_text;
 		while (*cp && *cp != ' ')
 			++cp;
 		while (*cp == ' ')
@@ -1046,18 +1046,20 @@ static ssize_t aac_show_vendor(struct device *device,
 			       struct device_attribute *attr, char *buf)
 {
 	struct aac_dev *dev = (struct aac_dev*)class_to_shost(device)->hostdata;
+	struct aac_supplement_adapter_info *sup_adap_info;
 	int len;
 
-	if (dev->supplement_adapter_info.AdapterTypeText[0]) {
-		char * cp = dev->supplement_adapter_info.AdapterTypeText;
+	sup_adap_info = &dev->supplement_adapter_info;
+	if (sup_adap_info->adapter_type_text[0]) {
+		char *cp = sup_adap_info->adapter_type_text;
 		while (*cp && *cp != ' ')
 			++cp;
 		len = snprintf(buf, PAGE_SIZE, "%.*s\n",
-		  (int)(cp - (char *)dev->supplement_adapter_info.AdapterTypeText),
-		  dev->supplement_adapter_info.AdapterTypeText);
+			(int)(cp - (char *)sup_adap_info->adapter_type_text),
+					sup_adap_info->adapter_type_text);
 	} else
 		len = snprintf(buf, PAGE_SIZE, "%s\n",
-		  aac_drivers[dev->cardtype].vname);
+			aac_drivers[dev->cardtype].vname);
 	return len;
 }
 
@@ -1078,7 +1080,7 @@ static ssize_t aac_show_flags(struct device *cdev,
 				"SAI_READ_CAPACITY_16\n");
 	if (dev->jbod)
 		len += snprintf(buf + len, PAGE_SIZE - len, "SUPPORTED_JBOD\n");
-	if (dev->supplement_adapter_info.SupportedOptions2 &
+	if (dev->supplement_adapter_info.supported_options2 &
 		AAC_OPTION_POWER_MANAGEMENT)
 		len += snprintf(buf + len, PAGE_SIZE - len,
 				"SUPPORTED_POWER_MANAGEMENT\n");
@@ -1139,12 +1141,12 @@ static ssize_t aac_show_serial_number(struct device *device,
 		len = snprintf(buf, 16, "%06X\n",
 		  le32_to_cpu(dev->adapter_info.serial[0]));
 	if (len &&
-	  !memcmp(&dev->supplement_adapter_info.MfgPcbaSerialNo[
-	    sizeof(dev->supplement_adapter_info.MfgPcbaSerialNo)-len],
+	  !memcmp(&dev->supplement_adapter_info.mfg_pcba_serial_no[
+	    sizeof(dev->supplement_adapter_info.mfg_pcba_serial_no)-len],
 	  buf, len-1))
 		len = snprintf(buf, 16, "%.*s\n",
-		  (int)sizeof(dev->supplement_adapter_info.MfgPcbaSerialNo),
-		  dev->supplement_adapter_info.MfgPcbaSerialNo);
+		  (int)sizeof(dev->supplement_adapter_info.mfg_pcba_serial_no),
+		  dev->supplement_adapter_info.mfg_pcba_serial_no);
 
 	return min(len, 16);
 }
diff --git a/drivers/scsi/aacraid/rx.c b/drivers/scsi/aacraid/rx.c
index 0e69a80c3275..5d19c31e3bba 100644
--- a/drivers/scsi/aacraid/rx.c
+++ b/drivers/scsi/aacraid/rx.c
@@ -475,7 +475,7 @@ static int aac_rx_restart_adapter(struct aac_dev *dev, int bled, u8 reset_type)
 {
 	u32 var = 0;
 
-	if (!(dev->supplement_adapter_info.SupportedOptions2 &
+	if (!(dev->supplement_adapter_info.supported_options2 &
 	  AAC_OPTION_MU_RESET) || (bled >= 0) || (bled == -2)) {
 		if (bled)
 			printk(KERN_ERR "%s%d: adapter kernel panic'd %x.\n",
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 8e4e2ddbafd7..c17b0603749c 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -684,7 +684,7 @@ static void aac_send_iop_reset(struct aac_dev *dev, int bled)
 
 	aac_set_intx_mode(dev);
 
-	if (!bled && (dev->supplement_adapter_info.SupportedOptions2 &
+	if (!bled && (dev->supplement_adapter_info.supported_options2 &
 	    AAC_OPTION_DOORBELL_RESET)) {
 		src_writel(dev, MUnit.IDR, reset_mask);
 	} else {
-- 
GitLab


From f3ef4a74dc3712ef0ce60d652aa87b1ba70cb2a4 Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:11 -0800
Subject: [PATCH 0418/1084] scsi: aacraid: Use correct channel number for raw
 srb

The channel being used for raw srb commands is retrieved from the utility
sent fibs and is converted into physical channel id. The driver does not
need to to do this since the management utility sends the correct channel
id in the first place and in addition the driver sets inaccurate
information in the cmd sent to the firmware and gets an invalid response.

Fixed by using channel id from srb command.

Cc: stable@vger.kernel.org
Fixes: 423400e64d377c0 ("scsi: aacraid: Include HBA direct interface")
Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/commctrl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c
index 614842a9eb07..f6afd50579c0 100644
--- a/drivers/scsi/aacraid/commctrl.c
+++ b/drivers/scsi/aacraid/commctrl.c
@@ -580,7 +580,7 @@ static int aac_send_raw_srb(struct aac_dev* dev, void __user * arg)
 		goto cleanup;
 	}
 
-	chn = aac_logical_to_phys(user_srbcmd->channel);
+	chn = user_srbcmd->channel;
 	if (chn < AAC_MAX_BUSES && user_srbcmd->id < AAC_MAX_TARGETS &&
 		dev->hba_map[chn][user_srbcmd->id].devtype ==
 		AAC_DEVTYPE_NATIVE_RAW) {
-- 
GitLab


From 16ae9dd35d374182ce955063100fce66a9974e74 Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:12 -0800
Subject: [PATCH 0419/1084] scsi: aacraid: Fix for excessive prints on EEH

This issue showed up on a kdump debug(single CPU on powerkvm), when EEH
errors rendered the adapter unusable. The driver correctly detected the
issue and attempted to restart the controller, in doing so the driver
attempted to read the status registers of the controller. This triggered
additional eeh errors which continued for a good 6 minutes.

Fixed by returning without waiting when EEH error is reported.

Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/commsup.c | 39 +++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 56090f5be6c5..a8dd4b51b086 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -461,6 +461,35 @@ int aac_queue_get(struct aac_dev * dev, u32 * index, u32 qid, struct hw_fib * hw
 	return 0;
 }
 
+#ifdef CONFIG_EEH
+static inline int aac_check_eeh_failure(struct aac_dev *dev)
+{
+	/* Check for an EEH failure for the given
+	 * device node. Function eeh_dev_check_failure()
+	 * returns 0 if there has not been an EEH error
+	 * otherwise returns a non-zero value.
+	 *
+	 * Need to be called before any PCI operation,
+	 * i.e.,before aac_adapter_check_health()
+	 */
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(dev->pdev);
+
+	if (eeh_dev_check_failure(edev)) {
+		/* The EEH mechanisms will handle this
+		 * error and reset the device if
+		 * necessary.
+		 */
+		return 1;
+	}
+	return 0;
+}
+#else
+static inline int aac_check_eeh_failure(struct aac_dev *dev)
+{
+	return 0;
+}
+#endif
+
 /*
  *	Define the highest level of host to adapter communication routines.
  *	These routines will support host to adapter FS commuication. These
@@ -496,7 +525,6 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
 	unsigned long mflags = 0;
 	unsigned long sflags = 0;
 
-
 	if (!(hw_fib->header.XferState & cpu_to_le32(HostOwned)))
 		return -EBUSY;
 	/*
@@ -662,6 +690,10 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
 					}
 					return -ETIMEDOUT;
 				}
+
+				if (aac_check_eeh_failure(dev))
+					return -EFAULT;
+
 				if ((blink = aac_adapter_check_health(dev)) > 0) {
 					if (wait == -1) {
 	        				printk(KERN_ERR "aacraid: aac_fib_send: adapter blinkLED 0x%x.\n"
@@ -755,7 +787,12 @@ int aac_hba_send(u8 command, struct fib *fibptr, fib_callback callback,
 	FIB_COUNTER_INCREMENT(aac_config.NativeSent);
 
 	if (wait) {
+
 		spin_unlock_irqrestore(&fibptr->event_lock, flags);
+
+		if (aac_check_eeh_failure(dev))
+			return -EFAULT;
+
 		/* Only set for first known interruptable command */
 		if (down_interruptible(&fibptr->event_wait)) {
 			fibptr->done = 2;
-- 
GitLab


From a0c6143e95c5b9e1f2d83e005e4e86ed3dc6096f Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:13 -0800
Subject: [PATCH 0420/1084] scsi: aacraid: Prevent E3 lockup when deleting
 units

Arrconf management utility at times sends fibs with AdapterProcessed set
in its fibs. This causes the controller to panic and lockup.

Fixed by failing the commands that have AdapterProcessed set in its flag.

Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/commsup.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index a8dd4b51b086..e221321d97d2 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -527,6 +527,10 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size,
 
 	if (!(hw_fib->header.XferState & cpu_to_le32(HostOwned)))
 		return -EBUSY;
+
+	if (hw_fib->header.XferState & cpu_to_le32(AdapterProcessed))
+		return -EINVAL;
+
 	/*
 	 *	There are 5 cases with the wait and response requested flags.
 	 *	The only invalid cases are if the caller requests to wait and
-- 
GitLab


From 1bff5abca65d4b9761fcc992ab6288243220003d Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:14 -0800
Subject: [PATCH 0421/1084] scsi: aacraid: Fix memory leak in fib init path

aac_fib_map_free frees misaligned fib dma memory, additionally it does not
free up the whole memory.

Fixed by changing the  code to free up the correct and full memory
allocation.

Cc: stable@vger.kernel.org
Fixes: e8b12f0fb835223 ([SCSI] aacraid: Add new code for PMC-Sierra's SRC based controller family)
Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/commsup.c | 36 ++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index e221321d97d2..c10954b3cd46 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -95,12 +95,20 @@ static int fib_map_alloc(struct aac_dev *dev)
 
 void aac_fib_map_free(struct aac_dev *dev)
 {
-	if (dev->hw_fib_va && dev->max_cmd_size) {
-		pci_free_consistent(dev->pdev,
-		(dev->max_cmd_size *
-		(dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB)),
-		dev->hw_fib_va, dev->hw_fib_pa);
-	}
+	size_t alloc_size;
+	size_t fib_size;
+	int num_fibs;
+
+	if(!dev->hw_fib_va || !dev->max_cmd_size)
+		return;
+
+	num_fibs = dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB;
+	fib_size = dev->max_fib_size + sizeof(struct aac_fib_xporthdr);
+	alloc_size = fib_size * num_fibs + ALIGN32 - 1;
+
+	pci_free_consistent(dev->pdev, alloc_size, dev->hw_fib_va,
+							dev->hw_fib_pa);
+
 	dev->hw_fib_va = NULL;
 	dev->hw_fib_pa = 0;
 }
@@ -153,22 +161,20 @@ int aac_fib_setup(struct aac_dev * dev)
 	if (i<0)
 		return -ENOMEM;
 
-	/* 32 byte alignment for PMC */
-	hw_fib_pa = (dev->hw_fib_pa + (ALIGN32 - 1)) & ~(ALIGN32 - 1);
-	dev->hw_fib_va = (struct hw_fib *)((unsigned char *)dev->hw_fib_va +
-		(hw_fib_pa - dev->hw_fib_pa));
-	dev->hw_fib_pa = hw_fib_pa;
 	memset(dev->hw_fib_va, 0,
 		(dev->max_cmd_size + sizeof(struct aac_fib_xporthdr)) *
 		(dev->scsi_host_ptr->can_queue + AAC_NUM_MGT_FIB));
 
+	/* 32 byte alignment for PMC */
+	hw_fib_pa = (dev->hw_fib_pa + (ALIGN32 - 1)) & ~(ALIGN32 - 1);
+	hw_fib    = (struct hw_fib *)((unsigned char *)dev->hw_fib_va +
+					(hw_fib_pa - dev->hw_fib_pa));
+
 	/* add Xport header */
-	dev->hw_fib_va = (struct hw_fib *)((unsigned char *)dev->hw_fib_va +
+	hw_fib = (struct hw_fib *)((unsigned char *)hw_fib +
 		sizeof(struct aac_fib_xporthdr));
-	dev->hw_fib_pa += sizeof(struct aac_fib_xporthdr);
+	hw_fib_pa += sizeof(struct aac_fib_xporthdr);
 
-	hw_fib = dev->hw_fib_va;
-	hw_fib_pa = dev->hw_fib_pa;
 	/*
 	 *	Initialise the fibs
 	 */
-- 
GitLab


From 30202e067a81754cb78cb823b7ce7e7cddd040e2 Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:15 -0800
Subject: [PATCH 0422/1084] scsi: aacraid: Added sysfs for driver version

Added support to retrieve driver version from a new sysfs variable called
driver_version. It makes it easier for the user to figure out the driver
version that is currently running.

Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/linit.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index ab4f1e7870b9..df027847f208 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -1131,6 +1131,13 @@ static ssize_t aac_show_bios_version(struct device *device,
 	return len;
 }
 
+static ssize_t aac_show_driver_version(struct device *device,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%s\n", aac_driver_version);
+}
+
 static ssize_t aac_show_serial_number(struct device *device,
 			       struct device_attribute *attr, char *buf)
 {
@@ -1241,6 +1248,13 @@ static struct device_attribute aac_bios_version = {
 	},
 	.show = aac_show_bios_version,
 };
+static struct device_attribute aac_lld_version = {
+	.attr = {
+		.name = "driver_version",
+		.mode = 0444,
+	},
+	.show = aac_show_driver_version,
+};
 static struct device_attribute aac_serial_number = {
 	.attr = {
 		.name = "serial_number",
@@ -1278,6 +1292,7 @@ static struct device_attribute *aac_attrs[] = {
 	&aac_kernel_version,
 	&aac_monitor_version,
 	&aac_bios_version,
+	&aac_lld_version,
 	&aac_serial_number,
 	&aac_max_channel,
 	&aac_max_id,
-- 
GitLab


From a7e2c642844cfefd570cb54a8d9fe7b85605311b Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:16 -0800
Subject: [PATCH 0423/1084] scsi: aacraid: Fix sync fibs time out on controller
 reset

After controller shutdown, all sync fibs time out due to not knowing
about the switch to INT-x mode

Fixed by replacing aac_src_access_devreg() to aac_set_intx_mode() call.

Cc: stable@vger.kernel.org
Fixes: 495c021767bd78c998 (aacraid: MSI-x support)
Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/aacraid.h  | 1 +
 drivers/scsi/aacraid/comminit.c | 2 +-
 drivers/scsi/aacraid/src.c      | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index b5a2c87f289b..9281e7295480 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -2639,6 +2639,7 @@ void aac_hba_callback(void *context, struct fib *fibptr);
 #define fib_data(fibctx) ((void *)(fibctx)->hw_fib_va->data)
 struct aac_dev *aac_init_adapter(struct aac_dev *dev);
 void aac_src_access_devreg(struct aac_dev *dev, int mode);
+void aac_set_intx_mode(struct aac_dev *dev);
 int aac_get_config_status(struct aac_dev *dev, int commit_flag);
 int aac_get_containers(struct aac_dev *dev);
 int aac_scsi_cmd(struct scsi_cmnd *cmd);
diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c
index 40bfc57b6849..35607005f7e1 100644
--- a/drivers/scsi/aacraid/comminit.c
+++ b/drivers/scsi/aacraid/comminit.c
@@ -330,7 +330,7 @@ int aac_send_shutdown(struct aac_dev * dev)
 	     dev->pdev->device == PMC_DEVICE_S8 ||
 	     dev->pdev->device == PMC_DEVICE_S9) &&
 	     dev->msi_enabled)
-		aac_src_access_devreg(dev, AAC_ENABLE_INTX);
+		aac_set_intx_mode(dev);
 	return status;
 }
 
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index c17b0603749c..b23c818adbf6 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -657,7 +657,7 @@ static int aac_srcv_ioremap(struct aac_dev *dev, u32 size)
 	return 0;
 }
 
-static void aac_set_intx_mode(struct aac_dev *dev)
+void aac_set_intx_mode(struct aac_dev *dev)
 {
 	if (dev->msi_enabled) {
 		aac_src_access_devreg(dev, AAC_ENABLE_INTX);
-- 
GitLab


From 849ac6a591bf7b5777fdb6ce65030f32a7c73e1a Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:17 -0800
Subject: [PATCH 0424/1084] scsi: aacraid: Skip wellness sync on controller
 failure

aac_command_thread checks on the health of controller periodically,
using aac_check_health. If the status is an error state KERNEL_PANIC or
anything else. The driver will attempt to restart the adapter, but the
response is not checked in aac_command_thread. This allows the periodic
sync to go thru and lead the driver to a hung state.

Fixed by terminating the periodic loop(intended per original design),
if the controller is not restored to a healthy state.

Cc: stable@vger.kernel.org
Fixes: 3d77d8404478353358 (scsi: aacraid: Added support for periodic wellness sync)
Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/commsup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index c10954b3cd46..eb4d8cfc3194 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -2481,7 +2481,7 @@ int aac_command_thread(void *data)
 
 			/* Don't even try to talk to adapter if its sick */
 			ret = aac_check_health(dev);
-			if (!dev->queues)
+			if (ret || !dev->queues)
 				break;
 			next_check_jiffies = jiffies
 					   + ((long)(unsigned)check_interval)
-- 
GitLab


From a2d0321dd532901ea64118ed5a752fa6e447d1da Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:18 -0800
Subject: [PATCH 0425/1084] scsi: aacraid: Reload offlined drives after
 controller reset

During the IOP reset stress testing, it was found that the drives can be
marked offline when the adapter controller crashes and IO's are running
in parallel. When the controller  does come back from the reset, the drive
that is marked offline is not exposed.

Fixed by removing and adding drives that are marked offline. In addition
invoke a scsi host bus rescan to capture any additional configuration
changes.

Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/commsup.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index eb4d8cfc3194..1f716c01708b 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -1637,11 +1637,29 @@ static int _aac_reset_adapter(struct aac_dev *aac, int forced, u8 reset_type)
 		command->SCp.phase = AAC_OWNER_ERROR_HANDLER;
 		command->scsi_done(command);
 	}
+	/*
+	 * Any Device that was already marked offline needs to be cleaned up
+	 */
+	__shost_for_each_device(dev, host) {
+		if (!scsi_device_online(dev)) {
+			sdev_printk(KERN_INFO, dev, "Removing offline device\n");
+			scsi_remove_device(dev);
+			scsi_device_put(dev);
+		}
+	}
 	retval = 0;
 
 out:
 	aac->in_reset = 0;
 	scsi_unblock_requests(host);
+	/*
+	 * Issue bus rescan to catch any configuration that might have
+	 * occurred
+	 */
+	if (!retval) {
+		dev_info(&aac->pdev->dev, "Issuing bus rescan\n");
+		scsi_scan_host(host);
+	}
 	if (jafo) {
 		spin_lock_irq(host->host_lock);
 	}
-- 
GitLab


From 11da1b7c4856de05e00f50f54efe2f5349214d5b Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:19 -0800
Subject: [PATCH 0426/1084] scsi: aacraid: Decrease adapter health check
 interval

Currently driver checks the health status of the adapter once every 24
hours. When that happens the driver becomes dependent on the kernel to
figure out if the  adapter is misbehaving. This might take some time
(when the adapter is idle). The driver currently has support to
restart/recover the controller when it fails, and decreasing the time
interval will help.

Fixed by decreasing check interval from 24 hours to 1 minute

Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/aachba.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 98d4ffd798c5..3ede50f25f46 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -311,7 +311,7 @@ module_param(update_interval, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(update_interval, "Interval in seconds between time sync"
 	" updates issued to adapter.");
 
-int check_interval = 24 * 60 * 60;
+int check_interval = 60;
 module_param(check_interval, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(check_interval, "Interval in seconds between adapter health"
 	" checks.");
-- 
GitLab


From 146aa1786d4978795cab5347d810e00236dea1c3 Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:20 -0800
Subject: [PATCH 0427/1084] scsi: aacraid: Skip IOP reset on controller
 panic(SMART Family)

When the SMART family of controller panic (KERNEL_PANIC) , they do not
honor IOP resets. So better to skip it and directly perform a IWBR reset.

Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/src.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index b23c818adbf6..9b11e1a6cc75 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -714,6 +714,12 @@ static int aac_src_restart_adapter(struct aac_dev *dev, int bled, u8 reset_type)
 		pr_err("%s%d: adapter kernel panic'd %x.\n",
 				dev->name, dev->id, bled);
 
+	/*
+	 * When there is a BlinkLED, IOP_RESET has not effect
+	 */
+	if (bled >= 2 && dev->sa_firmware && reset_type & HW_IOP_RESET)
+		reset_type &= ~HW_IOP_RESET;
+
 	dev->a_ops.adapter_enable_int = aac_src_disable_interrupt;
 
 	switch (reset_type) {
-- 
GitLab


From c421530bf848604e97d0785a03b3fe2c62775083 Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:21 -0800
Subject: [PATCH 0428/1084] scsi: aacraid: Reorder Adapter status check

The driver currently checks the SELF_TEST_FAILED first and then
KERNEL_PANIC next. Under error conditions(boot code failure) both
SELF_TEST_FAILED and KERNEL_PANIC can be set at the same time.

The driver has the capability to reset the controller on an KERNEL_PANIC,
but not on SELF_TEST_FAILED.

Fixed by first checking KERNEL_PANIC and then the others.

Cc: stable@vger.kernel.org
Fixes: e8b12f0fb835223752 ([SCSI] aacraid: Add new code for PMC-Sierra's SRC base controller family)
Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/src.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 9b11e1a6cc75..71aaabde6b57 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -436,17 +436,24 @@ static int aac_src_check_health(struct aac_dev *dev)
 {
 	u32 status = src_readl(dev, MUnit.OMR);
 
+	/*
+	 *	Check to see if the board panic'd.
+	 */
+	if (unlikely(status & KERNEL_PANIC))
+		goto err_blink;
+
 	/*
 	 *	Check to see if the board failed any self tests.
 	 */
 	if (unlikely(status & SELF_TEST_FAILED))
-		return -1;
+		goto err_out;
 
 	/*
-	 *	Check to see if the board panic'd.
+	 *	Check to see if the board failed any self tests.
 	 */
-	if (unlikely(status & KERNEL_PANIC))
-		return (status >> 16) & 0xFF;
+	if (unlikely(status & MONITOR_PANIC))
+		goto err_out;
+
 	/*
 	 *	Wait for the adapter to be up and running.
 	 */
@@ -456,6 +463,12 @@ static int aac_src_check_health(struct aac_dev *dev)
 	 *	Everything is OK
 	 */
 	return 0;
+
+err_out:
+	return -1;
+
+err_blink:
+	return (status > 16) & 0xFF;
 }
 
 static inline u32 aac_get_vector(struct aac_dev *dev)
-- 
GitLab


From 09624645e1e85df8d68b04de6e0607d696268333 Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:22 -0800
Subject: [PATCH 0429/1084] scsi: aacraid: Save adapter fib log before an IOP
 reset

Currently  the adapter firmware does not save outstanding I/O's log
information  when an IOP reset is triggered. This is problematic when
trying to root cause and debug issues.

Fixed by adding sync command to trigger I/O log file save in the adapter
firmware before issuing an IOP reset.

Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/aachba.c  |  4 ++++
 drivers/scsi/aacraid/aacraid.h |  6 ++++++
 drivers/scsi/aacraid/src.c     | 17 +++++++++++++++++
 3 files changed, 27 insertions(+)

diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 3ede50f25f46..e3e93def722b 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -294,6 +294,10 @@ MODULE_PARM_DESC(aif_timeout, "The duration of time in seconds to wait for"
 	"deregistering them. This is typically adjusted for heavily burdened"
 	" systems.");
 
+int aac_fib_dump;
+module_param(aac_fib_dump, int, 0644);
+MODULE_PARM_DESC(aac_fib_dump, "Dump controller fibs prior to IOP_RESET 0=off, 1=on");
+
 int numacb = -1;
 module_param(numacb, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(numacb, "Request a limit to the number of adapter control"
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 9281e7295480..622fd69b35cc 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1444,6 +1444,10 @@ struct aac_supplement_adapter_info
 #define AAC_OPTION_VARIABLE_BLOCK_SIZE	cpu_to_le32(0x00040000)
 /* 240 simple volume support */
 #define AAC_OPTION_SUPPORTED_240_VOLUMES cpu_to_le32(0x10000000)
+/*
+ * Supports FIB dump sync command send prior to IOP_RESET
+ */
+#define AAC_OPTION_SUPPORTED3_IOP_RESET_FIB_DUMP	cpu_to_le32(0x00004000)
 #define AAC_SIS_VERSION_V3	3
 #define AAC_SIS_SLOT_UNKNOWN	0xFF
 
@@ -2483,6 +2487,7 @@ struct aac_hba_info {
 #define GET_DRIVER_BUFFER_PROPERTIES	0x00000023
 #define RCV_TEMP_READINGS		0x00000025
 #define GET_COMM_PREFERRED_SETTINGS	0x00000026
+#define IOP_RESET_FW_FIB_DUMP		0x00000034
 #define IOP_RESET			0x00001000
 #define IOP_RESET_ALWAYS		0x00001001
 #define RE_INIT_ADAPTER			0x000000ee
@@ -2686,4 +2691,5 @@ extern int aac_commit;
 extern int update_interval;
 extern int check_interval;
 extern int aac_check_reset;
+extern int aac_fib_dump;
 #endif
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 71aaabde6b57..2e5338dec621 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -679,10 +679,27 @@ void aac_set_intx_mode(struct aac_dev *dev)
 	}
 }
 
+static void aac_dump_fw_fib_iop_reset(struct aac_dev *dev)
+{
+	__le32 supported_options3;
+
+	if (!aac_fib_dump)
+		return;
+
+	supported_options3  = dev->supplement_adapter_info.supported_options3;
+	if (!(supported_options3 & AAC_OPTION_SUPPORTED3_IOP_RESET_FIB_DUMP))
+		return;
+
+	aac_adapter_sync_cmd(dev, IOP_RESET_FW_FIB_DUMP,
+			0, 0, 0,  0, 0, 0, NULL, NULL, NULL, NULL, NULL);
+}
+
 static void aac_send_iop_reset(struct aac_dev *dev, int bled)
 {
 	u32 var, reset_mask;
 
+	aac_dump_fw_fib_iop_reset(dev);
+
 	bled = aac_adapter_sync_cmd(dev, IOP_RESET_ALWAYS,
 				    0, 0, 0, 0, 0, 0, &var,
 				    &reset_mask, NULL, NULL, NULL);
-- 
GitLab


From d844752e1801099f92c178845f56412861a2b4af Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:23 -0800
Subject: [PATCH 0430/1084] scsi: aacraid: Fix a potential spinlock double
 unlock bug

The driver does not unlock the reply  queue spin lock after handling SMART
adapter events. Instead it might attempt to unlock an already unlocked
spin lock.

Fixed by making sure the driver locks the spin lock before freeing it.

Thank you dan for finding this issue out.

Fixes: 6223a39fe6fbbeef (scsi: aacraid: Added support for hotplug)
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/commsup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 1f716c01708b..a2ea70d8a13a 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -2215,7 +2215,7 @@ static void aac_process_events(struct aac_dev *dev)
 			/* Thor AIF */
 			aac_handle_sa_aif(dev, fib);
 			aac_fib_adapter_complete(fib, (u16)sizeof(u32));
-			continue;
+			goto free_fib;
 		}
 		/*
 		 *	We will process the FIB here or pass it to a
-- 
GitLab


From 0662cc968aceaca34848e63f431e585f4f71f746 Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Thu, 16 Feb 2017 12:51:24 -0800
Subject: [PATCH 0431/1084] scsi: aacraid: Update driver version

Updated driver version to 50792

Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/aacraid.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 622fd69b35cc..d036a806f31c 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -97,7 +97,7 @@ enum {
 #define	PMC_GLOBAL_INT_BIT0		0x00000001
 
 #ifndef AAC_DRIVER_BUILD
-# define AAC_DRIVER_BUILD 50740
+# define AAC_DRIVER_BUILD 50792
 # define AAC_DRIVER_BRANCH "-custom"
 #endif
 #define MAXIMUM_NUM_CONTAINERS	32
-- 
GitLab


From ed10858eadd4988260c6bc7d75fc25176342b5a7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 17 Feb 2017 16:03:52 +0100
Subject: [PATCH 0432/1084] scsi: smartpqi: fix time handling

When we have turned off RTC support, the smartpqi driver fails to build:

ERROR: "rtc_time64_to_tm" [drivers/scsi/smartpqi/smartpqi.ko] undefined!

This is easily avoided by using the generic 'struct tm' based helper rather
than the RTC specific one. While fixing this, I noticed that even though
the driver uses time64_t for storing seconds, it gets them from the
old 32-bit struct timeval. To address this, we can simplify the code
by calling ktime_get_real_seconds() directly.

Fixes: 6c223761eb54 ("smartpqi: initial commit of Microsemi smartpqi driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Don Brace <don.brace@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/smartpqi/smartpqi_init.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index 11c0dfb3dfa3..657ad15682a3 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -534,8 +534,7 @@ static int pqi_write_current_time_to_host_wellness(
 	size_t buffer_length;
 	time64_t local_time;
 	unsigned int year;
-	struct timeval time;
-	struct rtc_time tm;
+	struct tm tm;
 
 	buffer_length = sizeof(*buffer);
 
@@ -552,9 +551,8 @@ static int pqi_write_current_time_to_host_wellness(
 	put_unaligned_le16(sizeof(buffer->time),
 		&buffer->time_length);
 
-	do_gettimeofday(&time);
-	local_time = time.tv_sec - (sys_tz.tz_minuteswest * 60);
-	rtc_time64_to_tm(local_time, &tm);
+	local_time = ktime_get_real_seconds();
+	time64_to_tm(local_time, -sys_tz.tz_minuteswest * 60, &tm);
 	year = tm.tm_year + 1900;
 
 	buffer->time[0] = bin2bcd(tm.tm_hour);
-- 
GitLab


From f22aaec97d889f3fbf89b185309bdc98b8202eda Mon Sep 17 00:00:00 2001
From: Subhash Jadavani <subhashj@codeaurora.org>
Date: Fri, 17 Feb 2017 13:53:41 -0800
Subject: [PATCH 0433/1084] scsi: ufs-qcom: remove redundant condition check

Dan Carpenter <dan.carpenter@oracle.com> reported this:

The patch 9c46b8676271: "scsi: ufs-qcom: dump additional testbus
registers" from Feb 3, 2017, leads to the following static checker
warning:

    drivers/scsi/ufs/ufs-qcom.c:1531 ufs_qcom_testbus_cfg_is_ok()
    warn: impossible condition
                '(host->testbus.select_minor > 255) => (0-255 > 255)'

drivers/scsi/ufs/ufs-qcom.c
  1517  static bool ufs_qcom_testbus_cfg_is_ok(struct ufs_qcom_host *host)
  1518  {
  1519          if (host->testbus.select_major >= TSTBUS_MAX) {
  1520               dev_err(host->hba->dev,
  1521                 "%s: UFS_CFG1[TEST_BUS_SEL} may not equal 0x%05X\n",
  1522                  __func__, host->testbus.select_major);
  1523                  return false;
  1524          }
  1525
  1526          /*
  1527           * Not performing check for each individual select_major
  1528           * mappings of select_minor, since there is no harm in
  1529           * configuring a non-existent select_minor
  1530           */
  1531          if (host->testbus.select_minor > 0xFF) {
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

It might make sense to keep this check.  I don't know.  But it's
confusing that 0xFF is a magic number.  Better to make it a define.

  1532                  dev_err(host->hba->dev,
  1533                       "%s: 0x%05X is not a legal testbus option\n",
  1534                        __func__, host->testbus.select_minor);
  1535                  return false;
  1536          }
  1537
  1538          return true;
  1539  }
---

As data type of "select_minor" is u8, above check is redundant. This
change removes it.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Subhash Jadavani <subhashj@codeaurora.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufs-qcom.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c
index ce5d023c1c91..c87d770b519a 100644
--- a/drivers/scsi/ufs/ufs-qcom.c
+++ b/drivers/scsi/ufs/ufs-qcom.c
@@ -1523,18 +1523,6 @@ static bool ufs_qcom_testbus_cfg_is_ok(struct ufs_qcom_host *host)
 		return false;
 	}
 
-	/*
-	 * Not performing check for each individual select_major
-	 * mappings of select_minor, since there is no harm in
-	 * configuring a non-existent select_minor
-	 */
-	if (host->testbus.select_minor > 0xFF) {
-		dev_err(host->hba->dev,
-			"%s: 0x%05X is not a legal testbus option\n",
-			__func__, host->testbus.select_minor);
-		return false;
-	}
-
 	return true;
 }
 
-- 
GitLab


From 857de6e00778738dc3d61f75acbac35bdc48e533 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Fri, 17 Feb 2017 09:02:45 +0100
Subject: [PATCH 0434/1084] scsi: use 'scsi_device_from_queue()' for scsi_dh

The device handler needs to check if a given queue belongs to a scsi
device; only then does it make sense to attach a device handler.

[mkp: dropped flags]

Cc: <stable@vger.kernel.org>
Signed-off-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_dh.c     | 22 ++++------------------
 drivers/scsi/scsi_lib.c    | 23 +++++++++++++++++++++++
 include/scsi/scsi_device.h |  1 +
 3 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c
index b8d3b97b217a..84addee05be6 100644
--- a/drivers/scsi/scsi_dh.c
+++ b/drivers/scsi/scsi_dh.c
@@ -219,20 +219,6 @@ int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh)
 }
 EXPORT_SYMBOL_GPL(scsi_unregister_device_handler);
 
-static struct scsi_device *get_sdev_from_queue(struct request_queue *q)
-{
-	struct scsi_device *sdev;
-	unsigned long flags;
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	sdev = q->queuedata;
-	if (!sdev || !get_device(&sdev->sdev_gendev))
-		sdev = NULL;
-	spin_unlock_irqrestore(q->queue_lock, flags);
-
-	return sdev;
-}
-
 /*
  * scsi_dh_activate - activate the path associated with the scsi_device
  *      corresponding to the given request queue.
@@ -251,7 +237,7 @@ int scsi_dh_activate(struct request_queue *q, activate_complete fn, void *data)
 	struct scsi_device *sdev;
 	int err = SCSI_DH_NOSYS;
 
-	sdev = get_sdev_from_queue(q);
+	sdev = scsi_device_from_queue(q);
 	if (!sdev) {
 		if (fn)
 			fn(data, err);
@@ -298,7 +284,7 @@ int scsi_dh_set_params(struct request_queue *q, const char *params)
 	struct scsi_device *sdev;
 	int err = -SCSI_DH_NOSYS;
 
-	sdev = get_sdev_from_queue(q);
+	sdev = scsi_device_from_queue(q);
 	if (!sdev)
 		return err;
 
@@ -321,7 +307,7 @@ int scsi_dh_attach(struct request_queue *q, const char *name)
 	struct scsi_device_handler *scsi_dh;
 	int err = 0;
 
-	sdev = get_sdev_from_queue(q);
+	sdev = scsi_device_from_queue(q);
 	if (!sdev)
 		return -ENODEV;
 
@@ -359,7 +345,7 @@ const char *scsi_dh_attached_handler_name(struct request_queue *q, gfp_t gfp)
 	struct scsi_device *sdev;
 	const char *handler_name = NULL;
 
-	sdev = get_sdev_from_queue(q);
+	sdev = scsi_device_from_queue(q);
 	if (!sdev)
 		return NULL;
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 912fbc3b4543..39b2d727f033 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2231,6 +2231,29 @@ void scsi_mq_destroy_tags(struct Scsi_Host *shost)
 	blk_mq_free_tag_set(&shost->tag_set);
 }
 
+/**
+ * scsi_device_from_queue - return sdev associated with a request_queue
+ * @q: The request queue to return the sdev from
+ *
+ * Return the sdev associated with a request queue or NULL if the
+ * request_queue does not reference a SCSI device.
+ */
+struct scsi_device *scsi_device_from_queue(struct request_queue *q)
+{
+	struct scsi_device *sdev = NULL;
+
+	if (q->mq_ops) {
+		if (q->mq_ops == &scsi_mq_ops)
+			sdev = q->queuedata;
+	} else if (q->request_fn == scsi_request_fn)
+		sdev = q->queuedata;
+	if (!sdev || !get_device(&sdev->sdev_gendev))
+		sdev = NULL;
+
+	return sdev;
+}
+EXPORT_SYMBOL_GPL(scsi_device_from_queue);
+
 /*
  * Function:    scsi_block_requests()
  *
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 8990e580b278..be41c76ddd48 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -315,6 +315,7 @@ extern void scsi_remove_device(struct scsi_device *);
 extern int scsi_unregister_device_handler(struct scsi_device_handler *scsi_dh);
 void scsi_attach_vpd(struct scsi_device *sdev);
 
+extern struct scsi_device *scsi_device_from_queue(struct request_queue *q);
 extern int scsi_device_get(struct scsi_device *);
 extern void scsi_device_put(struct scsi_device *);
 extern struct scsi_device *scsi_device_lookup(struct Scsi_Host *,
-- 
GitLab


From 943445200b049d5179b95297e5372d399c8ab0e2 Mon Sep 17 00:00:00 2001
From: "Matthew R. Ochs" <mrochs@linux.vnet.ibm.com>
Date: Thu, 16 Feb 2017 21:39:32 -0600
Subject: [PATCH 0435/1084] scsi: cxlflash: Enable PCI device ID for future IBM
 CXL Flash AFU

Add support for a future IBM Coherent Accelerator (CXL) flash AFU with
an ID of 0x0624.

Signed-off-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Signed-off-by: Uma Krishnan <ukrishn@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/cxlflash/main.c | 4 ++++
 drivers/scsi/cxlflash/main.h | 1 +
 2 files changed, 5 insertions(+)

diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index 7069639e92bc..3061d8045382 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -2259,6 +2259,8 @@ static struct dev_dependent_vals dev_corsa_vals = { CXLFLASH_MAX_SECTORS,
 					0ULL };
 static struct dev_dependent_vals dev_flash_gt_vals = { CXLFLASH_MAX_SECTORS,
 					CXLFLASH_NOTIFY_SHUTDOWN };
+static struct dev_dependent_vals dev_briard_vals = { CXLFLASH_MAX_SECTORS,
+					CXLFLASH_NOTIFY_SHUTDOWN };
 
 /*
  * PCI device binding table
@@ -2268,6 +2270,8 @@ static struct pci_device_id cxlflash_pci_table[] = {
 	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, (kernel_ulong_t)&dev_corsa_vals},
 	{PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_FLASH_GT,
 	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, (kernel_ulong_t)&dev_flash_gt_vals},
+	{PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_BRIARD,
+	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, (kernel_ulong_t)&dev_briard_vals},
 	{}
 };
 
diff --git a/drivers/scsi/cxlflash/main.h b/drivers/scsi/cxlflash/main.h
index e43545c86bcf..0be2261e6312 100644
--- a/drivers/scsi/cxlflash/main.h
+++ b/drivers/scsi/cxlflash/main.h
@@ -25,6 +25,7 @@
 
 #define PCI_DEVICE_ID_IBM_CORSA		0x04F0
 #define PCI_DEVICE_ID_IBM_FLASH_GT	0x0600
+#define PCI_DEVICE_ID_IBM_BRIARD	0x0624
 
 /* Since there is only one target, make it 0 */
 #define CXLFLASH_TARGET		0
-- 
GitLab


From 8ea73db486cda442f0671f4bc9c03a76be398a28 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:25 -0800
Subject: [PATCH 0436/1084] scsi: lpfc: Correct WQ creation for pagesize

Correct WQ creation for pagesize

The driver was calculating the adapter command pagesize indicator from
the system pagesize. However, the buffers the driver allocates are only
one size (SLI4_PAGE_SIZE), so no calculation was necessary.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_hw4.h | 2 ++
 drivers/scsi/lpfc/lpfc_sli.c | 9 +++++----
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index 5646699b0516..964a1fdb076b 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -1186,6 +1186,7 @@ struct lpfc_mbx_wq_create {
 #define lpfc_mbx_wq_create_page_size_SHIFT	0
 #define lpfc_mbx_wq_create_page_size_MASK	0x000000FF
 #define lpfc_mbx_wq_create_page_size_WORD	word1
+#define LPFC_WQ_PAGE_SIZE_4096	0x1
 #define lpfc_mbx_wq_create_wqe_size_SHIFT	8
 #define lpfc_mbx_wq_create_wqe_size_MASK	0x0000000F
 #define lpfc_mbx_wq_create_wqe_size_WORD	word1
@@ -1257,6 +1258,7 @@ struct rq_context {
 #define lpfc_rq_context_page_size_SHIFT	0		/* Version 1 Only */
 #define lpfc_rq_context_page_size_MASK	0x000000FF
 #define lpfc_rq_context_page_size_WORD	word0
+#define	LPFC_RQ_PAGE_SIZE_4096	0x1
 	uint32_t reserved1;
 	uint32_t word2;
 #define lpfc_rq_context_cq_id_SHIFT	16
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index d977a472f89f..586984dce3a4 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -13722,7 +13722,7 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
 			       LPFC_WQ_WQE_SIZE_128);
 			bf_set(lpfc_mbx_wq_create_page_size,
 			       &wq_create->u.request_1,
-			       (PAGE_SIZE/SLI4_PAGE_SIZE));
+			       LPFC_WQ_PAGE_SIZE_4096);
 			page = wq_create->u.request_1.page;
 			break;
 		}
@@ -13748,8 +13748,9 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
 			       LPFC_WQ_WQE_SIZE_128);
 			break;
 		}
-		bf_set(lpfc_mbx_wq_create_page_size, &wq_create->u.request_1,
-		       (PAGE_SIZE/SLI4_PAGE_SIZE));
+		bf_set(lpfc_mbx_wq_create_page_size,
+		       &wq_create->u.request_1,
+		       LPFC_WQ_PAGE_SIZE_4096);
 		page = wq_create->u.request_1.page;
 		break;
 	default:
@@ -13935,7 +13936,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 		       LPFC_RQE_SIZE_8);
 		bf_set(lpfc_rq_context_page_size,
 		       &rq_create->u.request.context,
-		       (PAGE_SIZE/SLI4_PAGE_SIZE));
+		       LPFC_RQ_PAGE_SIZE_4096);
 	} else {
 		switch (hrq->entry_count) {
 		default:
-- 
GitLab


From 45ffac1976c580ac20d926257b2f6320ce1b210f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 12 Feb 2017 13:52:26 -0800
Subject: [PATCH 0437/1084] scsi: lpfc: use pci_irq_alloc_vectors and
 pci_irq_free_vectors

This avoids having to store the msix_entries array and simpliefies the
shutdown and cleanup path a lot.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h      |   2 -
 drivers/scsi/lpfc/lpfc_init.c | 240 ++++++++--------------------------
 drivers/scsi/lpfc/lpfc_sli4.h |   1 -
 3 files changed, 54 insertions(+), 189 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 6593b073c524..64022d79117b 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -878,8 +878,6 @@ struct lpfc_hba {
 	enum intr_type_t intr_type;
 	uint32_t intr_mode;
 #define LPFC_INTR_ERROR	0xFFFFFFFF
-	struct msix_entry msix_entries[LPFC_MSIX_VECTORS];
-
 	struct list_head port_list;
 	struct lpfc_vport *pport;	/* physical lpfc_vport pointer */
 	uint16_t max_vpi;		/* Maximum virtual nports */
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 64717c171b15..8cffad192f17 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -5509,17 +5509,6 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		goto out_free_fcf_rr_bmask;
 	}
 
-	phba->sli4_hba.msix_entries = kzalloc((sizeof(struct msix_entry) *
-				  (fof_vectors +
-				   phba->cfg_fcp_io_channel)), GFP_KERNEL);
-	if (!phba->sli4_hba.msix_entries) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"2573 Failed allocate memory for msi-x "
-				"interrupt vector entries\n");
-		rc = -ENOMEM;
-		goto out_free_fcp_eq_hdl;
-	}
-
 	phba->sli4_hba.cpu_map = kzalloc((sizeof(struct lpfc_vector_map_info) *
 					 phba->sli4_hba.num_present_cpu),
 					 GFP_KERNEL);
@@ -5528,7 +5517,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 				"3327 Failed allocate memory for msi-x "
 				"interrupt vector mapping\n");
 		rc = -ENOMEM;
-		goto out_free_msix;
+		goto out_free_fcp_eq_hdl;
 	}
 	if (lpfc_used_cpu == NULL) {
 		lpfc_used_cpu = kzalloc((sizeof(uint16_t) * lpfc_present_cpu),
@@ -5539,7 +5528,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 					"interrupt vector mapping\n");
 			kfree(phba->sli4_hba.cpu_map);
 			rc = -ENOMEM;
-			goto out_free_msix;
+			goto out_free_fcp_eq_hdl;
 		}
 		for (i = 0; i < lpfc_present_cpu; i++)
 			lpfc_used_cpu[i] = LPFC_VECTOR_MAP_EMPTY;
@@ -5574,8 +5563,6 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 
 	return 0;
 
-out_free_msix:
-	kfree(phba->sli4_hba.msix_entries);
 out_free_fcp_eq_hdl:
 	kfree(phba->sli4_hba.fcp_eq_hdl);
 out_free_fcf_rr_bmask:
@@ -5611,9 +5598,6 @@ lpfc_sli4_driver_resource_unset(struct lpfc_hba *phba)
 	phba->sli4_hba.num_online_cpu = 0;
 	phba->sli4_hba.curr_disp_cpu = 0;
 
-	/* Free memory allocated for msi-x interrupt vector entries */
-	kfree(phba->sli4_hba.msix_entries);
-
 	/* Free memory allocated for fast-path work queue handles */
 	kfree(phba->sli4_hba.fcp_eq_hdl);
 
@@ -8484,16 +8468,7 @@ lpfc_sli4_pci_mem_unset(struct lpfc_hba *phba)
  * @phba: pointer to lpfc hba data structure.
  *
  * This routine is invoked to enable the MSI-X interrupt vectors to device
- * with SLI-3 interface specs. The kernel function pci_enable_msix_exact()
- * is called to enable the MSI-X vectors. Note that pci_enable_msix_exact(),
- * once invoked, enables either all or nothing, depending on the current
- * availability of PCI vector resources. The device driver is responsible
- * for calling the individual request_irq() to register each MSI-X vector
- * with a interrupt handler, which is done in this function. Note that
- * later when device is unloading, the driver should always call free_irq()
- * on all MSI-X vectors it has done request_irq() on before calling
- * pci_disable_msix(). Failure to do so results in a BUG_ON() and a device
- * will be left with MSI-X enabled and leaks its vectors.
+ * with SLI-3 interface specs.
  *
  * Return codes
  *   0 - successful
@@ -8502,33 +8477,24 @@ lpfc_sli4_pci_mem_unset(struct lpfc_hba *phba)
 static int
 lpfc_sli_enable_msix(struct lpfc_hba *phba)
 {
-	int rc, i;
+	int rc;
 	LPFC_MBOXQ_t *pmb;
 
 	/* Set up MSI-X multi-message vectors */
-	for (i = 0; i < LPFC_MSIX_VECTORS; i++)
-		phba->msix_entries[i].entry = i;
-
-	/* Configure MSI-X capability structure */
-	rc = pci_enable_msix_exact(phba->pcidev, phba->msix_entries,
-				   LPFC_MSIX_VECTORS);
-	if (rc) {
+	rc = pci_alloc_irq_vectors(phba->pcidev,
+			LPFC_MSIX_VECTORS, LPFC_MSIX_VECTORS, PCI_IRQ_MSIX);
+	if (rc < 0) {
 		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 				"0420 PCI enable MSI-X failed (%d)\n", rc);
 		goto vec_fail_out;
 	}
-	for (i = 0; i < LPFC_MSIX_VECTORS; i++)
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"0477 MSI-X entry[%d]: vector=x%x "
-				"message=%d\n", i,
-				phba->msix_entries[i].vector,
-				phba->msix_entries[i].entry);
+
 	/*
 	 * Assign MSI-X vectors to interrupt handlers
 	 */
 
 	/* vector-0 is associated to slow-path handler */
-	rc = request_irq(phba->msix_entries[0].vector,
+	rc = request_irq(pci_irq_vector(phba->pcidev, 0),
 			 &lpfc_sli_sp_intr_handler, 0,
 			 LPFC_SP_DRIVER_HANDLER_NAME, phba);
 	if (rc) {
@@ -8539,7 +8505,7 @@ lpfc_sli_enable_msix(struct lpfc_hba *phba)
 	}
 
 	/* vector-1 is associated to fast-path handler */
-	rc = request_irq(phba->msix_entries[1].vector,
+	rc = request_irq(pci_irq_vector(phba->pcidev, 1),
 			 &lpfc_sli_fp_intr_handler, 0,
 			 LPFC_FP_DRIVER_HANDLER_NAME, phba);
 
@@ -8584,41 +8550,20 @@ lpfc_sli_enable_msix(struct lpfc_hba *phba)
 
 mem_fail_out:
 	/* free the irq already requested */
-	free_irq(phba->msix_entries[1].vector, phba);
+	free_irq(pci_irq_vector(phba->pcidev, 1), phba);
 
 irq_fail_out:
 	/* free the irq already requested */
-	free_irq(phba->msix_entries[0].vector, phba);
+	free_irq(pci_irq_vector(phba->pcidev, 0), phba);
 
 msi_fail_out:
 	/* Unconfigure MSI-X capability structure */
-	pci_disable_msix(phba->pcidev);
+	pci_free_irq_vectors(phba->pcidev);
 
 vec_fail_out:
 	return rc;
 }
 
-/**
- * lpfc_sli_disable_msix - Disable MSI-X interrupt mode on SLI-3 device.
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine is invoked to release the MSI-X vectors and then disable the
- * MSI-X interrupt mode to device with SLI-3 interface spec.
- **/
-static void
-lpfc_sli_disable_msix(struct lpfc_hba *phba)
-{
-	int i;
-
-	/* Free up MSI-X multi-message vectors */
-	for (i = 0; i < LPFC_MSIX_VECTORS; i++)
-		free_irq(phba->msix_entries[i].vector, phba);
-	/* Disable MSI-X */
-	pci_disable_msix(phba->pcidev);
-
-	return;
-}
-
 /**
  * lpfc_sli_enable_msi - Enable MSI interrupt mode on SLI-3 device.
  * @phba: pointer to lpfc hba data structure.
@@ -8658,24 +8603,6 @@ lpfc_sli_enable_msi(struct lpfc_hba *phba)
 	return rc;
 }
 
-/**
- * lpfc_sli_disable_msi - Disable MSI interrupt mode to SLI-3 device.
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine is invoked to disable the MSI interrupt mode to device with
- * SLI-3 interface spec. The driver calls free_irq() on MSI vector it has
- * done request_irq() on before calling pci_disable_msi(). Failure to do so
- * results in a BUG_ON() and a device will be left with MSI enabled and leaks
- * its vector.
- */
-static void
-lpfc_sli_disable_msi(struct lpfc_hba *phba)
-{
-	free_irq(phba->pcidev->irq, phba);
-	pci_disable_msi(phba->pcidev);
-	return;
-}
-
 /**
  * lpfc_sli_enable_intr - Enable device interrupt to SLI-3 device.
  * @phba: pointer to lpfc hba data structure.
@@ -8747,19 +8674,20 @@ lpfc_sli_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode)
 static void
 lpfc_sli_disable_intr(struct lpfc_hba *phba)
 {
-	/* Disable the currently initialized interrupt mode */
+	int nr_irqs, i;
+
 	if (phba->intr_type == MSIX)
-		lpfc_sli_disable_msix(phba);
-	else if (phba->intr_type == MSI)
-		lpfc_sli_disable_msi(phba);
-	else if (phba->intr_type == INTx)
-		free_irq(phba->pcidev->irq, phba);
+		nr_irqs = LPFC_MSIX_VECTORS;
+	else
+		nr_irqs = 1;
+
+	for (i = 0; i < nr_irqs; i++)
+		free_irq(pci_irq_vector(phba->pcidev, i), phba);
+	pci_free_irq_vectors(phba->pcidev);
 
 	/* Reset interrupt management states */
 	phba->intr_type = NONE;
 	phba->sli.slistat.sli_intr = 0;
-
-	return;
 }
 
 /**
@@ -8915,7 +8843,7 @@ lpfc_sli4_set_affinity(struct lpfc_hba *phba, int vectors)
 			lpfc_used_cpu[cpu] = phys_id;
 
 		/* Associate vector with selected CPU */
-		cpup->irq = phba->sli4_hba.msix_entries[idx].vector;
+		cpup->irq = pci_irq_vector(phba->pcidev, idx);
 
 		/* Associate IO channel with selected CPU */
 		cpup->channel_id = idx;
@@ -8925,14 +8853,14 @@ lpfc_sli4_set_affinity(struct lpfc_hba *phba, int vectors)
 			first_cpu = cpu;
 
 		/* Now affinitize to the selected CPU */
-		i = irq_set_affinity_hint(phba->sli4_hba.msix_entries[idx].
-					  vector, get_cpu_mask(cpu));
+		i = irq_set_affinity_hint(pci_irq_vector(phba->pcidev, idx),
+					  get_cpu_mask(cpu));
 
 		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 				"3330 Set Affinity: CPU %d channel %d "
 				"irq %d (%x)\n",
 				cpu, cpup->channel_id,
-				phba->sli4_hba.msix_entries[idx].vector, i);
+				pci_irq_vector(phba->pcidev, idx), i);
 
 		/* Spread vector mapping across multple physical CPU nodes */
 		phys_id++;
@@ -9047,14 +8975,7 @@ lpfc_sli4_set_affinity(struct lpfc_hba *phba, int vectors)
  * @phba: pointer to lpfc hba data structure.
  *
  * This routine is invoked to enable the MSI-X interrupt vectors to device
- * with SLI-4 interface spec. The kernel function pci_enable_msix_range()
- * is called to enable the MSI-X vectors. The device driver is responsible
- * for calling the individual request_irq() to register each MSI-X vector
- * with a interrupt handler, which is done in this function. Note that
- * later when device is unloading, the driver should always call free_irq()
- * on all MSI-X vectors it has done request_irq() on before calling
- * pci_disable_msix(). Failure to do so results in a BUG_ON() and a device
- * will be left with MSI-X enabled and leaks its vectors.
+ * with SLI-4 interface spec.
  *
  * Return codes
  * 0 - successful
@@ -9066,17 +8987,11 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 	int vectors, rc, index;
 
 	/* Set up MSI-X multi-message vectors */
-	for (index = 0; index < phba->cfg_fcp_io_channel; index++)
-		phba->sli4_hba.msix_entries[index].entry = index;
-
-	/* Configure MSI-X capability structure */
 	vectors = phba->cfg_fcp_io_channel;
-	if (phba->cfg_fof) {
-		phba->sli4_hba.msix_entries[index].entry = index;
+	if (phba->cfg_fof)
 		vectors++;
-	}
-	rc = pci_enable_msix_range(phba->pcidev, phba->sli4_hba.msix_entries,
-				   2, vectors);
+
+	rc = pci_alloc_irq_vectors(phba->pcidev, 2, vectors, PCI_IRQ_MSIX);
 	if (rc < 0) {
 		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 				"0484 PCI enable MSI-X failed (%d)\n", rc);
@@ -9084,14 +8999,6 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 	}
 	vectors = rc;
 
-	/* Log MSI-X vector assignment */
-	for (index = 0; index < vectors; index++)
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"0489 MSI-X entry[%d]: vector=x%x "
-				"message=%d\n", index,
-				phba->sli4_hba.msix_entries[index].vector,
-				phba->sli4_hba.msix_entries[index].entry);
-
 	/* Assign MSI-X vectors to interrupt handlers */
 	for (index = 0; index < vectors; index++) {
 		memset(&phba->sli4_hba.handler_name[index], 0, 16);
@@ -9103,14 +9010,12 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 		phba->sli4_hba.fcp_eq_hdl[index].phba = phba;
 		atomic_set(&phba->sli4_hba.fcp_eq_hdl[index].fcp_eq_in_use, 1);
 		if (phba->cfg_fof && (index == (vectors - 1)))
-			rc = request_irq(
-				phba->sli4_hba.msix_entries[index].vector,
+			rc = request_irq(pci_irq_vector(phba->pcidev, index),
 				 &lpfc_sli4_fof_intr_handler, 0,
 				 (char *)&phba->sli4_hba.handler_name[index],
 				 &phba->sli4_hba.fcp_eq_hdl[index]);
 		else
-			rc = request_irq(
-				phba->sli4_hba.msix_entries[index].vector,
+			rc = request_irq(pci_irq_vector(phba->pcidev, index),
 				 &lpfc_sli4_hba_intr_handler, 0,
 				 (char *)&phba->sli4_hba.handler_name[index],
 				 &phba->sli4_hba.fcp_eq_hdl[index]);
@@ -9140,48 +9045,19 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 cfg_fail_out:
 	/* free the irq already requested */
 	for (--index; index >= 0; index--) {
-		irq_set_affinity_hint(phba->sli4_hba.msix_entries[index].
-					  vector, NULL);
-		free_irq(phba->sli4_hba.msix_entries[index].vector,
-			 &phba->sli4_hba.fcp_eq_hdl[index]);
+		int irq = pci_irq_vector(phba->pcidev, index);
+
+		irq_set_affinity_hint(irq, NULL);
+		free_irq(irq, &phba->sli4_hba.fcp_eq_hdl[index]);
 	}
 
 	/* Unconfigure MSI-X capability structure */
-	pci_disable_msix(phba->pcidev);
+	pci_free_irq_vectors(phba->pcidev);
 
 vec_fail_out:
 	return rc;
 }
 
-/**
- * lpfc_sli4_disable_msix - Disable MSI-X interrupt mode to SLI-4 device
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine is invoked to release the MSI-X vectors and then disable the
- * MSI-X interrupt mode to device with SLI-4 interface spec.
- **/
-static void
-lpfc_sli4_disable_msix(struct lpfc_hba *phba)
-{
-	int index;
-
-	/* Free up MSI-X multi-message vectors */
-	for (index = 0; index < phba->cfg_fcp_io_channel; index++) {
-		irq_set_affinity_hint(phba->sli4_hba.msix_entries[index].
-					  vector, NULL);
-		free_irq(phba->sli4_hba.msix_entries[index].vector,
-			 &phba->sli4_hba.fcp_eq_hdl[index]);
-	}
-	if (phba->cfg_fof) {
-		free_irq(phba->sli4_hba.msix_entries[index].vector,
-			 &phba->sli4_hba.fcp_eq_hdl[index]);
-	}
-	/* Disable MSI-X */
-	pci_disable_msix(phba->pcidev);
-
-	return;
-}
-
 /**
  * lpfc_sli4_enable_msi - Enable MSI interrupt mode to SLI-4 device
  * @phba: pointer to lpfc hba data structure.
@@ -9232,24 +9108,6 @@ lpfc_sli4_enable_msi(struct lpfc_hba *phba)
 	return 0;
 }
 
-/**
- * lpfc_sli4_disable_msi - Disable MSI interrupt mode to SLI-4 device
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine is invoked to disable the MSI interrupt mode to device with
- * SLI-4 interface spec. The driver calls free_irq() on MSI vector it has
- * done request_irq() on before calling pci_disable_msi(). Failure to do so
- * results in a BUG_ON() and a device will be left with MSI enabled and leaks
- * its vector.
- **/
-static void
-lpfc_sli4_disable_msi(struct lpfc_hba *phba)
-{
-	free_irq(phba->pcidev->irq, phba);
-	pci_disable_msi(phba->pcidev);
-	return;
-}
-
 /**
  * lpfc_sli4_enable_intr - Enable device interrupt to SLI-4 device
  * @phba: pointer to lpfc hba data structure.
@@ -9335,18 +9193,28 @@ static void
 lpfc_sli4_disable_intr(struct lpfc_hba *phba)
 {
 	/* Disable the currently initialized interrupt mode */
-	if (phba->intr_type == MSIX)
-		lpfc_sli4_disable_msix(phba);
-	else if (phba->intr_type == MSI)
-		lpfc_sli4_disable_msi(phba);
-	else if (phba->intr_type == INTx)
+	if (phba->intr_type == MSIX) {
+		int index;
+
+		/* Free up MSI-X multi-message vectors */
+		for (index = 0; index < phba->cfg_fcp_io_channel; index++) {
+			int irq = pci_irq_vector(phba->pcidev, index);
+
+			irq_set_affinity_hint(irq, NULL);
+			free_irq(irq, &phba->sli4_hba.fcp_eq_hdl[index]);
+		}
+
+		if (phba->cfg_fof)
+			free_irq(pci_irq_vector(phba->pcidev, index), phba);
+	} else {
 		free_irq(phba->pcidev->irq, phba);
+	}
+
+	pci_free_irq_vectors(phba->pcidev);
 
 	/* Reset interrupt management states */
 	phba->intr_type = NONE;
 	phba->sli.slistat.sli_intr = 0;
-
-	return;
 }
 
 /**
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 0b88b5703e0f..dfbb25e5c5b6 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -515,7 +515,6 @@ struct lpfc_sli4_hba {
 	uint32_t ue_to_rp;
 	struct lpfc_register sli_intf;
 	struct lpfc_pc_sli4_params pc_sli4_params;
-	struct msix_entry *msix_entries;
 	uint8_t handler_name[LPFC_SLI4_HANDLER_CNT][LPFC_SLI4_HANDLER_NAME_SZ];
 	struct lpfc_fcp_eq_hdl *fcp_eq_hdl; /* FCP per-WQ handle */
 
-- 
GitLab


From 2ea259eead133026ac6a3fbfa040cc58a96cae44 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:27 -0800
Subject: [PATCH 0438/1084] scsi: lpfc: minor code cleanups

This contains code cleanups that were in the prior patch set.
This allows better review of real changes later.

minor code cleanups:
 fix indentation, punctuation, line length
 addition/reduction of whitespace
 remove unneeded parens, braces
 lpfc_debugfs_nodelist_data: print as u64 rather than byte by byte
 covert printk(KERN_ERR to pr_err
 small print string deltas
 use num_present_cpus() rather than count them
 comment updates
 rctl/type names moved to module variable, not on stack

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_attr.c    |  8 ++--
 drivers/scsi/lpfc/lpfc_bsg.c     |  2 +-
 drivers/scsi/lpfc/lpfc_debugfs.c | 81 +++++++++++++++-----------------
 drivers/scsi/lpfc/lpfc_hbadisc.c |  7 ++-
 drivers/scsi/lpfc/lpfc_init.c    | 19 ++++----
 drivers/scsi/lpfc/lpfc_scsi.c    |  3 +-
 drivers/scsi/lpfc/lpfc_scsi.h    | 12 ++---
 drivers/scsi/lpfc/lpfc_sli.c     | 48 ++++++++++---------
 drivers/scsi/lpfc/lpfc_sli.h     |  6 +--
 drivers/scsi/lpfc/lpfc_vport.c   |  1 +
 10 files changed, 98 insertions(+), 89 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 50cf402dea29..ce41dea47682 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -50,9 +50,9 @@
 #include "lpfc_vport.h"
 #include "lpfc_attr.h"
 
-#define LPFC_DEF_DEVLOSS_TMO 30
-#define LPFC_MIN_DEVLOSS_TMO 1
-#define LPFC_MAX_DEVLOSS_TMO 255
+#define LPFC_DEF_DEVLOSS_TMO	30
+#define LPFC_MIN_DEVLOSS_TMO	1
+#define LPFC_MAX_DEVLOSS_TMO	255
 
 /*
  * Write key size should be multiple of 4. If write key is changed
@@ -5769,10 +5769,12 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	lpfc_fcp_io_channel_init(phba, lpfc_fcp_io_channel);
 	lpfc_enable_hba_reset_init(phba, lpfc_enable_hba_reset);
 	lpfc_enable_hba_heartbeat_init(phba, lpfc_enable_hba_heartbeat);
+
 	lpfc_EnableXLane_init(phba, lpfc_EnableXLane);
 	if (phba->sli_rev != LPFC_SLI_REV4)
 		phba->cfg_EnableXLane = 0;
 	lpfc_XLanePriority_init(phba, lpfc_XLanePriority);
+
 	memset(phba->cfg_oas_tgt_wwpn, 0, (8 * sizeof(uint8_t)));
 	memset(phba->cfg_oas_vpt_wwpn, 0, (8 * sizeof(uint8_t)));
 	phba->cfg_oas_lun_state = 0;
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index 7dca4d6a8883..1ee131f124b9 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -2703,7 +2703,7 @@ static int lpfcdiag_loop_get_xri(struct lpfc_hba *phba, uint16_t rpi,
  * lpfc_bsg_dma_page_alloc - allocate a bsg mbox page sized dma buffers
  * @phba: Pointer to HBA context object
  *
- * This function allocates BSG_MBOX_SIZE (4KB) page size dma buffer and.
+ * This function allocates BSG_MBOX_SIZE (4KB) page size dma buffer and
  * returns the pointer to the buffer.
  **/
 static struct lpfc_dmabuf *
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index caa7a7b0ec53..f26eba713c53 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -531,7 +531,7 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
 	int cnt;
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 	struct lpfc_nodelist *ndlp;
-	unsigned char *statep, *name;
+	unsigned char *statep;
 
 	cnt = (LPFC_NODELIST_SIZE / LPFC_NODELIST_ENTRY_SIZE);
 
@@ -574,36 +574,32 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
 		default:
 			statep = "UNKNOWN";
 		}
-		len +=  snprintf(buf+len, size-len, "%s DID:x%06x ",
-			statep, ndlp->nlp_DID);
-		name = (unsigned char *)&ndlp->nlp_portname;
-		len +=  snprintf(buf+len, size-len,
-			"WWPN %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x ",
-			*name, *(name+1), *(name+2), *(name+3),
-			*(name+4), *(name+5), *(name+6), *(name+7));
-		name = (unsigned char *)&ndlp->nlp_nodename;
-		len +=  snprintf(buf+len, size-len,
-			"WWNN %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x ",
-			*name, *(name+1), *(name+2), *(name+3),
-			*(name+4), *(name+5), *(name+6), *(name+7));
+		len += snprintf(buf+len, size-len, "%s DID:x%06x ",
+				statep, ndlp->nlp_DID);
+		len += snprintf(buf+len, size-len,
+				"WWPN x%llx ",
+				wwn_to_u64(ndlp->nlp_portname.u.wwn));
+		len += snprintf(buf+len, size-len,
+				"WWNN x%llx ",
+				wwn_to_u64(ndlp->nlp_nodename.u.wwn));
 		if (ndlp->nlp_flag & NLP_RPI_REGISTERED)
-			len +=  snprintf(buf+len, size-len, "RPI:%03d ",
-				ndlp->nlp_rpi);
+			len += snprintf(buf+len, size-len, "RPI:%03d ",
+					ndlp->nlp_rpi);
 		else
-			len +=  snprintf(buf+len, size-len, "RPI:none ");
+			len += snprintf(buf+len, size-len, "RPI:none ");
 		len +=  snprintf(buf+len, size-len, "flag:x%08x ",
 			ndlp->nlp_flag);
 		if (!ndlp->nlp_type)
-			len +=  snprintf(buf+len, size-len, "UNKNOWN_TYPE ");
+			len += snprintf(buf+len, size-len, "UNKNOWN_TYPE ");
 		if (ndlp->nlp_type & NLP_FC_NODE)
-			len +=  snprintf(buf+len, size-len, "FC_NODE ");
+			len += snprintf(buf+len, size-len, "FC_NODE ");
 		if (ndlp->nlp_type & NLP_FABRIC)
-			len +=  snprintf(buf+len, size-len, "FABRIC ");
+			len += snprintf(buf+len, size-len, "FABRIC ");
 		if (ndlp->nlp_type & NLP_FCP_TARGET)
-			len +=  snprintf(buf+len, size-len, "FCP_TGT sid:%d ",
+			len += snprintf(buf+len, size-len, "FCP_TGT sid:%d ",
 				ndlp->nlp_sid);
 		if (ndlp->nlp_type & NLP_FCP_INITIATOR)
-			len +=  snprintf(buf+len, size-len, "FCP_INITIATOR ");
+			len += snprintf(buf+len, size-len, "FCP_INITIATOR ");
 		len += snprintf(buf+len, size-len, "usgmap:%x ",
 			ndlp->nlp_usg_map);
 		len += snprintf(buf+len, size-len, "refcnt:%x",
@@ -611,8 +607,10 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
 		len +=  snprintf(buf+len, size-len, "\n");
 	}
 	spin_unlock_irq(shost->host_lock);
+
 	return len;
 }
+
 #endif
 
 /**
@@ -938,7 +936,7 @@ lpfc_debugfs_dumpData_open(struct inode *inode, struct file *file)
 		goto out;
 
 	/* Round to page boundary */
-	printk(KERN_ERR "9059 BLKGRD:  %s: _dump_buf_data=0x%p\n",
+	pr_err("9059 BLKGRD:  %s: _dump_buf_data=0x%p\n",
 			__func__, _dump_buf_data);
 	debug->buffer = _dump_buf_data;
 	if (!debug->buffer) {
@@ -968,8 +966,8 @@ lpfc_debugfs_dumpDif_open(struct inode *inode, struct file *file)
 		goto out;
 
 	/* Round to page boundary */
-	printk(KERN_ERR	"9060 BLKGRD: %s: _dump_buf_dif=0x%p file=%pD\n",
-		__func__, _dump_buf_dif, file);
+	pr_err("9060 BLKGRD: %s: _dump_buf_dif=0x%p file=%pD\n",
+			__func__, _dump_buf_dif, file);
 	debug->buffer = _dump_buf_dif;
 	if (!debug->buffer) {
 		kfree(debug);
@@ -3853,7 +3851,7 @@ lpfc_idiag_mbxacc_dump_bsg_mbox(struct lpfc_hba *phba, enum nemb_type nemb_tp,
 	if ((mbox_tp == mbox_rd) && (dma_tp == dma_mbox)) {
 		if (*mbx_dump_map & LPFC_BSG_DMP_MBX_RD_MBX) {
 			do_dump |= LPFC_BSG_DMP_MBX_RD_MBX;
-			printk(KERN_ERR "\nRead mbox command (x%x), "
+			pr_err("\nRead mbox command (x%x), "
 			       "nemb:0x%x, extbuf_cnt:%d:\n",
 			       sta_tp, nemb_tp, ext_buf);
 		}
@@ -3861,7 +3859,7 @@ lpfc_idiag_mbxacc_dump_bsg_mbox(struct lpfc_hba *phba, enum nemb_type nemb_tp,
 	if ((mbox_tp == mbox_rd) && (dma_tp == dma_ebuf)) {
 		if (*mbx_dump_map & LPFC_BSG_DMP_MBX_RD_BUF) {
 			do_dump |= LPFC_BSG_DMP_MBX_RD_BUF;
-			printk(KERN_ERR "\nRead mbox buffer (x%x), "
+			pr_err("\nRead mbox buffer (x%x), "
 			       "nemb:0x%x, extbuf_seq:%d:\n",
 			       sta_tp, nemb_tp, ext_buf);
 		}
@@ -3869,7 +3867,7 @@ lpfc_idiag_mbxacc_dump_bsg_mbox(struct lpfc_hba *phba, enum nemb_type nemb_tp,
 	if ((mbox_tp == mbox_wr) && (dma_tp == dma_mbox)) {
 		if (*mbx_dump_map & LPFC_BSG_DMP_MBX_WR_MBX) {
 			do_dump |= LPFC_BSG_DMP_MBX_WR_MBX;
-			printk(KERN_ERR "\nWrite mbox command (x%x), "
+			pr_err("\nWrite mbox command (x%x), "
 			       "nemb:0x%x, extbuf_cnt:%d:\n",
 			       sta_tp, nemb_tp, ext_buf);
 		}
@@ -3877,7 +3875,7 @@ lpfc_idiag_mbxacc_dump_bsg_mbox(struct lpfc_hba *phba, enum nemb_type nemb_tp,
 	if ((mbox_tp == mbox_wr) && (dma_tp == dma_ebuf)) {
 		if (*mbx_dump_map & LPFC_BSG_DMP_MBX_WR_BUF) {
 			do_dump |= LPFC_BSG_DMP_MBX_WR_BUF;
-			printk(KERN_ERR "\nWrite mbox buffer (x%x), "
+			pr_err("\nWrite mbox buffer (x%x), "
 			       "nemb:0x%x, extbuf_seq:%d:\n",
 			       sta_tp, nemb_tp, ext_buf);
 		}
@@ -3889,7 +3887,7 @@ lpfc_idiag_mbxacc_dump_bsg_mbox(struct lpfc_hba *phba, enum nemb_type nemb_tp,
 		for (i = 0; i < *mbx_word_cnt; i++) {
 			if (!(i % 8)) {
 				if (i != 0)
-					printk(KERN_ERR "%s\n", line_buf);
+					pr_err("%s\n", line_buf);
 				len = 0;
 				len += snprintf(line_buf+len,
 						LPFC_MBX_ACC_LBUF_SZ-len,
@@ -3900,7 +3898,7 @@ lpfc_idiag_mbxacc_dump_bsg_mbox(struct lpfc_hba *phba, enum nemb_type nemb_tp,
 			pword++;
 		}
 		if ((i - 1) % 8)
-			printk(KERN_ERR "%s\n", line_buf);
+			pr_err("%s\n", line_buf);
 		(*mbx_dump_cnt)--;
 	}
 
@@ -3949,13 +3947,13 @@ lpfc_idiag_mbxacc_dump_issue_mbox(struct lpfc_hba *phba, MAILBOX_t *pmbox)
 
 	/* dump buffer content */
 	if (*mbx_dump_map & LPFC_MBX_DMP_MBX_WORD) {
-		printk(KERN_ERR "Mailbox command:0x%x dump by word:\n",
+		pr_err("Mailbox command:0x%x dump by word:\n",
 		       pmbox->mbxCommand);
 		pword = (uint32_t *)pmbox;
 		for (i = 0; i < *mbx_word_cnt; i++) {
 			if (!(i % 8)) {
 				if (i != 0)
-					printk(KERN_ERR "%s\n", line_buf);
+					pr_err("%s\n", line_buf);
 				len = 0;
 				memset(line_buf, 0, LPFC_MBX_ACC_LBUF_SZ);
 				len += snprintf(line_buf+len,
@@ -3968,17 +3966,17 @@ lpfc_idiag_mbxacc_dump_issue_mbox(struct lpfc_hba *phba, MAILBOX_t *pmbox)
 			pword++;
 		}
 		if ((i - 1) % 8)
-			printk(KERN_ERR "%s\n", line_buf);
-		printk(KERN_ERR "\n");
+			pr_err("%s\n", line_buf);
+		pr_err("\n");
 	}
 	if (*mbx_dump_map & LPFC_MBX_DMP_MBX_BYTE) {
-		printk(KERN_ERR "Mailbox command:0x%x dump by byte:\n",
+		pr_err("Mailbox command:0x%x dump by byte:\n",
 		       pmbox->mbxCommand);
 		pbyte = (uint8_t *)pmbox;
 		for (i = 0; i < *mbx_word_cnt; i++) {
 			if (!(i % 8)) {
 				if (i != 0)
-					printk(KERN_ERR "%s\n", line_buf);
+					pr_err("%s\n", line_buf);
 				len = 0;
 				memset(line_buf, 0, LPFC_MBX_ACC_LBUF_SZ);
 				len += snprintf(line_buf+len,
@@ -3996,8 +3994,8 @@ lpfc_idiag_mbxacc_dump_issue_mbox(struct lpfc_hba *phba, MAILBOX_t *pmbox)
 					LPFC_MBX_ACC_LBUF_SZ-len, " ");
 		}
 		if ((i - 1) % 8)
-			printk(KERN_ERR "%s\n", line_buf);
-		printk(KERN_ERR "\n");
+			pr_err("%s\n", line_buf);
+		pr_err("\n");
 	}
 	(*mbx_dump_cnt)--;
 
@@ -4240,8 +4238,7 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 					i++;
 				}
 				lpfc_debugfs_max_slow_ring_trc = (1 << i);
-				printk(KERN_ERR
-				       "lpfc_debugfs_max_disc_trc changed to "
+				pr_err("lpfc_debugfs_max_disc_trc changed to "
 				       "%d\n", lpfc_debugfs_max_disc_trc);
 			}
 		}
@@ -4273,6 +4270,7 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 				(sizeof(struct lpfc_debugfs_trc) *
 				lpfc_debugfs_max_slow_ring_trc));
 		}
+
 	}
 
 	snprintf(name, sizeof(name), "vport%d", vport->vpi);
@@ -4298,8 +4296,7 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 				i++;
 			}
 			lpfc_debugfs_max_disc_trc = (1 << i);
-			printk(KERN_ERR
-			       "lpfc_debugfs_max_disc_trc changed to %d\n",
+			pr_err("lpfc_debugfs_max_disc_trc changed to %d\n",
 			       lpfc_debugfs_max_disc_trc);
 		}
 	}
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 82047070cdc9..92627df35a58 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -3972,12 +3972,13 @@ static void
 lpfc_unregister_remote_port(struct lpfc_nodelist *ndlp)
 {
 	struct fc_rport *rport = ndlp->rport;
+	struct lpfc_vport *vport = ndlp->vport;
 
-	lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_RPORT,
+	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT,
 		"rport delete:    did:x%x flg:x%x type x%x",
 		ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_type);
 
-	lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
 			 "3184 rport unregister x%06x, rport %p\n",
 			 ndlp->nlp_DID, rport);
 
@@ -4424,8 +4425,6 @@ lpfc_check_sli_ndlp(struct lpfc_hba *phba,
 		if (icmd->ulpContext == (volatile ushort)ndlp->nlp_rpi) {
 			return 1;
 		}
-	} else if (pring->ringno == psli->next_ring) {
-
 	}
 	return 0;
 }
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 8cffad192f17..ec23388ac4f5 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -4272,13 +4272,13 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli)
 			sprintf(message, "Unqualified optics - Replace with "
 				"Avago optics for Warranty and Technical "
 				"Support - Link is%s operational",
-				(operational) ? "" : " not");
+				(operational) ? " not" : "");
 			break;
 		case LPFC_SLI_EVENT_STATUS_UNCERTIFIED:
 			sprintf(message, "Uncertified optics - Replace with "
 				"Avago-certified optics to enable link "
 				"operation - Link is%s operational",
-				(operational) ? "" : " not");
+				(operational) ? " not" : "");
 			break;
 		default:
 			/* firmware is reporting a status we don't know about */
@@ -6207,6 +6207,7 @@ lpfc_create_shost(struct lpfc_hba *phba)
 
 	shost = lpfc_shost_from_vport(vport);
 	phba->pport = vport;
+
 	lpfc_debugfs_initialize(vport);
 	/* Put reference to SCSI host to driver's device private data */
 	pci_set_drvdata(phba->pcidev, shost);
@@ -6993,7 +6994,7 @@ lpfc_sli4_read_config(struct lpfc_hba *phba)
 				"VPI(B:%d M:%d) "
 				"VFI(B:%d M:%d) "
 				"RPI(B:%d M:%d) "
-				"FCFI(Count:%d)\n",
+				"FCFI:%d EQ:%d CQ:%d WQ:%d RQ:%d\n",
 				phba->sli4_hba.extents_in_use,
 				phba->sli4_hba.max_cfg_param.xri_base,
 				phba->sli4_hba.max_cfg_param.max_xri,
@@ -7003,7 +7004,12 @@ lpfc_sli4_read_config(struct lpfc_hba *phba)
 				phba->sli4_hba.max_cfg_param.max_vfi,
 				phba->sli4_hba.max_cfg_param.rpi_base,
 				phba->sli4_hba.max_cfg_param.max_rpi,
-				phba->sli4_hba.max_cfg_param.max_fcfi);
+				phba->sli4_hba.max_cfg_param.max_fcfi,
+				phba->sli4_hba.max_cfg_param.max_eq,
+				phba->sli4_hba.max_cfg_param.max_cq,
+				phba->sli4_hba.max_cfg_param.max_wq,
+				phba->sli4_hba.max_cfg_param.max_rq);
+
 	}
 
 	if (rc)
@@ -11344,7 +11350,6 @@ static struct miscdevice lpfc_mgmt_dev = {
 static int __init
 lpfc_init(void)
 {
-	int cpu;
 	int error = 0;
 
 	printk(LPFC_MODULE_DESC "\n");
@@ -11370,9 +11375,7 @@ lpfc_init(void)
 
 	/* Initialize in case vector mapping is needed */
 	lpfc_used_cpu = NULL;
-	lpfc_present_cpu = 0;
-	for_each_present_cpu(cpu)
-		lpfc_present_cpu++;
+	lpfc_present_cpu = num_present_cpus();
 
 	error = pci_register_driver(&lpfc_driver);
 	if (error) {
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 1180a22beb43..bd7b75dbb97e 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -3894,7 +3894,7 @@ int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba,
 		}
 	}
 	chann = atomic_add_return(1, &phba->fcp_qidx);
-	chann = (chann % phba->cfg_fcp_io_channel);
+	chann = chann % phba->cfg_fcp_io_channel;
 	return chann;
 }
 
@@ -3967,6 +3967,7 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
 		lpfc_cmd->prot_data_segment = NULL;
 	}
 #endif
+
 	if (pnode && NLP_CHK_NODE_ACT(pnode))
 		atomic_dec(&pnode->cmd_pending);
 
diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h
index 8cb80dabada8..4d7062258cf8 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.h
+++ b/drivers/scsi/lpfc/lpfc_scsi.h
@@ -178,13 +178,13 @@ struct lpfc_scsi_buf {
 #endif
 };
 
-#define LPFC_SCSI_DMA_EXT_SIZE 264
-#define LPFC_BPL_SIZE          1024
-#define MDAC_DIRECT_CMD                  0x22
+#define LPFC_SCSI_DMA_EXT_SIZE	264
+#define LPFC_BPL_SIZE		1024
+#define MDAC_DIRECT_CMD		0x22
 
-#define FIND_FIRST_OAS_LUN		 0
-#define NO_MORE_OAS_LUN			-1
-#define NOT_OAS_ENABLED_LUN		NO_MORE_OAS_LUN
+#define FIND_FIRST_OAS_LUN	0
+#define NO_MORE_OAS_LUN		-1
+#define NOT_OAS_ENABLED_LUN	NO_MORE_OAS_LUN
 
 int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba,
 				  struct lpfc_scsi_buf *lpfc_cmd);
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 586984dce3a4..19543142c94c 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -271,10 +271,11 @@ lpfc_sli4_eq_get(struct lpfc_queue *q)
 	/*
 	 * insert barrier for instruction interlock : data from the hardware
 	 * must have the valid bit checked before it can be copied and acted
-	 * upon. Given what was seen in lpfc_sli4_cq_get() of speculative
-	 * instructions allowing action on content before valid bit checked,
-	 * add barrier here as well. May not be needed as "content" is a
-	 * single 32-bit entity here (vs multi word structure for cq's).
+	 * upon. Speculative instructions were allowing a bcopy at the start
+	 * of lpfc_sli4_fp_handle_wcqe(), which is called immediately
+	 * after our return, to copy data before the valid bit check above
+	 * was done. As such, some of the copied data was stale. The barrier
+	 * ensures the check is before any data is copied.
 	 */
 	mb();
 	return eqe;
@@ -386,11 +387,10 @@ lpfc_sli4_cq_get(struct lpfc_queue *q)
 	/*
 	 * insert barrier for instruction interlock : data from the hardware
 	 * must have the valid bit checked before it can be copied and acted
-	 * upon. Speculative instructions were allowing a bcopy at the start
-	 * of lpfc_sli4_fp_handle_wcqe(), which is called immediately
-	 * after our return, to copy data before the valid bit check above
-	 * was done. As such, some of the copied data was stale. The barrier
-	 * ensures the check is before any data is copied.
+	 * upon. Given what was seen in lpfc_sli4_cq_get() of speculative
+	 * instructions allowing action on content before valid bit checked,
+	 * add barrier here as well. May not be needed as "content" is a
+	 * single 32-bit entity here (vs multi word structure for cq's).
 	 */
 	mb();
 	return cqe;
@@ -7368,7 +7368,8 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 
 		if (psli->sli_flag & LPFC_SLI_ACTIVE) {
 			/* copy results back to user */
-			lpfc_sli_pcimem_bcopy(phba->mbox, mbx, MAILBOX_CMD_SIZE);
+			lpfc_sli_pcimem_bcopy(phba->mbox, mbx,
+						MAILBOX_CMD_SIZE);
 			/* Copy the mailbox extension data */
 			if (pmbox->out_ext_byte_len && pmbox->context2) {
 				lpfc_sli_pcimem_bcopy(phba->mbox_ext,
@@ -7378,7 +7379,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 		} else {
 			/* First copy command data */
 			lpfc_memcpy_from_slim(mbx, phba->MBslimaddr,
-							MAILBOX_CMD_SIZE);
+						MAILBOX_CMD_SIZE);
 			/* Copy the mailbox extension data */
 			if (pmbox->out_ext_byte_len && pmbox->context2) {
 				lpfc_memcpy_from_slim(pmbox->context2,
@@ -8906,10 +8907,10 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number,
 				}
 			}
 		}
-	} else if (piocb->iocb_flag &  LPFC_IO_FCP) {
+	} else if (piocb->iocb_flag &  LPFC_IO_FCP)
 		/* These IO's already have an XRI and a mapped sgl. */
 		sglq = NULL;
-	} else {
+	else {
 		/*
 		 * This is a continuation of a commandi,(CX) so this
 		 * sglq is on the active list
@@ -13359,8 +13360,10 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	switch (cq->entry_count) {
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"0361 Unsupported CQ count. (%d)\n",
-				cq->entry_count);
+				"0361 Unsupported CQ count: "
+				"entry cnt %d sz %d pg cnt %d repost %d\n",
+				cq->entry_count, cq->entry_size,
+				cq->page_count, cq->entry_repost);
 		if (cq->entry_count < 256) {
 			status = -EINVAL;
 			goto out;
@@ -14824,6 +14827,9 @@ lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba,
 	return rc;
 }
 
+static char *lpfc_rctl_names[] = FC_RCTL_NAMES_INIT;
+static char *lpfc_type_names[] = FC_TYPE_NAMES_INIT;
+
 /**
  * lpfc_fc_frame_check - Check that this frame is a valid frame to handle
  * @phba: pointer to lpfc_hba struct that the frame was received on
@@ -14838,8 +14844,6 @@ static int
 lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 {
 	/*  make rctl_names static to save stack space */
-	static char *rctl_names[] = FC_RCTL_NAMES_INIT;
-	char *type_names[] = FC_TYPE_NAMES_INIT;
 	struct fc_vft_header *fc_vft_hdr;
 	uint32_t *header = (uint32_t *) fc_hdr;
 
@@ -14894,8 +14898,8 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
 			"2538 Received frame rctl:%s (x%x), type:%s (x%x), "
 			"frame Data:%08x %08x %08x %08x %08x %08x %08x\n",
-			rctl_names[fc_hdr->fh_r_ctl], fc_hdr->fh_r_ctl,
-			type_names[fc_hdr->fh_type], fc_hdr->fh_type,
+			lpfc_rctl_names[fc_hdr->fh_r_ctl], fc_hdr->fh_r_ctl,
+			lpfc_type_names[fc_hdr->fh_type], fc_hdr->fh_type,
 			be32_to_cpu(header[0]), be32_to_cpu(header[1]),
 			be32_to_cpu(header[2]), be32_to_cpu(header[3]),
 			be32_to_cpu(header[4]), be32_to_cpu(header[5]),
@@ -14904,8 +14908,8 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 drop:
 	lpfc_printf_log(phba, KERN_WARNING, LOG_ELS,
 			"2539 Dropped frame rctl:%s type:%s\n",
-			rctl_names[fc_hdr->fh_r_ctl],
-			type_names[fc_hdr->fh_type]);
+			lpfc_rctl_names[fc_hdr->fh_r_ctl],
+			lpfc_type_names[fc_hdr->fh_type]);
 	return 1;
 }
 
@@ -15726,11 +15730,13 @@ lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba,
 
 	/* Process each received buffer */
 	fc_hdr = (struct fc_frame_header *)dmabuf->hbuf.virt;
+
 	/* check to see if this a valid type of frame */
 	if (lpfc_fc_frame_check(phba, fc_hdr)) {
 		lpfc_in_buf_free(phba, &dmabuf->dbuf);
 		return;
 	}
+
 	if ((bf_get(lpfc_cqe_code,
 		    &dmabuf->cq_event.cqe.rcqe_cmpl) == CQE_CODE_RECEIVE_V1))
 		fcfi = bf_get(lpfc_rcqe_fcf_id_v1,
diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index 74227a28bd56..3fad5657514b 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h
@@ -97,11 +97,11 @@ struct lpfc_iocbq {
 		struct lpfc_node_rrq *rrq;
 	} context_un;
 
-	void (*fabric_iocb_cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
+	void (*fabric_iocb_cmpl)(struct lpfc_hba *, struct lpfc_iocbq *,
 			   struct lpfc_iocbq *);
-	void (*wait_iocb_cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
+	void (*wait_iocb_cmpl)(struct lpfc_hba *, struct lpfc_iocbq *,
 			   struct lpfc_iocbq *);
-	void (*iocb_cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
+	void (*iocb_cmpl)(struct lpfc_hba *, struct lpfc_iocbq *,
 			   struct lpfc_iocbq *);
 };
 
diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c
index e18bbc66e83b..5bbe6af148dd 100644
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -33,6 +33,7 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
+
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
-- 
GitLab


From 07bcd98efba2edd2bd2230e24f6a46a09991e2a4 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:28 -0800
Subject: [PATCH 0439/1084] scsi: lpfc: refactor debugfs queue prints

Create common wq, cq, eq, rq print functions

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h         |   1 +
 drivers/scsi/lpfc/lpfc_debugfs.c | 564 +++++++++++++------------------
 2 files changed, 228 insertions(+), 337 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 64022d79117b..8e0d6f43878d 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -948,6 +948,7 @@ struct lpfc_hba {
 	struct dentry *idiag_ctl_acc;
 	struct dentry *idiag_mbx_acc;
 	struct dentry *idiag_ext_acc;
+	uint8_t lpfc_idiag_last_eq;
 #endif
 
 	/* Used for deferred freeing of ELS data buffers */
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index f26eba713c53..866361d4d637 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -1972,6 +1972,155 @@ lpfc_idiag_baracc_write(struct file *file, const char __user *buf,
 	return -EINVAL;
 }
 
+static int
+__lpfc_idiag_print_wq(struct lpfc_queue *qp, char *wqtype,
+			char *pbuffer, int len)
+{
+	if (!qp)
+		return len;
+
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"\t\t%s WQ info: ", wqtype);
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"AssocCQID[%04d]: WQ-STAT[oflow:x%x posted:x%llx]\n",
+			qp->assoc_qid, qp->q_cnt_1,
+			(unsigned long long)qp->q_cnt_4);
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"\t\tWQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
+			"HOST-IDX[%04d], PORT-IDX[%04d]",
+			qp->queue_id, qp->entry_count,
+			qp->entry_size, qp->host_index,
+			qp->hba_index);
+	len +=  snprintf(pbuffer + len,
+			LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
+	return len;
+}
+
+static int
+lpfc_idiag_wqs_for_cq(struct lpfc_hba *phba, char *wqtype, char *pbuffer,
+		int *len, int max_cnt, int cq_id)
+{
+	struct lpfc_queue *qp;
+	int qidx;
+
+	for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++) {
+		qp = phba->sli4_hba.fcp_wq[qidx];
+		if (qp->assoc_qid != cq_id)
+			continue;
+		*len = __lpfc_idiag_print_wq(qp, wqtype, pbuffer, *len);
+		if (*len >= max_cnt)
+			return 1;
+	}
+	return 0;
+}
+
+static int
+__lpfc_idiag_print_cq(struct lpfc_queue *qp, char *cqtype,
+			char *pbuffer, int len)
+{
+	if (!qp)
+		return len;
+
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"\t%s CQ info: ", cqtype);
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"AssocEQID[%02d]: CQ STAT[max:x%x relw:x%x "
+			"xabt:x%x wq:x%llx]\n",
+			qp->assoc_qid, qp->q_cnt_1, qp->q_cnt_2,
+			qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"\tCQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
+			"HOST-IDX[%04d], PORT-IDX[%04d]",
+			qp->queue_id, qp->entry_count,
+			qp->entry_size, qp->host_index,
+			qp->hba_index);
+
+	len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
+
+	return len;
+}
+
+static int
+__lpfc_idiag_print_rqpair(struct lpfc_queue *qp, struct lpfc_queue *datqp,
+			char *rqtype, char *pbuffer, int len)
+{
+	if (!qp || !datqp)
+		return len;
+
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"\t\t%s RQ info: ", rqtype);
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"AssocCQID[%02d]: RQ-STAT[nopost:x%x nobuf:x%x "
+			"trunc:x%x rcv:x%llx]\n",
+			qp->assoc_qid, qp->q_cnt_1, qp->q_cnt_2,
+			qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"\t\tHQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
+			"HOST-IDX[%04d], PORT-IDX[%04d]\n",
+			qp->queue_id, qp->entry_count, qp->entry_size,
+			qp->host_index, qp->hba_index);
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"\t\tDQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
+			"HOST-IDX[%04d], PORT-IDX[%04d]\n",
+			datqp->queue_id, datqp->entry_count,
+			datqp->entry_size, datqp->host_index,
+			datqp->hba_index);
+	len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
+
+	return len;
+}
+
+static int
+lpfc_idiag_cqs_for_eq(struct lpfc_hba *phba, char *pbuffer,
+		int *len, int max_cnt, int eq_id)
+{
+	struct lpfc_queue *qp;
+	int qidx, rc;
+
+	for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++) {
+		qp = phba->sli4_hba.fcp_cq[qidx];
+		if (qp->assoc_qid != eq_id)
+			continue;
+
+		*len = __lpfc_idiag_print_cq(qp, "FCP", pbuffer, *len);
+
+		/* Reset max counter */
+		qp->CQ_max_cqe = 0;
+
+		if (*len >= max_cnt)
+			return 1;
+
+		rc = lpfc_idiag_wqs_for_cq(phba, "FCP", pbuffer, len,
+				max_cnt, qp->queue_id);
+		if (rc)
+			return 1;
+	}
+
+	return 0;
+}
+
+static int
+__lpfc_idiag_print_eq(struct lpfc_queue *qp, char *eqtype,
+			char *pbuffer, int len)
+{
+	if (!qp)
+		return len;
+
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"\n%s EQ info: EQ-STAT[max:x%x noE:x%x "
+			"bs:x%x proc:x%llx]\n",
+			eqtype, qp->q_cnt_1, qp->q_cnt_2, qp->q_cnt_3,
+			(unsigned long long)qp->q_cnt_4);
+	len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+			"EQID[%02d], QE-CNT[%04d], QE-SIZE[%04d], "
+			"HOST-IDX[%04d], PORT-IDX[%04d]",
+			qp->queue_id, qp->entry_count, qp->entry_size,
+			qp->host_index, qp->hba_index);
+	len +=  snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n");
+
+	return len;
+}
+
 /**
  * lpfc_idiag_queinfo_read - idiag debugfs read queue information
  * @file: The file pointer to read from.
@@ -1982,6 +2131,9 @@ lpfc_idiag_baracc_write(struct file *file, const char __user *buf,
  * Description:
  * This routine reads data from the @phba SLI4 PCI function queue information,
  * and copies to user @buf.
+ * This routine only returns 1 EQs worth of information. It remembers the last
+ * EQ read and jumps to the next EQ. Thus subsequent calls to queInfo will
+ * retrieve all EQs allocated for the phba.
  *
  * Returns:
  * This function returns the amount of data that was read (this could be less
@@ -1993,19 +2145,16 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes,
 {
 	struct lpfc_debug *debug = file->private_data;
 	struct lpfc_hba *phba = (struct lpfc_hba *)debug->i_private;
-	int len = 0;
 	char *pbuffer;
-	int x, cnt;
-	int max_cnt;
+	int max_cnt, rc, x, len = 0;
 	struct lpfc_queue *qp = NULL;
 
-
 	if (!debug->buffer)
 		debug->buffer = kmalloc(LPFC_QUE_INFO_GET_BUF_SIZE, GFP_KERNEL);
 	if (!debug->buffer)
 		return 0;
 	pbuffer = debug->buffer;
-	max_cnt = LPFC_QUE_INFO_GET_BUF_SIZE - 128;
+	max_cnt = LPFC_QUE_INFO_GET_BUF_SIZE - 256;
 
 	if (*ppos)
 		return 0;
@@ -2014,374 +2163,115 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes,
 
 	/* Fast-path event queue */
 	if (phba->sli4_hba.hba_eq && phba->cfg_fcp_io_channel) {
-		cnt = phba->cfg_fcp_io_channel;
 
-		for (x = 0; x < cnt; x++) {
+		x = phba->lpfc_idiag_last_eq;
+		if (phba->cfg_fof && (x >= phba->cfg_fcp_io_channel)) {
+			phba->lpfc_idiag_last_eq = 0;
+			goto fof;
+		}
+		phba->lpfc_idiag_last_eq++;
+		if (phba->lpfc_idiag_last_eq >= phba->cfg_fcp_io_channel)
+			if (phba->cfg_fof == 0)
+				phba->lpfc_idiag_last_eq = 0;
 
-			/* Fast-path EQ */
-			qp = phba->sli4_hba.hba_eq[x];
-			if (!qp)
-				goto proc_cq;
+		len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
+					"EQ %d out of %d HBA EQs\n",
+					x, phba->cfg_fcp_io_channel);
 
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\nHBA EQ info: "
-				"EQ-STAT[max:x%x noE:x%x "
-				"bs:x%x proc:x%llx]\n",
-				qp->q_cnt_1, qp->q_cnt_2,
-				qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
-
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"EQID[%02d], "
-				"QE-CNT[%04d], QE-SIZE[%04d], "
-				"HOST-IDX[%04d], PORT-IDX[%04d]",
-				qp->queue_id,
-				qp->entry_count,
-				qp->entry_size,
-				qp->host_index,
-				qp->hba_index);
-
-
-			/* Reset max counter */
-			qp->EQ_max_eqe = 0;
+		/* Fast-path EQ */
+		qp = phba->sli4_hba.hba_eq[x];
+		if (!qp)
+			goto out;
 
-			len +=  snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
-			if (len >= max_cnt)
-				goto too_big;
-proc_cq:
-			/* Fast-path FCP CQ */
-			qp = phba->sli4_hba.fcp_cq[x];
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\tFCP CQ info: ");
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"AssocEQID[%02d]: "
-				"CQ STAT[max:x%x relw:x%x "
-				"xabt:x%x wq:x%llx]\n",
-				qp->assoc_qid,
-				qp->q_cnt_1, qp->q_cnt_2,
-				qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\tCQID[%02d], "
-				"QE-CNT[%04d], QE-SIZE[%04d], "
-				"HOST-IDX[%04d], PORT-IDX[%04d]",
-				qp->queue_id, qp->entry_count,
-				qp->entry_size, qp->host_index,
-				qp->hba_index);
+		len = __lpfc_idiag_print_eq(qp, "HBA", pbuffer, len);
 
+		/* Reset max counter */
+		qp->EQ_max_eqe = 0;
 
-			/* Reset max counter */
-			qp->CQ_max_cqe = 0;
+		if (len >= max_cnt)
+			goto too_big;
 
-			len +=  snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
-			if (len >= max_cnt)
-				goto too_big;
+		rc = lpfc_idiag_cqs_for_eq(phba, pbuffer, &len,
+			max_cnt, qp->queue_id);
+		if (rc)
+			goto too_big;
 
-			/* Fast-path FCP WQ */
-			qp = phba->sli4_hba.fcp_wq[x];
+		/* Only EQ 0 has slow path CQs configured */
+		if (x)
+			goto out;
 
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\t\tFCP WQ info: ");
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"AssocCQID[%02d]: "
-				"WQ-STAT[oflow:x%x posted:x%llx]\n",
-				qp->assoc_qid,
-				qp->q_cnt_1, (unsigned long long)qp->q_cnt_4);
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\t\tWQID[%02d], "
-				"QE-CNT[%04d], QE-SIZE[%04d], "
-				"HOST-IDX[%04d], PORT-IDX[%04d]",
-				qp->queue_id,
-				qp->entry_count,
-				qp->entry_size,
-				qp->host_index,
-				qp->hba_index);
-
-			len +=  snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
-			if (len >= max_cnt)
-				goto too_big;
-
-			if (x)
-				continue;
-
-			/* Only EQ 0 has slow path CQs configured */
-
-			/* Slow-path mailbox CQ */
-			qp = phba->sli4_hba.mbx_cq;
-			if (qp) {
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\tMBX CQ info: ");
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"AssocEQID[%02d]: "
-					"CQ-STAT[mbox:x%x relw:x%x "
-					"xabt:x%x wq:x%llx]\n",
-					qp->assoc_qid,
-					qp->q_cnt_1, qp->q_cnt_2,
-					qp->q_cnt_3,
-					(unsigned long long)qp->q_cnt_4);
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\tCQID[%02d], "
-					"QE-CNT[%04d], QE-SIZE[%04d], "
-					"HOST-IDX[%04d], PORT-IDX[%04d]",
-					qp->queue_id, qp->entry_count,
-					qp->entry_size, qp->host_index,
-					qp->hba_index);
-
-				len +=  snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
-				if (len >= max_cnt)
-					goto too_big;
-			}
+		/* Slow-path mailbox CQ */
+		qp = phba->sli4_hba.mbx_cq;
+		len = __lpfc_idiag_print_cq(qp, "MBX", pbuffer, len);
+		if (len >= max_cnt)
+			goto too_big;
 
-			/* Slow-path MBOX MQ */
-			qp = phba->sli4_hba.mbx_wq;
-			if (qp) {
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\t\tMBX MQ info: ");
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"AssocCQID[%02d]:\n",
-					phba->sli4_hba.mbx_wq->assoc_qid);
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\t\tWQID[%02d], "
-					"QE-CNT[%04d], QE-SIZE[%04d], "
-					"HOST-IDX[%04d], PORT-IDX[%04d]",
-					qp->queue_id, qp->entry_count,
-					qp->entry_size, qp->host_index,
-					qp->hba_index);
-
-				len +=  snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
-				if (len >= max_cnt)
-					goto too_big;
-			}
+		/* Slow-path MBOX MQ */
+		qp = phba->sli4_hba.mbx_wq;
+		len = __lpfc_idiag_print_wq(qp, "MBX", pbuffer, len);
+		if (len >= max_cnt)
+			goto too_big;
 
-			/* Slow-path ELS response CQ */
-			qp = phba->sli4_hba.els_cq;
-			if (qp) {
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\tELS CQ info: ");
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"AssocEQID[%02d]: "
-					"CQ-STAT[max:x%x relw:x%x "
-					"xabt:x%x wq:x%llx]\n",
-					qp->assoc_qid,
-					qp->q_cnt_1, qp->q_cnt_2,
-					qp->q_cnt_3,
-					(unsigned long long)qp->q_cnt_4);
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\tCQID [%02d], "
-					"QE-CNT[%04d], QE-SIZE[%04d], "
-					"HOST-IDX[%04d], PORT-IDX[%04d]",
-					qp->queue_id, qp->entry_count,
-					qp->entry_size, qp->host_index,
-					qp->hba_index);
-
-				/* Reset max counter */
-				qp->CQ_max_cqe = 0;
-
-				len +=  snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
-				if (len >= max_cnt)
-					goto too_big;
-			}
+		/* Slow-path ELS response CQ */
+		qp = phba->sli4_hba.els_cq;
+		len = __lpfc_idiag_print_cq(qp, "ELS", pbuffer, len);
+		/* Reset max counter */
+		if (qp)
+			qp->CQ_max_cqe = 0;
+		if (len >= max_cnt)
+			goto too_big;
 
-			/* Slow-path ELS WQ */
-			qp = phba->sli4_hba.els_wq;
-			if (qp) {
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\t\tELS WQ info: ");
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"AssocCQID[%02d]: "
-					" WQ-STAT[oflow:x%x "
-					"posted:x%llx]\n",
-					qp->assoc_qid,
-					qp->q_cnt_1,
-					(unsigned long long)qp->q_cnt_4);
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\t\tWQID[%02d], "
-					"QE-CNT[%04d], QE-SIZE[%04d], "
-					"HOST-IDX[%04d], PORT-IDX[%04d]",
-					qp->queue_id, qp->entry_count,
-					qp->entry_size, qp->host_index,
-					qp->hba_index);
-
-				len +=  snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
-				if (len >= max_cnt)
-					goto too_big;
-			}
+		/* Slow-path ELS WQ */
+		qp = phba->sli4_hba.els_wq;
+		len = __lpfc_idiag_print_wq(qp, "ELS", pbuffer, len);
+		if (len >= max_cnt)
+			goto too_big;
 
-			if (phba->sli4_hba.hdr_rq && phba->sli4_hba.dat_rq) {
-				/* Slow-path RQ header */
-				qp = phba->sli4_hba.hdr_rq;
+		qp = phba->sli4_hba.hdr_rq;
+		len = __lpfc_idiag_print_rqpair(qp, phba->sli4_hba.dat_rq,
+				"RQpair", pbuffer, len);
+		if (len >= max_cnt)
+			goto too_big;
 
-				len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\t\tRQ info: ");
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"AssocCQID[%02d]: "
-					"RQ-STAT[nopost:x%x nobuf:x%x "
-					"trunc:x%x rcv:x%llx]\n",
-					qp->assoc_qid,
-					qp->q_cnt_1, qp->q_cnt_2,
-					qp->q_cnt_3,
-					(unsigned long long)qp->q_cnt_4);
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\t\tHQID[%02d], "
-					"QE-CNT[%04d], QE-SIZE[%04d], "
-					"HOST-IDX[%04d], PORT-IDX[%04d]\n",
-					qp->queue_id,
-					qp->entry_count,
-					qp->entry_size,
-					qp->host_index,
-					qp->hba_index);
-
-				/* Slow-path RQ data */
-				qp = phba->sli4_hba.dat_rq;
-				len += snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len,
-					"\t\tDQID[%02d], "
-					"QE-CNT[%04d], QE-SIZE[%04d], "
-					"HOST-IDX[%04d], PORT-IDX[%04d]\n",
-					qp->queue_id,
-					qp->entry_count,
-					qp->entry_size,
-					qp->host_index,
-					qp->hba_index);
-
-				len +=  snprintf(pbuffer+len,
-					LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
-			}
-		}
+		goto out;
 	}
 
+fof:
 	if (phba->cfg_fof) {
 		/* FOF EQ */
 		qp = phba->sli4_hba.fof_eq;
-		if (!qp)
-			goto out;
-
-		len += snprintf(pbuffer+len,
-			LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"\nFOF EQ info: "
-			"EQ-STAT[max:x%x noE:x%x "
-			"bs:x%x proc:x%llx]\n",
-			qp->q_cnt_1, qp->q_cnt_2,
-			qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
-
-		len += snprintf(pbuffer+len,
-			LPFC_QUE_INFO_GET_BUF_SIZE-len,
-			"EQID[%02d], "
-			"QE-CNT[%04d], QE-SIZE[%04d], "
-			"HOST-IDX[%04d], PORT-IDX[%04d]",
-			qp->queue_id,
-			qp->entry_count,
-			qp->entry_size,
-			qp->host_index,
-			qp->hba_index);
+		len = __lpfc_idiag_print_eq(qp, "FOF", pbuffer, len);
 
 		/* Reset max counter */
-		qp->EQ_max_eqe = 0;
+		if (qp)
+			qp->EQ_max_eqe = 0;
 
-		len +=  snprintf(pbuffer+len,
-			LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
 		if (len >= max_cnt)
 			goto too_big;
-	}
-
-	if (phba->cfg_fof) {
 
 		/* OAS CQ */
 		qp = phba->sli4_hba.oas_cq;
-		if (qp) {
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\tOAS CQ info: ");
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"AssocEQID[%02d]: "
-				"CQ STAT[max:x%x relw:x%x "
-				"xabt:x%x wq:x%llx]\n",
-				qp->assoc_qid,
-				qp->q_cnt_1, qp->q_cnt_2,
-				qp->q_cnt_3, (unsigned long long)qp->q_cnt_4);
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\tCQID[%02d], "
-				"QE-CNT[%04d], QE-SIZE[%04d], "
-				"HOST-IDX[%04d], PORT-IDX[%04d]",
-				qp->queue_id, qp->entry_count,
-				qp->entry_size, qp->host_index,
-				qp->hba_index);
-
-			/* Reset max counter */
+		len = __lpfc_idiag_print_cq(qp, "OAS", pbuffer, len);
+		/* Reset max counter */
+		if (qp)
 			qp->CQ_max_cqe = 0;
-
-			len +=  snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
-			if (len >= max_cnt)
-				goto too_big;
-		}
+		if (len >= max_cnt)
+			goto too_big;
 
 		/* OAS WQ */
 		qp = phba->sli4_hba.oas_wq;
-		if (qp) {
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\t\tOAS WQ info: ");
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"AssocCQID[%02d]: "
-				"WQ-STAT[oflow:x%x posted:x%llx]\n",
-				qp->assoc_qid,
-				qp->q_cnt_1, (unsigned long long)qp->q_cnt_4);
-			len += snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len,
-				"\t\tWQID[%02d], "
-				"QE-CNT[%04d], QE-SIZE[%04d], "
-				"HOST-IDX[%04d], PORT-IDX[%04d]",
-				qp->queue_id,
-				qp->entry_count,
-				qp->entry_size,
-				qp->host_index,
-				qp->hba_index);
-
-			len +=  snprintf(pbuffer+len,
-				LPFC_QUE_INFO_GET_BUF_SIZE-len, "\n");
-			if (len >= max_cnt)
-				goto too_big;
-		}
+		len = __lpfc_idiag_print_wq(qp, "OAS", pbuffer, len);
+		if (len >= max_cnt)
+			goto too_big;
 	}
-out:
+
 	spin_unlock_irq(&phba->hbalock);
 	return simple_read_from_buffer(buf, nbytes, ppos, pbuffer, len);
 
 too_big:
-	len +=  snprintf(pbuffer+len,
-		LPFC_QUE_INFO_GET_BUF_SIZE-len, "Truncated ...\n");
+	len +=  snprintf(pbuffer + len,
+		LPFC_QUE_INFO_GET_BUF_SIZE - len, "Truncated ...\n");
+out:
 	spin_unlock_irq(&phba->hbalock);
 	return simple_read_from_buffer(buf, nbytes, ppos, pbuffer, len);
 }
-- 
GitLab


From 1d9d5a9879ad493ee7cf75987df1f365c61fefe5 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:29 -0800
Subject: [PATCH 0440/1084] scsi: lpfc: refactor debugfs queue dump routines

Create common wq, cq, eq, rq dump functions

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_debugfs.c |  22 ++--
 drivers/scsi/lpfc/lpfc_debugfs.h | 214 +++++++++++++------------------
 2 files changed, 101 insertions(+), 135 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 866361d4d637..93ddda16c632 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -4555,31 +4555,31 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
 void
 lpfc_debug_dump_all_queues(struct lpfc_hba *phba)
 {
-	int fcp_wqidx;
+	int idx;
 
 	/*
 	 * Dump Work Queues (WQs)
 	 */
-	lpfc_debug_dump_mbx_wq(phba);
-	lpfc_debug_dump_els_wq(phba);
+	lpfc_debug_dump_wq(phba, DUMP_MBX, 0);
+	lpfc_debug_dump_wq(phba, DUMP_ELS, 0);
 
-	for (fcp_wqidx = 0; fcp_wqidx < phba->cfg_fcp_io_channel; fcp_wqidx++)
-		lpfc_debug_dump_fcp_wq(phba, fcp_wqidx);
+	for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++)
+		lpfc_debug_dump_wq(phba, DUMP_FCP, idx);
 
 	lpfc_debug_dump_hdr_rq(phba);
 	lpfc_debug_dump_dat_rq(phba);
 	/*
 	 * Dump Complete Queues (CQs)
 	 */
-	lpfc_debug_dump_mbx_cq(phba);
-	lpfc_debug_dump_els_cq(phba);
+	lpfc_debug_dump_cq(phba, DUMP_MBX, 0);
+	lpfc_debug_dump_cq(phba, DUMP_ELS, 0);
 
-	for (fcp_wqidx = 0; fcp_wqidx < phba->cfg_fcp_io_channel; fcp_wqidx++)
-		lpfc_debug_dump_fcp_cq(phba, fcp_wqidx);
+	for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++)
+		lpfc_debug_dump_cq(phba, DUMP_FCP, idx);
 
 	/*
 	 * Dump Event Queues (EQs)
 	 */
-	for (fcp_wqidx = 0; fcp_wqidx < phba->cfg_fcp_io_channel; fcp_wqidx++)
-		lpfc_debug_dump_hba_eq(phba, fcp_wqidx);
+	for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++)
+		lpfc_debug_dump_hba_eq(phba, idx);
 }
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h
index 8b2b6a3bfc25..9ae2c4b5fd12 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -42,6 +42,12 @@
 /* hbqinfo output buffer size */
 #define LPFC_HBQINFO_SIZE 8192
 
+enum {
+	DUMP_FCP,
+	DUMP_MBX,
+	DUMP_ELS,
+};
+
 /*
  * For SLI4 iDiag debugfs diagnostics tool
  */
@@ -358,58 +364,97 @@ lpfc_debug_dump_q(struct lpfc_queue *q)
 }
 
 /**
- * lpfc_debug_dump_fcp_wq - dump all entries from a fcp work queue
+ * lpfc_debug_dump_wq - dump all entries from the fcp work queue
  * @phba: Pointer to HBA context object.
- * @fcp_wqidx: Index to a FCP work queue.
+ * @wqidx: Index to a FCP work queue.
  *
- * This function dumps all entries from a FCP work queue specified by the
- * @fcp_wqidx.
+ * This function dumps all entries from a FCP work queue specified
+ * by the wqidx.
  **/
 static inline void
-lpfc_debug_dump_fcp_wq(struct lpfc_hba *phba, int fcp_wqidx)
+lpfc_debug_dump_wq(struct lpfc_hba *phba, int qtype, int wqidx)
 {
-	/* sanity check */
-	if (fcp_wqidx >= phba->cfg_fcp_io_channel)
+	struct lpfc_queue *wq;
+	char *qtypestr;
+
+	if (qtype == DUMP_FCP) {
+		wq = phba->sli4_hba.fcp_wq[wqidx];
+		qtypestr = "FCP";
+	} else if (qtype == DUMP_MBX) {
+		wq = phba->sli4_hba.mbx_wq;
+		qtypestr = "MBX";
+	} else if (qtype == DUMP_ELS) {
+		wq = phba->sli4_hba.els_wq;
+		qtypestr = "ELS";
+	} else
 		return;
 
-	printk(KERN_ERR "FCP WQ: WQ[Idx:%d|Qid:%d]\n",
-		fcp_wqidx, phba->sli4_hba.fcp_wq[fcp_wqidx]->queue_id);
-	lpfc_debug_dump_q(phba->sli4_hba.fcp_wq[fcp_wqidx]);
+	if (qtype == DUMP_FCP)
+		pr_err("%s WQ: WQ[Idx:%d|Qid:%d]\n",
+			qtypestr, wqidx, wq->queue_id);
+	else
+		pr_err("%s WQ: WQ[Qid:%d]\n",
+			qtypestr, wq->queue_id);
+
+	lpfc_debug_dump_q(wq);
 }
 
 /**
- * lpfc_debug_dump_fcp_cq - dump all entries from a fcp work queue's cmpl queue
+ * lpfc_debug_dump_cq - dump all entries from a fcp work queue's
+ * cmpl queue
  * @phba: Pointer to HBA context object.
- * @fcp_wqidx: Index to a FCP work queue.
+ * @wqidx: Index to a FCP work queue.
  *
- * This function dumps all entries from a FCP complete queue which is
- * associated to the FCP work queue specified by the @fcp_wqidx.
+ * This function dumps all entries from a FCP completion queue
+ * which is associated to the work queue specified by the @wqidx.
  **/
 static inline void
-lpfc_debug_dump_fcp_cq(struct lpfc_hba *phba, int fcp_wqidx)
+lpfc_debug_dump_cq(struct lpfc_hba *phba, int qtype, int wqidx)
 {
-	int fcp_cqidx, fcp_cqid;
-
-	/* sanity check */
-	if (fcp_wqidx >= phba->cfg_fcp_io_channel)
+	struct lpfc_queue *wq, *cq, *eq;
+	char *qtypestr;
+	int eqidx;
+
+	/* fcp wq and cq are 1:1, thus same indexes */
+
+	if (qtype == DUMP_FCP) {
+		wq = phba->sli4_hba.fcp_wq[wqidx];
+		cq = phba->sli4_hba.fcp_cq[wqidx];
+		qtypestr = "FCP";
+	} else if (qtype == DUMP_MBX) {
+		wq = phba->sli4_hba.mbx_wq;
+		cq = phba->sli4_hba.mbx_cq;
+		qtypestr = "MBX";
+	} else if (qtype == DUMP_ELS) {
+		wq = phba->sli4_hba.els_wq;
+		cq = phba->sli4_hba.els_cq;
+		qtypestr = "ELS";
+	} else
 		return;
 
-	fcp_cqid = phba->sli4_hba.fcp_wq[fcp_wqidx]->assoc_qid;
-	for (fcp_cqidx = 0; fcp_cqidx < phba->cfg_fcp_io_channel; fcp_cqidx++)
-		if (phba->sli4_hba.fcp_cq[fcp_cqidx]->queue_id == fcp_cqid)
+	for (eqidx = 0; eqidx < phba->cfg_fcp_io_channel; eqidx++) {
+		eq = phba->sli4_hba.hba_eq[eqidx];
+		if (cq->assoc_qid == eq->queue_id)
 			break;
-	if (phba->intr_type == MSIX) {
-		if (fcp_cqidx >= phba->cfg_fcp_io_channel)
-			return;
-	} else {
-		if (fcp_cqidx > 0)
-			return;
+	}
+	if (eqidx == phba->cfg_fcp_io_channel) {
+		pr_err("Couldn't find EQ for CQ. Using EQ[0]\n");
+		eqidx = 0;
+		eq = phba->sli4_hba.hba_eq[0];
 	}
 
-	printk(KERN_ERR "FCP CQ: WQ[Idx:%d|Qid%d]->CQ[Idx%d|Qid%d]:\n",
-		fcp_wqidx, phba->sli4_hba.fcp_wq[fcp_wqidx]->queue_id,
-		fcp_cqidx, fcp_cqid);
-	lpfc_debug_dump_q(phba->sli4_hba.fcp_cq[fcp_cqidx]);
+	if (qtype == DUMP_FCP)
+		pr_err("%s CQ: WQ[Idx:%d|Qid%d]->CQ[Idx%d|Qid%d]"
+			"->EQ[Idx:%d|Qid:%d]:\n",
+			qtypestr, wqidx, wq->queue_id, wqidx, cq->queue_id,
+			eqidx, eq->queue_id);
+	else
+		pr_err("%s CQ: WQ[Qid:%d]->CQ[Qid:%d]"
+			"->EQ[Idx:%d|Qid:%d]:\n",
+			qtypestr, wq->queue_id, cq->queue_id,
+			eqidx, eq->queue_id);
+
+	lpfc_debug_dump_q(cq);
 }
 
 /**
@@ -421,64 +466,15 @@ lpfc_debug_dump_fcp_cq(struct lpfc_hba *phba, int fcp_wqidx)
  * associated to the FCP work queue specified by the @fcp_wqidx.
  **/
 static inline void
-lpfc_debug_dump_hba_eq(struct lpfc_hba *phba, int fcp_wqidx)
+lpfc_debug_dump_hba_eq(struct lpfc_hba *phba, int qidx)
 {
-	struct lpfc_queue *qdesc;
-	int fcp_eqidx, fcp_eqid;
-	int fcp_cqidx, fcp_cqid;
-
-	/* sanity check */
-	if (fcp_wqidx >= phba->cfg_fcp_io_channel)
-		return;
-	fcp_cqid = phba->sli4_hba.fcp_wq[fcp_wqidx]->assoc_qid;
-	for (fcp_cqidx = 0; fcp_cqidx < phba->cfg_fcp_io_channel; fcp_cqidx++)
-		if (phba->sli4_hba.fcp_cq[fcp_cqidx]->queue_id == fcp_cqid)
-			break;
-	if (phba->intr_type == MSIX) {
-		if (fcp_cqidx >= phba->cfg_fcp_io_channel)
-			return;
-	} else {
-		if (fcp_cqidx > 0)
-			return;
-	}
+	struct lpfc_queue *qp;
 
-	fcp_eqidx = fcp_cqidx;
-	fcp_eqid = phba->sli4_hba.hba_eq[fcp_eqidx]->queue_id;
-	qdesc = phba->sli4_hba.hba_eq[fcp_eqidx];
+	qp = phba->sli4_hba.hba_eq[qidx];
 
-	printk(KERN_ERR "FCP EQ: WQ[Idx:%d|Qid:%d]->CQ[Idx:%d|Qid:%d]->"
-		"EQ[Idx:%d|Qid:%d]\n",
-		fcp_wqidx, phba->sli4_hba.fcp_wq[fcp_wqidx]->queue_id,
-		fcp_cqidx, fcp_cqid, fcp_eqidx, fcp_eqid);
-	lpfc_debug_dump_q(qdesc);
-}
+	pr_err("EQ[Idx:%d|Qid:%d]\n", qidx, qp->queue_id);
 
-/**
- * lpfc_debug_dump_els_wq - dump all entries from the els work queue
- * @phba: Pointer to HBA context object.
- *
- * This function dumps all entries from the ELS work queue.
- **/
-static inline void
-lpfc_debug_dump_els_wq(struct lpfc_hba *phba)
-{
-	printk(KERN_ERR "ELS WQ: WQ[Qid:%d]:\n",
-		phba->sli4_hba.els_wq->queue_id);
-	lpfc_debug_dump_q(phba->sli4_hba.els_wq);
-}
-
-/**
- * lpfc_debug_dump_mbx_wq - dump all entries from the mbox work queue
- * @phba: Pointer to HBA context object.
- *
- * This function dumps all entries from the MBOX work queue.
- **/
-static inline void
-lpfc_debug_dump_mbx_wq(struct lpfc_hba *phba)
-{
-	printk(KERN_ERR "MBX WQ: WQ[Qid:%d]\n",
-		phba->sli4_hba.mbx_wq->queue_id);
-	lpfc_debug_dump_q(phba->sli4_hba.mbx_wq);
+	lpfc_debug_dump_q(qp);
 }
 
 /**
@@ -509,36 +505,6 @@ lpfc_debug_dump_hdr_rq(struct lpfc_hba *phba)
 	lpfc_debug_dump_q(phba->sli4_hba.hdr_rq);
 }
 
-/**
- * lpfc_debug_dump_els_cq - dump all entries from the els complete queue
- * @phba: Pointer to HBA context object.
- *
- * This function dumps all entries from the els complete queue.
- **/
-static inline void
-lpfc_debug_dump_els_cq(struct lpfc_hba *phba)
-{
-	printk(KERN_ERR "ELS CQ: WQ[Qid:%d]->CQ[Qid:%d]\n",
-		phba->sli4_hba.els_wq->queue_id,
-		phba->sli4_hba.els_cq->queue_id);
-	lpfc_debug_dump_q(phba->sli4_hba.els_cq);
-}
-
-/**
- * lpfc_debug_dump_mbx_cq - dump all entries from the mbox complete queue
- * @phba: Pointer to HBA context object.
- *
- * This function dumps all entries from the mbox complete queue.
- **/
-static inline void
-lpfc_debug_dump_mbx_cq(struct lpfc_hba *phba)
-{
-	printk(KERN_ERR "MBX CQ: WQ[Qid:%d]->CQ[Qid:%d]\n",
-		phba->sli4_hba.mbx_wq->queue_id,
-		phba->sli4_hba.mbx_cq->queue_id);
-	lpfc_debug_dump_q(phba->sli4_hba.mbx_cq);
-}
-
 /**
  * lpfc_debug_dump_wq_by_id - dump all entries from a work queue by queue id
  * @phba: Pointer to HBA context object.
@@ -556,14 +522,15 @@ lpfc_debug_dump_wq_by_id(struct lpfc_hba *phba, int qid)
 		if (phba->sli4_hba.fcp_wq[wq_idx]->queue_id == qid)
 			break;
 	if (wq_idx < phba->cfg_fcp_io_channel) {
-		printk(KERN_ERR "FCP WQ[Idx:%d|Qid:%d]\n", wq_idx, qid);
+		pr_err("FCP WQ[Idx:%d|Qid:%d]\n", wq_idx, qid);
 		lpfc_debug_dump_q(phba->sli4_hba.fcp_wq[wq_idx]);
 		return;
 	}
 
 	if (phba->sli4_hba.els_wq->queue_id == qid) {
-		printk(KERN_ERR "ELS WQ[Qid:%d]\n", qid);
+		pr_err("ELS WQ[Qid:%d]\n", qid);
 		lpfc_debug_dump_q(phba->sli4_hba.els_wq);
+		return;
 	}
 }
 
@@ -617,27 +584,26 @@ lpfc_debug_dump_rq_by_id(struct lpfc_hba *phba, int qid)
 static inline void
 lpfc_debug_dump_cq_by_id(struct lpfc_hba *phba, int qid)
 {
-	int cq_idx = 0;
+	int cq_idx;
 
-	do {
+	for (cq_idx = 0; cq_idx < phba->cfg_fcp_io_channel; cq_idx++)
 		if (phba->sli4_hba.fcp_cq[cq_idx]->queue_id == qid)
 			break;
-	} while (++cq_idx < phba->cfg_fcp_io_channel);
 
 	if (cq_idx < phba->cfg_fcp_io_channel) {
-		printk(KERN_ERR "FCP CQ[Idx:%d|Qid:%d]\n", cq_idx, qid);
+		pr_err("FCP CQ[Idx:%d|Qid:%d]\n", cq_idx, qid);
 		lpfc_debug_dump_q(phba->sli4_hba.fcp_cq[cq_idx]);
 		return;
 	}
 
 	if (phba->sli4_hba.els_cq->queue_id == qid) {
-		printk(KERN_ERR "ELS CQ[Qid:%d]\n", qid);
+		pr_err("ELS CQ[Qid:%d]\n", qid);
 		lpfc_debug_dump_q(phba->sli4_hba.els_cq);
 		return;
 	}
 
 	if (phba->sli4_hba.mbx_cq->queue_id == qid) {
-		printk(KERN_ERR "MBX CQ[Qid:%d]\n", qid);
+		pr_err("MBX CQ[Qid:%d]\n", qid);
 		lpfc_debug_dump_q(phba->sli4_hba.mbx_cq);
 	}
 }
-- 
GitLab


From 895427bd012ce5814fc9888c7c0ee9de44761833 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:30 -0800
Subject: [PATCH 0441/1084] scsi: lpfc: NVME Initiator: Base modifications

NVME Initiator: Base modifications

This patch adds base modifications for NVME initiator support.

The base modifications consist of:
- Formal split of SLI3 rings from SLI-4 WQs (sometimes referred to as
  rings as well) as implementation now widely varies between the two.
- Addition of configuration modes:
   SCSI initiator only; NVME initiator only; NVME target only; and
   SCSI and NVME initiator.
   The configuration mode drives overall adapter configuration,
   offloads enabled, and resource splits.
   NVME support is only available on SLI-4 devices and newer fw.
- Implements the following based on configuration mode:
  - Exchange resources are split by protocol; Obviously, if only
     1 mode, then no split occurs. Default is 50/50. module attribute
     allows tuning.
  - Pools and config parameters are separated per-protocol
  - Each protocol has it's own set of queues, but share interrupt
    vectors.
     SCSI:
       SLI3 devices have few queues and the original style of queue
         allocation remains.
       SLI4 devices piggy back on an "io-channel" concept that
         eventually needs to merge with scsi-mq/blk-mq support (it is
	 underway).  For now, the paradigm continues as it existed
	 prior. io channel allocates N msix and N WQs (N=4 default)
	 and either round robins or uses cpu # modulo N for scheduling.
	 A bunch of module parameters allow the configuration to be
	 tuned.
     NVME (initiator):
       Allocates an msix per cpu (or whatever pci_alloc_irq_vectors
         gets)
       Allocates a WQ per cpu, and maps the WQs to msix on a WQ #
         modulo msix vector count basis.
       Module parameters exist to cap/control the config if desired.
  - Each protocol has its own buffer and dma pools.

I apologize for the size of the patch.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>

----
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h           |   80 +-
 drivers/scsi/lpfc/lpfc_attr.c      |  374 ++++-
 drivers/scsi/lpfc/lpfc_bsg.c       |   27 +-
 drivers/scsi/lpfc/lpfc_crtn.h      |   27 +-
 drivers/scsi/lpfc/lpfc_debugfs.c   |  178 ++-
 drivers/scsi/lpfc/lpfc_debugfs.h   |   72 +-
 drivers/scsi/lpfc/lpfc_els.c       |   38 +-
 drivers/scsi/lpfc/lpfc_hbadisc.c   |  157 +-
 drivers/scsi/lpfc/lpfc_hw.h        |    3 +-
 drivers/scsi/lpfc/lpfc_hw4.h       |   63 +-
 drivers/scsi/lpfc/lpfc_init.c      | 2141 +++++++++++++++-------------
 drivers/scsi/lpfc/lpfc_logmsg.h    |    4 +
 drivers/scsi/lpfc/lpfc_mbox.c      |   23 +-
 drivers/scsi/lpfc/lpfc_mem.c       |   62 +-
 drivers/scsi/lpfc/lpfc_nportdisc.c |    7 +-
 drivers/scsi/lpfc/lpfc_nvme.h      |   88 ++
 drivers/scsi/lpfc/lpfc_scsi.c      |  101 +-
 drivers/scsi/lpfc/lpfc_scsi.h      |    6 +
 drivers/scsi/lpfc/lpfc_sli.c       | 1429 ++++++++++++++-----
 drivers/scsi/lpfc/lpfc_sli.h       |   30 +-
 drivers/scsi/lpfc/lpfc_sli4.h      |   75 +-
 drivers/scsi/lpfc/lpfc_vport.c     |    2 +
 22 files changed, 3353 insertions(+), 1634 deletions(-)
 create mode 100644 drivers/scsi/lpfc/lpfc_nvme.h

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 8e0d6f43878d..77ad757ca231 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -20,6 +20,7 @@
  *******************************************************************/
 
 #include <scsi/scsi_host.h>
+#include <linux/ktime.h>
 
 #if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_SCSI_LPFC_DEBUG_FS)
 #define CONFIG_SCSI_LPFC_DEBUG_FS
@@ -53,6 +54,7 @@ struct lpfc_sli2_slim;
 #define LPFC_MAX_SG_SEG_CNT	4096	/* sg element count per scsi cmnd */
 #define LPFC_MAX_SGL_SEG_CNT	512	/* SGL element count per scsi cmnd */
 #define LPFC_MAX_BPL_SEG_CNT	4096	/* BPL element count per scsi cmnd */
+#define LPFC_MIN_NVME_SEG_CNT	254
 
 #define LPFC_MAX_SGE_SIZE       0x80000000 /* Maximum data allowed in a SGE */
 #define LPFC_IOCB_LIST_CNT	2250	/* list of IOCBs for fast-path usage. */
@@ -114,6 +116,13 @@ enum lpfc_polling_flags {
 	DISABLE_FCP_RING_INT    = 0x2
 };
 
+struct perf_prof {
+	uint16_t cmd_cpu[40];
+	uint16_t rsp_cpu[40];
+	uint16_t qh_cpu[40];
+	uint16_t wqidx[40];
+};
+
 /* Provide DMA memory definitions the driver uses per port instance. */
 struct lpfc_dmabuf {
 	struct list_head list;
@@ -131,10 +140,24 @@ struct lpfc_dma_pool {
 struct hbq_dmabuf {
 	struct lpfc_dmabuf hbuf;
 	struct lpfc_dmabuf dbuf;
-	uint32_t size;
+	uint16_t total_size;
+	uint16_t bytes_recv;
 	uint32_t tag;
 	struct lpfc_cq_event cq_event;
 	unsigned long time_stamp;
+	void *context;
+};
+
+struct rqb_dmabuf {
+	struct lpfc_dmabuf hbuf;
+	struct lpfc_dmabuf dbuf;
+	uint16_t total_size;
+	uint16_t bytes_recv;
+	void *context;
+	struct lpfc_iocbq *iocbq;
+	struct lpfc_sglq *sglq;
+	struct lpfc_queue *hrq;	  /* ptr to associated Header RQ */
+	struct lpfc_queue *drq;	  /* ptr to associated Data RQ */
 };
 
 /* Priority bit.  Set value to exceed low water mark in lpfc_mem. */
@@ -442,6 +465,11 @@ struct lpfc_vport {
 	uint16_t fdmi_num_disc;
 	uint32_t fdmi_hba_mask;
 	uint32_t fdmi_port_mask;
+
+	/* There is a single nvme instance per vport. */
+	struct nvme_fc_local_port *localport;
+	uint8_t  nvmei_support; /* driver supports NVME Initiator */
+	uint32_t last_fcp_wqidx;
 };
 
 struct hbq_s {
@@ -459,10 +487,9 @@ struct hbq_s {
 					       struct hbq_dmabuf *);
 };
 
-#define LPFC_MAX_HBQS  4
 /* this matches the position in the lpfc_hbq_defs array */
 #define LPFC_ELS_HBQ	0
-#define LPFC_EXTRA_HBQ	1
+#define LPFC_MAX_HBQS	1
 
 enum hba_temp_state {
 	HBA_NORMAL_TEMP,
@@ -652,6 +679,8 @@ struct lpfc_hba {
 					 * Firmware supports Forced Link Speed
 					 * capability
 					 */
+#define HBA_NVME_IOQ_FLUSH      0x80000 /* NVME IO queues flushed. */
+
 	uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/
 	struct lpfc_dmabuf slim2p;
 
@@ -700,6 +729,8 @@ struct lpfc_hba {
 	uint8_t  wwpn[8];
 	uint32_t RandomData[7];
 	uint8_t  fcp_embed_io;
+	uint8_t  nvme_support;	/* Firmware supports NVME */
+	uint8_t  nvmet_support;	/* driver supports NVMET */
 	uint8_t  mds_diags_support;
 
 	/* HBA Config Parameters */
@@ -725,6 +756,9 @@ struct lpfc_hba {
 	uint32_t cfg_fcp_imax;
 	uint32_t cfg_fcp_cpu_map;
 	uint32_t cfg_fcp_io_channel;
+	uint32_t cfg_nvme_oas;
+	uint32_t cfg_nvme_io_channel;
+	uint32_t cfg_nvme_enable_fb;
 	uint32_t cfg_total_seg_cnt;
 	uint32_t cfg_sg_seg_cnt;
 	uint32_t cfg_sg_dma_buf_size;
@@ -770,6 +804,12 @@ struct lpfc_hba {
 #define LPFC_FDMI_SUPPORT	1	/* FDMI supported? */
 	uint32_t cfg_enable_SmartSAN;
 	uint32_t cfg_enable_mds_diags;
+	uint32_t cfg_enable_fc4_type;
+	uint32_t cfg_xri_split;
+#define LPFC_ENABLE_FCP  1
+#define LPFC_ENABLE_NVME 2
+#define LPFC_ENABLE_BOTH 3
+	uint32_t io_channel_irqs;	/* number of irqs for io channels */
 	lpfc_vpd_t vpd;		/* vital product data */
 
 	struct pci_dev *pcidev;
@@ -784,11 +824,11 @@ struct lpfc_hba {
 	unsigned long data_flags;
 
 	uint32_t hbq_in_use;		/* HBQs in use flag */
-	struct list_head rb_pend_list;  /* Received buffers to be processed */
 	uint32_t hbq_count;	        /* Count of configured HBQs */
 	struct hbq_s hbqs[LPFC_MAX_HBQS]; /* local copy of hbq indicies  */
 
-	atomic_t fcp_qidx;		/* next work queue to post work to */
+	atomic_t fcp_qidx;         /* next FCP WQ (RR Policy) */
+	atomic_t nvme_qidx;        /* next NVME WQ (RR Policy) */
 
 	phys_addr_t pci_bar0_map;     /* Physical address for PCI BAR0 */
 	phys_addr_t pci_bar1_map;     /* Physical address for PCI BAR1 */
@@ -843,9 +883,17 @@ struct lpfc_hba {
 	/*
 	 * stat  counters
 	 */
-	uint64_t fc4InputRequests;
-	uint64_t fc4OutputRequests;
-	uint64_t fc4ControlRequests;
+	uint64_t fc4ScsiInputRequests;
+	uint64_t fc4ScsiOutputRequests;
+	uint64_t fc4ScsiControlRequests;
+	uint64_t fc4ScsiIoCmpls;
+	uint64_t fc4NvmeInputRequests;
+	uint64_t fc4NvmeOutputRequests;
+	uint64_t fc4NvmeControlRequests;
+	uint64_t fc4NvmeIoCmpls;
+	uint64_t fc4NvmeLsRequests;
+	uint64_t fc4NvmeLsCmpls;
+
 	uint64_t bg_guard_err_cnt;
 	uint64_t bg_apptag_err_cnt;
 	uint64_t bg_reftag_err_cnt;
@@ -856,17 +904,23 @@ struct lpfc_hba {
 	struct list_head lpfc_scsi_buf_list_get;
 	struct list_head lpfc_scsi_buf_list_put;
 	uint32_t total_scsi_bufs;
+	spinlock_t nvme_buf_list_get_lock;  /* NVME buf alloc list lock */
+	spinlock_t nvme_buf_list_put_lock;  /* NVME buf free list lock */
+	struct list_head lpfc_nvme_buf_list_get;
+	struct list_head lpfc_nvme_buf_list_put;
+	uint32_t total_nvme_bufs;
 	struct list_head lpfc_iocb_list;
 	uint32_t total_iocbq_bufs;
 	struct list_head active_rrq_list;
 	spinlock_t hbalock;
 
 	/* pci_mem_pools */
-	struct pci_pool *lpfc_scsi_dma_buf_pool;
+	struct pci_pool *lpfc_sg_dma_buf_pool;
 	struct pci_pool *lpfc_mbuf_pool;
 	struct pci_pool *lpfc_hrb_pool;	/* header receive buffer pool */
 	struct pci_pool *lpfc_drb_pool; /* data receive buffer pool */
 	struct pci_pool *lpfc_hbq_pool;	/* SLI3 hbq buffer pool */
+	struct pci_pool *txrdy_payload_pool;
 	struct lpfc_dma_pool lpfc_mbuf_safety_pool;
 
 	mempool_t *mbox_mem_pool;
@@ -1092,3 +1146,11 @@ lpfc_sli_read_hs(struct lpfc_hba *phba)
 
 	return 0;
 }
+
+static inline struct lpfc_sli_ring *
+lpfc_phba_elsring(struct lpfc_hba *phba)
+{
+	if (phba->sli_rev == LPFC_SLI_REV4)
+		return phba->sli4_hba.els_wq->pring;
+	return &phba->sli.sli3_ring[LPFC_ELS_RING];
+}
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index ce41dea47682..b91b4bb1062d 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -35,14 +35,17 @@
 #include <scsi/scsi_transport_fc.h>
 #include <scsi/fc/fc_fs.h>
 
+#include <linux/nvme-fc-driver.h>
+
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
 #include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
-#include "lpfc_scsi.h"
 #include "lpfc.h"
+#include "lpfc_scsi.h"
+#include "lpfc_nvme.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_version.h"
 #include "lpfc_compat.h"
@@ -129,6 +132,124 @@ lpfc_enable_fip_show(struct device *dev, struct device_attribute *attr,
 		return snprintf(buf, PAGE_SIZE, "0\n");
 }
 
+static ssize_t
+lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
+		    char *buf)
+{
+	struct Scsi_Host *shost = class_to_shost(dev);
+	struct lpfc_vport *vport = shost_priv(shost);
+	struct lpfc_hba   *phba = vport->phba;
+	struct nvme_fc_local_port *localport;
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_nvme_rport *rport;
+	struct nvme_fc_remote_port *nrport;
+	char *statep;
+	int len = 0;
+
+	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) {
+		len += snprintf(buf, PAGE_SIZE, "NVME Disabled\n");
+		return len;
+	}
+
+	localport = vport->localport;
+	if (!localport) {
+		len = snprintf(buf, PAGE_SIZE,
+				"NVME Initiator x%llx is not allocated\n",
+				wwn_to_u64(vport->fc_portname.u.wwn));
+		return len;
+	}
+	len = snprintf(buf, PAGE_SIZE, "NVME Initiator Enabled\n");
+
+	spin_lock_irq(shost->host_lock);
+	lport = (struct lpfc_nvme_lport *)localport->private;
+
+	/* Port state is only one of two values for now. */
+	if (localport->port_id)
+		statep = "ONLINE";
+	else
+		statep = "UNKNOWN ";
+
+	len += snprintf(buf + len, PAGE_SIZE - len,
+			"%s%d WWPN x%llx WWNN x%llx DID x%06x %s\n",
+			"NVME LPORT lpfc",
+			phba->brd_no,
+			wwn_to_u64(vport->fc_portname.u.wwn),
+			wwn_to_u64(vport->fc_nodename.u.wwn),
+			localport->port_id, statep);
+
+	list_for_each_entry(rport, &lport->rport_list, list) {
+		/* local short-hand pointer. */
+		nrport = rport->remoteport;
+
+		/* Port state is only one of two values for now. */
+		switch (nrport->port_state) {
+		case FC_OBJSTATE_ONLINE:
+			statep = "ONLINE";
+			break;
+		case FC_OBJSTATE_UNKNOWN:
+			statep = "UNKNOWN ";
+			break;
+		default:
+			statep = "UNSUPPORTED";
+			break;
+		}
+
+		/* Tab in to show lport ownership. */
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"NVME RPORT       ");
+		if (phba->brd_no >= 10)
+			len += snprintf(buf + len, PAGE_SIZE - len, " ");
+
+		len += snprintf(buf + len, PAGE_SIZE - len, "WWPN x%llx ",
+				nrport->port_name);
+		len += snprintf(buf + len, PAGE_SIZE - len, "WWNN x%llx ",
+				nrport->node_name);
+		len += snprintf(buf + len, PAGE_SIZE - len, "DID x%06x ",
+				nrport->port_id);
+
+		switch (nrport->port_role) {
+		case FC_PORT_ROLE_NVME_INITIATOR:
+			len +=  snprintf(buf + len, PAGE_SIZE - len,
+					 "INITIATOR ");
+			break;
+		case FC_PORT_ROLE_NVME_TARGET:
+			len +=  snprintf(buf + len, PAGE_SIZE - len,
+					 "TARGET ");
+			break;
+		case FC_PORT_ROLE_NVME_DISCOVERY:
+			len +=  snprintf(buf + len, PAGE_SIZE - len,
+					 "DISCOVERY ");
+			break;
+		default:
+			len +=  snprintf(buf + len, PAGE_SIZE - len,
+					 "UNKNOWN_ROLE x%x",
+					 nrport->port_role);
+			break;
+		}
+		len +=  snprintf(buf + len, PAGE_SIZE - len, "%s  ", statep);
+		/* Terminate the string. */
+		len +=  snprintf(buf + len, PAGE_SIZE - len, "\n");
+	}
+	spin_unlock_irq(shost->host_lock);
+
+	len += snprintf(buf + len, PAGE_SIZE, "\nNVME Statistics\n");
+	len += snprintf(buf+len, PAGE_SIZE-len,
+			"LS: Xmt %016llx Cmpl %016llx\n",
+			phba->fc4NvmeLsRequests,
+			phba->fc4NvmeLsCmpls);
+
+	len += snprintf(buf+len, PAGE_SIZE-len,
+			"FCP: Rd %016llx Wr %016llx IO %016llx\n",
+			phba->fc4NvmeInputRequests,
+			phba->fc4NvmeOutputRequests,
+			phba->fc4NvmeControlRequests);
+
+	len += snprintf(buf+len, PAGE_SIZE-len,
+			"    Cmpl %016llx\n", phba->fc4NvmeIoCmpls);
+
+	return len;
+}
+
 static ssize_t
 lpfc_bg_info_show(struct device *dev, struct device_attribute *attr,
 		  char *buf)
@@ -675,6 +796,28 @@ lpfc_issue_lip(struct Scsi_Host *shost)
 	return 0;
 }
 
+int
+lpfc_emptyq_wait(struct lpfc_hba *phba, struct list_head *q, spinlock_t *lock)
+{
+	int cnt = 0;
+
+	spin_lock_irq(lock);
+	while (!list_empty(q)) {
+		spin_unlock_irq(lock);
+		msleep(20);
+		if (cnt++ > 250) {  /* 5 secs */
+			lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+					"0466 %s %s\n",
+					"Outstanding IO when ",
+					"bringing Adapter offline\n");
+				return 0;
+		}
+		spin_lock_irq(lock);
+	}
+	spin_unlock_irq(lock);
+	return 1;
+}
+
 /**
  * lpfc_do_offline - Issues a mailbox command to bring the link down
  * @phba: lpfc_hba pointer.
@@ -694,10 +837,10 @@ static int
 lpfc_do_offline(struct lpfc_hba *phba, uint32_t type)
 {
 	struct completion online_compl;
+	struct lpfc_queue *qp = NULL;
 	struct lpfc_sli_ring *pring;
 	struct lpfc_sli *psli;
 	int status = 0;
-	int cnt = 0;
 	int i;
 	int rc;
 
@@ -717,20 +860,24 @@ lpfc_do_offline(struct lpfc_hba *phba, uint32_t type)
 	/* Wait a little for things to settle down, but not
 	 * long enough for dev loss timeout to expire.
 	 */
-	for (i = 0; i < psli->num_rings; i++) {
-		pring = &psli->ring[i];
-		while (!list_empty(&pring->txcmplq)) {
-			msleep(10);
-			if (cnt++ > 500) {  /* 5 secs */
-				lpfc_printf_log(phba,
-					KERN_WARNING, LOG_INIT,
-					"0466 Outstanding IO when "
-					"bringing Adapter offline\n");
-				break;
-			}
+	if (phba->sli_rev != LPFC_SLI_REV4) {
+		for (i = 0; i < psli->num_rings; i++) {
+			pring = &psli->sli3_ring[i];
+			if (!lpfc_emptyq_wait(phba, &pring->txcmplq,
+					      &phba->hbalock))
+				goto out;
+		}
+	} else {
+		list_for_each_entry(qp, &phba->sli4_hba.lpfc_wq_list, wq_list) {
+			pring = qp->pring;
+			if (!pring)
+				continue;
+			if (!lpfc_emptyq_wait(phba, &pring->txcmplq,
+					      &pring->ring_lock))
+				goto out;
 		}
 	}
-
+out:
 	init_completion(&online_compl);
 	rc = lpfc_workq_post_event(phba, &status, &online_compl, type);
 	if (rc == 0)
@@ -1945,6 +2092,7 @@ lpfc_##attr##_store(struct device *dev, struct device_attribute *attr, \
 }
 
 
+static DEVICE_ATTR(nvme_info, 0444, lpfc_nvme_info_show, NULL);
 static DEVICE_ATTR(bg_info, S_IRUGO, lpfc_bg_info_show, NULL);
 static DEVICE_ATTR(bg_guard_err, S_IRUGO, lpfc_bg_guard_err_show, NULL);
 static DEVICE_ATTR(bg_apptag_err, S_IRUGO, lpfc_bg_apptag_err_show, NULL);
@@ -2816,9 +2964,9 @@ lpfc_txq_hw_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct Scsi_Host  *shost = class_to_shost(dev);
 	struct lpfc_hba   *phba = ((struct lpfc_vport *) shost->hostdata)->phba;
+	struct lpfc_sli_ring *pring = lpfc_phba_elsring(phba);
 
-	return snprintf(buf, PAGE_SIZE, "%d\n",
-		phba->sli.ring[LPFC_ELS_RING].txq_max);
+	return snprintf(buf, PAGE_SIZE, "%d\n", pring->txq_max);
 }
 
 static DEVICE_ATTR(txq_hw, S_IRUGO,
@@ -2829,9 +2977,9 @@ lpfc_txcmplq_hw_show(struct device *dev, struct device_attribute *attr,
 {
 	struct Scsi_Host  *shost = class_to_shost(dev);
 	struct lpfc_hba   *phba = ((struct lpfc_vport *) shost->hostdata)->phba;
+	struct lpfc_sli_ring *pring = lpfc_phba_elsring(phba);
 
-	return snprintf(buf, PAGE_SIZE, "%d\n",
-		phba->sli.ring[LPFC_ELS_RING].txcmplq_max);
+	return snprintf(buf, PAGE_SIZE, "%d\n", pring->txcmplq_max);
 }
 
 static DEVICE_ATTR(txcmplq_hw, S_IRUGO,
@@ -3029,6 +3177,31 @@ lpfc_vport_param_store(devloss_tmo)
 static DEVICE_ATTR(lpfc_devloss_tmo, S_IRUGO | S_IWUSR,
 		   lpfc_devloss_tmo_show, lpfc_devloss_tmo_store);
 
+/*
+ * lpfc_enable_fc4_type: Defines what FC4 types are supported.
+ * Supported Values:  1 - register just FCP
+ *                    3 - register both FCP and NVME
+ * Supported values are [1,3]. Default value is 3
+ */
+LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_BOTH,
+	    LPFC_ENABLE_FCP, LPFC_ENABLE_BOTH,
+	    "Define fc4 type to register with fabric.");
+
+/*
+ * lpfc_xri_split: Defines the division of XRI resources between SCSI and NVME
+ * This parameter is only used if:
+ *     lpfc_enable_fc4_type is 3 - register both FCP and NVME
+ *
+ * ELS/CT always get 10% of XRIs, up to a maximum of 250
+ * The remaining XRIs get split up based on lpfc_xri_split per port:
+ *
+ * Supported Values are in percentages
+ * the xri_split value is the percentage the SCSI port will get. The remaining
+ * percentage will go to NVME.
+ */
+LPFC_ATTR_R(xri_split, 50, 10, 90,
+	     "Division of XRI resources between SCSI and NVME");
+
 /*
 # lpfc_log_verbose: Only turn this flag on if you are willing to risk being
 # deluged with LOTS of information.
@@ -4143,13 +4316,14 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr,
 	/*
 	 * Value range for the HBA is [5000,5000000]
 	 * The value for each EQ depends on how many EQs are configured.
+	 * Allow value == 0
 	 */
-	if (val < LPFC_MIN_IMAX || val > LPFC_MAX_IMAX)
+	if (val && (val < LPFC_MIN_IMAX || val > LPFC_MAX_IMAX))
 		return -EINVAL;
 
 	phba->cfg_fcp_imax = (uint32_t)val;
-	for (i = 0; i < phba->cfg_fcp_io_channel; i += LPFC_MAX_EQ_DELAY)
-		lpfc_modify_fcp_eq_delay(phba, i);
+	for (i = 0; i < phba->io_channel_irqs; i++)
+		lpfc_modify_hba_eq_delay(phba, i);
 
 	return strlen(buf);
 }
@@ -4187,7 +4361,8 @@ lpfc_fcp_imax_init(struct lpfc_hba *phba, int val)
 		return 0;
 	}
 
-	if (val >= LPFC_MIN_IMAX && val <= LPFC_MAX_IMAX) {
+	if ((val >= LPFC_MIN_IMAX && val <= LPFC_MAX_IMAX) ||
+	    (val == 0)) {
 		phba->cfg_fcp_imax = val;
 		return 0;
 	}
@@ -4376,6 +4551,17 @@ LPFC_VPORT_ATTR_RW(use_adisc, 0, 0, 1,
 LPFC_VPORT_ATTR_RW(first_burst_size, 0, 0, 65536,
 		   "First burst size for Targets that support first burst");
 
+/*
+* lpfc_nvme_enable_fb: Enable NVME first burst on I and T functions.
+* For the Initiator (I), enabling this parameter means that an NVME
+* PRLI response with FBA enabled and an FB_SIZE set to a nonzero value
+* will be processed by the initiator for subsequent NVME FCP IO.
+* Parameter supported on physical port only - no NPIV support.
+* Value range is [0,1]. Default value is 0 (disabled).
+*/
+LPFC_ATTR_RW(nvme_enable_fb, 0, 0, 1,
+	     "Enable First Burst feature on I and T functions.");
+
 /*
 # lpfc_max_scsicmpl_time: Use scsi command completion time to control I/O queue
 # depth. Default value is 0. When the value of this parameter is zero the
@@ -4423,17 +4609,25 @@ static DEVICE_ATTR(lpfc_max_scsicmpl_time, S_IRUGO | S_IWUSR,
 LPFC_ATTR_R(ack0, 0, 0, 1, "Enable ACK0 support");
 
 /*
-# lpfc_fcp_io_sched: Determine scheduling algrithmn for issuing FCP cmds
-# range is [0,1]. Default value is 0.
-# For [0], FCP commands are issued to Work Queues ina round robin fashion.
-# For [1], FCP commands are issued to a Work Queue associated with the
-#          current CPU.
-# It would be set to 1 by the driver if it's able to set up cpu affinity
-# for FCP I/Os through Work Queue associated with the current CPU. Otherwise,
-# roundrobin scheduling of FCP I/Os through WQs will be used.
-*/
-LPFC_ATTR_RW(fcp_io_sched, 0, 0, 1, "Determine scheduling algorithm for "
-		"issuing commands [0] - Round Robin, [1] - Current CPU");
+ * lpfc_io_sched: Determine scheduling algrithmn for issuing FCP cmds
+ * range is [0,1]. Default value is 0.
+ * For [0], FCP commands are issued to Work Queues ina round robin fashion.
+ * For [1], FCP commands are issued to a Work Queue associated with the
+ *          current CPU.
+ *
+ * LPFC_FCP_SCHED_ROUND_ROBIN == 0
+ * LPFC_FCP_SCHED_BY_CPU == 1
+ *
+ * The driver dynamically sets this to 1 (BY_CPU) if it's able to set up cpu
+ * affinity for FCP/NVME I/Os through Work Queues associated with the current
+ * CPU. Otherwise, the default 0 (Round Robin) scheduling of FCP/NVME I/Os
+ * through WQs will be used.
+ */
+LPFC_ATTR_RW(fcp_io_sched, LPFC_FCP_SCHED_ROUND_ROBIN,
+	     LPFC_FCP_SCHED_ROUND_ROBIN,
+	     LPFC_FCP_SCHED_BY_CPU,
+	     "Determine scheduling algorithm for "
+	     "issuing commands [0] - Round Robin, [1] - Current CPU");
 
 /*
 # lpfc_fcp2_no_tgt_reset: Determine bus reset behavior
@@ -4560,14 +4754,53 @@ LPFC_ATTR_R(use_msi, 2, 0, 2, "Use Message Signaled Interrupts (1) or "
 	    "MSI-X (2), if possible");
 
 /*
-# lpfc_fcp_io_channel: Set the number of FCP EQ/CQ/WQ IO channels
-#
-# Value range is [1,7]. Default value is 4.
-*/
-LPFC_ATTR_R(fcp_io_channel, LPFC_FCP_IO_CHAN_DEF, LPFC_FCP_IO_CHAN_MIN,
-	    LPFC_FCP_IO_CHAN_MAX,
+ * lpfc_nvme_oas: Use the oas bit when sending NVME IOs
+ *
+ *      0  = NVME OAS disabled
+ *      1  = NVME OAS enabled
+ *
+ * Value range is [0,1]. Default value is 0.
+ */
+LPFC_ATTR_RW(nvme_oas, 0, 0, 1,
+	     "Use OAS bit on NVME IOs");
+
+/*
+ * lpfc_fcp_io_channel: Set the number of FCP IO channels the driver
+ * will advertise it supports to the SCSI layer. This also will map to
+ * the number of WQs the driver will create.
+ *
+ *      0    = Configure the number of io channels to the number of active CPUs.
+ *      1,32 = Manually specify how many io channels to use.
+ *
+ * Value range is [0,32]. Default value is 4.
+ */
+LPFC_ATTR_R(fcp_io_channel,
+	    LPFC_FCP_IO_CHAN_DEF,
+	    LPFC_HBA_IO_CHAN_MIN, LPFC_HBA_IO_CHAN_MAX,
 	    "Set the number of FCP I/O channels");
 
+/*
+ * lpfc_nvme_io_channel: Set the number of IO hardware queues the driver
+ * will advertise it supports to the NVME layer. This also will map to
+ * the number of WQs the driver will create.
+ *
+ * This module parameter is valid when lpfc_enable_fc4_type is set
+ * to support NVME.
+ *
+ * The NVME Layer will try to create this many, plus 1 administrative
+ * hardware queue. The administrative queue will always map to WQ 0
+ * A hardware IO queue maps (qidx) to a specific driver WQ.
+ *
+ *      0    = Configure the number of io channels to the number of active CPUs.
+ *      1,32 = Manually specify how many io channels to use.
+ *
+ * Value range is [0,32]. Default value is 0.
+ */
+LPFC_ATTR_R(nvme_io_channel,
+	    LPFC_NVME_IO_CHAN_DEF,
+	    LPFC_HBA_IO_CHAN_MIN, LPFC_HBA_IO_CHAN_MAX,
+	    "Set the number of NVME I/O channels");
+
 /*
 # lpfc_enable_hba_reset: Allow or prevent HBA resets to the hardware.
 #       0  = HBA resets disabled
@@ -4692,6 +4925,7 @@ LPFC_ATTR_R(sg_seg_cnt, LPFC_DEFAULT_SG_SEG_CNT, LPFC_DEFAULT_SG_SEG_CNT,
 LPFC_ATTR_R(enable_mds_diags, 0, 0, 1, "Enable MDS Diagnostics");
 
 struct device_attribute *lpfc_hba_attrs[] = {
+	&dev_attr_nvme_info,
 	&dev_attr_bg_info,
 	&dev_attr_bg_guard_err,
 	&dev_attr_bg_apptag_err,
@@ -4718,6 +4952,8 @@ struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_peer_port_login,
 	&dev_attr_lpfc_nodev_tmo,
 	&dev_attr_lpfc_devloss_tmo,
+	&dev_attr_lpfc_enable_fc4_type,
+	&dev_attr_lpfc_xri_split,
 	&dev_attr_lpfc_fcp_class,
 	&dev_attr_lpfc_use_adisc,
 	&dev_attr_lpfc_first_burst_size,
@@ -4752,9 +4988,12 @@ struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_poll_tmo,
 	&dev_attr_lpfc_task_mgmt_tmo,
 	&dev_attr_lpfc_use_msi,
+	&dev_attr_lpfc_nvme_oas,
 	&dev_attr_lpfc_fcp_imax,
 	&dev_attr_lpfc_fcp_cpu_map,
 	&dev_attr_lpfc_fcp_io_channel,
+	&dev_attr_lpfc_nvme_io_channel,
+	&dev_attr_lpfc_nvme_enable_fb,
 	&dev_attr_lpfc_enable_bg,
 	&dev_attr_lpfc_soft_wwnn,
 	&dev_attr_lpfc_soft_wwpn,
@@ -5764,9 +6003,9 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	lpfc_fdmi_on_init(phba, lpfc_fdmi_on);
 	lpfc_enable_SmartSAN_init(phba, lpfc_enable_SmartSAN);
 	lpfc_use_msi_init(phba, lpfc_use_msi);
+	lpfc_nvme_oas_init(phba, lpfc_nvme_oas);
 	lpfc_fcp_imax_init(phba, lpfc_fcp_imax);
 	lpfc_fcp_cpu_map_init(phba, lpfc_fcp_cpu_map);
-	lpfc_fcp_io_channel_init(phba, lpfc_fcp_io_channel);
 	lpfc_enable_hba_reset_init(phba, lpfc_enable_hba_reset);
 	lpfc_enable_hba_heartbeat_init(phba, lpfc_enable_hba_heartbeat);
 
@@ -5789,8 +6028,43 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	else
 		phba->cfg_poll = lpfc_poll;
 
+	lpfc_enable_fc4_type_init(phba, lpfc_enable_fc4_type);
+
+	/* Initialize first burst. Target vs Initiator are different. */
+	lpfc_nvme_enable_fb_init(phba, lpfc_nvme_enable_fb);
+	lpfc_fcp_io_channel_init(phba, lpfc_fcp_io_channel);
+	lpfc_nvme_io_channel_init(phba, lpfc_nvme_io_channel);
+
+	if (phba->sli_rev != LPFC_SLI_REV4) {
+		/* NVME only supported on SLI4 */
+		phba->nvmet_support = 0;
+		phba->cfg_enable_fc4_type = LPFC_ENABLE_FCP;
+	} else {
+		/* We MUST have FCP support */
+		if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
+			phba->cfg_enable_fc4_type |= LPFC_ENABLE_FCP;
+	}
+
+	/* A value of 0 means use the number of CPUs found in the system */
+	if (phba->cfg_nvme_io_channel == 0)
+		phba->cfg_nvme_io_channel = phba->sli4_hba.num_present_cpu;
+	if (phba->cfg_fcp_io_channel == 0)
+		phba->cfg_fcp_io_channel = phba->sli4_hba.num_present_cpu;
+
+	if (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP)
+		phba->cfg_nvme_io_channel = 0;
+
+	if (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)
+		phba->cfg_fcp_io_channel = 0;
+
+	if (phba->cfg_fcp_io_channel > phba->cfg_nvme_io_channel)
+		phba->io_channel_irqs = phba->cfg_fcp_io_channel;
+	else
+		phba->io_channel_irqs = phba->cfg_nvme_io_channel;
+
 	phba->cfg_soft_wwnn = 0L;
 	phba->cfg_soft_wwpn = 0L;
+	lpfc_xri_split_init(phba, lpfc_xri_split);
 	lpfc_sg_seg_cnt_init(phba, lpfc_sg_seg_cnt);
 	lpfc_hba_queue_depth_init(phba, lpfc_hba_queue_depth);
 	lpfc_hba_log_verbose_init(phba, lpfc_log_verbose);
@@ -5806,6 +6080,26 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	return;
 }
 
+/**
+ * lpfc_nvme_mod_param_dep - Adjust module parameter value based on
+ * dependencies between protocols and roles.
+ * @phba: lpfc_hba pointer.
+ **/
+void
+lpfc_nvme_mod_param_dep(struct lpfc_hba *phba)
+{
+	phba->nvmet_support = 0;
+	if (phba->cfg_nvme_io_channel > phba->sli4_hba.num_present_cpu)
+		phba->cfg_nvme_io_channel = phba->sli4_hba.num_present_cpu;
+	if (phba->cfg_fcp_io_channel > phba->sli4_hba.num_present_cpu)
+		phba->cfg_fcp_io_channel = phba->sli4_hba.num_present_cpu;
+
+	if (phba->cfg_fcp_io_channel > phba->cfg_nvme_io_channel)
+		phba->io_channel_irqs = phba->cfg_fcp_io_channel;
+	else
+		phba->io_channel_irqs = phba->cfg_nvme_io_channel;
+}
+
 /**
  * lpfc_get_vport_cfgparam - Used during port create, init the vport structure
  * @vport: lpfc_vport pointer.
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index 1ee131f124b9..55a2270cadfc 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -1704,6 +1704,7 @@ lpfc_bsg_diag_mode_enter(struct lpfc_hba *phba)
 	struct lpfc_vport **vports;
 	struct Scsi_Host *shost;
 	struct lpfc_sli *psli;
+	struct lpfc_queue *qp = NULL;
 	struct lpfc_sli_ring *pring;
 	int i = 0;
 
@@ -1711,9 +1712,6 @@ lpfc_bsg_diag_mode_enter(struct lpfc_hba *phba)
 	if (!psli)
 		return -ENODEV;
 
-	pring = &psli->ring[LPFC_FCP_RING];
-	if (!pring)
-		return -ENODEV;
 
 	if ((phba->link_state == LPFC_HBA_ERROR) ||
 	    (psli->sli_flag & LPFC_BLOCK_MGMT_IO) ||
@@ -1732,10 +1730,18 @@ lpfc_bsg_diag_mode_enter(struct lpfc_hba *phba)
 		scsi_block_requests(shost);
 	}
 
-	while (!list_empty(&pring->txcmplq)) {
-		if (i++ > 500)  /* wait up to 5 seconds */
+	if (phba->sli_rev != LPFC_SLI_REV4) {
+		pring = &psli->sli3_ring[LPFC_FCP_RING];
+		lpfc_emptyq_wait(phba, &pring->txcmplq, &phba->hbalock);
+		return 0;
+	}
+	list_for_each_entry(qp, &phba->sli4_hba.lpfc_wq_list, wq_list) {
+		pring = qp->pring;
+		if (!pring || (pring->ringno != LPFC_FCP_RING))
+			continue;
+		if (!lpfc_emptyq_wait(phba, &pring->txcmplq,
+				      &pring->ring_lock))
 			break;
-		msleep(10);
 	}
 	return 0;
 }
@@ -2875,8 +2881,7 @@ diag_cmd_data_alloc(struct lpfc_hba *phba,
 static int lpfcdiag_loop_post_rxbufs(struct lpfc_hba *phba, uint16_t rxxri,
 			     size_t len)
 {
-	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring;
 	struct lpfc_iocbq *cmdiocbq;
 	IOCB_t *cmd = NULL;
 	struct list_head head, *curr, *next;
@@ -2890,6 +2895,8 @@ static int lpfcdiag_loop_post_rxbufs(struct lpfc_hba *phba, uint16_t rxxri,
 	int iocb_stat;
 	int i = 0;
 
+	pring = lpfc_phba_elsring(phba);
+
 	cmdiocbq = lpfc_sli_get_iocbq(phba);
 	rxbmp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
 	if (rxbmp != NULL) {
@@ -5403,13 +5410,15 @@ lpfc_bsg_timeout(struct bsg_job *job)
 	struct lpfc_vport *vport = shost_priv(fc_bsg_to_shost(job));
 	struct lpfc_hba *phba = vport->phba;
 	struct lpfc_iocbq *cmdiocb;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring;
 	struct bsg_job_data *dd_data;
 	unsigned long flags;
 	int rc = 0;
 	LIST_HEAD(completions);
 	struct lpfc_iocbq *check_iocb, *next_iocb;
 
+	pring = lpfc_phba_elsring(phba);
+
 	/* if job's driver data is NULL, the command completed or is in the
 	 * the process of completing.  In this case, return status to request
 	 * so the timeout is retried.  This avoids double completion issues
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 309643a2c55c..2e6345ebd6c5 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -21,6 +21,7 @@
 typedef int (*node_filter)(struct lpfc_nodelist *, void *);
 
 struct fc_rport;
+struct fc_frame_header;
 void lpfc_down_link(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_sli_read_link_ste(struct lpfc_hba *);
 void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t, uint16_t);
@@ -167,6 +168,8 @@ void lpfc_hb_timeout_handler(struct lpfc_hba *);
 void lpfc_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *,
 			 struct lpfc_iocbq *);
 int lpfc_ct_handle_unsol_abort(struct lpfc_hba *, struct hbq_dmabuf *);
+int lpfc_issue_gidft(struct lpfc_vport *vport);
+int lpfc_get_gidft_type(struct lpfc_vport *vport, struct lpfc_iocbq *iocbq);
 int lpfc_ns_cmd(struct lpfc_vport *, int, uint8_t, uint32_t);
 int lpfc_fdmi_cmd(struct lpfc_vport *, struct lpfc_nodelist *, int, uint32_t);
 void lpfc_fdmi_num_disc_check(struct lpfc_vport *);
@@ -186,6 +189,8 @@ void lpfc_unblock_mgmt_io(struct lpfc_hba *);
 void lpfc_offline_prep(struct lpfc_hba *, int);
 void lpfc_offline(struct lpfc_hba *);
 void lpfc_reset_hba(struct lpfc_hba *);
+int lpfc_emptyq_wait(struct lpfc_hba *phba, struct list_head *hd,
+			spinlock_t *slock);
 
 int lpfc_fof_queue_create(struct lpfc_hba *);
 int lpfc_fof_queue_setup(struct lpfc_hba *);
@@ -193,7 +198,11 @@ int lpfc_fof_queue_destroy(struct lpfc_hba *);
 irqreturn_t lpfc_sli4_fof_intr_handler(int, void *);
 
 int lpfc_sli_setup(struct lpfc_hba *);
-int lpfc_sli_queue_setup(struct lpfc_hba *);
+int lpfc_sli4_setup(struct lpfc_hba *phba);
+void lpfc_sli_queue_init(struct lpfc_hba *phba);
+void lpfc_sli4_queue_init(struct lpfc_hba *phba);
+struct lpfc_sli_ring *lpfc_sli4_calc_ring(struct lpfc_hba *phba,
+					  struct lpfc_iocbq *iocbq);
 
 void lpfc_handle_eratt(struct lpfc_hba *);
 void lpfc_handle_latt(struct lpfc_hba *);
@@ -233,6 +242,11 @@ struct hbq_dmabuf *lpfc_sli4_rb_alloc(struct lpfc_hba *);
 void lpfc_sli4_rb_free(struct lpfc_hba *, struct hbq_dmabuf *);
 void lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *, struct fcf_record *,
 			uint16_t);
+int lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
+		     struct lpfc_rqe *hrqe, struct lpfc_rqe *drqe);
+int lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hq,
+			struct lpfc_queue *dq, int count);
+int lpfc_free_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hq);
 void lpfc_unregister_fcf(struct lpfc_hba *);
 void lpfc_unregister_fcf_rescan(struct lpfc_hba *);
 void lpfc_unregister_unused_fcf(struct lpfc_hba *);
@@ -287,6 +301,9 @@ void lpfc_sli_def_mbox_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_sli4_unreg_rpi_cmpl_clr(struct lpfc_hba *, LPFC_MBOXQ_t *);
 int lpfc_sli_issue_iocb(struct lpfc_hba *, uint32_t,
 			struct lpfc_iocbq *, uint32_t);
+int lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t rnum,
+			struct lpfc_iocbq *iocbq);
+struct lpfc_sglq *__lpfc_clear_active_sglq(struct lpfc_hba *phba, uint16_t xri);
 void lpfc_sli_pcimem_bcopy(void *, void *, uint32_t);
 void lpfc_sli_bemem_bcopy(void *, void *, uint32_t);
 void lpfc_sli_abort_iocb_ring(struct lpfc_hba *, struct lpfc_sli_ring *);
@@ -356,6 +373,7 @@ extern struct device_attribute *lpfc_hba_attrs[];
 extern struct device_attribute *lpfc_vport_attrs[];
 extern struct scsi_host_template lpfc_template;
 extern struct scsi_host_template lpfc_template_s3;
+extern struct scsi_host_template lpfc_template_nvme;
 extern struct scsi_host_template lpfc_vport_template;
 extern struct fc_function_template lpfc_transport_functions;
 extern struct fc_function_template lpfc_vport_transport_functions;
@@ -471,7 +489,9 @@ int lpfc_issue_unreg_vfi(struct lpfc_vport *);
 int lpfc_selective_reset(struct lpfc_hba *);
 int lpfc_sli4_read_config(struct lpfc_hba *);
 void lpfc_sli4_node_prep(struct lpfc_hba *);
-int lpfc_sli4_xri_sgl_update(struct lpfc_hba *);
+int lpfc_sli4_els_sgl_update(struct lpfc_hba *phba);
+int lpfc_sli4_scsi_sgl_update(struct lpfc_hba *phba);
+int lpfc_sli4_nvme_sgl_update(struct lpfc_hba *phba);
 void lpfc_free_sgl_list(struct lpfc_hba *, struct list_head *);
 uint32_t lpfc_sli_port_speed_get(struct lpfc_hba *);
 int lpfc_sli4_request_firmware_update(struct lpfc_hba *, uint8_t);
@@ -496,3 +516,6 @@ bool lpfc_find_next_oas_lun(struct lpfc_hba *, struct lpfc_name *,
 			    uint32_t *, uint32_t *);
 int lpfc_sli4_dump_page_a0(struct lpfc_hba *phba, struct lpfcMboxq *mbox);
 void lpfc_mbx_cmpl_rdp_page_a0(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb);
+
+/* NVME interfaces. */
+void lpfc_nvme_mod_param_dep(struct lpfc_hba *phba);
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 93ddda16c632..d9bbebe8eb37 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -484,20 +484,23 @@ lpfc_debugfs_dumpHostSlim_data(struct lpfc_hba *phba, char *buf, int size)
 		off += (8 * sizeof(uint32_t));
 	}
 
-	for (i = 0; i < 4; i++) {
-		pgpp = &phba->port_gp[i];
-		pring = &psli->ring[i];
-		len +=  snprintf(buf+len, size-len,
-				 "Ring %d: CMD GetInx:%d (Max:%d Next:%d "
-				 "Local:%d flg:x%x)  RSP PutInx:%d Max:%d\n",
-				 i, pgpp->cmdGetInx, pring->sli.sli3.numCiocb,
-				 pring->sli.sli3.next_cmdidx,
-				 pring->sli.sli3.local_getidx,
-				 pring->flag, pgpp->rspPutInx,
-				 pring->sli.sli3.numRiocb);
-	}
-
 	if (phba->sli_rev <= LPFC_SLI_REV3) {
+		for (i = 0; i < 4; i++) {
+			pgpp = &phba->port_gp[i];
+			pring = &psli->sli3_ring[i];
+			len +=  snprintf(buf+len, size-len,
+					 "Ring %d: CMD GetInx:%d "
+					 "(Max:%d Next:%d "
+					 "Local:%d flg:x%x)  "
+					 "RSP PutInx:%d Max:%d\n",
+					 i, pgpp->cmdGetInx,
+					 pring->sli.sli3.numCiocb,
+					 pring->sli.sli3.next_cmdidx,
+					 pring->sli.sli3.local_getidx,
+					 pring->flag, pgpp->rspPutInx,
+					 pring->sli.sli3.numRiocb);
+		}
+
 		word0 = readl(phba->HAregaddr);
 		word1 = readl(phba->CAregaddr);
 		word2 = readl(phba->HSregaddr);
@@ -535,6 +538,7 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
 
 	cnt = (LPFC_NODELIST_SIZE / LPFC_NODELIST_ENTRY_SIZE);
 
+	len += snprintf(buf+len, size-len, "\nFCP Nodelist Entries ...\n");
 	spin_lock_irq(shost->host_lock);
 	list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
 		if (!cnt) {
@@ -2011,6 +2015,14 @@ lpfc_idiag_wqs_for_cq(struct lpfc_hba *phba, char *wqtype, char *pbuffer,
 		if (*len >= max_cnt)
 			return 1;
 	}
+	for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++) {
+		qp = phba->sli4_hba.nvme_wq[qidx];
+		if (qp->assoc_qid != cq_id)
+			continue;
+		*len = __lpfc_idiag_print_wq(qp, wqtype, pbuffer, *len);
+		if (*len >= max_cnt)
+			return 1;
+	}
 	return 0;
 }
 
@@ -2096,6 +2108,25 @@ lpfc_idiag_cqs_for_eq(struct lpfc_hba *phba, char *pbuffer,
 			return 1;
 	}
 
+	for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++) {
+		qp = phba->sli4_hba.nvme_cq[qidx];
+		if (qp->assoc_qid != eq_id)
+			continue;
+
+		*len = __lpfc_idiag_print_cq(qp, "NVME", pbuffer, *len);
+
+		/* Reset max counter */
+		qp->CQ_max_cqe = 0;
+
+		if (*len >= max_cnt)
+			return 1;
+
+		rc = lpfc_idiag_wqs_for_cq(phba, "NVME", pbuffer, len,
+				max_cnt, qp->queue_id);
+		if (rc)
+			return 1;
+	}
+
 	return 0;
 }
 
@@ -2162,21 +2193,21 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes,
 	spin_lock_irq(&phba->hbalock);
 
 	/* Fast-path event queue */
-	if (phba->sli4_hba.hba_eq && phba->cfg_fcp_io_channel) {
+	if (phba->sli4_hba.hba_eq && phba->io_channel_irqs) {
 
 		x = phba->lpfc_idiag_last_eq;
-		if (phba->cfg_fof && (x >= phba->cfg_fcp_io_channel)) {
+		if (phba->cfg_fof && (x >= phba->io_channel_irqs)) {
 			phba->lpfc_idiag_last_eq = 0;
 			goto fof;
 		}
 		phba->lpfc_idiag_last_eq++;
-		if (phba->lpfc_idiag_last_eq >= phba->cfg_fcp_io_channel)
+		if (phba->lpfc_idiag_last_eq >= phba->io_channel_irqs)
 			if (phba->cfg_fof == 0)
 				phba->lpfc_idiag_last_eq = 0;
 
 		len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len,
 					"EQ %d out of %d HBA EQs\n",
-					x, phba->cfg_fcp_io_channel);
+					x, phba->io_channel_irqs);
 
 		/* Fast-path EQ */
 		qp = phba->sli4_hba.hba_eq[x];
@@ -2191,6 +2222,7 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes,
 		if (len >= max_cnt)
 			goto too_big;
 
+		/* will dump both fcp and nvme cqs/wqs for the eq */
 		rc = lpfc_idiag_cqs_for_eq(phba, pbuffer, &len,
 			max_cnt, qp->queue_id);
 		if (rc)
@@ -2227,6 +2259,23 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes,
 		if (len >= max_cnt)
 			goto too_big;
 
+		/* Slow-path NVME LS response CQ */
+		qp = phba->sli4_hba.nvmels_cq;
+		len = __lpfc_idiag_print_cq(qp, "NVME LS",
+						pbuffer, len);
+		/* Reset max counter */
+		if (qp)
+			qp->CQ_max_cqe = 0;
+		if (len >= max_cnt)
+			goto too_big;
+
+		/* Slow-path NVME LS WQ */
+		qp = phba->sli4_hba.nvmels_wq;
+		len = __lpfc_idiag_print_wq(qp, "NVME LS",
+						pbuffer, len);
+		if (len >= max_cnt)
+			goto too_big;
+
 		qp = phba->sli4_hba.hdr_rq;
 		len = __lpfc_idiag_print_rqpair(qp, phba->sli4_hba.dat_rq,
 				"RQpair", pbuffer, len);
@@ -2447,7 +2496,7 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 	struct lpfc_hba *phba = (struct lpfc_hba *)debug->i_private;
 	uint32_t qidx, quetp, queid, index, count, offset, value;
 	uint32_t *pentry;
-	struct lpfc_queue *pque;
+	struct lpfc_queue *pque, *qp;
 	int rc;
 
 	/* This is a user write operation */
@@ -2483,19 +2532,15 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 	case LPFC_IDIAG_EQ:
 		/* HBA event queue */
 		if (phba->sli4_hba.hba_eq) {
-			for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
-				qidx++) {
-				if (phba->sli4_hba.hba_eq[qidx] &&
-				    phba->sli4_hba.hba_eq[qidx]->queue_id ==
-				    queid) {
+			for (qidx = 0; qidx < phba->io_channel_irqs; qidx++) {
+				qp = phba->sli4_hba.hba_eq[qidx];
+				if (qp && qp->queue_id == queid) {
 					/* Sanity check */
-					rc = lpfc_idiag_que_param_check(
-						phba->sli4_hba.hba_eq[qidx],
+					rc = lpfc_idiag_que_param_check(qp,
 						index, count);
 					if (rc)
 						goto error_out;
-					idiag.ptr_private =
-						phba->sli4_hba.hba_eq[qidx];
+					idiag.ptr_private = qp;
 					goto pass_check;
 				}
 			}
@@ -2525,24 +2570,32 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 			idiag.ptr_private = phba->sli4_hba.els_cq;
 			goto pass_check;
 		}
+		/* NVME LS complete queue */
+		if (phba->sli4_hba.nvmels_cq &&
+		    phba->sli4_hba.nvmels_cq->queue_id == queid) {
+			/* Sanity check */
+			rc = lpfc_idiag_que_param_check(
+					phba->sli4_hba.nvmels_cq, index, count);
+			if (rc)
+				goto error_out;
+			idiag.ptr_private = phba->sli4_hba.nvmels_cq;
+			goto pass_check;
+		}
 		/* FCP complete queue */
 		if (phba->sli4_hba.fcp_cq) {
-			qidx = 0;
-			do {
-				if (phba->sli4_hba.fcp_cq[qidx] &&
-				    phba->sli4_hba.fcp_cq[qidx]->queue_id ==
-				    queid) {
+			for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
+								qidx++) {
+				qp = phba->sli4_hba.fcp_cq[qidx];
+				if (qp && qp->queue_id == queid) {
 					/* Sanity check */
 					rc = lpfc_idiag_que_param_check(
-						phba->sli4_hba.fcp_cq[qidx],
-						index, count);
+						qp, index, count);
 					if (rc)
 						goto error_out;
-					idiag.ptr_private =
-						phba->sli4_hba.fcp_cq[qidx];
+					idiag.ptr_private = qp;
 					goto pass_check;
 				}
-			} while (++qidx < phba->cfg_fcp_io_channel);
+			}
 		}
 		goto error_out;
 		break;
@@ -2572,22 +2625,45 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 			idiag.ptr_private = phba->sli4_hba.els_wq;
 			goto pass_check;
 		}
+		/* NVME LS work queue */
+		if (phba->sli4_hba.nvmels_wq &&
+		    phba->sli4_hba.nvmels_wq->queue_id == queid) {
+			/* Sanity check */
+			rc = lpfc_idiag_que_param_check(
+					phba->sli4_hba.nvmels_wq, index, count);
+			if (rc)
+				goto error_out;
+			idiag.ptr_private = phba->sli4_hba.nvmels_wq;
+			goto pass_check;
+		}
 		/* FCP work queue */
 		if (phba->sli4_hba.fcp_wq) {
 			for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
-				qidx++) {
-				if (!phba->sli4_hba.fcp_wq[qidx])
-					continue;
-				if (phba->sli4_hba.fcp_wq[qidx]->queue_id ==
-				    queid) {
+								qidx++) {
+				qp = phba->sli4_hba.fcp_wq[qidx];
+				if (qp && qp->queue_id == queid) {
 					/* Sanity check */
 					rc = lpfc_idiag_que_param_check(
-						phba->sli4_hba.fcp_wq[qidx],
-						index, count);
+						qp, index, count);
+					if (rc)
+						goto error_out;
+					idiag.ptr_private = qp;
+					goto pass_check;
+				}
+			}
+		}
+		/* NVME work queue */
+		if (phba->sli4_hba.nvme_wq) {
+			for (qidx = 0; qidx < phba->cfg_nvme_io_channel;
+								qidx++) {
+				qp = phba->sli4_hba.nvme_wq[qidx];
+				if (qp && qp->queue_id == queid) {
+					/* Sanity check */
+					rc = lpfc_idiag_que_param_check(
+						qp, index, count);
 					if (rc)
 						goto error_out;
-					idiag.ptr_private =
-						phba->sli4_hba.fcp_wq[qidx];
+					idiag.ptr_private = qp;
 					goto pass_check;
 				}
 			}
@@ -4562,10 +4638,14 @@ lpfc_debug_dump_all_queues(struct lpfc_hba *phba)
 	 */
 	lpfc_debug_dump_wq(phba, DUMP_MBX, 0);
 	lpfc_debug_dump_wq(phba, DUMP_ELS, 0);
+	lpfc_debug_dump_wq(phba, DUMP_NVMELS, 0);
 
 	for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++)
 		lpfc_debug_dump_wq(phba, DUMP_FCP, idx);
 
+	for (idx = 0; idx < phba->cfg_nvme_io_channel; idx++)
+		lpfc_debug_dump_wq(phba, DUMP_NVME, idx);
+
 	lpfc_debug_dump_hdr_rq(phba);
 	lpfc_debug_dump_dat_rq(phba);
 	/*
@@ -4573,13 +4653,17 @@ lpfc_debug_dump_all_queues(struct lpfc_hba *phba)
 	 */
 	lpfc_debug_dump_cq(phba, DUMP_MBX, 0);
 	lpfc_debug_dump_cq(phba, DUMP_ELS, 0);
+	lpfc_debug_dump_cq(phba, DUMP_NVMELS, 0);
 
 	for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++)
 		lpfc_debug_dump_cq(phba, DUMP_FCP, idx);
 
+	for (idx = 0; idx < phba->cfg_nvme_io_channel; idx++)
+		lpfc_debug_dump_cq(phba, DUMP_NVME, idx);
+
 	/*
 	 * Dump Event Queues (EQs)
 	 */
-	for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++)
+	for (idx = 0; idx < phba->io_channel_irqs; idx++)
 		lpfc_debug_dump_hba_eq(phba, idx);
 }
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h
index 9ae2c4b5fd12..98814d651239 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -44,8 +44,10 @@
 
 enum {
 	DUMP_FCP,
+	DUMP_NVME,
 	DUMP_MBX,
 	DUMP_ELS,
+	DUMP_NVMELS,
 };
 
 /*
@@ -364,11 +366,11 @@ lpfc_debug_dump_q(struct lpfc_queue *q)
 }
 
 /**
- * lpfc_debug_dump_wq - dump all entries from the fcp work queue
+ * lpfc_debug_dump_wq - dump all entries from the fcp or nvme work queue
  * @phba: Pointer to HBA context object.
- * @wqidx: Index to a FCP work queue.
+ * @wqidx: Index to a FCP or NVME work queue.
  *
- * This function dumps all entries from a FCP work queue specified
+ * This function dumps all entries from a FCP or NVME work queue specified
  * by the wqidx.
  **/
 static inline void
@@ -380,16 +382,22 @@ lpfc_debug_dump_wq(struct lpfc_hba *phba, int qtype, int wqidx)
 	if (qtype == DUMP_FCP) {
 		wq = phba->sli4_hba.fcp_wq[wqidx];
 		qtypestr = "FCP";
+	} else if (qtype == DUMP_NVME) {
+		wq = phba->sli4_hba.nvme_wq[wqidx];
+		qtypestr = "NVME";
 	} else if (qtype == DUMP_MBX) {
 		wq = phba->sli4_hba.mbx_wq;
 		qtypestr = "MBX";
 	} else if (qtype == DUMP_ELS) {
 		wq = phba->sli4_hba.els_wq;
 		qtypestr = "ELS";
+	} else if (qtype == DUMP_NVMELS) {
+		wq = phba->sli4_hba.nvmels_wq;
+		qtypestr = "NVMELS";
 	} else
 		return;
 
-	if (qtype == DUMP_FCP)
+	if (qtype == DUMP_FCP || qtype == DUMP_NVME)
 		pr_err("%s WQ: WQ[Idx:%d|Qid:%d]\n",
 			qtypestr, wqidx, wq->queue_id);
 	else
@@ -400,12 +408,12 @@ lpfc_debug_dump_wq(struct lpfc_hba *phba, int qtype, int wqidx)
 }
 
 /**
- * lpfc_debug_dump_cq - dump all entries from a fcp work queue's
+ * lpfc_debug_dump_cq - dump all entries from a fcp or nvme work queue's
  * cmpl queue
  * @phba: Pointer to HBA context object.
  * @wqidx: Index to a FCP work queue.
  *
- * This function dumps all entries from a FCP completion queue
+ * This function dumps all entries from a FCP or NVME completion queue
  * which is associated to the work queue specified by the @wqidx.
  **/
 static inline void
@@ -415,12 +423,16 @@ lpfc_debug_dump_cq(struct lpfc_hba *phba, int qtype, int wqidx)
 	char *qtypestr;
 	int eqidx;
 
-	/* fcp wq and cq are 1:1, thus same indexes */
+	/* fcp/nvme wq and cq are 1:1, thus same indexes */
 
 	if (qtype == DUMP_FCP) {
 		wq = phba->sli4_hba.fcp_wq[wqidx];
 		cq = phba->sli4_hba.fcp_cq[wqidx];
 		qtypestr = "FCP";
+	} else if (qtype == DUMP_NVME) {
+		wq = phba->sli4_hba.nvme_wq[wqidx];
+		cq = phba->sli4_hba.nvme_cq[wqidx];
+		qtypestr = "NVME";
 	} else if (qtype == DUMP_MBX) {
 		wq = phba->sli4_hba.mbx_wq;
 		cq = phba->sli4_hba.mbx_cq;
@@ -429,21 +441,25 @@ lpfc_debug_dump_cq(struct lpfc_hba *phba, int qtype, int wqidx)
 		wq = phba->sli4_hba.els_wq;
 		cq = phba->sli4_hba.els_cq;
 		qtypestr = "ELS";
+	} else if (qtype == DUMP_NVMELS) {
+		wq = phba->sli4_hba.nvmels_wq;
+		cq = phba->sli4_hba.nvmels_cq;
+		qtypestr = "NVMELS";
 	} else
 		return;
 
-	for (eqidx = 0; eqidx < phba->cfg_fcp_io_channel; eqidx++) {
+	for (eqidx = 0; eqidx < phba->io_channel_irqs; eqidx++) {
 		eq = phba->sli4_hba.hba_eq[eqidx];
 		if (cq->assoc_qid == eq->queue_id)
 			break;
 	}
-	if (eqidx == phba->cfg_fcp_io_channel) {
+	if (eqidx == phba->io_channel_irqs) {
 		pr_err("Couldn't find EQ for CQ. Using EQ[0]\n");
 		eqidx = 0;
 		eq = phba->sli4_hba.hba_eq[0];
 	}
 
-	if (qtype == DUMP_FCP)
+	if (qtype == DUMP_FCP || qtype == DUMP_NVME)
 		pr_err("%s CQ: WQ[Idx:%d|Qid%d]->CQ[Idx%d|Qid%d]"
 			"->EQ[Idx:%d|Qid:%d]:\n",
 			qtypestr, wqidx, wq->queue_id, wqidx, cq->queue_id,
@@ -527,11 +543,25 @@ lpfc_debug_dump_wq_by_id(struct lpfc_hba *phba, int qid)
 		return;
 	}
 
+	for (wq_idx = 0; wq_idx < phba->cfg_nvme_io_channel; wq_idx++)
+		if (phba->sli4_hba.nvme_wq[wq_idx]->queue_id == qid)
+			break;
+	if (wq_idx < phba->cfg_nvme_io_channel) {
+		pr_err("NVME WQ[Idx:%d|Qid:%d]\n", wq_idx, qid);
+		lpfc_debug_dump_q(phba->sli4_hba.nvme_wq[wq_idx]);
+		return;
+	}
+
 	if (phba->sli4_hba.els_wq->queue_id == qid) {
 		pr_err("ELS WQ[Qid:%d]\n", qid);
 		lpfc_debug_dump_q(phba->sli4_hba.els_wq);
 		return;
 	}
+
+	if (phba->sli4_hba.nvmels_wq->queue_id == qid) {
+		pr_err("NVME LS WQ[Qid:%d]\n", qid);
+		lpfc_debug_dump_q(phba->sli4_hba.nvmels_wq);
+	}
 }
 
 /**
@@ -596,12 +626,28 @@ lpfc_debug_dump_cq_by_id(struct lpfc_hba *phba, int qid)
 		return;
 	}
 
+	for (cq_idx = 0; cq_idx < phba->cfg_nvme_io_channel; cq_idx++)
+		if (phba->sli4_hba.nvme_cq[cq_idx]->queue_id == qid)
+			break;
+
+	if (cq_idx < phba->cfg_nvme_io_channel) {
+		pr_err("NVME CQ[Idx:%d|Qid:%d]\n", cq_idx, qid);
+		lpfc_debug_dump_q(phba->sli4_hba.nvme_cq[cq_idx]);
+		return;
+	}
+
 	if (phba->sli4_hba.els_cq->queue_id == qid) {
 		pr_err("ELS CQ[Qid:%d]\n", qid);
 		lpfc_debug_dump_q(phba->sli4_hba.els_cq);
 		return;
 	}
 
+	if (phba->sli4_hba.nvmels_cq->queue_id == qid) {
+		pr_err("NVME LS CQ[Qid:%d]\n", qid);
+		lpfc_debug_dump_q(phba->sli4_hba.nvmels_cq);
+		return;
+	}
+
 	if (phba->sli4_hba.mbx_cq->queue_id == qid) {
 		pr_err("MBX CQ[Qid:%d]\n", qid);
 		lpfc_debug_dump_q(phba->sli4_hba.mbx_cq);
@@ -621,17 +667,15 @@ lpfc_debug_dump_eq_by_id(struct lpfc_hba *phba, int qid)
 {
 	int eq_idx;
 
-	for (eq_idx = 0; eq_idx < phba->cfg_fcp_io_channel; eq_idx++) {
+	for (eq_idx = 0; eq_idx < phba->io_channel_irqs; eq_idx++)
 		if (phba->sli4_hba.hba_eq[eq_idx]->queue_id == qid)
 			break;
-	}
 
-	if (eq_idx < phba->cfg_fcp_io_channel) {
+	if (eq_idx < phba->io_channel_irqs) {
 		printk(KERN_ERR "FCP EQ[Idx:%d|Qid:%d]\n", eq_idx, qid);
 		lpfc_debug_dump_q(phba->sli4_hba.hba_eq[eq_idx]);
 		return;
 	}
-
 }
 
 void lpfc_debug_dump_all_queues(struct lpfc_hba *);
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 3a1f1a2a2b55..7be235457540 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -1323,7 +1323,7 @@ lpfc_els_abort_flogi(struct lpfc_hba *phba)
 			"0201 Abort outstanding I/O on NPort x%x\n",
 			Fabric_DID);
 
-	pring = &phba->sli.ring[LPFC_ELS_RING];
+	pring = lpfc_phba_elsring(phba);
 
 	/*
 	 * Check the txcmplq for an iocb that matches the nport the driver is
@@ -7155,7 +7155,8 @@ lpfc_els_timeout_handler(struct lpfc_vport *vport)
 
 	timeout = (uint32_t)(phba->fc_ratov << 1);
 
-	pring = &phba->sli.ring[LPFC_ELS_RING];
+	pring = lpfc_phba_elsring(phba);
+
 	if ((phba->pport->load_flag & FC_UNLOADING))
 		return;
 	spin_lock_irq(&phba->hbalock);
@@ -7224,7 +7225,7 @@ lpfc_els_timeout_handler(struct lpfc_vport *vport)
 		spin_unlock_irq(&phba->hbalock);
 	}
 
-	if (!list_empty(&phba->sli.ring[LPFC_ELS_RING].txcmplq))
+	if (!list_empty(&pring->txcmplq))
 		if (!(phba->pport->load_flag & FC_UNLOADING))
 			mod_timer(&vport->els_tmofunc,
 				  jiffies + msecs_to_jiffies(1000 * timeout));
@@ -7255,7 +7256,7 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport)
 {
 	LIST_HEAD(abort_list);
 	struct lpfc_hba  *phba = vport->phba;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring;
 	struct lpfc_iocbq *tmp_iocb, *piocb;
 	IOCB_t *cmd = NULL;
 
@@ -7267,6 +7268,7 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport)
 	 * a working list and release the locks before calling the abort.
 	 */
 	spin_lock_irq(&phba->hbalock);
+	pring = lpfc_phba_elsring(phba);
 	if (phba->sli_rev == LPFC_SLI_REV4)
 		spin_lock(&pring->ring_lock);
 
@@ -9013,7 +9015,9 @@ void lpfc_fabric_abort_nport(struct lpfc_nodelist *ndlp)
 	LIST_HEAD(completions);
 	struct lpfc_hba  *phba = ndlp->phba;
 	struct lpfc_iocbq *tmp_iocb, *piocb;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring;
+
+	pring = lpfc_phba_elsring(phba);
 
 	spin_lock_irq(&phba->hbalock);
 	list_for_each_entry_safe(piocb, tmp_iocb, &phba->fabric_iocb_list,
@@ -9069,13 +9073,13 @@ lpfc_sli4_vport_delete_els_xri_aborted(struct lpfc_vport *vport)
 	unsigned long iflag = 0;
 
 	spin_lock_irqsave(&phba->hbalock, iflag);
-	spin_lock(&phba->sli4_hba.abts_sgl_list_lock);
+	spin_lock(&phba->sli4_hba.sgl_list_lock);
 	list_for_each_entry_safe(sglq_entry, sglq_next,
 			&phba->sli4_hba.lpfc_abts_els_sgl_list, list) {
 		if (sglq_entry->ndlp && sglq_entry->ndlp->vport == vport)
 			sglq_entry->ndlp = NULL;
 	}
-	spin_unlock(&phba->sli4_hba.abts_sgl_list_lock);
+	spin_unlock(&phba->sli4_hba.sgl_list_lock);
 	spin_unlock_irqrestore(&phba->hbalock, iflag);
 	return;
 }
@@ -9099,22 +9103,22 @@ lpfc_sli4_els_xri_aborted(struct lpfc_hba *phba,
 	struct lpfc_sglq *sglq_entry = NULL, *sglq_next = NULL;
 	unsigned long iflag = 0;
 	struct lpfc_nodelist *ndlp;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring;
+
+	pring = lpfc_phba_elsring(phba);
 
 	spin_lock_irqsave(&phba->hbalock, iflag);
-	spin_lock(&phba->sli4_hba.abts_sgl_list_lock);
+	spin_lock(&phba->sli4_hba.sgl_list_lock);
 	list_for_each_entry_safe(sglq_entry, sglq_next,
 			&phba->sli4_hba.lpfc_abts_els_sgl_list, list) {
 		if (sglq_entry->sli4_xritag == xri) {
 			list_del(&sglq_entry->list);
 			ndlp = sglq_entry->ndlp;
 			sglq_entry->ndlp = NULL;
-			spin_lock(&pring->ring_lock);
 			list_add_tail(&sglq_entry->list,
-				&phba->sli4_hba.lpfc_sgl_list);
+				&phba->sli4_hba.lpfc_els_sgl_list);
 			sglq_entry->state = SGL_FREED;
-			spin_unlock(&pring->ring_lock);
-			spin_unlock(&phba->sli4_hba.abts_sgl_list_lock);
+			spin_unlock(&phba->sli4_hba.sgl_list_lock);
 			spin_unlock_irqrestore(&phba->hbalock, iflag);
 			lpfc_set_rrq_active(phba, ndlp,
 				sglq_entry->sli4_lxritag,
@@ -9126,21 +9130,21 @@ lpfc_sli4_els_xri_aborted(struct lpfc_hba *phba,
 			return;
 		}
 	}
-	spin_unlock(&phba->sli4_hba.abts_sgl_list_lock);
+	spin_unlock(&phba->sli4_hba.sgl_list_lock);
 	lxri = lpfc_sli4_xri_inrange(phba, xri);
 	if (lxri == NO_XRI) {
 		spin_unlock_irqrestore(&phba->hbalock, iflag);
 		return;
 	}
-	spin_lock(&pring->ring_lock);
+	spin_lock(&phba->sli4_hba.sgl_list_lock);
 	sglq_entry = __lpfc_get_active_sglq(phba, lxri);
 	if (!sglq_entry || (sglq_entry->sli4_xritag != xri)) {
-		spin_unlock(&pring->ring_lock);
+		spin_unlock(&phba->sli4_hba.sgl_list_lock);
 		spin_unlock_irqrestore(&phba->hbalock, iflag);
 		return;
 	}
 	sglq_entry->state = SGL_XRI_ABORTED;
-	spin_unlock(&pring->ring_lock);
+	spin_unlock(&phba->sli4_hba.sgl_list_lock);
 	spin_unlock_irqrestore(&phba->hbalock, iflag);
 	return;
 }
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 92627df35a58..ede831d1f467 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -93,7 +93,7 @@ lpfc_terminate_rport_io(struct fc_rport *rport)
 
 	if (ndlp->nlp_sid != NLP_NO_SID) {
 		lpfc_sli_abort_iocb(ndlp->vport,
-			&phba->sli.ring[phba->sli.fcp_ring],
+			&phba->sli.sli3_ring[LPFC_FCP_RING],
 			ndlp->nlp_sid, 0, LPFC_CTX_TGT);
 	}
 }
@@ -247,8 +247,8 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
 		if (ndlp->nlp_sid != NLP_NO_SID) {
 			/* flush the target */
 			lpfc_sli_abort_iocb(vport,
-					&phba->sli.ring[phba->sli.fcp_ring],
-					ndlp->nlp_sid, 0, LPFC_CTX_TGT);
+					    &phba->sli.sli3_ring[LPFC_FCP_RING],
+					    ndlp->nlp_sid, 0, LPFC_CTX_TGT);
 		}
 		put_node = rdata->pnode != NULL;
 		rdata->pnode = NULL;
@@ -283,7 +283,7 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
 
 	if (ndlp->nlp_sid != NLP_NO_SID) {
 		warn_on = 1;
-		lpfc_sli_abort_iocb(vport, &phba->sli.ring[phba->sli.fcp_ring],
+		lpfc_sli_abort_iocb(vport, &phba->sli.sli3_ring[LPFC_FCP_RING],
 				    ndlp->nlp_sid, 0, LPFC_CTX_TGT);
 	}
 
@@ -495,11 +495,12 @@ lpfc_send_fastpath_evt(struct lpfc_hba *phba,
 		return;
 	}
 
-	fc_host_post_vendor_event(shost,
-		fc_get_event_number(),
-		evt_data_size,
-		evt_data,
-		LPFC_NL_VENDOR_ID);
+	if (phba->cfg_enable_fc4_type != LPFC_ENABLE_NVME)
+		fc_host_post_vendor_event(shost,
+			fc_get_event_number(),
+			evt_data_size,
+			evt_data,
+			LPFC_NL_VENDOR_ID);
 
 	lpfc_free_fast_evt(phba, fast_evt_data);
 	return;
@@ -682,7 +683,7 @@ lpfc_work_done(struct lpfc_hba *phba)
 		}
 	lpfc_destroy_vport_work_array(phba, vports);
 
-	pring = &phba->sli.ring[LPFC_ELS_RING];
+	pring = lpfc_phba_elsring(phba);
 	status = (ha_copy & (HA_RXMASK  << (4*LPFC_ELS_RING)));
 	status >>= (4*LPFC_ELS_RING);
 	if ((status & HA_RXMASK) ||
@@ -894,11 +895,16 @@ lpfc_linkdown(struct lpfc_hba *phba)
 		spin_unlock_irq(shost->host_lock);
 	}
 	vports = lpfc_create_vport_work_array(phba);
-	if (vports != NULL)
+	if (vports != NULL) {
 		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			/* Issue a LINK DOWN event to all nodes */
 			lpfc_linkdown_port(vports[i]);
+
+			vports[i]->fc_myDID = 0;
+
+			/* todo: init: revise localport nvme attributes */
 		}
+	}
 	lpfc_destroy_vport_work_array(phba, vports);
 	/* Clean up any firmware default rpi's */
 	mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
@@ -914,7 +920,6 @@ lpfc_linkdown(struct lpfc_hba *phba)
 
 	/* Setup myDID for link up if we are in pt2pt mode */
 	if (phba->pport->fc_flag & FC_PT2PT) {
-		phba->pport->fc_myDID = 0;
 		mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 		if (mb) {
 			lpfc_config_link(phba, mb);
@@ -929,7 +934,6 @@ lpfc_linkdown(struct lpfc_hba *phba)
 		phba->pport->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI);
 		spin_unlock_irq(shost->host_lock);
 	}
-
 	return 0;
 }
 
@@ -1016,7 +1020,7 @@ lpfc_linkup(struct lpfc_hba *phba)
  * This routine handles processing a CLEAR_LA mailbox
  * command upon completion. It is setup in the LPFC_MBOXQ
  * as the completion routine when the command is
- * handed off to the SLI layer.
+ * handed off to the SLI layer. SLI3 only.
  */
 static void
 lpfc_mbx_cmpl_clear_la(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
@@ -1028,9 +1032,8 @@ lpfc_mbx_cmpl_clear_la(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	uint32_t control;
 
 	/* Since we don't do discovery right now, turn these off here */
-	psli->ring[psli->extra_ring].flag &= ~LPFC_STOP_IOCB_EVENT;
-	psli->ring[psli->fcp_ring].flag &= ~LPFC_STOP_IOCB_EVENT;
-	psli->ring[psli->next_ring].flag &= ~LPFC_STOP_IOCB_EVENT;
+	psli->sli3_ring[LPFC_EXTRA_RING].flag &= ~LPFC_STOP_IOCB_EVENT;
+	psli->sli3_ring[LPFC_FCP_RING].flag &= ~LPFC_STOP_IOCB_EVENT;
 
 	/* Check for error */
 	if ((mb->mbxStatus) && (mb->mbxStatus != 0x1601)) {
@@ -3277,7 +3280,7 @@ lpfc_mbx_issue_link_down(struct lpfc_hba *phba)
  * This routine handles processing a READ_TOPOLOGY mailbox
  * command upon completion. It is setup in the LPFC_MBOXQ
  * as the completion routine when the command is
- * handed off to the SLI layer.
+ * handed off to the SLI layer. SLI4 only.
  */
 void
 lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
@@ -3285,11 +3288,14 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	struct lpfc_vport *vport = pmb->vport;
 	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
 	struct lpfc_mbx_read_top *la;
+	struct lpfc_sli_ring *pring;
 	MAILBOX_t *mb = &pmb->u.mb;
 	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
 
 	/* Unblock ELS traffic */
-	phba->sli.ring[LPFC_ELS_RING].flag &= ~LPFC_STOP_IOCB_EVENT;
+	pring = lpfc_phba_elsring(phba);
+	pring->flag &= ~LPFC_STOP_IOCB_EVENT;
+
 	/* Check for error */
 	if (mb->mbxStatus) {
 		lpfc_printf_log(phba, KERN_INFO, LOG_LINK_EVENT,
@@ -3458,6 +3464,14 @@ lpfc_mbx_cmpl_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 		spin_lock_irq(shost->host_lock);
 		ndlp->nlp_flag &= ~NLP_IGNR_REG_CMPL;
 		spin_unlock_irq(shost->host_lock);
+
+		/*
+		 * We cannot leave the RPI registered because
+		 * if we go thru discovery again for this ndlp
+		 * a subsequent REG_RPI will fail.
+		 */
+		ndlp->nlp_flag |= NLP_RPI_REGISTERED;
+		lpfc_unreg_rpi(vport, ndlp);
 	}
 
 	/* Call state machine */
@@ -3903,6 +3917,9 @@ lpfc_register_remote_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 	struct fc_rport_identifiers rport_ids;
 	struct lpfc_hba  *phba = vport->phba;
 
+	if (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)
+		return;
+
 	/* Remote port has reappeared. Re-register w/ FC transport */
 	rport_ids.node_name = wwn_to_u64(ndlp->nlp_nodename.u.wwn);
 	rport_ids.port_name = wwn_to_u64(ndlp->nlp_portname.u.wwn);
@@ -4395,7 +4412,6 @@ lpfc_check_sli_ndlp(struct lpfc_hba *phba,
 		    struct lpfc_iocbq *iocb,
 		    struct lpfc_nodelist *ndlp)
 {
-	struct lpfc_sli *psli = &phba->sli;
 	IOCB_t *icmd = &iocb->iocb;
 	struct lpfc_vport    *vport = ndlp->vport;
 
@@ -4414,9 +4430,7 @@ lpfc_check_sli_ndlp(struct lpfc_hba *phba,
 			if (iocb->context1 == (uint8_t *) ndlp)
 				return 1;
 		}
-	} else if (pring->ringno == psli->extra_ring) {
-
-	} else if (pring->ringno == psli->fcp_ring) {
+	} else if (pring->ringno == LPFC_FCP_RING) {
 		/* Skip match check if waiting to relogin to FCP target */
 		if ((ndlp->nlp_type & NLP_FCP_TARGET) &&
 		    (ndlp->nlp_flag & NLP_DELAY_TMO)) {
@@ -4429,6 +4443,54 @@ lpfc_check_sli_ndlp(struct lpfc_hba *phba,
 	return 0;
 }
 
+static void
+__lpfc_dequeue_nport_iocbs(struct lpfc_hba *phba,
+		struct lpfc_nodelist *ndlp, struct lpfc_sli_ring *pring,
+		struct list_head *dequeue_list)
+{
+	struct lpfc_iocbq *iocb, *next_iocb;
+
+	list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) {
+		/* Check to see if iocb matches the nport */
+		if (lpfc_check_sli_ndlp(phba, pring, iocb, ndlp))
+			/* match, dequeue */
+			list_move_tail(&iocb->list, dequeue_list);
+	}
+}
+
+static void
+lpfc_sli3_dequeue_nport_iocbs(struct lpfc_hba *phba,
+		struct lpfc_nodelist *ndlp, struct list_head *dequeue_list)
+{
+	struct lpfc_sli *psli = &phba->sli;
+	uint32_t i;
+
+	spin_lock_irq(&phba->hbalock);
+	for (i = 0; i < psli->num_rings; i++)
+		__lpfc_dequeue_nport_iocbs(phba, ndlp, &psli->sli3_ring[i],
+						dequeue_list);
+	spin_unlock_irq(&phba->hbalock);
+}
+
+static void
+lpfc_sli4_dequeue_nport_iocbs(struct lpfc_hba *phba,
+		struct lpfc_nodelist *ndlp, struct list_head *dequeue_list)
+{
+	struct lpfc_sli_ring *pring;
+	struct lpfc_queue *qp = NULL;
+
+	spin_lock_irq(&phba->hbalock);
+	list_for_each_entry(qp, &phba->sli4_hba.lpfc_wq_list, wq_list) {
+		pring = qp->pring;
+		if (!pring)
+			continue;
+		spin_lock_irq(&pring->ring_lock);
+		__lpfc_dequeue_nport_iocbs(phba, ndlp, pring, dequeue_list);
+		spin_unlock_irq(&pring->ring_lock);
+	}
+	spin_unlock_irq(&phba->hbalock);
+}
+
 /*
  * Free resources / clean up outstanding I/Os
  * associated with nlp_rpi in the LPFC_NODELIST entry.
@@ -4437,10 +4499,6 @@ static int
 lpfc_no_rpi(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 {
 	LIST_HEAD(completions);
-	struct lpfc_sli *psli;
-	struct lpfc_sli_ring *pring;
-	struct lpfc_iocbq *iocb, *next_iocb;
-	uint32_t i;
 
 	lpfc_fabric_abort_nport(ndlp);
 
@@ -4448,29 +4506,11 @@ lpfc_no_rpi(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 	 * Everything that matches on txcmplq will be returned
 	 * by firmware with a no rpi error.
 	 */
-	psli = &phba->sli;
 	if (ndlp->nlp_flag & NLP_RPI_REGISTERED) {
-		/* Now process each ring */
-		for (i = 0; i < psli->num_rings; i++) {
-			pring = &psli->ring[i];
-
-			spin_lock_irq(&phba->hbalock);
-			list_for_each_entry_safe(iocb, next_iocb, &pring->txq,
-						 list) {
-				/*
-				 * Check to see if iocb matches the nport we are
-				 * looking for
-				 */
-				if ((lpfc_check_sli_ndlp(phba, pring, iocb,
-							 ndlp))) {
-					/* It matches, so deque and call compl
-					   with an error */
-					list_move_tail(&iocb->list,
-						       &completions);
-				}
-			}
-			spin_unlock_irq(&phba->hbalock);
-		}
+		if (phba->sli_rev != LPFC_SLI_REV4)
+			lpfc_sli3_dequeue_nport_iocbs(phba, ndlp, &completions);
+		else
+			lpfc_sli4_dequeue_nport_iocbs(phba, ndlp, &completions);
 	}
 
 	/* Cancel all the IOCBs from the completions list */
@@ -5039,14 +5079,14 @@ lpfc_disc_list_loopmap(struct lpfc_vport *vport)
 	return;
 }
 
+/* SLI3 only */
 void
 lpfc_issue_clear_la(struct lpfc_hba *phba, struct lpfc_vport *vport)
 {
 	LPFC_MBOXQ_t *mbox;
 	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring *extra_ring = &psli->ring[psli->extra_ring];
-	struct lpfc_sli_ring *fcp_ring   = &psli->ring[psli->fcp_ring];
-	struct lpfc_sli_ring *next_ring  = &psli->ring[psli->next_ring];
+	struct lpfc_sli_ring *extra_ring = &psli->sli3_ring[LPFC_EXTRA_RING];
+	struct lpfc_sli_ring *fcp_ring   = &psli->sli3_ring[LPFC_FCP_RING];
 	int  rc;
 
 	/*
@@ -5070,7 +5110,6 @@ lpfc_issue_clear_la(struct lpfc_hba *phba, struct lpfc_vport *vport)
 			lpfc_disc_flush_list(vport);
 			extra_ring->flag &= ~LPFC_STOP_IOCB_EVENT;
 			fcp_ring->flag &= ~LPFC_STOP_IOCB_EVENT;
-			next_ring->flag &= ~LPFC_STOP_IOCB_EVENT;
 			phba->link_state = LPFC_HBA_ERROR;
 		}
 	}
@@ -5206,7 +5245,7 @@ lpfc_free_tx(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 	struct lpfc_sli_ring *pring;
 
 	psli = &phba->sli;
-	pring = &psli->ring[LPFC_ELS_RING];
+	pring = lpfc_phba_elsring(phba);
 
 	/* Error matching iocb on txq or txcmplq
 	 * First check the txq.
@@ -5522,12 +5561,14 @@ lpfc_disc_timeout_handler(struct lpfc_vport *vport)
 
 	if (clrlaerr) {
 		lpfc_disc_flush_list(vport);
-		psli->ring[(psli->extra_ring)].flag &= ~LPFC_STOP_IOCB_EVENT;
-		psli->ring[(psli->fcp_ring)].flag &= ~LPFC_STOP_IOCB_EVENT;
-		psli->ring[(psli->next_ring)].flag &= ~LPFC_STOP_IOCB_EVENT;
+		if (phba->sli_rev != LPFC_SLI_REV4) {
+			psli->sli3_ring[(LPFC_EXTRA_RING)].flag &=
+				~LPFC_STOP_IOCB_EVENT;
+			psli->sli3_ring[LPFC_FCP_RING].flag &=
+				~LPFC_STOP_IOCB_EVENT;
+		}
 		vport->port_state = LPFC_VPORT_READY;
 	}
-
 	return;
 }
 
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index 3b970d370600..28247c99b4f2 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -44,8 +44,6 @@
 #define LPFC_FCP_RING            0	/* ring 0 for FCP initiator commands */
 #define LPFC_EXTRA_RING          1	/* ring 1 for other protocols */
 #define LPFC_ELS_RING            2	/* ring 2 for ELS commands */
-#define LPFC_FCP_NEXT_RING       3
-#define LPFC_FCP_OAS_RING        3
 
 #define SLI2_IOCB_CMD_R0_ENTRIES    172	/* SLI-2 FCP command ring entries */
 #define SLI2_IOCB_RSP_R0_ENTRIES    134	/* SLI-2 FCP response ring entries */
@@ -1791,6 +1789,7 @@ typedef struct {		/* FireFly BIU registers */
 #define MBX_INIT_VFI        0xA3
 #define MBX_INIT_VPI        0xA4
 #define MBX_ACCESS_VDATA    0xA5
+#define MBX_REG_FCFI_MRQ    0xAF
 
 #define MBX_AUTH_PORT       0xF8
 #define MBX_SECURITY_MGMT   0xF9
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index 964a1fdb076b..c3277c5312c9 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -108,6 +108,7 @@ struct lpfc_sli_intf {
 #define LPFC_MAX_MQ_PAGE		8
 #define LPFC_MAX_WQ_PAGE_V0		4
 #define LPFC_MAX_WQ_PAGE		8
+#define LPFC_MAX_RQ_PAGE		8
 #define LPFC_MAX_CQ_PAGE		4
 #define LPFC_MAX_EQ_PAGE		8
 
@@ -198,7 +199,7 @@ struct lpfc_sli_intf {
 /* Configuration of Interrupts / sec for entire HBA port */
 #define LPFC_MIN_IMAX          5000
 #define LPFC_MAX_IMAX          5000000
-#define LPFC_DEF_IMAX          50000
+#define LPFC_DEF_IMAX          150000
 
 #define LPFC_MIN_CPU_MAP       0
 #define LPFC_MAX_CPU_MAP       2
@@ -348,6 +349,7 @@ struct lpfc_cqe {
 #define CQE_CODE_RECEIVE		0x4
 #define CQE_CODE_XRI_ABORTED		0x5
 #define CQE_CODE_RECEIVE_V1		0x9
+#define CQE_CODE_NVME_ERSP		0xd
 
 /*
  * Define mask value for xri_aborted and wcqe completed CQE extended status.
@@ -367,6 +369,9 @@ struct lpfc_wcqe_complete {
 #define lpfc_wcqe_c_hw_status_SHIFT	0
 #define lpfc_wcqe_c_hw_status_MASK	0x000000FF
 #define lpfc_wcqe_c_hw_status_WORD	word0
+#define lpfc_wcqe_c_ersp0_SHIFT		0
+#define lpfc_wcqe_c_ersp0_MASK		0x0000FFFF
+#define lpfc_wcqe_c_ersp0_WORD		word0
 	uint32_t total_data_placed;
 	uint32_t parameter;
 #define lpfc_wcqe_c_bg_edir_SHIFT	5
@@ -400,6 +405,9 @@ struct lpfc_wcqe_complete {
 #define lpfc_wcqe_c_code_SHIFT		lpfc_cqe_code_SHIFT
 #define lpfc_wcqe_c_code_MASK		lpfc_cqe_code_MASK
 #define lpfc_wcqe_c_code_WORD		lpfc_cqe_code_WORD
+#define lpfc_wcqe_c_sqhead_SHIFT	0
+#define lpfc_wcqe_c_sqhead_MASK		0x0000FFFF
+#define lpfc_wcqe_c_sqhead_WORD		word3
 };
 
 /* completion queue entry for wqe release */
@@ -2841,12 +2849,18 @@ struct lpfc_sli4_parameters {
 #define cfg_mqv_WORD				word6
 	uint32_t word7;
 	uint32_t word8;
+#define cfg_wqpcnt_SHIFT			0
+#define cfg_wqpcnt_MASK				0x0000000f
+#define cfg_wqpcnt_WORD				word8
 #define cfg_wqsize_SHIFT			8
 #define cfg_wqsize_MASK				0x0000000f
 #define cfg_wqsize_WORD				word8
 #define cfg_wqv_SHIFT				14
 #define cfg_wqv_MASK				0x00000003
 #define cfg_wqv_WORD				word8
+#define cfg_wqpsize_SHIFT			16
+#define cfg_wqpsize_MASK			0x000000ff
+#define cfg_wqpsize_WORD			word8
 	uint32_t word9;
 	uint32_t word10;
 #define cfg_rqv_SHIFT				14
@@ -2897,6 +2911,12 @@ struct lpfc_sli4_parameters {
 #define cfg_mds_diags_SHIFT			1
 #define cfg_mds_diags_MASK			0x00000001
 #define cfg_mds_diags_WORD			word19
+#define cfg_nvme_SHIFT				3
+#define cfg_nvme_MASK				0x00000001
+#define cfg_nvme_WORD				word19
+#define cfg_xib_SHIFT				4
+#define cfg_xib_MASK				0x00000001
+#define cfg_xib_WORD				word19
 };
 
 #define LPFC_SET_UE_RECOVERY		0x10
@@ -3659,6 +3679,9 @@ struct wqe_common {
 #define wqe_ebde_cnt_SHIFT    0
 #define wqe_ebde_cnt_MASK     0x0000000f
 #define wqe_ebde_cnt_WORD     word10
+#define wqe_nvme_SHIFT        4
+#define wqe_nvme_MASK         0x00000001
+#define wqe_nvme_WORD         word10
 #define wqe_oas_SHIFT         6
 #define wqe_oas_MASK          0x00000001
 #define wqe_oas_WORD          word10
@@ -4017,11 +4040,39 @@ struct lpfc_grp_hdr {
 	uint8_t revision[32];
 };
 
-#define FCP_COMMAND 0x0
-#define FCP_COMMAND_DATA_OUT 0x1
-#define ELS_COMMAND_NON_FIP 0xC
-#define ELS_COMMAND_FIP 0xD
-#define OTHER_COMMAND 0x8
+/* Defines for WQE command type */
+#define FCP_COMMAND		0x0
+#define NVME_READ_CMD		0x0
+#define FCP_COMMAND_DATA_OUT	0x1
+#define NVME_WRITE_CMD		0x1
+#define FCP_COMMAND_TRECEIVE	0x2
+#define FCP_COMMAND_TRSP	0x3
+#define FCP_COMMAND_TSEND	0x7
+#define OTHER_COMMAND		0x8
+#define ELS_COMMAND_NON_FIP	0xC
+#define ELS_COMMAND_FIP		0xD
+
+#define LPFC_NVME_EMBED_CMD	0x0
+#define LPFC_NVME_EMBED_WRITE	0x1
+#define LPFC_NVME_EMBED_READ	0x2
+
+/* WQE Commands */
+#define CMD_ABORT_XRI_WQE       0x0F
+#define CMD_XMIT_SEQUENCE64_WQE 0x82
+#define CMD_XMIT_BCAST64_WQE    0x84
+#define CMD_ELS_REQUEST64_WQE   0x8A
+#define CMD_XMIT_ELS_RSP64_WQE  0x95
+#define CMD_XMIT_BLS_RSP64_WQE  0x97
+#define CMD_FCP_IWRITE64_WQE    0x98
+#define CMD_FCP_IREAD64_WQE     0x9A
+#define CMD_FCP_ICMND64_WQE     0x9C
+#define CMD_FCP_TSEND64_WQE     0x9F
+#define CMD_FCP_TRECEIVE64_WQE  0xA1
+#define CMD_FCP_TRSP64_WQE      0xA3
+#define CMD_GEN_REQUEST64_WQE   0xC2
+
+#define CMD_WQE_MASK            0xff
+
 
 #define LPFC_FW_DUMP	1
 #define LPFC_FW_RESET	2
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index ec23388ac4f5..57087ba4834f 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -34,6 +34,7 @@
 #include <linux/firmware.h>
 #include <linux/miscdevice.h>
 #include <linux/percpu.h>
+#include <linux/msi.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_device.h>
@@ -46,8 +47,9 @@
 #include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
-#include "lpfc_scsi.h"
 #include "lpfc.h"
+#include "lpfc_scsi.h"
+#include "lpfc_nvme.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_crtn.h"
 #include "lpfc_vport.h"
@@ -499,12 +501,10 @@ lpfc_config_port_post(struct lpfc_hba *phba)
 	phba->link_state = LPFC_LINK_DOWN;
 
 	/* Only process IOCBs on ELS ring till hba_state is READY */
-	if (psli->ring[psli->extra_ring].sli.sli3.cmdringaddr)
-		psli->ring[psli->extra_ring].flag |= LPFC_STOP_IOCB_EVENT;
-	if (psli->ring[psli->fcp_ring].sli.sli3.cmdringaddr)
-		psli->ring[psli->fcp_ring].flag |= LPFC_STOP_IOCB_EVENT;
-	if (psli->ring[psli->next_ring].sli.sli3.cmdringaddr)
-		psli->ring[psli->next_ring].flag |= LPFC_STOP_IOCB_EVENT;
+	if (psli->sli3_ring[LPFC_EXTRA_RING].sli.sli3.cmdringaddr)
+		psli->sli3_ring[LPFC_EXTRA_RING].flag |= LPFC_STOP_IOCB_EVENT;
+	if (psli->sli3_ring[LPFC_FCP_RING].sli.sli3.cmdringaddr)
+		psli->sli3_ring[LPFC_FCP_RING].flag |= LPFC_STOP_IOCB_EVENT;
 
 	/* Post receive buffers for desired rings */
 	if (phba->sli_rev != 3)
@@ -892,7 +892,7 @@ lpfc_hba_free_post_buf(struct lpfc_hba *phba)
 		lpfc_sli_hbqbuf_free_all(phba);
 	else {
 		/* Cleanup preposted buffers on the ELS ring */
-		pring = &psli->ring[LPFC_ELS_RING];
+		pring = &psli->sli3_ring[LPFC_ELS_RING];
 		spin_lock_irq(&phba->hbalock);
 		list_splice_init(&pring->postbufq, &buflist);
 		spin_unlock_irq(&phba->hbalock);
@@ -925,32 +925,43 @@ static void
 lpfc_hba_clean_txcmplq(struct lpfc_hba *phba)
 {
 	struct lpfc_sli *psli = &phba->sli;
+	struct lpfc_queue *qp = NULL;
 	struct lpfc_sli_ring *pring;
 	LIST_HEAD(completions);
 	int i;
 
-	for (i = 0; i < psli->num_rings; i++) {
-		pring = &psli->ring[i];
-		if (phba->sli_rev >= LPFC_SLI_REV4)
-			spin_lock_irq(&pring->ring_lock);
-		else
+	if (phba->sli_rev != LPFC_SLI_REV4) {
+		for (i = 0; i < psli->num_rings; i++) {
+			pring = &psli->sli3_ring[i];
 			spin_lock_irq(&phba->hbalock);
-		/* At this point in time the HBA is either reset or DOA. Either
-		 * way, nothing should be on txcmplq as it will NEVER complete.
-		 */
-		list_splice_init(&pring->txcmplq, &completions);
-		pring->txcmplq_cnt = 0;
-
-		if (phba->sli_rev >= LPFC_SLI_REV4)
-			spin_unlock_irq(&pring->ring_lock);
-		else
+			/* At this point in time the HBA is either reset or DOA
+			 * Nothing should be on txcmplq as it will
+			 * NEVER complete.
+			 */
+			list_splice_init(&pring->txcmplq, &completions);
+			pring->txcmplq_cnt = 0;
 			spin_unlock_irq(&phba->hbalock);
 
+			lpfc_sli_abort_iocb_ring(phba, pring);
+		}
 		/* Cancel all the IOCBs from the completions list */
-		lpfc_sli_cancel_iocbs(phba, &completions, IOSTAT_LOCAL_REJECT,
-				      IOERR_SLI_ABORTED);
+		lpfc_sli_cancel_iocbs(phba, &completions,
+				      IOSTAT_LOCAL_REJECT, IOERR_SLI_ABORTED);
+		return;
+	}
+	list_for_each_entry(qp, &phba->sli4_hba.lpfc_wq_list, wq_list) {
+		pring = qp->pring;
+		if (!pring)
+			continue;
+		spin_lock_irq(&pring->ring_lock);
+		list_splice_init(&pring->txcmplq, &completions);
+		pring->txcmplq_cnt = 0;
+		spin_unlock_irq(&pring->ring_lock);
 		lpfc_sli_abort_iocb_ring(phba, pring);
 	}
+	/* Cancel all the IOCBs from the completions list */
+	lpfc_sli_cancel_iocbs(phba, &completions,
+			      IOSTAT_LOCAL_REJECT, IOERR_SLI_ABORTED);
 }
 
 /**
@@ -989,43 +1000,51 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
 {
 	struct lpfc_scsi_buf *psb, *psb_next;
 	LIST_HEAD(aborts);
+	LIST_HEAD(nvme_aborts);
 	unsigned long iflag = 0;
 	struct lpfc_sglq *sglq_entry = NULL;
-	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring *pring;
 
-	lpfc_hba_free_post_buf(phba);
+
+	lpfc_sli_hbqbuf_free_all(phba);
 	lpfc_hba_clean_txcmplq(phba);
-	pring = &psli->ring[LPFC_ELS_RING];
 
 	/* At this point in time the HBA is either reset or DOA. Either
 	 * way, nothing should be on lpfc_abts_els_sgl_list, it needs to be
-	 * on the lpfc_sgl_list so that it can either be freed if the
+	 * on the lpfc_els_sgl_list so that it can either be freed if the
 	 * driver is unloading or reposted if the driver is restarting
 	 * the port.
 	 */
-	spin_lock_irq(&phba->hbalock);  /* required for lpfc_sgl_list and */
+	spin_lock_irq(&phba->hbalock);  /* required for lpfc_els_sgl_list and */
 					/* scsl_buf_list */
-	/* abts_sgl_list_lock required because worker thread uses this
+	/* sgl_list_lock required because worker thread uses this
 	 * list.
 	 */
-	spin_lock(&phba->sli4_hba.abts_sgl_list_lock);
+	spin_lock(&phba->sli4_hba.sgl_list_lock);
 	list_for_each_entry(sglq_entry,
 		&phba->sli4_hba.lpfc_abts_els_sgl_list, list)
 		sglq_entry->state = SGL_FREED;
 
-	spin_lock(&pring->ring_lock);
 	list_splice_init(&phba->sli4_hba.lpfc_abts_els_sgl_list,
-			&phba->sli4_hba.lpfc_sgl_list);
-	spin_unlock(&pring->ring_lock);
-	spin_unlock(&phba->sli4_hba.abts_sgl_list_lock);
+			&phba->sli4_hba.lpfc_els_sgl_list);
+
+	spin_unlock(&phba->sli4_hba.sgl_list_lock);
 	/* abts_scsi_buf_list_lock required because worker thread uses this
 	 * list.
 	 */
-	spin_lock(&phba->sli4_hba.abts_scsi_buf_list_lock);
-	list_splice_init(&phba->sli4_hba.lpfc_abts_scsi_buf_list,
-			&aborts);
-	spin_unlock(&phba->sli4_hba.abts_scsi_buf_list_lock);
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
+		spin_lock(&phba->sli4_hba.abts_scsi_buf_list_lock);
+		list_splice_init(&phba->sli4_hba.lpfc_abts_scsi_buf_list,
+				 &aborts);
+		spin_unlock(&phba->sli4_hba.abts_scsi_buf_list_lock);
+	}
+
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+		list_splice_init(&phba->sli4_hba.lpfc_abts_nvme_buf_list,
+				 &nvme_aborts);
+		spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+	}
+
 	spin_unlock_irq(&phba->hbalock);
 
 	list_for_each_entry_safe(psb, psb_next, &aborts, list) {
@@ -1036,6 +1055,14 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
 	list_splice(&aborts, &phba->lpfc_scsi_buf_list_put);
 	spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, iflag);
 
+	list_for_each_entry_safe(psb, psb_next, &nvme_aborts, list) {
+		psb->pCmd = NULL;
+		psb->status = IOSTAT_SUCCESS;
+	}
+	spin_lock_irqsave(&phba->nvme_buf_list_put_lock, iflag);
+	list_splice(&nvme_aborts, &phba->lpfc_nvme_buf_list_put);
+	spin_unlock_irqrestore(&phba->nvme_buf_list_put_lock, iflag);
+
 	lpfc_sli4_free_sp_events(phba);
 	return 0;
 }
@@ -1829,7 +1856,7 @@ lpfc_handle_eratt(struct lpfc_hba *phba)
  * @phba: pointer to lpfc hba data structure.
  *
  * This routine is invoked from the worker thread to handle a HBA host
- * attention link event.
+ * attention link event. SLI3 only.
  **/
 void
 lpfc_handle_latt(struct lpfc_hba *phba)
@@ -1867,7 +1894,7 @@ lpfc_handle_latt(struct lpfc_hba *phba)
 	pmb->mbox_cmpl = lpfc_mbx_cmpl_read_topology;
 	pmb->vport = vport;
 	/* Block ELS IOCBs until we have processed this mbox command */
-	phba->sli.ring[LPFC_ELS_RING].flag |= LPFC_STOP_IOCB_EVENT;
+	phba->sli.sli3_ring[LPFC_ELS_RING].flag |= LPFC_STOP_IOCB_EVENT;
 	rc = lpfc_sli_issue_mbox (phba, pmb, MBX_NOWAIT);
 	if (rc == MBX_NOT_FINISHED) {
 		rc = 4;
@@ -1883,7 +1910,7 @@ lpfc_handle_latt(struct lpfc_hba *phba)
 	return;
 
 lpfc_handle_latt_free_mbuf:
-	phba->sli.ring[LPFC_ELS_RING].flag &= ~LPFC_STOP_IOCB_EVENT;
+	phba->sli.sli3_ring[LPFC_ELS_RING].flag &= ~LPFC_STOP_IOCB_EVENT;
 	lpfc_mbuf_free(phba, mp->virt, mp->phys);
 lpfc_handle_latt_free_mp:
 	kfree(mp);
@@ -2441,7 +2468,7 @@ lpfc_post_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, int cnt)
  *
  * This routine posts initial receive IOCB buffers to the ELS ring. The
  * current number of initial IOCB buffers specified by LPFC_BUF_RING0 is
- * set to 64 IOCBs.
+ * set to 64 IOCBs. SLI3 only.
  *
  * Return codes
  *   0 - success (currently always success)
@@ -2452,7 +2479,7 @@ lpfc_post_rcv_buf(struct lpfc_hba *phba)
 	struct lpfc_sli *psli = &phba->sli;
 
 	/* Ring 0, ELS / CT buffers */
-	lpfc_post_buffer(phba, &psli->ring[LPFC_ELS_RING], LPFC_BUF_RING0);
+	lpfc_post_buffer(phba, &psli->sli3_ring[LPFC_ELS_RING], LPFC_BUF_RING0);
 	/* Ring 2 - FCP no buffers needed */
 
 	return 0;
@@ -2894,11 +2921,6 @@ lpfc_online(struct lpfc_hba *phba)
 
 	lpfc_block_mgmt_io(phba, LPFC_MBX_WAIT);
 
-	if (!lpfc_sli_queue_setup(phba)) {
-		lpfc_unblock_mgmt_io(phba);
-		return 1;
-	}
-
 	if (phba->sli_rev == LPFC_SLI_REV4) {
 		if (lpfc_sli4_hba_setup(phba)) { /* Initialize SLI4 HBA */
 			lpfc_unblock_mgmt_io(phba);
@@ -2909,6 +2931,7 @@ lpfc_online(struct lpfc_hba *phba)
 			vpis_cleared = true;
 		spin_unlock_irq(&phba->hbalock);
 	} else {
+		lpfc_sli_queue_init(phba);
 		if (lpfc_sli_hba_setup(phba)) {	/* Initialize SLI2/SLI3 HBA */
 			lpfc_unblock_mgmt_io(phba);
 			return 1;
@@ -3100,6 +3123,9 @@ lpfc_scsi_free(struct lpfc_hba *phba)
 	struct lpfc_scsi_buf *sb, *sb_next;
 	struct lpfc_iocbq *io, *io_next;
 
+	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
+		return;
+
 	spin_lock_irq(&phba->hbalock);
 
 	/* Release all the lpfc_scsi_bufs maintained by this host. */
@@ -3108,7 +3134,7 @@ lpfc_scsi_free(struct lpfc_hba *phba)
 	list_for_each_entry_safe(sb, sb_next, &phba->lpfc_scsi_buf_list_put,
 				 list) {
 		list_del(&sb->list);
-		pci_pool_free(phba->lpfc_scsi_dma_buf_pool, sb->data,
+		pci_pool_free(phba->lpfc_sg_dma_buf_pool, sb->data,
 			      sb->dma_handle);
 		kfree(sb);
 		phba->total_scsi_bufs--;
@@ -3119,7 +3145,7 @@ lpfc_scsi_free(struct lpfc_hba *phba)
 	list_for_each_entry_safe(sb, sb_next, &phba->lpfc_scsi_buf_list_get,
 				 list) {
 		list_del(&sb->list);
-		pci_pool_free(phba->lpfc_scsi_dma_buf_pool, sb->data,
+		pci_pool_free(phba->lpfc_sg_dma_buf_pool, sb->data,
 			      sb->dma_handle);
 		kfree(sb);
 		phba->total_scsi_bufs--;
@@ -3135,9 +3161,59 @@ lpfc_scsi_free(struct lpfc_hba *phba)
 
 	spin_unlock_irq(&phba->hbalock);
 }
+/**
+ * lpfc_nvme_free - Free all the NVME buffers and IOCBs from driver lists
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is to free all the NVME buffers and IOCBs from the driver
+ * list back to kernel. It is called from lpfc_pci_remove_one to free
+ * the internal resources before the device is removed from the system.
+ **/
+static void
+lpfc_nvme_free(struct lpfc_hba *phba)
+{
+	struct lpfc_nvme_buf *lpfc_ncmd, *lpfc_ncmd_next;
+	struct lpfc_iocbq *io, *io_next;
+
+	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
+		return;
+
+	spin_lock_irq(&phba->hbalock);
+
+	/* Release all the lpfc_nvme_bufs maintained by this host. */
+	spin_lock(&phba->nvme_buf_list_put_lock);
+	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+				 &phba->lpfc_nvme_buf_list_put, list) {
+		list_del(&lpfc_ncmd->list);
+		pci_pool_free(phba->lpfc_sg_dma_buf_pool, lpfc_ncmd->data,
+			      lpfc_ncmd->dma_handle);
+		kfree(lpfc_ncmd);
+		phba->total_nvme_bufs--;
+	}
+	spin_unlock(&phba->nvme_buf_list_put_lock);
+
+	spin_lock(&phba->nvme_buf_list_get_lock);
+	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+				 &phba->lpfc_nvme_buf_list_get, list) {
+		list_del(&lpfc_ncmd->list);
+		pci_pool_free(phba->lpfc_sg_dma_buf_pool, lpfc_ncmd->data,
+			      lpfc_ncmd->dma_handle);
+		kfree(lpfc_ncmd);
+		phba->total_nvme_bufs--;
+	}
+	spin_unlock(&phba->nvme_buf_list_get_lock);
 
+	/* Release all the lpfc_iocbq entries maintained by this host. */
+	list_for_each_entry_safe(io, io_next, &phba->lpfc_iocb_list, list) {
+		list_del(&io->list);
+		kfree(io);
+		phba->total_iocbq_bufs--;
+	}
+
+	spin_unlock_irq(&phba->hbalock);
+}
 /**
- * lpfc_sli4_xri_sgl_update - update xri-sgl sizing and mapping
+ * lpfc_sli4_els_sgl_update - update ELS xri-sgl sizing and mapping
  * @phba: pointer to lpfc hba data structure.
  *
  * This routine first calculates the sizes of the current els and allocated
@@ -3149,20 +3225,18 @@ lpfc_scsi_free(struct lpfc_hba *phba)
  *   0 - successful (for now, it always returns 0)
  **/
 int
-lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
+lpfc_sli4_els_sgl_update(struct lpfc_hba *phba)
 {
 	struct lpfc_sglq *sglq_entry = NULL, *sglq_entry_next = NULL;
-	struct lpfc_scsi_buf *psb = NULL, *psb_next = NULL;
-	uint16_t i, lxri, xri_cnt, els_xri_cnt, scsi_xri_cnt;
+	uint16_t i, lxri, xri_cnt, els_xri_cnt;
 	LIST_HEAD(els_sgl_list);
-	LIST_HEAD(scsi_sgl_list);
 	int rc;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
 
 	/*
 	 * update on pci function's els xri-sgl list
 	 */
 	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
+
 	if (els_xri_cnt > phba->sli4_hba.els_xri_cnt) {
 		/* els xri-sgl expanded */
 		xri_cnt = els_xri_cnt - phba->sli4_hba.els_xri_cnt;
@@ -3198,9 +3272,10 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
 			list_add_tail(&sglq_entry->list, &els_sgl_list);
 		}
 		spin_lock_irq(&phba->hbalock);
-		spin_lock(&pring->ring_lock);
-		list_splice_init(&els_sgl_list, &phba->sli4_hba.lpfc_sgl_list);
-		spin_unlock(&pring->ring_lock);
+		spin_lock(&phba->sli4_hba.sgl_list_lock);
+		list_splice_init(&els_sgl_list,
+				 &phba->sli4_hba.lpfc_els_sgl_list);
+		spin_unlock(&phba->sli4_hba.sgl_list_lock);
 		spin_unlock_irq(&phba->hbalock);
 	} else if (els_xri_cnt < phba->sli4_hba.els_xri_cnt) {
 		/* els xri-sgl shrinked */
@@ -3210,24 +3285,22 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
 				"%d to %d\n", phba->sli4_hba.els_xri_cnt,
 				els_xri_cnt);
 		spin_lock_irq(&phba->hbalock);
-		spin_lock(&pring->ring_lock);
-		list_splice_init(&phba->sli4_hba.lpfc_sgl_list, &els_sgl_list);
-		spin_unlock(&pring->ring_lock);
-		spin_unlock_irq(&phba->hbalock);
+		spin_lock(&phba->sli4_hba.sgl_list_lock);
+		list_splice_init(&phba->sli4_hba.lpfc_els_sgl_list,
+				 &els_sgl_list);
 		/* release extra els sgls from list */
 		for (i = 0; i < xri_cnt; i++) {
 			list_remove_head(&els_sgl_list,
 					 sglq_entry, struct lpfc_sglq, list);
 			if (sglq_entry) {
-				lpfc_mbuf_free(phba, sglq_entry->virt,
-					       sglq_entry->phys);
+				__lpfc_mbuf_free(phba, sglq_entry->virt,
+						 sglq_entry->phys);
 				kfree(sglq_entry);
 			}
 		}
-		spin_lock_irq(&phba->hbalock);
-		spin_lock(&pring->ring_lock);
-		list_splice_init(&els_sgl_list, &phba->sli4_hba.lpfc_sgl_list);
-		spin_unlock(&pring->ring_lock);
+		list_splice_init(&els_sgl_list,
+				 &phba->sli4_hba.lpfc_els_sgl_list);
+		spin_unlock(&phba->sli4_hba.sgl_list_lock);
 		spin_unlock_irq(&phba->hbalock);
 	} else
 		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
@@ -3239,7 +3312,7 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
 	sglq_entry = NULL;
 	sglq_entry_next = NULL;
 	list_for_each_entry_safe(sglq_entry, sglq_entry_next,
-				 &phba->sli4_hba.lpfc_sgl_list, list) {
+				 &phba->sli4_hba.lpfc_els_sgl_list, list) {
 		lxri = lpfc_sli4_next_xritag(phba);
 		if (lxri == NO_XRI) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
@@ -3251,21 +3324,53 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
 		sglq_entry->sli4_lxritag = lxri;
 		sglq_entry->sli4_xritag = phba->sli4_hba.xri_ids[lxri];
 	}
+	return 0;
+
+out_free_mem:
+	lpfc_free_els_sgl_list(phba);
+	return rc;
+}
+
+/**
+ * lpfc_sli4_scsi_sgl_update - update xri-sgl sizing and mapping
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine first calculates the sizes of the current els and allocated
+ * scsi sgl lists, and then goes through all sgls to updates the physical
+ * XRIs assigned due to port function reset. During port initialization, the
+ * current els and allocated scsi sgl lists are 0s.
+ *
+ * Return codes
+ *   0 - successful (for now, it always returns 0)
+ **/
+int
+lpfc_sli4_scsi_sgl_update(struct lpfc_hba *phba)
+{
+	struct lpfc_scsi_buf *psb, *psb_next;
+	uint16_t i, lxri, els_xri_cnt, scsi_xri_cnt;
+	LIST_HEAD(scsi_sgl_list);
+	int rc;
 
 	/*
-	 * update on pci function's allocated scsi xri-sgl list
+	 * update on pci function's els xri-sgl list
 	 */
+	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
 	phba->total_scsi_bufs = 0;
 
+	/*
+	 * update on pci function's allocated scsi xri-sgl list
+	 */
 	/* maximum number of xris available for scsi buffers */
 	phba->sli4_hba.scsi_xri_max = phba->sli4_hba.max_cfg_param.max_xri -
 				      els_xri_cnt;
 
-	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
-			"2401 Current allocated SCSI xri-sgl count:%d, "
-			"maximum  SCSI xri count:%d\n",
-			phba->sli4_hba.scsi_xri_cnt,
-			phba->sli4_hba.scsi_xri_max);
+	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
+		return 0;
+
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+		phba->sli4_hba.scsi_xri_max =  /* Split them up */
+			(phba->sli4_hba.scsi_xri_max *
+			 phba->cfg_xri_split) / 100;
 
 	spin_lock_irq(&phba->scsi_buf_list_get_lock);
 	spin_lock(&phba->scsi_buf_list_put_lock);
@@ -3283,7 +3388,7 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
 			list_remove_head(&scsi_sgl_list, psb,
 					 struct lpfc_scsi_buf, list);
 			if (psb) {
-				pci_pool_free(phba->lpfc_scsi_dma_buf_pool,
+				pci_pool_free(phba->lpfc_sg_dma_buf_pool,
 					      psb->data, psb->dma_handle);
 				kfree(psb);
 			}
@@ -3314,15 +3419,112 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
 	INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put);
 	spin_unlock(&phba->scsi_buf_list_put_lock);
 	spin_unlock_irq(&phba->scsi_buf_list_get_lock);
-
 	return 0;
 
 out_free_mem:
-	lpfc_free_els_sgl_list(phba);
 	lpfc_scsi_free(phba);
 	return rc;
 }
 
+/**
+ * lpfc_sli4_nvme_sgl_update - update xri-sgl sizing and mapping
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine first calculates the sizes of the current els and allocated
+ * scsi sgl lists, and then goes through all sgls to updates the physical
+ * XRIs assigned due to port function reset. During port initialization, the
+ * current els and allocated scsi sgl lists are 0s.
+ *
+ * Return codes
+ *   0 - successful (for now, it always returns 0)
+ **/
+int
+lpfc_sli4_nvme_sgl_update(struct lpfc_hba *phba)
+{
+	struct lpfc_nvme_buf *lpfc_ncmd = NULL, *lpfc_ncmd_next = NULL;
+	uint16_t i, lxri, els_xri_cnt;
+	uint16_t nvme_xri_cnt, nvme_xri_max;
+	LIST_HEAD(nvme_sgl_list);
+	int rc;
+
+	phba->total_nvme_bufs = 0;
+
+	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
+		return 0;
+	/*
+	 * update on pci function's allocated nvme xri-sgl list
+	 */
+
+	/* maximum number of xris available for nvme buffers */
+	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
+	nvme_xri_max = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt;
+	phba->sli4_hba.nvme_xri_max = nvme_xri_max;
+	phba->sli4_hba.nvme_xri_max -= phba->sli4_hba.scsi_xri_max;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+			"6074 Current allocated NVME xri-sgl count:%d, "
+			"maximum  NVME xri count:%d\n",
+			phba->sli4_hba.nvme_xri_cnt,
+			phba->sli4_hba.nvme_xri_max);
+
+	spin_lock_irq(&phba->nvme_buf_list_get_lock);
+	spin_lock(&phba->nvme_buf_list_put_lock);
+	list_splice_init(&phba->lpfc_nvme_buf_list_get, &nvme_sgl_list);
+	list_splice(&phba->lpfc_nvme_buf_list_put, &nvme_sgl_list);
+	spin_unlock(&phba->nvme_buf_list_put_lock);
+	spin_unlock_irq(&phba->nvme_buf_list_get_lock);
+
+	if (phba->sli4_hba.nvme_xri_cnt > phba->sli4_hba.nvme_xri_max) {
+		/* max nvme xri shrunk below the allocated nvme buffers */
+		spin_lock_irq(&phba->nvme_buf_list_get_lock);
+		nvme_xri_cnt = phba->sli4_hba.nvme_xri_cnt -
+					phba->sli4_hba.nvme_xri_max;
+		spin_unlock_irq(&phba->nvme_buf_list_get_lock);
+		/* release the extra allocated nvme buffers */
+		for (i = 0; i < nvme_xri_cnt; i++) {
+			list_remove_head(&nvme_sgl_list, lpfc_ncmd,
+					 struct lpfc_nvme_buf, list);
+			if (lpfc_ncmd) {
+				pci_pool_free(phba->lpfc_sg_dma_buf_pool,
+					      lpfc_ncmd->data,
+					      lpfc_ncmd->dma_handle);
+				kfree(lpfc_ncmd);
+			}
+		}
+		spin_lock_irq(&phba->nvme_buf_list_get_lock);
+		phba->sli4_hba.nvme_xri_cnt -= nvme_xri_cnt;
+		spin_unlock_irq(&phba->nvme_buf_list_get_lock);
+	}
+
+	/* update xris associated to remaining allocated nvme buffers */
+	lpfc_ncmd = NULL;
+	lpfc_ncmd_next = NULL;
+	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+				 &nvme_sgl_list, list) {
+		lxri = lpfc_sli4_next_xritag(phba);
+		if (lxri == NO_XRI) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"6075 Failed to allocate xri for "
+					"nvme buffer\n");
+			rc = -ENOMEM;
+			goto out_free_mem;
+		}
+		lpfc_ncmd->cur_iocbq.sli4_lxritag = lxri;
+		lpfc_ncmd->cur_iocbq.sli4_xritag = phba->sli4_hba.xri_ids[lxri];
+	}
+	spin_lock_irq(&phba->nvme_buf_list_get_lock);
+	spin_lock(&phba->nvme_buf_list_put_lock);
+	list_splice_init(&nvme_sgl_list, &phba->lpfc_nvme_buf_list_get);
+	INIT_LIST_HEAD(&phba->lpfc_nvme_buf_list_put);
+	spin_unlock(&phba->nvme_buf_list_put_lock);
+	spin_unlock_irq(&phba->nvme_buf_list_get_lock);
+	return 0;
+
+out_free_mem:
+	lpfc_nvme_free(phba);
+	return rc;
+}
+
 /**
  * lpfc_create_port - Create an FC port
  * @phba: pointer to lpfc hba data structure.
@@ -3343,18 +3545,23 @@ struct lpfc_vport *
 lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
 {
 	struct lpfc_vport *vport;
-	struct Scsi_Host  *shost;
+	struct Scsi_Host  *shost = NULL;
 	int error = 0;
 
-	if (dev != &phba->pcidev->dev) {
-		shost = scsi_host_alloc(&lpfc_vport_template,
-					sizeof(struct lpfc_vport));
-	} else {
-		if (phba->sli_rev == LPFC_SLI_REV4)
-			shost = scsi_host_alloc(&lpfc_template,
-					sizeof(struct lpfc_vport));
-		else
-			shost = scsi_host_alloc(&lpfc_template_s3,
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
+		if (dev != &phba->pcidev->dev) {
+			shost = scsi_host_alloc(&lpfc_vport_template,
+						sizeof(struct lpfc_vport));
+		} else {
+			if (phba->sli_rev == LPFC_SLI_REV4)
+				shost = scsi_host_alloc(&lpfc_template,
+						sizeof(struct lpfc_vport));
+			else
+				shost = scsi_host_alloc(&lpfc_template_s3,
+						sizeof(struct lpfc_vport));
+		}
+	} else if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		shost = scsi_host_alloc(&lpfc_template_nvme,
 					sizeof(struct lpfc_vport));
 	}
 	if (!shost)
@@ -3365,8 +3572,8 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
 	vport->load_flag |= FC_LOADING;
 	vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
 	vport->fc_rscn_flush = 0;
-
 	lpfc_get_vport_cfgparam(vport);
+
 	shost->unique_id = instance;
 	shost->max_id = LPFC_MAX_TARGET;
 	shost->max_lun = vport->cfg_max_luns;
@@ -3944,7 +4151,7 @@ lpfc_sli4_async_link_evt(struct lpfc_hba *phba,
 	lpfc_els_flush_all_cmd(phba);
 
 	/* Block ELS IOCBs until we have done process link event */
-	phba->sli.ring[LPFC_ELS_RING].flag |= LPFC_STOP_IOCB_EVENT;
+	phba->sli4_hba.els_wq->pring->flag |= LPFC_STOP_IOCB_EVENT;
 
 	/* Update link event statistics */
 	phba->sli.slistat.link_event++;
@@ -4103,7 +4310,7 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc)
 	lpfc_els_flush_all_cmd(phba);
 
 	/* Block ELS IOCBs until we have done process link event */
-	phba->sli.ring[LPFC_ELS_RING].flag |= LPFC_STOP_IOCB_EVENT;
+	phba->sli4_hba.els_wq->pring->flag |= LPFC_STOP_IOCB_EVENT;
 
 	/* Update link event statistics */
 	phba->sli.slistat.link_event++;
@@ -5000,40 +5207,78 @@ lpfc_sli_probe_sriov_nr_virtfn(struct lpfc_hba *phba, int nr_vfn)
 }
 
 /**
- * lpfc_sli_driver_resource_setup - Setup driver internal resources for SLI3 dev.
+ * lpfc_setup_driver_resource_phase1 - Phase1 etup driver internal resources.
  * @phba: pointer to lpfc hba data structure.
  *
- * This routine is invoked to set up the driver internal resources specific to
- * support the SLI-3 HBA device it attached to.
+ * This routine is invoked to set up the driver internal resources before the
+ * device specific resource setup to support the HBA device it attached to.
  *
  * Return codes
- * 	0 - successful
- * 	other values - error
+ *	0 - successful
+ *	other values - error
  **/
 static int
-lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
+lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
 {
-	struct lpfc_sli *psli;
-	int rc;
+	struct lpfc_sli *psli = &phba->sli;
 
 	/*
-	 * Initialize timers used by driver
+	 * Driver resources common to all SLI revisions
 	 */
+	atomic_set(&phba->fast_event_count, 0);
+	spin_lock_init(&phba->hbalock);
 
-	/* Heartbeat timer */
-	init_timer(&phba->hb_tmofunc);
-	phba->hb_tmofunc.function = lpfc_hb_timeout;
-	phba->hb_tmofunc.data = (unsigned long)phba;
+	/* Initialize ndlp management spinlock */
+	spin_lock_init(&phba->ndlp_lock);
+
+	INIT_LIST_HEAD(&phba->port_list);
+	INIT_LIST_HEAD(&phba->work_list);
+	init_waitqueue_head(&phba->wait_4_mlo_m_q);
+
+	/* Initialize the wait queue head for the kernel thread */
+	init_waitqueue_head(&phba->work_waitq);
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"1403 Protocols supported %s %s\n",
+			((phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) ?
+				"SCSI" : " "),
+			((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) ?
+				"NVME" : " "));
+
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
+		/* Initialize the scsi buffer list used by driver for scsi IO */
+		spin_lock_init(&phba->scsi_buf_list_get_lock);
+		INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_get);
+		spin_lock_init(&phba->scsi_buf_list_put_lock);
+		INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put);
+	}
+
+	if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) &&
+		(phba->nvmet_support == 0)) {
+		/* Initialize the NVME buffer list used by driver for NVME IO */
+		spin_lock_init(&phba->nvme_buf_list_get_lock);
+		INIT_LIST_HEAD(&phba->lpfc_nvme_buf_list_get);
+		spin_lock_init(&phba->nvme_buf_list_put_lock);
+		INIT_LIST_HEAD(&phba->lpfc_nvme_buf_list_put);
+	}
+
+	/* Initialize the fabric iocb list */
+	INIT_LIST_HEAD(&phba->fabric_iocb_list);
+
+	/* Initialize list to save ELS buffers */
+	INIT_LIST_HEAD(&phba->elsbuf);
+
+	/* Initialize FCF connection rec list */
+	INIT_LIST_HEAD(&phba->fcf_conn_rec_list);
+
+	/* Initialize OAS configuration list */
+	spin_lock_init(&phba->devicelock);
+	INIT_LIST_HEAD(&phba->luns);
 
-	psli = &phba->sli;
 	/* MBOX heartbeat timer */
 	init_timer(&psli->mbox_tmo);
 	psli->mbox_tmo.function = lpfc_mbox_timeout;
 	psli->mbox_tmo.data = (unsigned long) phba;
-	/* FCP polling mode timer */
-	init_timer(&phba->fcp_poll_timer);
-	phba->fcp_poll_timer.function = lpfc_poll_timeout;
-	phba->fcp_poll_timer.data = (unsigned long) phba;
 	/* Fabric block timer */
 	init_timer(&phba->fabric_block_timer);
 	phba->fabric_block_timer.function = lpfc_fabric_block_timeout;
@@ -5042,6 +5287,38 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
 	init_timer(&phba->eratt_poll);
 	phba->eratt_poll.function = lpfc_poll_eratt;
 	phba->eratt_poll.data = (unsigned long) phba;
+	/* Heartbeat timer */
+	init_timer(&phba->hb_tmofunc);
+	phba->hb_tmofunc.function = lpfc_hb_timeout;
+	phba->hb_tmofunc.data = (unsigned long)phba;
+
+	return 0;
+}
+
+/**
+ * lpfc_sli_driver_resource_setup - Setup driver internal resources for SLI3 dev
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to set up the driver internal resources specific to
+ * support the SLI-3 HBA device it attached to.
+ *
+ * Return codes
+ * 0 - successful
+ * other values - error
+ **/
+static int
+lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
+{
+	int rc;
+
+	/*
+	 * Initialize timers used by driver
+	 */
+
+	/* FCP polling mode timer */
+	init_timer(&phba->fcp_poll_timer);
+	phba->fcp_poll_timer.function = lpfc_poll_timeout;
+	phba->fcp_poll_timer.data = (unsigned long) phba;
 
 	/* Host attention work mask setup */
 	phba->work_ha_mask = (HA_ERATT | HA_MBATT | HA_LATT);
@@ -5049,6 +5326,12 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
 
 	/* Get all the module params for configuring this host */
 	lpfc_get_cfgparam(phba);
+	/* Set up phase-1 common device driver resources */
+
+	rc = lpfc_setup_driver_resource_phase1(phba);
+	if (rc)
+		return -ENODEV;
+
 	if (phba->pcidev->device == PCI_DEVICE_ID_HORNET) {
 		phba->menlo_flag |= HBA_MENLO_SUPPORT;
 		/* check for menlo minimum sg count */
@@ -5056,10 +5339,10 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
 			phba->cfg_sg_seg_cnt = LPFC_DEFAULT_MENLO_SG_SEG_CNT;
 	}
 
-	if (!phba->sli.ring)
-		phba->sli.ring = kzalloc(LPFC_SLI3_MAX_RING *
+	if (!phba->sli.sli3_ring)
+		phba->sli.sli3_ring = kzalloc(LPFC_SLI3_MAX_RING *
 			sizeof(struct lpfc_sli_ring), GFP_KERNEL);
-	if (!phba->sli.ring)
+	if (!phba->sli.sli3_ring)
 		return -ENOMEM;
 
 	/*
@@ -5118,7 +5401,7 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
 	 * Initialize the SLI Layer to run with lpfc HBAs.
 	 */
 	lpfc_sli_setup(phba);
-	lpfc_sli_queue_setup(phba);
+	lpfc_sli_queue_init(phba);
 
 	/* Allocate device driver memory */
 	if (lpfc_mem_alloc(phba, BPL_ALIGN_SZ))
@@ -5174,18 +5457,25 @@ lpfc_sli_driver_resource_unset(struct lpfc_hba *phba)
 static int
 lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 {
-	struct lpfc_vector_map_info *cpup;
-	struct lpfc_sli *psli;
 	LPFC_MBOXQ_t *mboxq;
-	int rc, i, hbq_count, max_buf_size;
+	int rc, i, max_buf_size;
 	uint8_t pn_page[LPFC_MAX_SUPPORTED_PAGES] = {0};
 	struct lpfc_mqe *mqe;
 	int longs;
 	int fof_vectors = 0;
 
+	phba->sli4_hba.num_online_cpu = num_online_cpus();
+	phba->sli4_hba.num_present_cpu = lpfc_present_cpu;
+	phba->sli4_hba.curr_disp_cpu = 0;
+
 	/* Get all the module params for configuring this host */
 	lpfc_get_cfgparam(phba);
 
+	/* Set up phase-1 common device driver resources */
+	rc = lpfc_setup_driver_resource_phase1(phba);
+	if (rc)
+		return -ENODEV;
+
 	/* Before proceed, wait for POST done and device ready */
 	rc = lpfc_sli4_post_status_check(phba);
 	if (rc)
@@ -5195,27 +5485,10 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 	 * Initialize timers used by driver
 	 */
 
-	/* Heartbeat timer */
-	init_timer(&phba->hb_tmofunc);
-	phba->hb_tmofunc.function = lpfc_hb_timeout;
-	phba->hb_tmofunc.data = (unsigned long)phba;
 	init_timer(&phba->rrq_tmr);
 	phba->rrq_tmr.function = lpfc_rrq_timeout;
 	phba->rrq_tmr.data = (unsigned long)phba;
 
-	psli = &phba->sli;
-	/* MBOX heartbeat timer */
-	init_timer(&psli->mbox_tmo);
-	psli->mbox_tmo.function = lpfc_mbox_timeout;
-	psli->mbox_tmo.data = (unsigned long) phba;
-	/* Fabric block timer */
-	init_timer(&phba->fabric_block_timer);
-	phba->fabric_block_timer.function = lpfc_fabric_block_timeout;
-	phba->fabric_block_timer.data = (unsigned long) phba;
-	/* EA polling mode timer */
-	init_timer(&phba->eratt_poll);
-	phba->eratt_poll.function = lpfc_poll_eratt;
-	phba->eratt_poll.data = (unsigned long) phba;
 	/* FCF rediscover timer */
 	init_timer(&phba->fcf.redisc_wait);
 	phba->fcf.redisc_wait.function = lpfc_sli4_fcf_redisc_wait_tmo;
@@ -5242,14 +5515,9 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 
 	/*
 	 * For SLI4, instead of using ring 0 (LPFC_FCP_RING) for FCP commands
-	 * we will associate a new ring, for each FCP fastpath EQ/CQ/WQ tuple.
+	 * we will associate a new ring, for each EQ/CQ/WQ tuple.
+	 * The WQ create will allocate the ring.
 	 */
-	if (!phba->sli.ring)
-		phba->sli.ring = kzalloc(
-			(LPFC_SLI3_MAX_RING + phba->cfg_fcp_io_channel) *
-			sizeof(struct lpfc_sli_ring), GFP_KERNEL);
-	if (!phba->sli.ring)
-		return -ENOMEM;
 
 	/*
 	 * It doesn't matter what family our adapter is in, we are
@@ -5261,43 +5529,45 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		phba->cfg_sg_seg_cnt = LPFC_MAX_SGL_SEG_CNT - 2;
 
 	/*
-	 * Since lpfc_sg_seg_cnt is module parameter, the sg_dma_buf_size
-	 * used to create the sg_dma_buf_pool must be dynamically calculated.
+	 * Since lpfc_sg_seg_cnt is module param, the sg_dma_buf_size
+	 * used to create the sg_dma_buf_pool must be calculated.
 	 */
-
 	if (phba->cfg_enable_bg) {
 		/*
-		 * The scsi_buf for a T10-DIF I/O will hold the FCP cmnd,
-		 * the FCP rsp, and a SGE for each. Sice we have no control
-		 * over how many protection data segments the SCSI Layer
+		 * The scsi_buf for a T10-DIF I/O holds the FCP cmnd,
+		 * the FCP rsp, and a SGE. Sice we have no control
+		 * over how many protection segments the SCSI Layer
 		 * will hand us (ie: there could be one for every block
-		 * in the IO), we just allocate enough SGEs to accomidate
-		 * our max amount and we need to limit lpfc_sg_seg_cnt to
-		 * minimize the risk of running out.
+		 * in the IO), just allocate enough SGEs to accomidate
+		 * our max amount and we need to limit lpfc_sg_seg_cnt
+		 * to minimize the risk of running out.
 		 */
 		phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
-			sizeof(struct fcp_rsp) + max_buf_size;
+				sizeof(struct fcp_rsp) + max_buf_size;
 
 		/* Total SGEs for scsi_sg_list and scsi_sg_prot_list */
 		phba->cfg_total_seg_cnt = LPFC_MAX_SGL_SEG_CNT;
 
 		if (phba->cfg_sg_seg_cnt > LPFC_MAX_SG_SLI4_SEG_CNT_DIF)
-			phba->cfg_sg_seg_cnt = LPFC_MAX_SG_SLI4_SEG_CNT_DIF;
+			phba->cfg_sg_seg_cnt =
+				LPFC_MAX_SG_SLI4_SEG_CNT_DIF;
 	} else {
 		/*
-		 * The scsi_buf for a regular I/O will hold the FCP cmnd,
+		 * The scsi_buf for a regular I/O holds the FCP cmnd,
 		 * the FCP rsp, a SGE for each, and a SGE for up to
 		 * cfg_sg_seg_cnt data segments.
 		 */
 		phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
-			sizeof(struct fcp_rsp) +
-			((phba->cfg_sg_seg_cnt + 2) * sizeof(struct sli4_sge));
+				sizeof(struct fcp_rsp) +
+				((phba->cfg_sg_seg_cnt + 2) *
+				sizeof(struct sli4_sge));
 
 		/* Total SGEs for scsi_sg_list */
 		phba->cfg_total_seg_cnt = phba->cfg_sg_seg_cnt + 2;
+
 		/*
-		 * NOTE: if (phba->cfg_sg_seg_cnt + 2) <= 256 we only need
-		 * to post 1 page for the SGL.
+		 * NOTE: if (phba->cfg_sg_seg_cnt + 2) <= 256 we only
+		 * need to post 1 page for the SGL.
 		 */
 	}
 
@@ -5317,21 +5587,27 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 			phba->cfg_total_seg_cnt);
 
 	/* Initialize buffer queue management fields */
-	hbq_count = lpfc_sli_hbq_count();
-	for (i = 0; i < hbq_count; ++i)
-		INIT_LIST_HEAD(&phba->hbqs[i].hbq_buffer_list);
-	INIT_LIST_HEAD(&phba->rb_pend_list);
+	INIT_LIST_HEAD(&phba->hbqs[LPFC_ELS_HBQ].hbq_buffer_list);
 	phba->hbqs[LPFC_ELS_HBQ].hbq_alloc_buffer = lpfc_sli4_rb_alloc;
 	phba->hbqs[LPFC_ELS_HBQ].hbq_free_buffer = lpfc_sli4_rb_free;
 
 	/*
 	 * Initialize the SLI Layer to run with lpfc SLI4 HBAs.
 	 */
-	/* Initialize the Abort scsi buffer list used by driver */
-	spin_lock_init(&phba->sli4_hba.abts_scsi_buf_list_lock);
-	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_scsi_buf_list);
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
+		/* Initialize the Abort scsi buffer list used by driver */
+		spin_lock_init(&phba->sli4_hba.abts_scsi_buf_list_lock);
+		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_scsi_buf_list);
+	}
+
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		/* Initialize the Abort nvme buffer list used by driver */
+		spin_lock_init(&phba->sli4_hba.abts_nvme_buf_list_lock);
+		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
+	}
+
 	/* This abort list used by worker thread */
-	spin_lock_init(&phba->sli4_hba.abts_sgl_list_lock);
+	spin_lock_init(&phba->sli4_hba.sgl_list_lock);
 
 	/*
 	 * Initialize driver internal slow-path work queues
@@ -5359,10 +5635,6 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 	/* initialize optic_state to 0xFF */
 	phba->sli4_hba.lnk_info.optic_state = 0xff;
 
-	/* Initialize the driver internal SLI layer lists. */
-	lpfc_sli_setup(phba);
-	lpfc_sli_queue_setup(phba);
-
 	/* Allocate device driver memory */
 	rc = lpfc_mem_alloc(phba, SGL_ALIGN_SZ);
 	if (rc)
@@ -5372,8 +5644,10 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 	if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) ==
 	    LPFC_SLI_INTF_IF_TYPE_2) {
 		rc = lpfc_pci_function_reset(phba);
-		if (unlikely(rc))
-			return -ENODEV;
+		if (unlikely(rc)) {
+			rc = -ENODEV;
+			goto out_free_mem;
+		}
 		phba->temp_sensor_support = 1;
 	}
 
@@ -5410,6 +5684,10 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		goto out_free_bsmbx;
 	}
 
+	phba->nvmet_support = 0;
+
+	lpfc_nvme_mod_param_dep(phba);
+
 	/* Get the Supported Pages if PORT_CAPABILITIES is supported by port. */
 	lpfc_supported_pages(mboxq);
 	rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
@@ -5448,9 +5726,11 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"2999 Unsupported SLI4 Parameters "
 				"Extents and RPI headers enabled.\n");
-			goto out_free_bsmbx;
 		}
+		mempool_free(mboxq, phba->mbox_mem_pool);
+		goto out_free_bsmbx;
 	}
+
 	mempool_free(mboxq, phba->mbox_mem_pool);
 
 	/* Verify OAS is supported */
@@ -5497,11 +5777,10 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		goto out_remove_rpi_hdrs;
 	}
 
-	phba->sli4_hba.fcp_eq_hdl =
-			kzalloc((sizeof(struct lpfc_fcp_eq_hdl) *
-			    (fof_vectors + phba->cfg_fcp_io_channel)),
-			    GFP_KERNEL);
-	if (!phba->sli4_hba.fcp_eq_hdl) {
+	phba->sli4_hba.hba_eq_hdl = kcalloc(fof_vectors + phba->io_channel_irqs,
+						sizeof(struct lpfc_hba_eq_hdl),
+						GFP_KERNEL);
+	if (!phba->sli4_hba.hba_eq_hdl) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"2572 Failed allocate memory for "
 				"fast-path per-EQ handle array\n");
@@ -5509,41 +5788,31 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		goto out_free_fcf_rr_bmask;
 	}
 
-	phba->sli4_hba.cpu_map = kzalloc((sizeof(struct lpfc_vector_map_info) *
-					 phba->sli4_hba.num_present_cpu),
-					 GFP_KERNEL);
+	phba->sli4_hba.cpu_map = kcalloc(phba->sli4_hba.num_present_cpu,
+					sizeof(struct lpfc_vector_map_info),
+					GFP_KERNEL);
 	if (!phba->sli4_hba.cpu_map) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"3327 Failed allocate memory for msi-x "
 				"interrupt vector mapping\n");
 		rc = -ENOMEM;
-		goto out_free_fcp_eq_hdl;
+		goto out_free_hba_eq_hdl;
 	}
 	if (lpfc_used_cpu == NULL) {
-		lpfc_used_cpu = kzalloc((sizeof(uint16_t) * lpfc_present_cpu),
-					 GFP_KERNEL);
+		lpfc_used_cpu = kcalloc(lpfc_present_cpu, sizeof(uint16_t),
+						GFP_KERNEL);
 		if (!lpfc_used_cpu) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 					"3335 Failed allocate memory for msi-x "
 					"interrupt vector mapping\n");
 			kfree(phba->sli4_hba.cpu_map);
 			rc = -ENOMEM;
-			goto out_free_fcp_eq_hdl;
+			goto out_free_hba_eq_hdl;
 		}
 		for (i = 0; i < lpfc_present_cpu; i++)
 			lpfc_used_cpu[i] = LPFC_VECTOR_MAP_EMPTY;
 	}
 
-	/* Initialize io channels for round robin */
-	cpup = phba->sli4_hba.cpu_map;
-	rc = 0;
-	for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
-		cpup->channel_id = rc;
-		rc++;
-		if (rc >= phba->cfg_fcp_io_channel)
-			rc = 0;
-	}
-
 	/*
 	 * Enable sr-iov virtual functions if supported and configured
 	 * through the module parameter.
@@ -5563,8 +5832,8 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 
 	return 0;
 
-out_free_fcp_eq_hdl:
-	kfree(phba->sli4_hba.fcp_eq_hdl);
+out_free_hba_eq_hdl:
+	kfree(phba->sli4_hba.hba_eq_hdl);
 out_free_fcf_rr_bmask:
 	kfree(phba->fcf.fcf_rr_bmask);
 out_remove_rpi_hdrs:
@@ -5599,7 +5868,7 @@ lpfc_sli4_driver_resource_unset(struct lpfc_hba *phba)
 	phba->sli4_hba.curr_disp_cpu = 0;
 
 	/* Free memory allocated for fast-path work queue handles */
-	kfree(phba->sli4_hba.fcp_eq_hdl);
+	kfree(phba->sli4_hba.hba_eq_hdl);
 
 	/* Free the allocated rpi headers. */
 	lpfc_sli4_remove_rpi_hdrs(phba);
@@ -5672,58 +5941,6 @@ lpfc_init_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
 	return 0;
 }
 
-/**
- * lpfc_setup_driver_resource_phase1 - Phase1 etup driver internal resources.
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine is invoked to set up the driver internal resources before the
- * device specific resource setup to support the HBA device it attached to.
- *
- * Return codes
- *	0 - successful
- *	other values - error
- **/
-static int
-lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
-{
-	/*
-	 * Driver resources common to all SLI revisions
-	 */
-	atomic_set(&phba->fast_event_count, 0);
-	spin_lock_init(&phba->hbalock);
-
-	/* Initialize ndlp management spinlock */
-	spin_lock_init(&phba->ndlp_lock);
-
-	INIT_LIST_HEAD(&phba->port_list);
-	INIT_LIST_HEAD(&phba->work_list);
-	init_waitqueue_head(&phba->wait_4_mlo_m_q);
-
-	/* Initialize the wait queue head for the kernel thread */
-	init_waitqueue_head(&phba->work_waitq);
-
-	/* Initialize the scsi buffer list used by driver for scsi IO */
-	spin_lock_init(&phba->scsi_buf_list_get_lock);
-	INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_get);
-	spin_lock_init(&phba->scsi_buf_list_put_lock);
-	INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put);
-
-	/* Initialize the fabric iocb list */
-	INIT_LIST_HEAD(&phba->fabric_iocb_list);
-
-	/* Initialize list to save ELS buffers */
-	INIT_LIST_HEAD(&phba->elsbuf);
-
-	/* Initialize FCF connection rec list */
-	INIT_LIST_HEAD(&phba->fcf_conn_rec_list);
-
-	/* Initialize OAS configuration list */
-	spin_lock_init(&phba->devicelock);
-	INIT_LIST_HEAD(&phba->luns);
-
-	return 0;
-}
-
 /**
  * lpfc_setup_driver_resource_phase2 - Phase2 setup driver internal resources.
  * @phba: pointer to lpfc hba data structure.
@@ -5871,13 +6088,12 @@ static void
 lpfc_free_els_sgl_list(struct lpfc_hba *phba)
 {
 	LIST_HEAD(sglq_list);
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
 
 	/* Retrieve all els sgls from driver list */
 	spin_lock_irq(&phba->hbalock);
-	spin_lock(&pring->ring_lock);
-	list_splice_init(&phba->sli4_hba.lpfc_sgl_list, &sglq_list);
-	spin_unlock(&pring->ring_lock);
+	spin_lock(&phba->sli4_hba.sgl_list_lock);
+	list_splice_init(&phba->sli4_hba.lpfc_els_sgl_list, &sglq_list);
+	spin_unlock(&phba->sli4_hba.sgl_list_lock);
 	spin_unlock_irq(&phba->hbalock);
 
 	/* Now free the sgl list */
@@ -5931,7 +6147,7 @@ static void
 lpfc_init_sgl_list(struct lpfc_hba *phba)
 {
 	/* Initialize and populate the sglq list per host/VF. */
-	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_sgl_list);
+	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_els_sgl_list);
 	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_els_sgl_list);
 
 	/* els xri-sgl book keeping */
@@ -5939,6 +6155,9 @@ lpfc_init_sgl_list(struct lpfc_hba *phba)
 
 	/* scsi xri-buffer book keeping */
 	phba->sli4_hba.scsi_xri_cnt = 0;
+
+	/* nvme xri-buffer book keeping */
+	phba->sli4_hba.nvme_xri_cnt = 0;
 }
 
 /**
@@ -6169,9 +6388,9 @@ lpfc_hba_free(struct lpfc_hba *phba)
 	/* Release the driver assigned board number */
 	idr_remove(&lpfc_hba_index, phba->brd_no);
 
-	/* Free memory allocated with sli rings */
-	kfree(phba->sli.ring);
-	phba->sli.ring = NULL;
+	/* Free memory allocated with sli3 rings */
+	kfree(phba->sli.sli3_ring);
+	phba->sli.sli3_ring = NULL;
 
 	kfree(phba);
 	return;
@@ -6489,8 +6708,6 @@ lpfc_sli_pci_mem_setup(struct lpfc_hba *phba)
 
 	memset(phba->hbqslimp.virt, 0, lpfc_sli_hbq_size());
 
-	INIT_LIST_HEAD(&phba->rb_pend_list);
-
 	phba->MBslimaddr = phba->slim_memmap_p;
 	phba->HAregaddr = phba->ctrl_regs_memmap_p + HA_REG_OFFSET;
 	phba->CAregaddr = phba->ctrl_regs_memmap_p + CA_REG_OFFSET;
@@ -7200,11 +7417,11 @@ lpfc_setup_endian_order(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_sli4_queue_verify - Verify and update EQ and CQ counts
+ * lpfc_sli4_queue_verify - Verify and update EQ counts
  * @phba: pointer to lpfc hba data structure.
  *
- * This routine is invoked to check the user settable queue counts for EQs and
- * CQs. after this routine is called the counts will be set to valid values that
+ * This routine is invoked to check the user settable queue counts for EQs.
+ * After this routine is called the counts will be set to valid values that
  * adhere to the constraints of the system's interrupt vectors and the port's
  * queue resources.
  *
@@ -7215,9 +7432,7 @@ lpfc_setup_endian_order(struct lpfc_hba *phba)
 static int
 lpfc_sli4_queue_verify(struct lpfc_hba *phba)
 {
-	int cfg_fcp_io_channel;
-	uint32_t cpu;
-	uint32_t i = 0;
+	int io_channel;
 	int fof_vectors = phba->cfg_fof ? 1 : 0;
 
 	/*
@@ -7226,49 +7441,38 @@ lpfc_sli4_queue_verify(struct lpfc_hba *phba)
 	 */
 
 	/* Sanity check on HBA EQ parameters */
-	cfg_fcp_io_channel = phba->cfg_fcp_io_channel;
+	io_channel = phba->io_channel_irqs;
 
-	/* It doesn't make sense to have more io channels then online CPUs */
-	for_each_present_cpu(cpu) {
-		if (cpu_online(cpu))
-			i++;
-	}
-	phba->sli4_hba.num_online_cpu = i;
-	phba->sli4_hba.num_present_cpu = lpfc_present_cpu;
-	phba->sli4_hba.curr_disp_cpu = 0;
-
-	if (i < cfg_fcp_io_channel) {
+	if (phba->sli4_hba.num_online_cpu < io_channel) {
 		lpfc_printf_log(phba,
 				KERN_ERR, LOG_INIT,
 				"3188 Reducing IO channels to match number of "
 				"online CPUs: from %d to %d\n",
-				cfg_fcp_io_channel, i);
-		cfg_fcp_io_channel = i;
+				io_channel, phba->sli4_hba.num_online_cpu);
+		io_channel = phba->sli4_hba.num_online_cpu;
 	}
 
-	if (cfg_fcp_io_channel + fof_vectors >
-	    phba->sli4_hba.max_cfg_param.max_eq) {
-		if (phba->sli4_hba.max_cfg_param.max_eq <
-		    LPFC_FCP_IO_CHAN_MIN) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"2574 Not enough EQs (%d) from the "
-					"pci function for supporting FCP "
-					"EQs (%d)\n",
-					phba->sli4_hba.max_cfg_param.max_eq,
-					phba->cfg_fcp_io_channel);
-			goto out_error;
-		}
+	if (io_channel + fof_vectors > phba->sli4_hba.max_cfg_param.max_eq) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"2575 Reducing IO channels to match number of "
 				"available EQs: from %d to %d\n",
-				cfg_fcp_io_channel,
+				io_channel,
 				phba->sli4_hba.max_cfg_param.max_eq);
-		cfg_fcp_io_channel = phba->sli4_hba.max_cfg_param.max_eq -
-			fof_vectors;
+		io_channel = phba->sli4_hba.max_cfg_param.max_eq - fof_vectors;
 	}
 
-	/* The actual number of FCP event queues adopted */
-	phba->cfg_fcp_io_channel = cfg_fcp_io_channel;
+	/* The actual number of FCP / NVME event queues adopted */
+	if (io_channel != phba->io_channel_irqs)
+		phba->io_channel_irqs = io_channel;
+	if (phba->cfg_fcp_io_channel > io_channel)
+		phba->cfg_fcp_io_channel = io_channel;
+	if (phba->cfg_nvme_io_channel > io_channel)
+		phba->cfg_nvme_io_channel = io_channel;
+
+	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"2574 IRQs: %d, IO Channels: fcp %d nvme %d\n",
+			phba->io_channel_irqs, phba->cfg_fcp_io_channel,
+			phba->cfg_nvme_io_channel);
 
 	/* Get EQ depth from module parameter, fake the default for now */
 	phba->sli4_hba.eq_esize = LPFC_EQE_SIZE_4B;
@@ -7277,10 +7481,67 @@ lpfc_sli4_queue_verify(struct lpfc_hba *phba)
 	/* Get CQ depth from module parameter, fake the default for now */
 	phba->sli4_hba.cq_esize = LPFC_CQE_SIZE;
 	phba->sli4_hba.cq_ecount = LPFC_CQE_DEF_COUNT;
+	return 0;
+}
+
+static int
+lpfc_alloc_nvme_wq_cq(struct lpfc_hba *phba, int wqidx)
+{
+	struct lpfc_queue *qdesc;
+	int cnt;
 
+	qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.cq_esize,
+					    phba->sli4_hba.cq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0508 Failed allocate fast-path NVME CQ (%d)\n",
+				wqidx);
+		return 1;
+	}
+	phba->sli4_hba.nvme_cq[wqidx] = qdesc;
+
+	cnt = LPFC_NVME_WQSIZE;
+	qdesc = lpfc_sli4_queue_alloc(phba, LPFC_WQE128_SIZE, cnt);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0509 Failed allocate fast-path NVME WQ (%d)\n",
+				wqidx);
+		return 1;
+	}
+	phba->sli4_hba.nvme_wq[wqidx] = qdesc;
+	list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
+	return 0;
+}
+
+static int
+lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx)
+{
+	struct lpfc_queue *qdesc;
+	uint32_t wqesize;
+
+	/* Create Fast Path FCP CQs */
+	qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.cq_esize,
+					phba->sli4_hba.cq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"0499 Failed allocate fast-path FCP CQ (%d)\n", wqidx);
+		return 1;
+	}
+	phba->sli4_hba.fcp_cq[wqidx] = qdesc;
+
+	/* Create Fast Path FCP WQs */
+	wqesize = (phba->fcp_embed_io) ?
+				LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize;
+	qdesc = lpfc_sli4_queue_alloc(phba, wqesize, phba->sli4_hba.wq_ecount);
+	if (!qdesc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0503 Failed allocate fast-path FCP WQ (%d)\n",
+				wqidx);
+		return 1;
+	}
+	phba->sli4_hba.fcp_wq[wqidx] = qdesc;
+	list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
 	return 0;
-out_error:
-	return -ENOMEM;
 }
 
 /**
@@ -7301,13 +7562,14 @@ int
 lpfc_sli4_queue_create(struct lpfc_hba *phba)
 {
 	struct lpfc_queue *qdesc;
-	uint32_t wqesize;
-	int idx;
+	int idx, io_channel;
 
 	/*
 	 * Create HBA Record arrays.
+	 * Both NVME and FCP will share that same vectors / EQs
 	 */
-	if (!phba->cfg_fcp_io_channel)
+	io_channel = phba->io_channel_irqs;
+	if (!io_channel)
 		return -ERANGE;
 
 	phba->sli4_hba.mq_esize = LPFC_MQE_SIZE;
@@ -7316,9 +7578,14 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 	phba->sli4_hba.wq_ecount = LPFC_WQE_DEF_COUNT;
 	phba->sli4_hba.rq_esize = LPFC_RQE_SIZE;
 	phba->sli4_hba.rq_ecount = LPFC_RQE_DEF_COUNT;
+	phba->sli4_hba.eq_esize = LPFC_EQE_SIZE_4B;
+	phba->sli4_hba.eq_ecount = LPFC_EQE_DEF_COUNT;
+	phba->sli4_hba.cq_esize = LPFC_CQE_SIZE;
+	phba->sli4_hba.cq_ecount = LPFC_CQE_DEF_COUNT;
 
-	phba->sli4_hba.hba_eq =  kzalloc((sizeof(struct lpfc_queue *) *
-				phba->cfg_fcp_io_channel), GFP_KERNEL);
+	phba->sli4_hba.hba_eq =  kcalloc(io_channel,
+					sizeof(struct lpfc_queue *),
+					GFP_KERNEL);
 	if (!phba->sli4_hba.hba_eq) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 			"2576 Failed allocate memory for "
@@ -7326,44 +7593,83 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 		goto out_error;
 	}
 
-	phba->sli4_hba.fcp_cq = kzalloc((sizeof(struct lpfc_queue *) *
-				phba->cfg_fcp_io_channel), GFP_KERNEL);
-	if (!phba->sli4_hba.fcp_cq) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"2577 Failed allocate memory for fast-path "
-				"CQ record array\n");
-		goto out_error;
+	if (phba->cfg_fcp_io_channel) {
+		phba->sli4_hba.fcp_cq = kcalloc(phba->cfg_fcp_io_channel,
+						sizeof(struct lpfc_queue *),
+						GFP_KERNEL);
+		if (!phba->sli4_hba.fcp_cq) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"2577 Failed allocate memory for "
+					"fast-path CQ record array\n");
+			goto out_error;
+		}
+		phba->sli4_hba.fcp_wq = kcalloc(phba->cfg_fcp_io_channel,
+						sizeof(struct lpfc_queue *),
+						GFP_KERNEL);
+		if (!phba->sli4_hba.fcp_wq) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"2578 Failed allocate memory for "
+					"fast-path FCP WQ record array\n");
+			goto out_error;
+		}
+		/*
+		 * Since the first EQ can have multiple CQs associated with it,
+		 * this array is used to quickly see if we have a FCP fast-path
+		 * CQ match.
+		 */
+		phba->sli4_hba.fcp_cq_map = kcalloc(phba->cfg_fcp_io_channel,
+							sizeof(uint16_t),
+							GFP_KERNEL);
+		if (!phba->sli4_hba.fcp_cq_map) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"2545 Failed allocate memory for "
+					"fast-path CQ map\n");
+			goto out_error;
+		}
 	}
 
-	phba->sli4_hba.fcp_wq = kzalloc((sizeof(struct lpfc_queue *) *
-				phba->cfg_fcp_io_channel), GFP_KERNEL);
-	if (!phba->sli4_hba.fcp_wq) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"2578 Failed allocate memory for fast-path "
-				"WQ record array\n");
-		goto out_error;
-	}
+	if (phba->cfg_nvme_io_channel) {
+		phba->sli4_hba.nvme_cq = kcalloc(phba->cfg_nvme_io_channel,
+						sizeof(struct lpfc_queue *),
+						GFP_KERNEL);
+		if (!phba->sli4_hba.nvme_cq) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6077 Failed allocate memory for "
+					"fast-path CQ record array\n");
+			goto out_error;
+		}
 
-	/*
-	 * Since the first EQ can have multiple CQs associated with it,
-	 * this array is used to quickly see if we have a FCP fast-path
-	 * CQ match.
-	 */
-	phba->sli4_hba.fcp_cq_map = kzalloc((sizeof(uint16_t) *
-					 phba->cfg_fcp_io_channel), GFP_KERNEL);
-	if (!phba->sli4_hba.fcp_cq_map) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"2545 Failed allocate memory for fast-path "
-				"CQ map\n");
-		goto out_error;
+
+		phba->sli4_hba.nvme_wq = kcalloc(phba->cfg_nvme_io_channel,
+						sizeof(struct lpfc_queue *),
+						GFP_KERNEL);
+		if (!phba->sli4_hba.nvme_wq) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"2581 Failed allocate memory for "
+					"fast-path NVME WQ record array\n");
+			goto out_error;
+		}
+
+		/*
+		 * Since the first EQ can have multiple CQs associated with it,
+		 * this array is used to quickly see if we have a NVME fast-path
+		 * CQ match.
+		 */
+		phba->sli4_hba.nvme_cq_map = kcalloc(phba->cfg_nvme_io_channel,
+							sizeof(uint16_t),
+							GFP_KERNEL);
+		if (!phba->sli4_hba.nvme_cq_map) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6078 Failed allocate memory for "
+					"fast-path CQ map\n");
+			goto out_error;
+		}
 	}
 
-	/*
-	 * Create HBA Event Queues (EQs).  The cfg_fcp_io_channel specifies
-	 * how many EQs to create.
-	 */
-	for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++) {
+	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_wq_list);
 
+	/* Create HBA Event Queues (EQs) */
+	for (idx = 0; idx < io_channel; idx++) {
 		/* Create EQs */
 		qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.eq_esize,
 					      phba->sli4_hba.eq_ecount);
@@ -7373,32 +7679,17 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 			goto out_error;
 		}
 		phba->sli4_hba.hba_eq[idx] = qdesc;
+	}
 
-		/* Create Fast Path FCP CQs */
-		qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.cq_esize,
-					      phba->sli4_hba.cq_ecount);
-		if (!qdesc) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0499 Failed allocate fast-path FCP "
-					"CQ (%d)\n", idx);
-			goto out_error;
-		}
-		phba->sli4_hba.fcp_cq[idx] = qdesc;
+	/* FCP and NVME io channels are not required to be balanced */
 
-		/* Create Fast Path FCP WQs */
-		wqesize = (phba->fcp_embed_io) ?
-				LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize;
-		qdesc = lpfc_sli4_queue_alloc(phba, wqesize,
-						phba->sli4_hba.wq_ecount);
-		if (!qdesc) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0503 Failed allocate fast-path FCP "
-					"WQ (%d)\n", idx);
+	for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++)
+		if (lpfc_alloc_fcp_wq_cq(phba, idx))
 			goto out_error;
-		}
-		phba->sli4_hba.fcp_wq[idx] = qdesc;
-	}
 
+	for (idx = 0; idx < phba->cfg_nvme_io_channel; idx++)
+		if (lpfc_alloc_nvme_wq_cq(phba, idx))
+			goto out_error;
 
 	/*
 	 * Create Slow Path Completion Queues (CQs)
@@ -7453,6 +7744,30 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 		goto out_error;
 	}
 	phba->sli4_hba.els_wq = qdesc;
+	list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
+
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+		/* Create NVME LS Complete Queue */
+		qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.cq_esize,
+					      phba->sli4_hba.cq_ecount);
+		if (!qdesc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6079 Failed allocate NVME LS CQ\n");
+			goto out_error;
+		}
+		phba->sli4_hba.nvmels_cq = qdesc;
+
+		/* Create NVME LS Work Queue */
+		qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.wq_esize,
+					      phba->sli4_hba.wq_ecount);
+		if (!qdesc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6080 Failed allocate NVME LS WQ\n");
+			goto out_error;
+		}
+		phba->sli4_hba.nvmels_wq = qdesc;
+		list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
+	}
 
 	/*
 	 * Create Receive Queue (RQ)
@@ -7488,6 +7803,39 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 	return -ENOMEM;
 }
 
+static inline void
+__lpfc_sli4_release_queue(struct lpfc_queue **qp)
+{
+	if (*qp != NULL) {
+		lpfc_sli4_queue_free(*qp);
+		*qp = NULL;
+	}
+}
+
+static inline void
+lpfc_sli4_release_queues(struct lpfc_queue ***qs, int max)
+{
+	int idx;
+
+	if (*qs == NULL)
+		return;
+
+	for (idx = 0; idx < max; idx++)
+		__lpfc_sli4_release_queue(&(*qs)[idx]);
+
+	kfree(*qs);
+	*qs = NULL;
+}
+
+static inline void
+lpfc_sli4_release_queue_map(uint16_t **qmap)
+{
+	if (*qmap != NULL) {
+		kfree(*qmap);
+		*qmap = NULL;
+	}
+}
+
 /**
  * lpfc_sli4_queue_destroy - Destroy all the SLI4 queues
  * @phba: pointer to lpfc hba data structure.
@@ -7503,91 +7851,188 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 void
 lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
 {
-	int idx;
-
 	if (phba->cfg_fof)
 		lpfc_fof_queue_destroy(phba);
 
-	if (phba->sli4_hba.hba_eq != NULL) {
-		/* Release HBA event queue */
-		for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++) {
-			if (phba->sli4_hba.hba_eq[idx] != NULL) {
-				lpfc_sli4_queue_free(
-					phba->sli4_hba.hba_eq[idx]);
-				phba->sli4_hba.hba_eq[idx] = NULL;
-			}
+	/* Release HBA eqs */
+	lpfc_sli4_release_queues(&phba->sli4_hba.hba_eq, phba->io_channel_irqs);
+
+	/* Release FCP cqs */
+	lpfc_sli4_release_queues(&phba->sli4_hba.fcp_cq,
+					phba->cfg_fcp_io_channel);
+
+	/* Release FCP wqs */
+	lpfc_sli4_release_queues(&phba->sli4_hba.fcp_wq,
+					phba->cfg_fcp_io_channel);
+
+	/* Release FCP CQ mapping array */
+	lpfc_sli4_release_queue_map(&phba->sli4_hba.fcp_cq_map);
+
+	/* Release NVME cqs */
+	lpfc_sli4_release_queues(&phba->sli4_hba.nvme_cq,
+					phba->cfg_nvme_io_channel);
+
+	/* Release NVME wqs */
+	lpfc_sli4_release_queues(&phba->sli4_hba.nvme_wq,
+					phba->cfg_nvme_io_channel);
+
+	/* Release NVME CQ mapping array */
+	lpfc_sli4_release_queue_map(&phba->sli4_hba.nvme_cq_map);
+
+	/* Release mailbox command work queue */
+	__lpfc_sli4_release_queue(&phba->sli4_hba.mbx_wq);
+
+	/* Release ELS work queue */
+	__lpfc_sli4_release_queue(&phba->sli4_hba.els_wq);
+
+	/* Release ELS work queue */
+	__lpfc_sli4_release_queue(&phba->sli4_hba.nvmels_wq);
+
+	/* Release unsolicited receive queue */
+	__lpfc_sli4_release_queue(&phba->sli4_hba.hdr_rq);
+	__lpfc_sli4_release_queue(&phba->sli4_hba.dat_rq);
+
+	/* Release ELS complete queue */
+	__lpfc_sli4_release_queue(&phba->sli4_hba.els_cq);
+
+	/* Release NVME LS complete queue */
+	__lpfc_sli4_release_queue(&phba->sli4_hba.nvmels_cq);
+
+	/* Release mailbox command complete queue */
+	__lpfc_sli4_release_queue(&phba->sli4_hba.mbx_cq);
+
+	/* Everything on this list has been freed */
+	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_wq_list);
+}
+
+int
+lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
+		    struct lpfc_queue *drq, int count)
+{
+	int rc, i;
+	struct lpfc_rqe hrqe;
+	struct lpfc_rqe drqe;
+	struct lpfc_rqb *rqbp;
+	struct rqb_dmabuf *rqb_buffer;
+	LIST_HEAD(rqb_buf_list);
+
+	rqbp = hrq->rqbp;
+	for (i = 0; i < count; i++) {
+		rqb_buffer = (rqbp->rqb_alloc_buffer)(phba);
+		if (!rqb_buffer)
+			break;
+		rqb_buffer->hrq = hrq;
+		rqb_buffer->drq = drq;
+		list_add_tail(&rqb_buffer->hbuf.list, &rqb_buf_list);
+	}
+	while (!list_empty(&rqb_buf_list)) {
+		list_remove_head(&rqb_buf_list, rqb_buffer, struct rqb_dmabuf,
+				 hbuf.list);
+
+		hrqe.address_lo = putPaddrLow(rqb_buffer->hbuf.phys);
+		hrqe.address_hi = putPaddrHigh(rqb_buffer->hbuf.phys);
+		drqe.address_lo = putPaddrLow(rqb_buffer->dbuf.phys);
+		drqe.address_hi = putPaddrHigh(rqb_buffer->dbuf.phys);
+		rc = lpfc_sli4_rq_put(hrq, drq, &hrqe, &drqe);
+		if (rc < 0) {
+			(rqbp->rqb_free_buffer)(phba, rqb_buffer);
+		} else {
+			list_add_tail(&rqb_buffer->hbuf.list,
+				      &rqbp->rqb_buffer_list);
+			rqbp->buffer_count++;
 		}
-		kfree(phba->sli4_hba.hba_eq);
-		phba->sli4_hba.hba_eq = NULL;
 	}
+	return 1;
+}
 
-	if (phba->sli4_hba.fcp_cq != NULL) {
-		/* Release FCP completion queue */
-		for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++) {
-			if (phba->sli4_hba.fcp_cq[idx] != NULL) {
-				lpfc_sli4_queue_free(
-					phba->sli4_hba.fcp_cq[idx]);
-				phba->sli4_hba.fcp_cq[idx] = NULL;
-			}
-		}
-		kfree(phba->sli4_hba.fcp_cq);
-		phba->sli4_hba.fcp_cq = NULL;
+int
+lpfc_free_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *rq)
+{
+	struct lpfc_rqb *rqbp;
+	struct lpfc_dmabuf *h_buf;
+	struct rqb_dmabuf *rqb_buffer;
+
+	rqbp = rq->rqbp;
+	while (!list_empty(&rqbp->rqb_buffer_list)) {
+		list_remove_head(&rqbp->rqb_buffer_list, h_buf,
+				 struct lpfc_dmabuf, list);
+
+		rqb_buffer = container_of(h_buf, struct rqb_dmabuf, hbuf);
+		(rqbp->rqb_free_buffer)(phba, rqb_buffer);
+		rqbp->buffer_count--;
 	}
+	return 1;
+}
 
-	if (phba->sli4_hba.fcp_wq != NULL) {
-		/* Release FCP work queue */
-		for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++) {
-			if (phba->sli4_hba.fcp_wq[idx] != NULL) {
-				lpfc_sli4_queue_free(
-					phba->sli4_hba.fcp_wq[idx]);
-				phba->sli4_hba.fcp_wq[idx] = NULL;
-			}
-		}
-		kfree(phba->sli4_hba.fcp_wq);
-		phba->sli4_hba.fcp_wq = NULL;
+static int
+lpfc_create_wq_cq(struct lpfc_hba *phba, struct lpfc_queue *eq,
+	struct lpfc_queue *cq, struct lpfc_queue *wq, uint16_t *cq_map,
+	int qidx, uint32_t qtype)
+{
+	struct lpfc_sli_ring *pring;
+	int rc;
+
+	if (!eq || !cq || !wq) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"6085 Fast-path %s (%d) not allocated\n",
+			((eq) ? ((cq) ? "WQ" : "CQ") : "EQ"), qidx);
+		return -ENOMEM;
+	}
+
+	/* create the Cq first */
+	rc = lpfc_cq_create(phba, cq, eq,
+			(qtype == LPFC_MBOX) ? LPFC_MCQ : LPFC_WCQ, qtype);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"6086 Failed setup of CQ (%d), rc = 0x%x\n",
+			qidx, (uint32_t)rc);
+		return rc;
 	}
 
-	/* Release FCP CQ mapping array */
-	if (phba->sli4_hba.fcp_cq_map != NULL) {
-		kfree(phba->sli4_hba.fcp_cq_map);
-		phba->sli4_hba.fcp_cq_map = NULL;
-	}
+	if (qtype != LPFC_MBOX) {
+		/* Setup nvme_cq_map for fast lookup */
+		if (cq_map)
+			*cq_map = cq->queue_id;
 
-	/* Release mailbox command work queue */
-	if (phba->sli4_hba.mbx_wq != NULL) {
-		lpfc_sli4_queue_free(phba->sli4_hba.mbx_wq);
-		phba->sli4_hba.mbx_wq = NULL;
-	}
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"6087 CQ setup: cq[%d]-id=%d, parent eq[%d]-id=%d\n",
+			qidx, cq->queue_id, qidx, eq->queue_id);
 
-	/* Release ELS work queue */
-	if (phba->sli4_hba.els_wq != NULL) {
-		lpfc_sli4_queue_free(phba->sli4_hba.els_wq);
-		phba->sli4_hba.els_wq = NULL;
-	}
+		/* create the wq */
+		rc = lpfc_wq_create(phba, wq, cq, qtype);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"6123 Fail setup fastpath WQ (%d), rc = 0x%x\n",
+				qidx, (uint32_t)rc);
+			/* no need to tear down cq - caller will do so */
+			return rc;
+		}
 
-	/* Release unsolicited receive queue */
-	if (phba->sli4_hba.hdr_rq != NULL) {
-		lpfc_sli4_queue_free(phba->sli4_hba.hdr_rq);
-		phba->sli4_hba.hdr_rq = NULL;
-	}
-	if (phba->sli4_hba.dat_rq != NULL) {
-		lpfc_sli4_queue_free(phba->sli4_hba.dat_rq);
-		phba->sli4_hba.dat_rq = NULL;
-	}
+		/* Bind this CQ/WQ to the NVME ring */
+		pring = wq->pring;
+		pring->sli.sli4.wqp = (void *)wq;
+		cq->pring = pring;
 
-	/* Release ELS complete queue */
-	if (phba->sli4_hba.els_cq != NULL) {
-		lpfc_sli4_queue_free(phba->sli4_hba.els_cq);
-		phba->sli4_hba.els_cq = NULL;
-	}
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"2593 WQ setup: wq[%d]-id=%d assoc=%d, cq[%d]-id=%d\n",
+			qidx, wq->queue_id, wq->assoc_qid, qidx, cq->queue_id);
+	} else {
+		rc = lpfc_mq_create(phba, wq, cq, LPFC_MBOX);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0539 Failed setup of slow-path MQ: "
+				"rc = 0x%x\n", rc);
+			/* no need to tear down cq - caller will do so */
+			return rc;
+		}
 
-	/* Release mailbox command complete queue */
-	if (phba->sli4_hba.mbx_cq != NULL) {
-		lpfc_sli4_queue_free(phba->sli4_hba.mbx_cq);
-		phba->sli4_hba.mbx_cq = NULL;
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+			"2589 MBX MQ setup: wq-id=%d, parent cq-id=%d\n",
+			phba->sli4_hba.mbx_wq->queue_id,
+			phba->sli4_hba.mbx_cq->queue_id);
 	}
 
-	return;
+	return 0;
 }
 
 /**
@@ -7605,15 +8050,12 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
 int
 lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 {
-	struct lpfc_sli *psli = &phba->sli;
-	struct lpfc_sli_ring *pring;
-	int rc = -ENOMEM;
-	int fcp_eqidx, fcp_cqidx, fcp_wqidx;
-	int fcp_cq_index = 0;
 	uint32_t shdr_status, shdr_add_status;
 	union lpfc_sli4_cfg_shdr *shdr;
 	LPFC_MBOXQ_t *mboxq;
-	uint32_t length;
+	int qidx;
+	uint32_t length, io_channel;
+	int rc = -ENOMEM;
 
 	/* Check for dual-ULP support */
 	mboxq = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
@@ -7663,220 +8105,173 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 	/*
 	 * Set up HBA Event Queues (EQs)
 	 */
+	io_channel = phba->io_channel_irqs;
 
 	/* Set up HBA event queue */
-	if (phba->cfg_fcp_io_channel && !phba->sli4_hba.hba_eq) {
+	if (io_channel && !phba->sli4_hba.hba_eq) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"3147 Fast-path EQs not allocated\n");
 		rc = -ENOMEM;
 		goto out_error;
 	}
-	for (fcp_eqidx = 0; fcp_eqidx < phba->cfg_fcp_io_channel; fcp_eqidx++) {
-		if (!phba->sli4_hba.hba_eq[fcp_eqidx]) {
+	for (qidx = 0; qidx < io_channel; qidx++) {
+		if (!phba->sli4_hba.hba_eq[qidx]) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 					"0522 Fast-path EQ (%d) not "
-					"allocated\n", fcp_eqidx);
+					"allocated\n", qidx);
 			rc = -ENOMEM;
-			goto out_destroy_hba_eq;
+			goto out_destroy;
 		}
-		rc = lpfc_eq_create(phba, phba->sli4_hba.hba_eq[fcp_eqidx],
-			 (phba->cfg_fcp_imax / phba->cfg_fcp_io_channel));
+		rc = lpfc_eq_create(phba, phba->sli4_hba.hba_eq[qidx],
+						phba->cfg_fcp_imax);
 		if (rc) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 					"0523 Failed setup of fast-path EQ "
-					"(%d), rc = 0x%x\n", fcp_eqidx,
+					"(%d), rc = 0x%x\n", qidx,
 					(uint32_t)rc);
-			goto out_destroy_hba_eq;
+			goto out_destroy;
 		}
 		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"2584 HBA EQ setup: "
-				"queue[%d]-id=%d\n", fcp_eqidx,
-				phba->sli4_hba.hba_eq[fcp_eqidx]->queue_id);
-	}
-
-	/* Set up fast-path FCP Response Complete Queue */
-	if (!phba->sli4_hba.fcp_cq) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"3148 Fast-path FCP CQ array not "
-				"allocated\n");
-		rc = -ENOMEM;
-		goto out_destroy_hba_eq;
+				"2584 HBA EQ setup: queue[%d]-id=%d\n",
+				qidx, phba->sli4_hba.hba_eq[qidx]->queue_id);
 	}
 
-	for (fcp_cqidx = 0; fcp_cqidx < phba->cfg_fcp_io_channel; fcp_cqidx++) {
-		if (!phba->sli4_hba.fcp_cq[fcp_cqidx]) {
+	if (phba->cfg_nvme_io_channel) {
+		if (!phba->sli4_hba.nvme_cq || !phba->sli4_hba.nvme_wq) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0526 Fast-path FCP CQ (%d) not "
-					"allocated\n", fcp_cqidx);
+				"6084 Fast-path NVME %s array not allocated\n",
+				(phba->sli4_hba.nvme_cq) ? "CQ" : "WQ");
 			rc = -ENOMEM;
-			goto out_destroy_fcp_cq;
-		}
-		rc = lpfc_cq_create(phba, phba->sli4_hba.fcp_cq[fcp_cqidx],
-			phba->sli4_hba.hba_eq[fcp_cqidx], LPFC_WCQ, LPFC_FCP);
-		if (rc) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0527 Failed setup of fast-path FCP "
-					"CQ (%d), rc = 0x%x\n", fcp_cqidx,
-					(uint32_t)rc);
-			goto out_destroy_fcp_cq;
+			goto out_destroy;
 		}
 
-		/* Setup fcp_cq_map for fast lookup */
-		phba->sli4_hba.fcp_cq_map[fcp_cqidx] =
-				phba->sli4_hba.fcp_cq[fcp_cqidx]->queue_id;
-
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"2588 FCP CQ setup: cq[%d]-id=%d, "
-				"parent seq[%d]-id=%d\n",
-				fcp_cqidx,
-				phba->sli4_hba.fcp_cq[fcp_cqidx]->queue_id,
-				fcp_cqidx,
-				phba->sli4_hba.hba_eq[fcp_cqidx]->queue_id);
-	}
-
-	/* Set up fast-path FCP Work Queue */
-	if (!phba->sli4_hba.fcp_wq) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"3149 Fast-path FCP WQ array not "
-				"allocated\n");
-		rc = -ENOMEM;
-		goto out_destroy_fcp_cq;
+		for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++) {
+			rc = lpfc_create_wq_cq(phba,
+					phba->sli4_hba.hba_eq[
+						qidx % io_channel],
+					phba->sli4_hba.nvme_cq[qidx],
+					phba->sli4_hba.nvme_wq[qidx],
+					&phba->sli4_hba.nvme_cq_map[qidx],
+					qidx, LPFC_NVME);
+			if (rc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6123 Failed to setup fastpath "
+					"NVME WQ/CQ (%d), rc = 0x%x\n",
+					qidx, (uint32_t)rc);
+				goto out_destroy;
+			}
+		}
 	}
 
-	for (fcp_wqidx = 0; fcp_wqidx < phba->cfg_fcp_io_channel; fcp_wqidx++) {
-		if (!phba->sli4_hba.fcp_wq[fcp_wqidx]) {
+	if (phba->cfg_fcp_io_channel) {
+		/* Set up fast-path FCP Response Complete Queue */
+		if (!phba->sli4_hba.fcp_cq || !phba->sli4_hba.fcp_wq) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0534 Fast-path FCP WQ (%d) not "
-					"allocated\n", fcp_wqidx);
+				"3148 Fast-path FCP %s array not allocated\n",
+				phba->sli4_hba.fcp_cq ? "WQ" : "CQ");
 			rc = -ENOMEM;
-			goto out_destroy_fcp_wq;
-		}
-		rc = lpfc_wq_create(phba, phba->sli4_hba.fcp_wq[fcp_wqidx],
-				    phba->sli4_hba.fcp_cq[fcp_wqidx],
-				    LPFC_FCP);
-		if (rc) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0535 Failed setup of fast-path FCP "
-					"WQ (%d), rc = 0x%x\n", fcp_wqidx,
-					(uint32_t)rc);
-			goto out_destroy_fcp_wq;
+			goto out_destroy;
 		}
 
-		/* Bind this WQ to the next FCP ring */
-		pring = &psli->ring[MAX_SLI3_CONFIGURED_RINGS + fcp_wqidx];
-		pring->sli.sli4.wqp = (void *)phba->sli4_hba.fcp_wq[fcp_wqidx];
-		phba->sli4_hba.fcp_cq[fcp_wqidx]->pring = pring;
-
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"2591 FCP WQ setup: wq[%d]-id=%d, "
-				"parent cq[%d]-id=%d\n",
-				fcp_wqidx,
-				phba->sli4_hba.fcp_wq[fcp_wqidx]->queue_id,
-				fcp_cq_index,
-				phba->sli4_hba.fcp_cq[fcp_wqidx]->queue_id);
+		for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++) {
+			rc = lpfc_create_wq_cq(phba,
+					phba->sli4_hba.hba_eq[
+						qidx % io_channel],
+					phba->sli4_hba.fcp_cq[qidx],
+					phba->sli4_hba.fcp_wq[qidx],
+					&phba->sli4_hba.fcp_cq_map[qidx],
+					qidx, LPFC_FCP);
+			if (rc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"0535 Failed to setup fastpath "
+					"FCP WQ/CQ (%d), rc = 0x%x\n",
+					qidx, (uint32_t)rc);
+				goto out_destroy;
+			}
+		}
 	}
+
 	/*
-	 * Set up Complete Queues (CQs)
+	 * Set up Slow Path Complete Queues (CQs)
 	 */
 
-	/* Set up slow-path MBOX Complete Queue as the first CQ */
-	if (!phba->sli4_hba.mbx_cq) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0528 Mailbox CQ not allocated\n");
-		rc = -ENOMEM;
-		goto out_destroy_fcp_wq;
-	}
-	rc = lpfc_cq_create(phba, phba->sli4_hba.mbx_cq,
-			phba->sli4_hba.hba_eq[0], LPFC_MCQ, LPFC_MBOX);
-	if (rc) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0529 Failed setup of slow-path mailbox CQ: "
-				"rc = 0x%x\n", (uint32_t)rc);
-		goto out_destroy_fcp_wq;
-	}
-	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-			"2585 MBX CQ setup: cq-id=%d, parent eq-id=%d\n",
-			phba->sli4_hba.mbx_cq->queue_id,
-			phba->sli4_hba.hba_eq[0]->queue_id);
+	/* Set up slow-path MBOX CQ/MQ */
 
-	/* Set up slow-path ELS Complete Queue */
-	if (!phba->sli4_hba.els_cq) {
+	if (!phba->sli4_hba.mbx_cq || !phba->sli4_hba.mbx_wq) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0530 ELS CQ not allocated\n");
+				"0528 %s not allocated\n",
+				phba->sli4_hba.mbx_cq ?
+						"Mailbox WQ" : "Mailbox CQ");
 		rc = -ENOMEM;
-		goto out_destroy_mbx_cq;
+		goto out_destroy;
 	}
-	rc = lpfc_cq_create(phba, phba->sli4_hba.els_cq,
-			phba->sli4_hba.hba_eq[0], LPFC_WCQ, LPFC_ELS);
-	if (rc) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0531 Failed setup of slow-path ELS CQ: "
-				"rc = 0x%x\n", (uint32_t)rc);
-		goto out_destroy_mbx_cq;
-	}
-	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-			"2586 ELS CQ setup: cq-id=%d, parent eq-id=%d\n",
-			phba->sli4_hba.els_cq->queue_id,
-			phba->sli4_hba.hba_eq[0]->queue_id);
-
-	/*
-	 * Set up all the Work Queues (WQs)
-	 */
 
-	/* Set up Mailbox Command Queue */
-	if (!phba->sli4_hba.mbx_wq) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0538 Slow-path MQ not allocated\n");
-		rc = -ENOMEM;
-		goto out_destroy_els_cq;
-	}
-	rc = lpfc_mq_create(phba, phba->sli4_hba.mbx_wq,
-			    phba->sli4_hba.mbx_cq, LPFC_MBOX);
+	rc = lpfc_create_wq_cq(phba, phba->sli4_hba.hba_eq[0],
+					phba->sli4_hba.mbx_cq,
+					phba->sli4_hba.mbx_wq,
+					NULL, 0, LPFC_MBOX);
 	if (rc) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0539 Failed setup of slow-path MQ: "
-				"rc = 0x%x\n", rc);
-		goto out_destroy_els_cq;
+			"0529 Failed setup of mailbox WQ/CQ: rc = 0x%x\n",
+			(uint32_t)rc);
+		goto out_destroy;
 	}
-	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-			"2589 MBX MQ setup: wq-id=%d, parent cq-id=%d\n",
-			phba->sli4_hba.mbx_wq->queue_id,
-			phba->sli4_hba.mbx_cq->queue_id);
 
-	/* Set up slow-path ELS Work Queue */
-	if (!phba->sli4_hba.els_wq) {
+	/* Set up slow-path ELS WQ/CQ */
+	if (!phba->sli4_hba.els_cq || !phba->sli4_hba.els_wq) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0536 Slow-path ELS WQ not allocated\n");
+				"0530 ELS %s not allocated\n",
+				phba->sli4_hba.els_cq ? "WQ" : "CQ");
 		rc = -ENOMEM;
-		goto out_destroy_mbx_wq;
+		goto out_destroy;
 	}
-	rc = lpfc_wq_create(phba, phba->sli4_hba.els_wq,
-			    phba->sli4_hba.els_cq, LPFC_ELS);
+	rc = lpfc_create_wq_cq(phba, phba->sli4_hba.hba_eq[0],
+					phba->sli4_hba.els_cq,
+					phba->sli4_hba.els_wq,
+					NULL, 0, LPFC_ELS);
 	if (rc) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"0537 Failed setup of slow-path ELS WQ: "
-				"rc = 0x%x\n", (uint32_t)rc);
-		goto out_destroy_mbx_wq;
+			"0529 Failed setup of ELS WQ/CQ: rc = 0x%x\n",
+			(uint32_t)rc);
+		goto out_destroy;
 	}
-
-	/* Bind this WQ to the ELS ring */
-	pring = &psli->ring[LPFC_ELS_RING];
-	pring->sli.sli4.wqp = (void *)phba->sli4_hba.els_wq;
-	phba->sli4_hba.els_cq->pring = pring;
-
 	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 			"2590 ELS WQ setup: wq-id=%d, parent cq-id=%d\n",
 			phba->sli4_hba.els_wq->queue_id,
 			phba->sli4_hba.els_cq->queue_id);
 
-	/*
-	 * Create Receive Queue (RQ)
-	 */
+	if (phba->cfg_nvme_io_channel) {
+		/* Set up NVME LS Complete Queue */
+		if (!phba->sli4_hba.nvmels_cq || !phba->sli4_hba.nvmels_wq) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6091 LS %s not allocated\n",
+					phba->sli4_hba.nvmels_cq ? "WQ" : "CQ");
+			rc = -ENOMEM;
+			goto out_destroy;
+		}
+		rc = lpfc_create_wq_cq(phba, phba->sli4_hba.hba_eq[0],
+					phba->sli4_hba.nvmels_cq,
+					phba->sli4_hba.nvmels_wq,
+					NULL, 0, LPFC_NVME_LS);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0529 Failed setup of NVVME LS WQ/CQ: "
+				"rc = 0x%x\n", (uint32_t)rc);
+			goto out_destroy;
+		}
+
+		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+				"6096 ELS WQ setup: wq-id=%d, "
+				"parent cq-id=%d\n",
+				phba->sli4_hba.nvmels_wq->queue_id,
+				phba->sli4_hba.nvmels_cq->queue_id);
+	}
+
 	if (!phba->sli4_hba.hdr_rq || !phba->sli4_hba.dat_rq) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0540 Receive Queue not allocated\n");
 		rc = -ENOMEM;
-		goto out_destroy_els_wq;
+		goto out_destroy;
 	}
 
 	lpfc_rq_adjust_repost(phba, phba->sli4_hba.hdr_rq, LPFC_ELS_HBQ);
@@ -7888,7 +8283,7 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0541 Failed setup of Receive Queue: "
 				"rc = 0x%x\n", (uint32_t)rc);
-		goto out_destroy_fcp_wq;
+		goto out_destroy;
 	}
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
@@ -7904,7 +8299,7 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 					"0549 Failed setup of FOF Queues: "
 					"rc = 0x%x\n", rc);
-			goto out_destroy_els_rq;
+			goto out_destroy;
 		}
 	}
 
@@ -7912,30 +8307,12 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 	 * Configure EQ delay multipier for interrupt coalescing using
 	 * MODIFY_EQ_DELAY for all EQs created, LPFC_MAX_EQ_DELAY at a time.
 	 */
-	for (fcp_eqidx = 0; fcp_eqidx < phba->cfg_fcp_io_channel;
-			fcp_eqidx += LPFC_MAX_EQ_DELAY)
-		lpfc_modify_fcp_eq_delay(phba, fcp_eqidx);
+	for (qidx = 0; qidx < io_channel; qidx += LPFC_MAX_EQ_DELAY)
+		lpfc_modify_hba_eq_delay(phba, qidx);
 	return 0;
 
-out_destroy_els_rq:
-	lpfc_rq_destroy(phba, phba->sli4_hba.hdr_rq, phba->sli4_hba.dat_rq);
-out_destroy_els_wq:
-	lpfc_wq_destroy(phba, phba->sli4_hba.els_wq);
-out_destroy_mbx_wq:
-	lpfc_mq_destroy(phba, phba->sli4_hba.mbx_wq);
-out_destroy_els_cq:
-	lpfc_cq_destroy(phba, phba->sli4_hba.els_cq);
-out_destroy_mbx_cq:
-	lpfc_cq_destroy(phba, phba->sli4_hba.mbx_cq);
-out_destroy_fcp_wq:
-	for (--fcp_wqidx; fcp_wqidx >= 0; fcp_wqidx--)
-		lpfc_wq_destroy(phba, phba->sli4_hba.fcp_wq[fcp_wqidx]);
-out_destroy_fcp_cq:
-	for (--fcp_cqidx; fcp_cqidx >= 0; fcp_cqidx--)
-		lpfc_cq_destroy(phba, phba->sli4_hba.fcp_cq[fcp_cqidx]);
-out_destroy_hba_eq:
-	for (--fcp_eqidx; fcp_eqidx >= 0; fcp_eqidx--)
-		lpfc_eq_destroy(phba, phba->sli4_hba.hba_eq[fcp_eqidx]);
+out_destroy:
+	lpfc_sli4_queue_unset(phba);
 out_error:
 	return rc;
 }
@@ -7955,39 +8332,66 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 void
 lpfc_sli4_queue_unset(struct lpfc_hba *phba)
 {
-	int fcp_qidx;
+	int qidx;
 
 	/* Unset the queues created for Flash Optimized Fabric operations */
 	if (phba->cfg_fof)
 		lpfc_fof_queue_destroy(phba);
+
 	/* Unset mailbox command work queue */
-	lpfc_mq_destroy(phba, phba->sli4_hba.mbx_wq);
+	if (phba->sli4_hba.mbx_wq)
+		lpfc_mq_destroy(phba, phba->sli4_hba.mbx_wq);
+
+	/* Unset NVME LS work queue */
+	if (phba->sli4_hba.nvmels_wq)
+		lpfc_wq_destroy(phba, phba->sli4_hba.nvmels_wq);
+
 	/* Unset ELS work queue */
-	lpfc_wq_destroy(phba, phba->sli4_hba.els_wq);
+	if (phba->sli4_hba.els_cq)
+		lpfc_wq_destroy(phba, phba->sli4_hba.els_wq);
+
 	/* Unset unsolicited receive queue */
-	lpfc_rq_destroy(phba, phba->sli4_hba.hdr_rq, phba->sli4_hba.dat_rq);
+	if (phba->sli4_hba.hdr_rq)
+		lpfc_rq_destroy(phba, phba->sli4_hba.hdr_rq,
+				phba->sli4_hba.dat_rq);
+
 	/* Unset FCP work queue */
-	if (phba->sli4_hba.fcp_wq) {
-		for (fcp_qidx = 0; fcp_qidx < phba->cfg_fcp_io_channel;
-		     fcp_qidx++)
-			lpfc_wq_destroy(phba, phba->sli4_hba.fcp_wq[fcp_qidx]);
+	if (phba->sli4_hba.fcp_wq)
+		for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++)
+			lpfc_wq_destroy(phba, phba->sli4_hba.fcp_wq[qidx]);
+
+	/* Unset NVME work queue */
+	if (phba->sli4_hba.nvme_wq) {
+		for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++)
+			lpfc_wq_destroy(phba, phba->sli4_hba.nvme_wq[qidx]);
 	}
+
 	/* Unset mailbox command complete queue */
-	lpfc_cq_destroy(phba, phba->sli4_hba.mbx_cq);
+	if (phba->sli4_hba.mbx_cq)
+		lpfc_cq_destroy(phba, phba->sli4_hba.mbx_cq);
+
 	/* Unset ELS complete queue */
-	lpfc_cq_destroy(phba, phba->sli4_hba.els_cq);
+	if (phba->sli4_hba.els_cq)
+		lpfc_cq_destroy(phba, phba->sli4_hba.els_cq);
+
+	/* Unset NVME LS complete queue */
+	if (phba->sli4_hba.nvmels_cq)
+		lpfc_cq_destroy(phba, phba->sli4_hba.nvmels_cq);
+
+	/* Unset NVME response complete queue */
+	if (phba->sli4_hba.nvme_cq)
+		for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++)
+			lpfc_cq_destroy(phba, phba->sli4_hba.nvme_cq[qidx]);
+
 	/* Unset FCP response complete queue */
-	if (phba->sli4_hba.fcp_cq) {
-		for (fcp_qidx = 0; fcp_qidx < phba->cfg_fcp_io_channel;
-		     fcp_qidx++)
-			lpfc_cq_destroy(phba, phba->sli4_hba.fcp_cq[fcp_qidx]);
-	}
+	if (phba->sli4_hba.fcp_cq)
+		for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++)
+			lpfc_cq_destroy(phba, phba->sli4_hba.fcp_cq[qidx]);
+
 	/* Unset fast-path event queue */
-	if (phba->sli4_hba.hba_eq) {
-		for (fcp_qidx = 0; fcp_qidx < phba->cfg_fcp_io_channel;
-		     fcp_qidx++)
-			lpfc_eq_destroy(phba, phba->sli4_hba.hba_eq[fcp_qidx]);
-	}
+	if (phba->sli4_hba.hba_eq)
+		for (qidx = 0; qidx < phba->io_channel_irqs; qidx++)
+			lpfc_eq_destroy(phba, phba->sli4_hba.hba_eq[qidx]);
 }
 
 /**
@@ -8697,91 +9101,33 @@ lpfc_sli_disable_intr(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_find_next_cpu - Find next available CPU that matches the phys_id
+ * lpfc_cpu_affinity_check - Check vector CPU affinity mappings
  * @phba: pointer to lpfc hba data structure.
- *
- * Find next available CPU to use for IRQ to CPU affinity.
+ * @vectors: number of msix vectors allocated.
+ *
+ * The routine will figure out the CPU affinity assignment for every
+ * MSI-X vector allocated for the HBA.  The hba_eq_hdl will be updated
+ * with a pointer to the CPU mask that defines ALL the CPUs this vector
+ * can be associated with. If the vector can be unquely associated with
+ * a single CPU, that CPU will be recorded in hba_eq_hdl[index].cpu.
+ * In addition, the CPU to IO channel mapping will be calculated
+ * and the phba->sli4_hba.cpu_map array will reflect this.
  */
-static int
-lpfc_find_next_cpu(struct lpfc_hba *phba, uint32_t phys_id)
+static void
+lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
 {
 	struct lpfc_vector_map_info *cpup;
+	int index = 0;
+	int vec = 0;
 	int cpu;
-
-	cpup = phba->sli4_hba.cpu_map;
-	for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
-		/* CPU must be online */
-		if (cpu_online(cpu)) {
-			if ((cpup->irq == LPFC_VECTOR_MAP_EMPTY) &&
-			    (lpfc_used_cpu[cpu] == LPFC_VECTOR_MAP_EMPTY) &&
-			    (cpup->phys_id == phys_id)) {
-				return cpu;
-			}
-		}
-		cpup++;
-	}
-
-	/*
-	 * If we get here, we have used ALL CPUs for the specific
-	 * phys_id. Now we need to clear out lpfc_used_cpu and start
-	 * reusing CPUs.
-	 */
-
-	for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
-		if (lpfc_used_cpu[cpu] == phys_id)
-			lpfc_used_cpu[cpu] = LPFC_VECTOR_MAP_EMPTY;
-	}
-
-	cpup = phba->sli4_hba.cpu_map;
-	for (cpu = 0; cpu < phba->sli4_hba.num_present_cpu; cpu++) {
-		/* CPU must be online */
-		if (cpu_online(cpu)) {
-			if ((cpup->irq == LPFC_VECTOR_MAP_EMPTY) &&
-			    (cpup->phys_id == phys_id)) {
-				return cpu;
-			}
-		}
-		cpup++;
-	}
-	return LPFC_VECTOR_MAP_EMPTY;
-}
-
-/**
- * lpfc_sli4_set_affinity - Set affinity for HBA IRQ vectors
- * @phba:	pointer to lpfc hba data structure.
- * @vectors:	number of HBA vectors
- *
- * Affinitize MSIX IRQ vectors to CPUs. Try to equally spread vector
- * affinization across multple physical CPUs (numa nodes).
- * In addition, this routine will assign an IO channel for each CPU
- * to use when issuing I/Os.
- */
-static int
-lpfc_sli4_set_affinity(struct lpfc_hba *phba, int vectors)
-{
-	int i, idx, saved_chann, used_chann, cpu, phys_id;
-	int max_phys_id, min_phys_id;
-	int num_io_channel, first_cpu, chan;
-	struct lpfc_vector_map_info *cpup;
 #ifdef CONFIG_X86
 	struct cpuinfo_x86 *cpuinfo;
 #endif
-	uint8_t chann[LPFC_FCP_IO_CHAN_MAX+1];
-
-	/* If there is no mapping, just return */
-	if (!phba->cfg_fcp_cpu_map)
-		return 1;
 
 	/* Init cpu_map array */
 	memset(phba->sli4_hba.cpu_map, 0xff,
 	       (sizeof(struct lpfc_vector_map_info) *
-		phba->sli4_hba.num_present_cpu));
-
-	max_phys_id = 0;
-	min_phys_id = 0xff;
-	phys_id = 0;
-	num_io_channel = 0;
-	first_cpu = LPFC_VECTOR_MAP_EMPTY;
+	       phba->sli4_hba.num_present_cpu));
 
 	/* Update CPU map with physical id and core id of each CPU */
 	cpup = phba->sli4_hba.cpu_map;
@@ -8795,184 +9141,16 @@ lpfc_sli4_set_affinity(struct lpfc_hba *phba, int vectors)
 		cpup->phys_id = 0;
 		cpup->core_id = 0;
 #endif
-
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"3328 CPU physid %d coreid %d\n",
-				cpup->phys_id, cpup->core_id);
-
-		if (cpup->phys_id > max_phys_id)
-			max_phys_id = cpup->phys_id;
-		if (cpup->phys_id < min_phys_id)
-			min_phys_id = cpup->phys_id;
+		cpup->channel_id = index;  /* For now round robin */
+		cpup->irq = pci_irq_vector(phba->pcidev, vec);
+		vec++;
+		if (vec >= vectors)
+			vec = 0;
+		index++;
+		if (index >= phba->cfg_fcp_io_channel)
+			index = 0;
 		cpup++;
 	}
-
-	phys_id = min_phys_id;
-	/* Now associate the HBA vectors with specific CPUs */
-	for (idx = 0; idx < vectors; idx++) {
-		cpup = phba->sli4_hba.cpu_map;
-		cpu = lpfc_find_next_cpu(phba, phys_id);
-		if (cpu == LPFC_VECTOR_MAP_EMPTY) {
-
-			/* Try for all phys_id's */
-			for (i = 1; i < max_phys_id; i++) {
-				phys_id++;
-				if (phys_id > max_phys_id)
-					phys_id = min_phys_id;
-				cpu = lpfc_find_next_cpu(phba, phys_id);
-				if (cpu == LPFC_VECTOR_MAP_EMPTY)
-					continue;
-				goto found;
-			}
-
-			/* Use round robin for scheduling */
-			phba->cfg_fcp_io_sched = LPFC_FCP_SCHED_ROUND_ROBIN;
-			chan = 0;
-			cpup = phba->sli4_hba.cpu_map;
-			for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
-				cpup->channel_id = chan;
-				cpup++;
-				chan++;
-				if (chan >= phba->cfg_fcp_io_channel)
-					chan = 0;
-			}
-
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"3329 Cannot set affinity:"
-					"Error mapping vector %d (%d)\n",
-					idx, vectors);
-			return 0;
-		}
-found:
-		cpup += cpu;
-		if (phba->cfg_fcp_cpu_map == LPFC_DRIVER_CPU_MAP)
-			lpfc_used_cpu[cpu] = phys_id;
-
-		/* Associate vector with selected CPU */
-		cpup->irq = pci_irq_vector(phba->pcidev, idx);
-
-		/* Associate IO channel with selected CPU */
-		cpup->channel_id = idx;
-		num_io_channel++;
-
-		if (first_cpu == LPFC_VECTOR_MAP_EMPTY)
-			first_cpu = cpu;
-
-		/* Now affinitize to the selected CPU */
-		i = irq_set_affinity_hint(pci_irq_vector(phba->pcidev, idx),
-					  get_cpu_mask(cpu));
-
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"3330 Set Affinity: CPU %d channel %d "
-				"irq %d (%x)\n",
-				cpu, cpup->channel_id,
-				pci_irq_vector(phba->pcidev, idx), i);
-
-		/* Spread vector mapping across multple physical CPU nodes */
-		phys_id++;
-		if (phys_id > max_phys_id)
-			phys_id = min_phys_id;
-	}
-
-	/*
-	 * Finally fill in the IO channel for any remaining CPUs.
-	 * At this point, all IO channels have been assigned to a specific
-	 * MSIx vector, mapped to a specific CPU.
-	 * Base the remaining IO channel assigned, to IO channels already
-	 * assigned to other CPUs on the same phys_id.
-	 */
-	for (i = min_phys_id; i <= max_phys_id; i++) {
-		/*
-		 * If there are no io channels already mapped to
-		 * this phys_id, just round robin thru the io_channels.
-		 * Setup chann[] for round robin.
-		 */
-		for (idx = 0; idx < phba->cfg_fcp_io_channel; idx++)
-			chann[idx] = idx;
-
-		saved_chann = 0;
-		used_chann = 0;
-
-		/*
-		 * First build a list of IO channels already assigned
-		 * to this phys_id before reassigning the same IO
-		 * channels to the remaining CPUs.
-		 */
-		cpup = phba->sli4_hba.cpu_map;
-		cpu = first_cpu;
-		cpup += cpu;
-		for (idx = 0; idx < phba->sli4_hba.num_present_cpu;
-		     idx++) {
-			if (cpup->phys_id == i) {
-				/*
-				 * Save any IO channels that are
-				 * already mapped to this phys_id.
-				 */
-				if (cpup->irq != LPFC_VECTOR_MAP_EMPTY) {
-					if (saved_chann <=
-					    LPFC_FCP_IO_CHAN_MAX) {
-						chann[saved_chann] =
-							cpup->channel_id;
-						saved_chann++;
-					}
-					goto out;
-				}
-
-				/* See if we are using round-robin */
-				if (saved_chann == 0)
-					saved_chann =
-						phba->cfg_fcp_io_channel;
-
-				/* Associate next IO channel with CPU */
-				cpup->channel_id = chann[used_chann];
-				num_io_channel++;
-				used_chann++;
-				if (used_chann == saved_chann)
-					used_chann = 0;
-
-				lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-						"3331 Set IO_CHANN "
-						"CPU %d channel %d\n",
-						idx, cpup->channel_id);
-			}
-out:
-			cpu++;
-			if (cpu >= phba->sli4_hba.num_present_cpu) {
-				cpup = phba->sli4_hba.cpu_map;
-				cpu = 0;
-			} else {
-				cpup++;
-			}
-		}
-	}
-
-	if (phba->sli4_hba.num_online_cpu != phba->sli4_hba.num_present_cpu) {
-		cpup = phba->sli4_hba.cpu_map;
-		for (idx = 0; idx < phba->sli4_hba.num_present_cpu; idx++) {
-			if (cpup->channel_id == LPFC_VECTOR_MAP_EMPTY) {
-				cpup->channel_id = 0;
-				num_io_channel++;
-
-				lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-						"3332 Assign IO_CHANN "
-						"CPU %d channel %d\n",
-						idx, cpup->channel_id);
-			}
-			cpup++;
-		}
-	}
-
-	/* Sanity check */
-	if (num_io_channel != phba->sli4_hba.num_present_cpu)
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"3333 Set affinity mismatch:"
-				"%d chann != %d cpus: %d vectors\n",
-				num_io_channel, phba->sli4_hba.num_present_cpu,
-				vectors);
-
-	/* Enable using cpu affinity for scheduling */
-	phba->cfg_fcp_io_sched = LPFC_FCP_SCHED_BY_CPU;
-	return 1;
 }
 
 
@@ -8993,11 +9171,12 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 	int vectors, rc, index;
 
 	/* Set up MSI-X multi-message vectors */
-	vectors = phba->cfg_fcp_io_channel;
+	vectors = phba->io_channel_irqs;
 	if (phba->cfg_fof)
 		vectors++;
 
-	rc = pci_alloc_irq_vectors(phba->pcidev, 2, vectors, PCI_IRQ_MSIX);
+	rc = pci_alloc_irq_vectors(phba->pcidev, 2, vectors,
+				PCI_IRQ_MSIX | PCI_IRQ_AFFINITY);
 	if (rc < 0) {
 		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 				"0484 PCI enable MSI-X failed (%d)\n", rc);
@@ -9012,19 +9191,19 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 			 LPFC_SLI4_HANDLER_NAME_SZ,
 			 LPFC_DRIVER_HANDLER_NAME"%d", index);
 
-		phba->sli4_hba.fcp_eq_hdl[index].idx = index;
-		phba->sli4_hba.fcp_eq_hdl[index].phba = phba;
-		atomic_set(&phba->sli4_hba.fcp_eq_hdl[index].fcp_eq_in_use, 1);
+		phba->sli4_hba.hba_eq_hdl[index].idx = index;
+		phba->sli4_hba.hba_eq_hdl[index].phba = phba;
+		atomic_set(&phba->sli4_hba.hba_eq_hdl[index].hba_eq_in_use, 1);
 		if (phba->cfg_fof && (index == (vectors - 1)))
 			rc = request_irq(pci_irq_vector(phba->pcidev, index),
 				 &lpfc_sli4_fof_intr_handler, 0,
 				 (char *)&phba->sli4_hba.handler_name[index],
-				 &phba->sli4_hba.fcp_eq_hdl[index]);
+				 &phba->sli4_hba.hba_eq_hdl[index]);
 		else
 			rc = request_irq(pci_irq_vector(phba->pcidev, index),
 				 &lpfc_sli4_hba_intr_handler, 0,
 				 (char *)&phba->sli4_hba.handler_name[index],
-				 &phba->sli4_hba.fcp_eq_hdl[index]);
+				 &phba->sli4_hba.hba_eq_hdl[index]);
 		if (rc) {
 			lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
 					"0486 MSI-X fast-path (%d) "
@@ -9036,26 +9215,29 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 	if (phba->cfg_fof)
 		vectors--;
 
-	if (vectors != phba->cfg_fcp_io_channel) {
+	if (vectors != phba->io_channel_irqs) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"3238 Reducing IO channels to match number of "
 				"MSI-X vectors, requested %d got %d\n",
-				phba->cfg_fcp_io_channel, vectors);
-		phba->cfg_fcp_io_channel = vectors;
+				phba->io_channel_irqs, vectors);
+		if (phba->cfg_fcp_io_channel > vectors)
+			phba->cfg_fcp_io_channel = vectors;
+		if (phba->cfg_nvme_io_channel > vectors)
+			phba->cfg_nvme_io_channel = vectors;
+		if (phba->cfg_fcp_io_channel > phba->cfg_nvme_io_channel)
+			phba->io_channel_irqs = phba->cfg_fcp_io_channel;
+		else
+			phba->io_channel_irqs = phba->cfg_nvme_io_channel;
 	}
+	lpfc_cpu_affinity_check(phba, vectors);
 
-	if (!shost_use_blk_mq(lpfc_shost_from_vport(phba->pport)))
-		lpfc_sli4_set_affinity(phba, vectors);
 	return rc;
 
 cfg_fail_out:
 	/* free the irq already requested */
-	for (--index; index >= 0; index--) {
-		int irq = pci_irq_vector(phba->pcidev, index);
-
-		irq_set_affinity_hint(irq, NULL);
-		free_irq(irq, &phba->sli4_hba.fcp_eq_hdl[index]);
-	}
+	for (--index; index >= 0; index--)
+		free_irq(pci_irq_vector(phba->pcidev, index),
+				&phba->sli4_hba.hba_eq_hdl[index]);
 
 	/* Unconfigure MSI-X capability structure */
 	pci_free_irq_vectors(phba->pcidev);
@@ -9102,14 +9284,14 @@ lpfc_sli4_enable_msi(struct lpfc_hba *phba)
 		return rc;
 	}
 
-	for (index = 0; index < phba->cfg_fcp_io_channel; index++) {
-		phba->sli4_hba.fcp_eq_hdl[index].idx = index;
-		phba->sli4_hba.fcp_eq_hdl[index].phba = phba;
+	for (index = 0; index < phba->io_channel_irqs; index++) {
+		phba->sli4_hba.hba_eq_hdl[index].idx = index;
+		phba->sli4_hba.hba_eq_hdl[index].phba = phba;
 	}
 
 	if (phba->cfg_fof) {
-		phba->sli4_hba.fcp_eq_hdl[index].idx = index;
-		phba->sli4_hba.fcp_eq_hdl[index].phba = phba;
+		phba->sli4_hba.hba_eq_hdl[index].idx = index;
+		phba->sli4_hba.hba_eq_hdl[index].phba = phba;
 	}
 	return 0;
 }
@@ -9134,7 +9316,7 @@ static uint32_t
 lpfc_sli4_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode)
 {
 	uint32_t intr_mode = LPFC_INTR_ERROR;
-	int retval, index;
+	int retval, idx;
 
 	if (cfg_mode == 2) {
 		/* Preparation before conf_msi mbox cmd */
@@ -9165,21 +9347,23 @@ lpfc_sli4_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode)
 		retval = request_irq(phba->pcidev->irq, lpfc_sli4_intr_handler,
 				     IRQF_SHARED, LPFC_DRIVER_NAME, phba);
 		if (!retval) {
+			struct lpfc_hba_eq_hdl *eqhdl;
+
 			/* Indicate initialization to INTx mode */
 			phba->intr_type = INTx;
 			intr_mode = 0;
-			for (index = 0; index < phba->cfg_fcp_io_channel;
-			     index++) {
-				phba->sli4_hba.fcp_eq_hdl[index].idx = index;
-				phba->sli4_hba.fcp_eq_hdl[index].phba = phba;
-				atomic_set(&phba->sli4_hba.fcp_eq_hdl[index].
-					fcp_eq_in_use, 1);
+
+			for (idx = 0; idx < phba->io_channel_irqs; idx++) {
+				eqhdl = &phba->sli4_hba.hba_eq_hdl[idx];
+				eqhdl->idx = idx;
+				eqhdl->phba = phba;
+				atomic_set(&eqhdl->hba_eq_in_use, 1);
 			}
 			if (phba->cfg_fof) {
-				phba->sli4_hba.fcp_eq_hdl[index].idx = index;
-				phba->sli4_hba.fcp_eq_hdl[index].phba = phba;
-				atomic_set(&phba->sli4_hba.fcp_eq_hdl[index].
-					fcp_eq_in_use, 1);
+				eqhdl = &phba->sli4_hba.hba_eq_hdl[idx];
+				eqhdl->idx = idx;
+				eqhdl->phba = phba;
+				atomic_set(&eqhdl->hba_eq_in_use, 1);
 			}
 		}
 	}
@@ -9203,15 +9387,13 @@ lpfc_sli4_disable_intr(struct lpfc_hba *phba)
 		int index;
 
 		/* Free up MSI-X multi-message vectors */
-		for (index = 0; index < phba->cfg_fcp_io_channel; index++) {
-			int irq = pci_irq_vector(phba->pcidev, index);
-
-			irq_set_affinity_hint(irq, NULL);
-			free_irq(irq, &phba->sli4_hba.fcp_eq_hdl[index]);
-		}
+		for (index = 0; index < phba->io_channel_irqs; index++)
+			free_irq(pci_irq_vector(phba->pcidev, index),
+					&phba->sli4_hba.hba_eq_hdl[index]);
 
 		if (phba->cfg_fof)
-			free_irq(pci_irq_vector(phba->pcidev, index), phba);
+			free_irq(pci_irq_vector(phba->pcidev, index),
+					&phba->sli4_hba.hba_eq_hdl[index]);
 	} else {
 		free_irq(phba->pcidev->irq, phba);
 	}
@@ -9273,11 +9455,24 @@ static void
 lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
 {
 	int wait_time = 0;
-	int fcp_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_scsi_buf_list);
+	int nvme_xri_cmpl = 1;
+	int fcp_xri_cmpl = 1;
 	int els_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list);
 
-	while (!fcp_xri_cmpl || !els_xri_cmpl) {
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP)
+		fcp_xri_cmpl =
+			list_empty(&phba->sli4_hba.lpfc_abts_scsi_buf_list);
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+		nvme_xri_cmpl =
+			list_empty(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
+
+	while (!fcp_xri_cmpl || !els_xri_cmpl || !nvme_xri_cmpl) {
 		if (wait_time > LPFC_XRI_EXCH_BUSY_WAIT_TMO) {
+			if (!nvme_xri_cmpl)
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"6100 NVME XRI exchange busy "
+						"wait time: %d seconds.\n",
+						wait_time/1000);
 			if (!fcp_xri_cmpl)
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 						"2877 FCP XRI exchange busy "
@@ -9294,8 +9489,14 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
 			msleep(LPFC_XRI_EXCH_BUSY_WAIT_T1);
 			wait_time += LPFC_XRI_EXCH_BUSY_WAIT_T1;
 		}
-		fcp_xri_cmpl =
-			list_empty(&phba->sli4_hba.lpfc_abts_scsi_buf_list);
+		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+			nvme_xri_cmpl = list_empty(
+				&phba->sli4_hba.lpfc_abts_nvme_buf_list);
+
+		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP)
+			fcp_xri_cmpl = list_empty(
+				&phba->sli4_hba.lpfc_abts_scsi_buf_list);
+
 		els_xri_cmpl =
 			list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list);
 	}
@@ -9509,10 +9710,31 @@ lpfc_get_sli4_parameters(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
 	sli4_params->wqsize = bf_get(cfg_wqsize, mbx_sli4_parameters);
 	sli4_params->sgl_pages_max = bf_get(cfg_sgl_page_cnt,
 					    mbx_sli4_parameters);
+	sli4_params->wqpcnt = bf_get(cfg_wqpcnt, mbx_sli4_parameters);
 	sli4_params->sgl_pp_align = bf_get(cfg_sgl_pp_align,
 					   mbx_sli4_parameters);
 	phba->sli4_hba.extents_in_use = bf_get(cfg_ext, mbx_sli4_parameters);
 	phba->sli4_hba.rpi_hdrs_in_use = bf_get(cfg_hdrr, mbx_sli4_parameters);
+	phba->nvme_support = (bf_get(cfg_nvme, mbx_sli4_parameters) &&
+			      bf_get(cfg_xib, mbx_sli4_parameters));
+
+	if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP) ||
+	    !phba->nvme_support) {
+		phba->nvme_support = 0;
+		phba->nvmet_support = 0;
+		phba->cfg_nvme_io_channel = 0;
+		phba->io_channel_irqs = phba->cfg_fcp_io_channel;
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_NVME,
+				"6101 Disabling NVME support: "
+				"Not supported by firmware: %d %d\n",
+				bf_get(cfg_nvme, mbx_sli4_parameters),
+				bf_get(cfg_xib, mbx_sli4_parameters));
+
+		/* If firmware doesn't support NVME, just use SCSI support */
+		if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
+			return -ENODEV;
+		phba->cfg_enable_fc4_type = LPFC_ENABLE_FCP;
+	}
 
 	/* Make sure that sge_supp_len can be handled by the driver */
 	if (sli4_params->sge_supp_len > LPFC_MAX_SGE_SIZE)
@@ -9587,14 +9809,6 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct pci_device_id *pid)
 		goto out_disable_pci_dev;
 	}
 
-	/* Set up phase-1 common device driver resources */
-	error = lpfc_setup_driver_resource_phase1(phba);
-	if (error) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"1403 Failed to set up driver resource.\n");
-		goto out_unset_pci_mem_s3;
-	}
-
 	/* Set up SLI-3 specific device driver resources */
 	error = lpfc_sli_driver_resource_setup(phba);
 	if (error) {
@@ -10169,6 +10383,21 @@ lpfc_sli4_get_els_iocb_cnt(struct lpfc_hba *phba)
 		return 0;
 }
 
+/**
+ * lpfc_sli4_get_iocb_cnt - Calculate the # of total IOCBs to reserve
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * returns the number of ELS/CT
+ **/
+int
+lpfc_sli4_get_iocb_cnt(struct lpfc_hba *phba)
+{
+	int max_xri = lpfc_sli4_get_els_iocb_cnt(phba);
+
+	return max_xri;
+}
+
+
 /**
  * lpfc_write_firmware - attempt to write a firmware image to the port
  * @fw: pointer to firmware image returned from request_firmware.
@@ -10333,7 +10562,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	struct Scsi_Host  *shost = NULL;
 	int error;
 	uint32_t cfg_mode, intr_mode;
-	int adjusted_fcp_io_channel;
 
 	/* Allocate memory for HBA structure */
 	phba = lpfc_hba_alloc(pdev);
@@ -10358,14 +10586,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 		goto out_disable_pci_dev;
 	}
 
-	/* Set up phase-1 common device driver resources */
-	error = lpfc_setup_driver_resource_phase1(phba);
-	if (error) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"1411 Failed to set up driver resource.\n");
-		goto out_unset_pci_mem_s4;
-	}
-
 	/* Set up SLI-4 Specific device driver resources */
 	error = lpfc_sli4_driver_resource_setup(phba);
 	if (error) {
@@ -10424,6 +10644,7 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 
 	/* Put device to a known state before enabling interrupt */
 	lpfc_stop_port(phba);
+
 	/* Configure and enable interrupt */
 	intr_mode = lpfc_sli4_enable_intr(phba, cfg_mode);
 	if (intr_mode == LPFC_INTR_ERROR) {
@@ -10433,11 +10654,15 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 		goto out_free_sysfs_attr;
 	}
 	/* Default to single EQ for non-MSI-X */
-	if (phba->intr_type != MSIX)
-		adjusted_fcp_io_channel = 1;
-	else
-		adjusted_fcp_io_channel = phba->cfg_fcp_io_channel;
-	phba->cfg_fcp_io_channel = adjusted_fcp_io_channel;
+	if (phba->intr_type != MSIX) {
+		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP)
+			phba->cfg_fcp_io_channel = 1;
+		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+			phba->cfg_nvme_io_channel = 1;
+		phba->io_channel_irqs = 1;
+	}
+
+
 	/* Set up SLI-4 HBA */
 	if (lpfc_sli4_hba_setup(phba)) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -10453,6 +10678,8 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	/* Perform post initialization setup */
 	lpfc_post_init_setup(phba);
 
+	/* todo: init: register port with nvme */
+
 	/* check for firmware upgrade or downgrade */
 	if (phba->cfg_request_firmware_upgrade)
 		lpfc_sli4_request_firmware_update(phba, INT_FW_UPGRADE);
@@ -10524,8 +10751,11 @@ lpfc_pci_remove_one_s4(struct pci_dev *pdev)
 	fc_remove_host(shost);
 	scsi_remove_host(shost);
 
-	/* Perform cleanup on the physical port */
+	/* Perform ndlp cleanup on the physical port.  The nvme localport
+	 * is destroyed after to ensure all rports are io-disabled.
+	 */
 	lpfc_cleanup(vport);
+	/* todo: init: unregister port with nvme */
 
 	/*
 	 * Bring down the SLI Layer. This step disables all interrupts,
@@ -10543,6 +10773,7 @@ lpfc_pci_remove_one_s4(struct pci_dev *pdev)
 	 * buffers are released to their corresponding pools here.
 	 */
 	lpfc_scsi_free(phba);
+	lpfc_nvme_free(phba);
 
 	lpfc_sli4_driver_resource_unset(phba);
 
@@ -11188,7 +11419,7 @@ lpfc_sli4_oas_verify(struct lpfc_hba *phba)
 int
 lpfc_fof_queue_setup(struct lpfc_hba *phba)
 {
-	struct lpfc_sli *psli = &phba->sli;
+	struct lpfc_sli_ring *pring;
 	int rc;
 
 	rc = lpfc_eq_create(phba, phba->sli4_hba.fof_eq, LPFC_MAX_IMAX);
@@ -11207,8 +11438,11 @@ lpfc_fof_queue_setup(struct lpfc_hba *phba)
 		if (rc)
 			goto out_oas_wq;
 
-		phba->sli4_hba.oas_cq->pring = &psli->ring[LPFC_FCP_OAS_RING];
-		phba->sli4_hba.oas_ring = &psli->ring[LPFC_FCP_OAS_RING];
+		/* Bind this CQ/WQ to the NVME ring */
+		pring = phba->sli4_hba.oas_wq->pring;
+		pring->sli.sli4.wqp =
+			(void *)phba->sli4_hba.oas_wq;
+		phba->sli4_hba.oas_cq->pring = pring;
 	}
 
 	return 0;
@@ -11265,6 +11499,7 @@ lpfc_fof_queue_create(struct lpfc_hba *phba)
 			goto out_error;
 
 		phba->sli4_hba.oas_wq = qdesc;
+		list_add_tail(&qdesc->wq_list, &phba->sli4_hba.lpfc_wq_list);
 
 	}
 	return 0;
diff --git a/drivers/scsi/lpfc/lpfc_logmsg.h b/drivers/scsi/lpfc/lpfc_logmsg.h
index 2a4e5d21eab2..3faf7a07bfd4 100644
--- a/drivers/scsi/lpfc/lpfc_logmsg.h
+++ b/drivers/scsi/lpfc/lpfc_logmsg.h
@@ -38,6 +38,10 @@
 #define LOG_FIP		0x00020000	/* FIP events */
 #define LOG_FCP_UNDER	0x00040000	/* FCP underruns errors */
 #define LOG_SCSI_CMD	0x00080000	/* ALL SCSI commands */
+#define LOG_NVME	0x00100000	/* NVME general events. */
+#define LOG_NVME_DISC   0x00200000      /* NVME Discovery/Connect events. */
+#define LOG_NVME_ABTS   0x00400000      /* NVME ABTS events. */
+#define LOG_NVME_IOERR  0x00800000      /* NVME IO Error events. */
 #define LOG_ALL_MSG	0xffffffff	/* LOG all messages */
 
 #define lpfc_printf_vlog(vport, level, mask, fmt, arg...) \
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index b234c50c255f..c20dc25a1e79 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -954,7 +954,7 @@ lpfc_config_pcb_setup(struct lpfc_hba * phba)
 	pcbp->maxRing = (psli->num_rings - 1);
 
 	for (i = 0; i < psli->num_rings; i++) {
-		pring = &psli->ring[i];
+		pring = &psli->sli3_ring[i];
 
 		pring->sli.sli3.sizeCiocb =
 			phba->sli_rev == 3 ? SLI3_IOCB_CMD_SIZE :
@@ -1217,7 +1217,7 @@ lpfc_config_ring(struct lpfc_hba * phba, int ring, LPFC_MBOXQ_t * pmb)
 	mb->un.varCfgRing.recvNotify = 1;
 
 	psli = &phba->sli;
-	pring = &psli->ring[ring];
+	pring = &psli->sli3_ring[ring];
 	mb->un.varCfgRing.numMask = pring->num_mask;
 	mb->mbxCommand = MBX_CONFIG_RING;
 	mb->mbxOwner = OWN_HOST;
@@ -2434,14 +2434,25 @@ lpfc_reg_fcfi(struct lpfc_hba *phba, struct lpfcMboxq *mbox)
 	memset(mbox, 0, sizeof(*mbox));
 	reg_fcfi = &mbox->u.mqe.un.reg_fcfi;
 	bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_REG_FCFI);
-	bf_set(lpfc_reg_fcfi_rq_id0, reg_fcfi, phba->sli4_hba.hdr_rq->queue_id);
-	bf_set(lpfc_reg_fcfi_rq_id1, reg_fcfi, REG_FCF_INVALID_QID);
+	if (phba->nvmet_support == 0) {
+		bf_set(lpfc_reg_fcfi_rq_id0, reg_fcfi,
+		       phba->sli4_hba.hdr_rq->queue_id);
+		/* Match everything - rq_id0 */
+		bf_set(lpfc_reg_fcfi_type_match0, reg_fcfi, 0);
+		bf_set(lpfc_reg_fcfi_type_mask0, reg_fcfi, 0);
+		bf_set(lpfc_reg_fcfi_rctl_match0, reg_fcfi, 0);
+		bf_set(lpfc_reg_fcfi_rctl_mask0, reg_fcfi, 0);
+
+		bf_set(lpfc_reg_fcfi_rq_id1, reg_fcfi, REG_FCF_INVALID_QID);
+
+		/* addr mode is bit wise inverted value of fcf addr_mode */
+		bf_set(lpfc_reg_fcfi_mam, reg_fcfi,
+		       (~phba->fcf.addr_mode) & 0x3);
+	}
 	bf_set(lpfc_reg_fcfi_rq_id2, reg_fcfi, REG_FCF_INVALID_QID);
 	bf_set(lpfc_reg_fcfi_rq_id3, reg_fcfi, REG_FCF_INVALID_QID);
 	bf_set(lpfc_reg_fcfi_info_index, reg_fcfi,
 	       phba->fcf.current_rec.fcf_indx);
-	/* reg_fcf addr mode is bit wise inverted value of fcf addr_mode */
-	bf_set(lpfc_reg_fcfi_mam, reg_fcfi, (~phba->fcf.addr_mode) & 0x3);
 	if (phba->fcf.current_rec.vlan_id != LPFC_FCOE_NULL_VID) {
 		bf_set(lpfc_reg_fcfi_vv, reg_fcfi, 1);
 		bf_set(lpfc_reg_fcfi_vlan_tag, reg_fcfi,
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 3fa65338d3f5..c65a1ec3d2e4 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -24,10 +24,12 @@
 #include <linux/pci.h>
 #include <linux/interrupt.h>
 
+#include <scsi/scsi.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_transport_fc.h>
+#include <scsi/fc/fc_fs.h>
 
-#include <scsi/scsi.h>
+#include <linux/nvme-fc-driver.h>
 
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
@@ -35,8 +37,9 @@
 #include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
-#include "lpfc_scsi.h"
 #include "lpfc.h"
+#include "lpfc_scsi.h"
+#include "lpfc_nvme.h"
 #include "lpfc_crtn.h"
 #include "lpfc_logmsg.h"
 
@@ -66,7 +69,7 @@ lpfc_mem_alloc_active_rrq_pool_s4(struct lpfc_hba *phba) {
  * lpfc_mem_alloc - create and allocate all PCI and memory pools
  * @phba: HBA to allocate pools for
  *
- * Description: Creates and allocates PCI pools lpfc_scsi_dma_buf_pool,
+ * Description: Creates and allocates PCI pools lpfc_sg_dma_buf_pool,
  * lpfc_mbuf_pool, lpfc_hrb_pool.  Creates and allocates kmalloc-backed mempools
  * for LPFC_MBOXQ_t and lpfc_nodelist.  Also allocates the VPI bitmask.
  *
@@ -90,21 +93,23 @@ lpfc_mem_alloc(struct lpfc_hba *phba, int align)
 		else
 			i = SLI4_PAGE_SIZE;
 
-		phba->lpfc_scsi_dma_buf_pool =
-			pci_pool_create("lpfc_scsi_dma_buf_pool",
-				phba->pcidev,
-				phba->cfg_sg_dma_buf_size,
-				i,
-				0);
+		phba->lpfc_sg_dma_buf_pool =
+			pci_pool_create("lpfc_sg_dma_buf_pool",
+					phba->pcidev,
+					phba->cfg_sg_dma_buf_size,
+					i, 0);
+		if (!phba->lpfc_sg_dma_buf_pool)
+			goto fail;
+
 	} else {
-		phba->lpfc_scsi_dma_buf_pool =
-			pci_pool_create("lpfc_scsi_dma_buf_pool",
-				phba->pcidev, phba->cfg_sg_dma_buf_size,
-				align, 0);
-	}
+		phba->lpfc_sg_dma_buf_pool =
+			pci_pool_create("lpfc_sg_dma_buf_pool",
+					phba->pcidev, phba->cfg_sg_dma_buf_size,
+					align, 0);
 
-	if (!phba->lpfc_scsi_dma_buf_pool)
-		goto fail;
+		if (!phba->lpfc_sg_dma_buf_pool)
+			goto fail;
+	}
 
 	phba->lpfc_mbuf_pool = pci_pool_create("lpfc_mbuf_pool", phba->pcidev,
 							LPFC_BPL_SIZE,
@@ -170,12 +175,15 @@ lpfc_mem_alloc(struct lpfc_hba *phba, int align)
 					LPFC_DEVICE_DATA_POOL_SIZE,
 					sizeof(struct lpfc_device_data));
 		if (!phba->device_data_mem_pool)
-			goto fail_free_hrb_pool;
+			goto fail_free_drb_pool;
 	} else {
 		phba->device_data_mem_pool = NULL;
 	}
 
 	return 0;
+fail_free_drb_pool:
+	pci_pool_destroy(phba->lpfc_drb_pool);
+	phba->lpfc_drb_pool = NULL;
  fail_free_hrb_pool:
 	pci_pool_destroy(phba->lpfc_hrb_pool);
 	phba->lpfc_hrb_pool = NULL;
@@ -197,8 +205,8 @@ lpfc_mem_alloc(struct lpfc_hba *phba, int align)
 	pci_pool_destroy(phba->lpfc_mbuf_pool);
 	phba->lpfc_mbuf_pool = NULL;
  fail_free_dma_buf_pool:
-	pci_pool_destroy(phba->lpfc_scsi_dma_buf_pool);
-	phba->lpfc_scsi_dma_buf_pool = NULL;
+	pci_pool_destroy(phba->lpfc_sg_dma_buf_pool);
+	phba->lpfc_sg_dma_buf_pool = NULL;
  fail:
 	return -ENOMEM;
 }
@@ -227,6 +235,9 @@ lpfc_mem_free(struct lpfc_hba *phba)
 	if (phba->lpfc_hrb_pool)
 		pci_pool_destroy(phba->lpfc_hrb_pool);
 	phba->lpfc_hrb_pool = NULL;
+	if (phba->txrdy_payload_pool)
+		pci_pool_destroy(phba->txrdy_payload_pool);
+	phba->txrdy_payload_pool = NULL;
 
 	if (phba->lpfc_hbq_pool)
 		pci_pool_destroy(phba->lpfc_hbq_pool);
@@ -258,8 +269,8 @@ lpfc_mem_free(struct lpfc_hba *phba)
 	phba->lpfc_mbuf_pool = NULL;
 
 	/* Free DMA buffer memory pool */
-	pci_pool_destroy(phba->lpfc_scsi_dma_buf_pool);
-	phba->lpfc_scsi_dma_buf_pool = NULL;
+	pci_pool_destroy(phba->lpfc_sg_dma_buf_pool);
+	phba->lpfc_sg_dma_buf_pool = NULL;
 
 	/* Free Device Data memory pool */
 	if (phba->device_data_mem_pool) {
@@ -282,7 +293,7 @@ lpfc_mem_free(struct lpfc_hba *phba)
  * @phba: HBA to free memory for
  *
  * Description: Free memory from PCI and driver memory pools and also those
- * used : lpfc_scsi_dma_buf_pool, lpfc_mbuf_pool, lpfc_hrb_pool. Frees
+ * used : lpfc_sg_dma_buf_pool, lpfc_mbuf_pool, lpfc_hrb_pool. Frees
  * kmalloc-backed mempools for LPFC_MBOXQ_t and lpfc_nodelist. Also frees
  * the VPI bitmask.
  *
@@ -458,7 +469,7 @@ lpfc_els_hbq_alloc(struct lpfc_hba *phba)
 		kfree(hbqbp);
 		return NULL;
 	}
-	hbqbp->size = LPFC_BPL_SIZE;
+	hbqbp->total_size = LPFC_BPL_SIZE;
 	return hbqbp;
 }
 
@@ -518,7 +529,7 @@ lpfc_sli4_rb_alloc(struct lpfc_hba *phba)
 		kfree(dma_buf);
 		return NULL;
 	}
-	dma_buf->size = LPFC_BPL_SIZE;
+	dma_buf->total_size = LPFC_DATA_BUF_SIZE;
 	return dma_buf;
 }
 
@@ -540,7 +551,6 @@ lpfc_sli4_rb_free(struct lpfc_hba *phba, struct hbq_dmabuf *dmab)
 	pci_pool_free(phba->lpfc_hrb_pool, dmab->hbuf.virt, dmab->hbuf.phys);
 	pci_pool_free(phba->lpfc_drb_pool, dmab->dbuf.virt, dmab->dbuf.phys);
 	kfree(dmab);
-	return;
 }
 
 /**
@@ -565,13 +575,13 @@ lpfc_in_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp)
 		return;
 
 	if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) {
+		hbq_entry = container_of(mp, struct hbq_dmabuf, dbuf);
 		/* Check whether HBQ is still in use */
 		spin_lock_irqsave(&phba->hbalock, flags);
 		if (!phba->hbq_in_use) {
 			spin_unlock_irqrestore(&phba->hbalock, flags);
 			return;
 		}
-		hbq_entry = container_of(mp, struct hbq_dmabuf, dbuf);
 		list_del(&hbq_entry->dbuf.list);
 		if (hbq_entry->tag == -1) {
 			(phba->hbqs[LPFC_ELS_HBQ].hbq_free_buffer)
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 56a3df4fddb0..835ea9f78219 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -204,10 +204,11 @@ int
 lpfc_els_abort(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 {
 	LIST_HEAD(abort_list);
-	struct lpfc_sli  *psli = &phba->sli;
-	struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring;
 	struct lpfc_iocbq *iocb, *next_iocb;
 
+	pring = lpfc_phba_elsring(phba);
+
 	/* Abort outstanding I/O on NPort <nlp_DID> */
 	lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_DISCOVERY,
 			 "2819 Abort outstanding I/O on NPort x%x "
@@ -2104,7 +2105,7 @@ lpfc_rcv_prlo_mapped_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
 
 	/* flush the target */
-	lpfc_sli_abort_iocb(vport, &phba->sli.ring[phba->sli.fcp_ring],
+	lpfc_sli_abort_iocb(vport, &phba->sli.sli3_ring[LPFC_FCP_RING],
 			    ndlp->nlp_sid, 0, LPFC_CTX_TGT);
 
 	/* Treat like rcv logo */
diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h
new file mode 100644
index 000000000000..2b167933fd26
--- /dev/null
+++ b/drivers/scsi/lpfc/lpfc_nvme.h
@@ -0,0 +1,88 @@
+/*******************************************************************
+ * This file is part of the Emulex Linux Device Driver for         *
+ * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ ********************************************************************/
+
+#define LPFC_NVME_MIN_SEGS		16
+#define LPFC_NVME_DEFAULT_SEGS		66	/* 256K IOs - 64 + 2 */
+#define LPFC_NVME_MAX_SEGS		510
+#define LPFC_NVMET_MIN_POSTBUF		16
+#define LPFC_NVMET_DEFAULT_POSTBUF	1024
+#define LPFC_NVMET_MAX_POSTBUF		4096
+#define LPFC_NVME_WQSIZE		256
+
+#define LPFC_NVME_ERSP_LEN		0x20
+
+/* Declare nvme-based local and remote port definitions. */
+struct lpfc_nvme_lport {
+	struct lpfc_vport *vport;
+	struct list_head rport_list;
+	struct completion lport_unreg_done;
+	/* Add sttats counters here */
+};
+
+struct lpfc_nvme_rport {
+	struct list_head list;
+	struct lpfc_nvme_lport *lport;
+	struct nvme_fc_remote_port *remoteport;
+	struct lpfc_nodelist *ndlp;
+	struct completion rport_unreg_done;
+};
+
+struct lpfc_nvme_buf {
+	struct list_head list;
+	struct nvmefc_fcp_req *nvmeCmd;
+	struct lpfc_nvme_rport *nrport;
+
+	uint32_t timeout;
+
+	uint16_t flags;  /* TBD convert exch_busy to flags */
+#define LPFC_SBUF_XBUSY         0x1     /* SLI4 hba reported XB on WCQE cmpl */
+	uint16_t exch_busy;     /* SLI4 hba reported XB on complete WCQE */
+	uint16_t status;	/* From IOCB Word 7- ulpStatus */
+	uint16_t cpu;
+	uint16_t qidx;
+	uint16_t sqid;
+	uint32_t result;	/* From IOCB Word 4. */
+
+	uint32_t   seg_cnt;	/* Number of scatter-gather segments returned by
+				 * dma_map_sg.  The driver needs this for calls
+				 * to dma_unmap_sg.
+				 */
+	dma_addr_t nonsg_phys;	/* Non scatter-gather physical address. */
+
+	/*
+	 * data and dma_handle are the kernel virtual and bus address of the
+	 * dma-able buffer containing the fcp_cmd, fcp_rsp and a scatter
+	 * gather bde list that supports the sg_tablesize value.
+	 */
+	void *data;
+	dma_addr_t dma_handle;
+
+	struct sli4_sge *nvme_sgl;
+	dma_addr_t dma_phys_sgl;
+
+	/* cur_iocbq has phys of the dma-able buffer.
+	 * Iotag is in here
+	 */
+	struct lpfc_iocbq cur_iocbq;
+
+	wait_queue_head_t *waitq;
+	unsigned long start_time;
+};
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index bd7b75dbb97e..c327fc7b1a54 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -413,7 +413,7 @@ lpfc_new_scsi_buf_s3(struct lpfc_vport *vport, int num_to_alloc)
 		 * struct fcp_cmnd, struct fcp_rsp and the number of bde's
 		 * necessary to support the sg_tablesize.
 		 */
-		psb->data = pci_pool_zalloc(phba->lpfc_scsi_dma_buf_pool,
+		psb->data = pci_pool_zalloc(phba->lpfc_sg_dma_buf_pool,
 					GFP_KERNEL, &psb->dma_handle);
 		if (!psb->data) {
 			kfree(psb);
@@ -424,8 +424,8 @@ lpfc_new_scsi_buf_s3(struct lpfc_vport *vport, int num_to_alloc)
 		/* Allocate iotag for psb->cur_iocbq. */
 		iotag = lpfc_sli_next_iotag(phba, &psb->cur_iocbq);
 		if (iotag == 0) {
-			pci_pool_free(phba->lpfc_scsi_dma_buf_pool,
-					psb->data, psb->dma_handle);
+			pci_pool_free(phba->lpfc_sg_dma_buf_pool,
+				      psb->data, psb->dma_handle);
 			kfree(psb);
 			break;
 		}
@@ -522,6 +522,8 @@ lpfc_sli4_vport_delete_fcp_xri_aborted(struct lpfc_vport *vport)
 	struct lpfc_scsi_buf *psb, *next_psb;
 	unsigned long iflag = 0;
 
+	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
+		return;
 	spin_lock_irqsave(&phba->hbalock, iflag);
 	spin_lock(&phba->sli4_hba.abts_scsi_buf_list_lock);
 	list_for_each_entry_safe(psb, next_psb,
@@ -554,8 +556,10 @@ lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *phba,
 	int i;
 	struct lpfc_nodelist *ndlp;
 	int rrq_empty = 0;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring = phba->sli4_hba.els_wq->pring;
 
+	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
+		return;
 	spin_lock_irqsave(&phba->hbalock, iflag);
 	spin_lock(&phba->sli4_hba.abts_scsi_buf_list_lock);
 	list_for_each_entry_safe(psb, next_psb,
@@ -819,7 +823,7 @@ lpfc_new_scsi_buf_s4(struct lpfc_vport *vport, int num_to_alloc)
 		 * for the struct fcp_cmnd, struct fcp_rsp and the number
 		 * of bde's necessary to support the sg_tablesize.
 		 */
-		psb->data = pci_pool_zalloc(phba->lpfc_scsi_dma_buf_pool,
+		psb->data = pci_pool_zalloc(phba->lpfc_sg_dma_buf_pool,
 						GFP_KERNEL, &psb->dma_handle);
 		if (!psb->data) {
 			kfree(psb);
@@ -832,7 +836,7 @@ lpfc_new_scsi_buf_s4(struct lpfc_vport *vport, int num_to_alloc)
 		 */
 		if (phba->cfg_enable_bg  && (((unsigned long)(psb->data) &
 		    (unsigned long)(SLI4_PAGE_SIZE - 1)) != 0)) {
-			pci_pool_free(phba->lpfc_scsi_dma_buf_pool,
+			pci_pool_free(phba->lpfc_sg_dma_buf_pool,
 				      psb->data, psb->dma_handle);
 			kfree(psb);
 			break;
@@ -841,8 +845,8 @@ lpfc_new_scsi_buf_s4(struct lpfc_vport *vport, int num_to_alloc)
 
 		lxri = lpfc_sli4_next_xritag(phba);
 		if (lxri == NO_XRI) {
-			pci_pool_free(phba->lpfc_scsi_dma_buf_pool,
-			      psb->data, psb->dma_handle);
+			pci_pool_free(phba->lpfc_sg_dma_buf_pool,
+				      psb->data, psb->dma_handle);
 			kfree(psb);
 			break;
 		}
@@ -850,8 +854,8 @@ lpfc_new_scsi_buf_s4(struct lpfc_vport *vport, int num_to_alloc)
 		/* Allocate iotag for psb->cur_iocbq. */
 		iotag = lpfc_sli_next_iotag(phba, &psb->cur_iocbq);
 		if (iotag == 0) {
-			pci_pool_free(phba->lpfc_scsi_dma_buf_pool,
-				psb->data, psb->dma_handle);
+			pci_pool_free(phba->lpfc_sg_dma_buf_pool,
+				      psb->data, psb->dma_handle);
 			kfree(psb);
 			lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
 					"3368 Failed to allocate IOTAG for"
@@ -920,7 +924,7 @@ lpfc_new_scsi_buf_s4(struct lpfc_vport *vport, int num_to_alloc)
 		phba->sli4_hba.scsi_xri_cnt++;
 		spin_unlock_irq(&phba->scsi_buf_list_get_lock);
 	}
-	lpfc_printf_log(phba, KERN_INFO, LOG_BG,
+	lpfc_printf_log(phba, KERN_INFO, LOG_BG | LOG_FCP,
 			"3021 Allocate %d out of %d requested new SCSI "
 			"buffers\n", bcnt, num_to_alloc);
 
@@ -3925,6 +3929,8 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
 	struct Scsi_Host *shost;
 	uint32_t logit = LOG_FCP;
 
+	phba->fc4ScsiIoCmpls++;
+
 	/* Sanity check on return of outstanding command */
 	cmd = lpfc_cmd->pCmd;
 	if (!cmd)
@@ -4242,19 +4248,19 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 						vport->cfg_first_burst_size;
 			}
 			fcp_cmnd->fcpCntl3 = WRITE_DATA;
-			phba->fc4OutputRequests++;
+			phba->fc4ScsiOutputRequests++;
 		} else {
 			iocb_cmd->ulpCommand = CMD_FCP_IREAD64_CR;
 			iocb_cmd->ulpPU = PARM_READ_CHECK;
 			fcp_cmnd->fcpCntl3 = READ_DATA;
-			phba->fc4InputRequests++;
+			phba->fc4ScsiInputRequests++;
 		}
 	} else {
 		iocb_cmd->ulpCommand = CMD_FCP_ICMND64_CR;
 		iocb_cmd->un.fcpi.fcpi_parm = 0;
 		iocb_cmd->ulpPU = 0;
 		fcp_cmnd->fcpCntl3 = 0;
-		phba->fc4ControlRequests++;
+		phba->fc4ScsiControlRequests++;
 	}
 	if (phba->sli_rev == 3 &&
 	    !(phba->sli3_options & LPFC_SLI3_BG_ENABLED))
@@ -4468,7 +4474,7 @@ static __inline__ void lpfc_poll_rearm_timer(struct lpfc_hba * phba)
 	unsigned long  poll_tmo_expires =
 		(jiffies + msecs_to_jiffies(phba->cfg_poll_tmo));
 
-	if (!list_empty(&phba->sli.ring[LPFC_FCP_RING].txcmplq))
+	if (!list_empty(&phba->sli.sli3_ring[LPFC_FCP_RING].txcmplq))
 		mod_timer(&phba->fcp_poll_timer,
 			  poll_tmo_expires);
 }
@@ -4498,7 +4504,7 @@ void lpfc_poll_timeout(unsigned long ptr)
 
 	if (phba->cfg_poll & ENABLE_FCP_RING_POLLING) {
 		lpfc_sli_handle_fast_ring_event(phba,
-			&phba->sli.ring[LPFC_FCP_RING], HA_R0RE_REQ);
+			&phba->sli.sli3_ring[LPFC_FCP_RING], HA_R0RE_REQ);
 
 		if (phba->cfg_poll & DISABLE_FCP_RING_INT)
 			lpfc_poll_rearm_timer(phba);
@@ -4562,7 +4568,7 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 	if (lpfc_cmd == NULL) {
 		lpfc_rampdown_queue_depth(phba);
 
-		lpfc_printf_vlog(vport, KERN_INFO, LOG_MISC,
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP_ERROR,
 				 "0707 driver's buffer pool is empty, "
 				 "IO busied\n");
 		goto out_host_busy;
@@ -4637,7 +4643,7 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 	}
 	if (phba->cfg_poll & ENABLE_FCP_RING_POLLING) {
 		lpfc_sli_handle_fast_ring_event(phba,
-			&phba->sli.ring[LPFC_FCP_RING], HA_R0RE_REQ);
+			&phba->sli.sli3_ring[LPFC_FCP_RING], HA_R0RE_REQ);
 
 		if (phba->cfg_poll & DISABLE_FCP_RING_INT)
 			lpfc_poll_rearm_timer(phba);
@@ -4682,7 +4688,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 	IOCB_t *cmd, *icmd;
 	int ret = SUCCESS, status = 0;
 	struct lpfc_sli_ring *pring_s4;
-	int ring_number, ret_val;
+	int ret_val;
 	unsigned long flags, iflags;
 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waitq);
 
@@ -4770,7 +4776,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 	icmd->ulpClass = cmd->ulpClass;
 
 	/* ABTS WQE must go to the same WQ as the WQE to be aborted */
-	abtsiocb->fcp_wqidx = iocb->fcp_wqidx;
+	abtsiocb->hba_wqidx = iocb->hba_wqidx;
 	abtsiocb->iocb_flag |= LPFC_USE_FCPWQIDX;
 	if (iocb->iocb_flag & LPFC_IO_FOF)
 		abtsiocb->iocb_flag |= LPFC_IO_FOF;
@@ -4783,8 +4789,11 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 	abtsiocb->iocb_cmpl = lpfc_sli_abort_fcp_cmpl;
 	abtsiocb->vport = vport;
 	if (phba->sli_rev == LPFC_SLI_REV4) {
-		ring_number = MAX_SLI3_CONFIGURED_RINGS + iocb->fcp_wqidx;
-		pring_s4 = &phba->sli.ring[ring_number];
+		pring_s4 = lpfc_sli4_calc_ring(phba, iocb);
+		if (pring_s4 == NULL) {
+			ret = FAILED;
+			goto out_unlock;
+		}
 		/* Note: both hbalock and ring_lock must be set here */
 		spin_lock_irqsave(&pring_s4->ring_lock, iflags);
 		ret_val = __lpfc_sli_issue_iocb(phba, pring_s4->ringno,
@@ -4806,7 +4815,7 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 
 	if (phba->cfg_poll & DISABLE_FCP_RING_INT)
 		lpfc_sli_handle_fast_ring_event(phba,
-			&phba->sli.ring[LPFC_FCP_RING], HA_R0RE_REQ);
+			&phba->sli.sli3_ring[LPFC_FCP_RING], HA_R0RE_REQ);
 
 wait_for_cmpl:
 	lpfc_cmd->waitq = &waitq;
@@ -5106,7 +5115,7 @@ lpfc_reset_flush_io_context(struct lpfc_vport *vport, uint16_t tgt_id,
 	cnt = lpfc_sli_sum_iocb(vport, tgt_id, lun_id, context);
 	if (cnt)
 		lpfc_sli_abort_taskmgmt(vport,
-					&phba->sli.ring[phba->sli.fcp_ring],
+					&phba->sli.sli3_ring[LPFC_FCP_RING],
 					tgt_id, lun_id, context);
 	later = msecs_to_jiffies(2 * vport->cfg_devloss_tmo * 1000) + jiffies;
 	while (time_after(later, jiffies) && cnt) {
@@ -5535,7 +5544,7 @@ lpfc_slave_configure(struct scsi_device *sdev)
 
 	if (phba->cfg_poll & ENABLE_FCP_RING_POLLING) {
 		lpfc_sli_handle_fast_ring_event(phba,
-			&phba->sli.ring[LPFC_FCP_RING], HA_R0RE_REQ);
+			&phba->sli.sli3_ring[LPFC_FCP_RING], HA_R0RE_REQ);
 		if (phba->cfg_poll & DISABLE_FCP_RING_INT)
 			lpfc_poll_rearm_timer(phba);
 	}
@@ -5899,6 +5908,48 @@ lpfc_disable_oas_lun(struct lpfc_hba *phba, struct lpfc_name *vport_wwpn,
 	return false;
 }
 
+static int
+lpfc_no_command(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
+{
+	return SCSI_MLQUEUE_HOST_BUSY;
+}
+
+static int
+lpfc_no_handler(struct scsi_cmnd *cmnd)
+{
+	return FAILED;
+}
+
+static int
+lpfc_no_slave(struct scsi_device *sdev)
+{
+	return -ENODEV;
+}
+
+struct scsi_host_template lpfc_template_nvme = {
+	.module			= THIS_MODULE,
+	.name			= LPFC_DRIVER_NAME,
+	.proc_name		= LPFC_DRIVER_NAME,
+	.info			= lpfc_info,
+	.queuecommand		= lpfc_no_command,
+	.eh_abort_handler	= lpfc_no_handler,
+	.eh_device_reset_handler = lpfc_no_handler,
+	.eh_target_reset_handler = lpfc_no_handler,
+	.eh_bus_reset_handler	= lpfc_no_handler,
+	.eh_host_reset_handler  = lpfc_no_handler,
+	.slave_alloc		= lpfc_no_slave,
+	.slave_configure	= lpfc_no_slave,
+	.scan_finished		= lpfc_scan_finished,
+	.this_id		= -1,
+	.sg_tablesize		= 1,
+	.cmd_per_lun		= 1,
+	.use_clustering		= ENABLE_CLUSTERING,
+	.shost_attrs		= lpfc_hba_attrs,
+	.max_sectors		= 0xFFFF,
+	.vendor_id		= LPFC_NL_VENDOR_ID,
+	.track_queue_depth	= 0,
+};
+
 struct scsi_host_template lpfc_template_s3 = {
 	.module			= THIS_MODULE,
 	.name			= LPFC_DRIVER_NAME,
diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h
index 4d7062258cf8..def0c0a5b17e 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.h
+++ b/drivers/scsi/lpfc/lpfc_scsi.h
@@ -135,6 +135,8 @@ struct lpfc_scsi_buf {
 
 	uint32_t timeout;
 
+	uint16_t flags;  /* TBD convert exch_busy to flags */
+#define LPFC_SBUF_XBUSY         0x1     /* SLI4 hba reported XB on WCQE cmpl */
 	uint16_t exch_busy;     /* SLI4 hba reported XB on complete WCQE */
 	uint16_t status;	/* From IOCB Word 7- ulpStatus */
 	uint32_t result;	/* From IOCB Word 4. */
@@ -164,6 +166,8 @@ struct lpfc_scsi_buf {
 	 * Iotag is in here
 	 */
 	struct lpfc_iocbq cur_iocbq;
+	uint16_t cpu;
+
 	wait_queue_head_t *waitq;
 	unsigned long start_time;
 
@@ -186,5 +190,7 @@ struct lpfc_scsi_buf {
 #define NO_MORE_OAS_LUN		-1
 #define NOT_OAS_ENABLED_LUN	NO_MORE_OAS_LUN
 
+#define TXRDY_PAYLOAD_LEN	12
+
 int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba,
 				  struct lpfc_scsi_buf *lpfc_cmd);
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 19543142c94c..52fa5c77f3bc 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -34,14 +34,17 @@
 #include <scsi/fc/fc_fs.h>
 #include <linux/aer.h>
 
+#include <linux/nvme-fc-driver.h>
+
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
 #include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
-#include "lpfc_scsi.h"
 #include "lpfc.h"
+#include "lpfc_scsi.h"
+#include "lpfc_nvme.h"
 #include "lpfc_crtn.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_compat.h"
@@ -67,14 +70,17 @@ static struct lpfc_iocbq *lpfc_sli4_els_wcqe_to_rspiocbq(struct lpfc_hba *,
 							 struct lpfc_iocbq *);
 static void lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *,
 				      struct hbq_dmabuf *);
-static int lpfc_sli4_fp_handle_wcqe(struct lpfc_hba *, struct lpfc_queue *,
+static int lpfc_sli4_fp_handle_cqe(struct lpfc_hba *, struct lpfc_queue *,
 				    struct lpfc_cqe *);
-static int lpfc_sli4_post_els_sgl_list(struct lpfc_hba *, struct list_head *,
+static int lpfc_sli4_post_sgl_list(struct lpfc_hba *, struct list_head *,
 				       int);
 static void lpfc_sli4_hba_handle_eqe(struct lpfc_hba *, struct lpfc_eqe *,
 			uint32_t);
 static bool lpfc_sli4_mbox_completions_pending(struct lpfc_hba *phba);
 static bool lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba);
+static int lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba,
+				   struct lpfc_sli_ring *pring,
+				   struct lpfc_iocbq *cmdiocb);
 
 static IOCB_t *
 lpfc_get_iocb_from_iocbq(struct lpfc_iocbq *iocbq)
@@ -456,7 +462,7 @@ lpfc_sli4_cq_release(struct lpfc_queue *q, bool arm)
  * on @q then this function will return -ENOMEM.
  * The caller is expected to hold the hbalock when calling this routine.
  **/
-static int
+int
 lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
 		 struct lpfc_rqe *hrqe, struct lpfc_rqe *drqe)
 {
@@ -602,7 +608,7 @@ __lpfc_sli_get_iocbq(struct lpfc_hba *phba)
  *
  * Returns sglq ponter = success, NULL = Failure.
  **/
-static struct lpfc_sglq *
+struct lpfc_sglq *
 __lpfc_clear_active_sglq(struct lpfc_hba *phba, uint16_t xritag)
 {
 	struct lpfc_sglq *sglq;
@@ -902,7 +908,7 @@ lpfc_set_rrq_active(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
 }
 
 /**
- * __lpfc_sli_get_sglq - Allocates an iocb object from sgl pool
+ * __lpfc_sli_get_els_sglq - Allocates an iocb object from sgl pool
  * @phba: Pointer to HBA context object.
  * @piocb: Pointer to the iocbq.
  *
@@ -912,9 +918,9 @@ lpfc_set_rrq_active(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
  * allocated sglq object else it returns NULL.
  **/
 static struct lpfc_sglq *
-__lpfc_sli_get_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq)
+__lpfc_sli_get_els_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq)
 {
-	struct list_head *lpfc_sgl_list = &phba->sli4_hba.lpfc_sgl_list;
+	struct list_head *lpfc_els_sgl_list = &phba->sli4_hba.lpfc_els_sgl_list;
 	struct lpfc_sglq *sglq = NULL;
 	struct lpfc_sglq *start_sglq = NULL;
 	struct lpfc_scsi_buf *lpfc_cmd;
@@ -938,18 +944,21 @@ __lpfc_sli_get_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq)
 		ndlp = piocbq->context1;
 	}
 
-	list_remove_head(lpfc_sgl_list, sglq, struct lpfc_sglq, list);
+	spin_lock(&phba->sli4_hba.sgl_list_lock);
+	list_remove_head(lpfc_els_sgl_list, sglq, struct lpfc_sglq, list);
 	start_sglq = sglq;
 	while (!found) {
 		if (!sglq)
 			return NULL;
-		if (lpfc_test_rrq_active(phba, ndlp, sglq->sli4_lxritag)) {
+		if (ndlp && ndlp->active_rrqs_xri_bitmap &&
+		    test_bit(sglq->sli4_lxritag,
+		    ndlp->active_rrqs_xri_bitmap)) {
 			/* This xri has an rrq outstanding for this DID.
 			 * put it back in the list and get another xri.
 			 */
-			list_add_tail(&sglq->list, lpfc_sgl_list);
+			list_add_tail(&sglq->list, lpfc_els_sgl_list);
 			sglq = NULL;
-			list_remove_head(lpfc_sgl_list, sglq,
+			list_remove_head(lpfc_els_sgl_list, sglq,
 						struct lpfc_sglq, list);
 			if (sglq == start_sglq) {
 				sglq = NULL;
@@ -962,6 +971,7 @@ __lpfc_sli_get_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq)
 		phba->sli4_hba.lpfc_sglq_active_list[sglq->sli4_lxritag] = sglq;
 		sglq->state = SGL_ALLOCATED;
 	}
+	spin_unlock(&phba->sli4_hba.sgl_list_lock);
 	return sglq;
 }
 
@@ -1002,7 +1012,7 @@ lpfc_sli_get_iocbq(struct lpfc_hba *phba)
  * this IO was aborted then the sglq entry it put on the
  * lpfc_abts_els_sgl_list until the CQ_ABORTED_XRI is received. If the
  * IO has good status or fails for any other reason then the sglq
- * entry is added to the free list (lpfc_sgl_list).
+ * entry is added to the free list (lpfc_els_sgl_list).
  **/
 static void
 __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
@@ -1010,7 +1020,7 @@ __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
 	struct lpfc_sglq *sglq;
 	size_t start_clean = offsetof(struct lpfc_iocbq, iocb);
 	unsigned long iflag = 0;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring;
 
 	lockdep_assert_held(&phba->hbalock);
 
@@ -1021,21 +1031,24 @@ __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
 
 
 	if (sglq)  {
+		pring = phba->sli4_hba.els_wq->pring;
 		if ((iocbq->iocb_flag & LPFC_EXCHANGE_BUSY) &&
 			(sglq->state != SGL_XRI_ABORTED)) {
-			spin_lock_irqsave(&phba->sli4_hba.abts_sgl_list_lock,
-					iflag);
+			spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock,
+					  iflag);
 			list_add(&sglq->list,
-				&phba->sli4_hba.lpfc_abts_els_sgl_list);
+				 &phba->sli4_hba.lpfc_abts_els_sgl_list);
 			spin_unlock_irqrestore(
-				&phba->sli4_hba.abts_sgl_list_lock, iflag);
+				&phba->sli4_hba.sgl_list_lock, iflag);
 		} else {
-			spin_lock_irqsave(&pring->ring_lock, iflag);
+			spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock,
+					  iflag);
 			sglq->state = SGL_FREED;
 			sglq->ndlp = NULL;
 			list_add_tail(&sglq->list,
-				&phba->sli4_hba.lpfc_sgl_list);
-			spin_unlock_irqrestore(&pring->ring_lock, iflag);
+				      &phba->sli4_hba.lpfc_els_sgl_list);
+			spin_unlock_irqrestore(
+				&phba->sli4_hba.sgl_list_lock, iflag);
 
 			/* Check if TXQ queue needs to be serviced */
 			if (!list_empty(&pring->txq))
@@ -1043,13 +1056,13 @@ __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
 		}
 	}
 
-
 	/*
 	 * Clean all volatile data fields, preserve iotag and node struct.
 	 */
 	memset((char *)iocbq + start_clean, 0, sizeof(*iocbq) - start_clean);
 	iocbq->sli4_lxritag = NO_XRI;
 	iocbq->sli4_xritag = NO_XRI;
+	iocbq->iocb_flag &= ~(LPFC_IO_NVME | LPFC_IO_NVME_LS);
 	list_add_tail(&iocbq->list, &phba->lpfc_iocb_list);
 }
 
@@ -1639,7 +1652,7 @@ lpfc_sli_resume_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
 
 	if (lpfc_is_link_up(phba) &&
 	    (!list_empty(&pring->txq)) &&
-	    (pring->ringno != phba->sli.fcp_ring ||
+	    (pring->ringno != LPFC_FCP_RING ||
 	     phba->sli.sli_flag & LPFC_PROCESS_LA)) {
 
 		while ((iocb = lpfc_sli_next_iocb_slot(phba, pring)) &&
@@ -1718,7 +1731,6 @@ lpfc_sli_hbqbuf_free_all(struct lpfc_hba *phba)
 	struct hbq_dmabuf *hbq_buf;
 	unsigned long flags;
 	int i, hbq_count;
-	uint32_t hbqno;
 
 	hbq_count = lpfc_sli_hbq_count();
 	/* Return all memory used by all HBQs */
@@ -1732,24 +1744,6 @@ lpfc_sli_hbqbuf_free_all(struct lpfc_hba *phba)
 		}
 		phba->hbqs[i].buffer_count = 0;
 	}
-	/* Return all HBQ buffer that are in-fly */
-	list_for_each_entry_safe(dmabuf, next_dmabuf, &phba->rb_pend_list,
-				 list) {
-		hbq_buf = container_of(dmabuf, struct hbq_dmabuf, dbuf);
-		list_del(&hbq_buf->dbuf.list);
-		if (hbq_buf->tag == -1) {
-			(phba->hbqs[LPFC_ELS_HBQ].hbq_free_buffer)
-				(phba, hbq_buf);
-		} else {
-			hbqno = hbq_buf->tag >> 16;
-			if (hbqno >= LPFC_MAX_HBQS)
-				(phba->hbqs[LPFC_ELS_HBQ].hbq_free_buffer)
-					(phba, hbq_buf);
-			else
-				(phba->hbqs[hbqno].hbq_free_buffer)(phba,
-					hbq_buf);
-		}
-	}
 
 	/* Mark the HBQs not in use */
 	phba->hbq_in_use = 0;
@@ -1802,7 +1796,7 @@ lpfc_sli_hbq_to_firmware_s3(struct lpfc_hba *phba, uint32_t hbqno,
 
 		hbqe->bde.addrHigh = le32_to_cpu(putPaddrHigh(physaddr));
 		hbqe->bde.addrLow  = le32_to_cpu(putPaddrLow(physaddr));
-		hbqe->bde.tus.f.bdeSize = hbq_buf->size;
+		hbqe->bde.tus.f.bdeSize = hbq_buf->total_size;
 		hbqe->bde.tus.f.bdeFlags = 0;
 		hbqe->bde.tus.w = le32_to_cpu(hbqe->bde.tus.w);
 		hbqe->buffer_tag = le32_to_cpu(hbq_buf->tag);
@@ -1834,17 +1828,23 @@ lpfc_sli_hbq_to_firmware_s4(struct lpfc_hba *phba, uint32_t hbqno,
 	int rc;
 	struct lpfc_rqe hrqe;
 	struct lpfc_rqe drqe;
+	struct lpfc_queue *hrq;
+	struct lpfc_queue *drq;
+
+	if (hbqno != LPFC_ELS_HBQ)
+		return 1;
+	hrq = phba->sli4_hba.hdr_rq;
+	drq = phba->sli4_hba.dat_rq;
 
 	lockdep_assert_held(&phba->hbalock);
 	hrqe.address_lo = putPaddrLow(hbq_buf->hbuf.phys);
 	hrqe.address_hi = putPaddrHigh(hbq_buf->hbuf.phys);
 	drqe.address_lo = putPaddrLow(hbq_buf->dbuf.phys);
 	drqe.address_hi = putPaddrHigh(hbq_buf->dbuf.phys);
-	rc = lpfc_sli4_rq_put(phba->sli4_hba.hdr_rq, phba->sli4_hba.dat_rq,
-			      &hrqe, &drqe);
+	rc = lpfc_sli4_rq_put(hrq, drq, &hrqe, &drqe);
 	if (rc < 0)
 		return rc;
-	hbq_buf->tag = rc;
+	hbq_buf->tag = (rc | (hbqno << 16));
 	list_add_tail(&hbq_buf->dbuf.list, &phba->hbqs[hbqno].hbq_buffer_list);
 	return 0;
 }
@@ -1861,22 +1861,9 @@ static struct lpfc_hbq_init lpfc_els_hbq = {
 	.add_count = 40,
 };
 
-/* HBQ for the extra ring if needed */
-static struct lpfc_hbq_init lpfc_extra_hbq = {
-	.rn = 1,
-	.entry_count = 200,
-	.mask_count = 0,
-	.profile = 0,
-	.ring_mask = (1 << LPFC_EXTRA_RING),
-	.buffer_count = 0,
-	.init_count = 0,
-	.add_count = 5,
-};
-
 /* Array of HBQs */
 struct lpfc_hbq_init *lpfc_hbq_defs[] = {
 	&lpfc_els_hbq,
-	&lpfc_extra_hbq,
 };
 
 /**
@@ -2713,7 +2700,7 @@ static struct lpfc_iocbq *
 lpfc_sli_iocbq_lookup_by_tag(struct lpfc_hba *phba,
 			     struct lpfc_sli_ring *pring, uint16_t iotag)
 {
-	struct lpfc_iocbq *cmd_iocb;
+	struct lpfc_iocbq *cmd_iocb = NULL;
 
 	lockdep_assert_held(&phba->hbalock);
 	if (iotag != 0 && iotag <= phba->sli.last_iotag) {
@@ -2727,8 +2714,10 @@ lpfc_sli_iocbq_lookup_by_tag(struct lpfc_hba *phba,
 	}
 
 	lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-			"0372 iotag x%x is out of range: max iotag (x%x)\n",
-			iotag, phba->sli.last_iotag);
+			"0372 iotag x%x lookup error: max iotag (x%x) "
+			"iocb_flag x%x\n",
+			iotag, phba->sli.last_iotag,
+			cmd_iocb ? cmd_iocb->iocb_flag : 0xffff);
 	return NULL;
 }
 
@@ -3597,6 +3586,33 @@ lpfc_sli_abort_iocb_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
 			      IOERR_SLI_ABORTED);
 }
 
+/**
+ * lpfc_sli_abort_wqe_ring - Abort all iocbs in the ring
+ * @phba: Pointer to HBA context object.
+ * @pring: Pointer to driver SLI ring object.
+ *
+ * This function aborts all iocbs in the given ring and frees all the iocb
+ * objects in txq. This function issues an abort iocb for all the iocb commands
+ * in txcmplq. The iocbs in the txcmplq is not guaranteed to complete before
+ * the return of this function. The caller is not required to hold any locks.
+ **/
+void
+lpfc_sli_abort_wqe_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
+{
+	LIST_HEAD(completions);
+	struct lpfc_iocbq *iocb, *next_iocb;
+
+	if (pring->ringno == LPFC_ELS_RING)
+		lpfc_fabric_abort_hba(phba);
+
+	spin_lock_irq(&phba->hbalock);
+	/* Next issue ABTS for everything on the txcmplq */
+	list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list)
+		lpfc_sli4_abort_nvme_io(phba, pring, iocb);
+	spin_unlock_irq(&phba->hbalock);
+}
+
+
 /**
  * lpfc_sli_abort_fcp_rings - Abort all iocbs in all FCP rings
  * @phba: Pointer to HBA context object.
@@ -3617,15 +3633,40 @@ lpfc_sli_abort_fcp_rings(struct lpfc_hba *phba)
 	/* Look on all the FCP Rings for the iotag */
 	if (phba->sli_rev >= LPFC_SLI_REV4) {
 		for (i = 0; i < phba->cfg_fcp_io_channel; i++) {
-			pring = &psli->ring[i + MAX_SLI3_CONFIGURED_RINGS];
+			pring = phba->sli4_hba.fcp_wq[i]->pring;
 			lpfc_sli_abort_iocb_ring(phba, pring);
 		}
 	} else {
-		pring = &psli->ring[psli->fcp_ring];
+		pring = &psli->sli3_ring[LPFC_FCP_RING];
 		lpfc_sli_abort_iocb_ring(phba, pring);
 	}
 }
 
+/**
+ * lpfc_sli_abort_nvme_rings - Abort all wqes in all NVME rings
+ * @phba: Pointer to HBA context object.
+ *
+ * This function aborts all wqes in NVME rings. This function issues an
+ * abort wqe for all the outstanding IO commands in txcmplq. The iocbs in
+ * the txcmplq is not guaranteed to complete before the return of this
+ * function. The caller is not required to hold any locks.
+ **/
+void
+lpfc_sli_abort_nvme_rings(struct lpfc_hba *phba)
+{
+	struct lpfc_sli_ring  *pring;
+	uint32_t i;
+
+	if (phba->sli_rev < LPFC_SLI_REV4)
+		return;
+
+	/* Abort all IO on each NVME ring. */
+	for (i = 0; i < phba->cfg_nvme_io_channel; i++) {
+		pring = phba->sli4_hba.nvme_wq[i]->pring;
+		lpfc_sli_abort_wqe_ring(phba, pring);
+	}
+}
+
 
 /**
  * lpfc_sli_flush_fcp_rings - flush all iocbs in the fcp ring
@@ -3654,7 +3695,7 @@ lpfc_sli_flush_fcp_rings(struct lpfc_hba *phba)
 	/* Look on all the FCP Rings for the iotag */
 	if (phba->sli_rev >= LPFC_SLI_REV4) {
 		for (i = 0; i < phba->cfg_fcp_io_channel; i++) {
-			pring = &psli->ring[i + MAX_SLI3_CONFIGURED_RINGS];
+			pring = phba->sli4_hba.fcp_wq[i]->pring;
 
 			spin_lock_irq(&pring->ring_lock);
 			/* Retrieve everything on txq */
@@ -3675,7 +3716,7 @@ lpfc_sli_flush_fcp_rings(struct lpfc_hba *phba)
 					      IOERR_SLI_DOWN);
 		}
 	} else {
-		pring = &psli->ring[psli->fcp_ring];
+		pring = &psli->sli3_ring[LPFC_FCP_RING];
 
 		spin_lock_irq(&phba->hbalock);
 		/* Retrieve everything on txq */
@@ -3695,6 +3736,51 @@ lpfc_sli_flush_fcp_rings(struct lpfc_hba *phba)
 	}
 }
 
+/**
+ * lpfc_sli_flush_nvme_rings - flush all wqes in the nvme rings
+ * @phba: Pointer to HBA context object.
+ *
+ * This function flushes all wqes in the nvme rings and frees all resources
+ * in the txcmplq. This function does not issue abort wqes for the IO
+ * commands in txcmplq, they will just be returned with
+ * IOERR_SLI_DOWN. This function is invoked with EEH when device's PCI
+ * slot has been permanently disabled.
+ **/
+void
+lpfc_sli_flush_nvme_rings(struct lpfc_hba *phba)
+{
+	LIST_HEAD(txcmplq);
+	struct lpfc_sli_ring  *pring;
+	uint32_t i;
+
+	if (phba->sli_rev < LPFC_SLI_REV4)
+		return;
+
+	/* Hint to other driver operations that a flush is in progress. */
+	spin_lock_irq(&phba->hbalock);
+	phba->hba_flag |= HBA_NVME_IOQ_FLUSH;
+	spin_unlock_irq(&phba->hbalock);
+
+	/* Cycle through all NVME rings and complete each IO with
+	 * a local driver reason code.  This is a flush so no
+	 * abort exchange to FW.
+	 */
+	for (i = 0; i < phba->cfg_nvme_io_channel; i++) {
+		pring = phba->sli4_hba.nvme_wq[i]->pring;
+
+		/* Retrieve everything on the txcmplq */
+		spin_lock_irq(&pring->ring_lock);
+		list_splice_init(&pring->txcmplq, &txcmplq);
+		pring->txcmplq_cnt = 0;
+		spin_unlock_irq(&pring->ring_lock);
+
+		/* Flush the txcmpq &&&PAE */
+		lpfc_sli_cancel_iocbs(phba, &txcmplq,
+				      IOSTAT_LOCAL_REJECT,
+				      IOERR_SLI_DOWN);
+	}
+}
+
 /**
  * lpfc_sli_brdready_s3 - Check for sli3 host ready status
  * @phba: Pointer to HBA context object.
@@ -4069,7 +4155,7 @@ lpfc_sli_brdreset(struct lpfc_hba *phba)
 
 	/* Initialize relevant SLI info */
 	for (i = 0; i < psli->num_rings; i++) {
-		pring = &psli->ring[i];
+		pring = &psli->sli3_ring[i];
 		pring->flag = 0;
 		pring->sli.sli3.rspidx = 0;
 		pring->sli.sli3.next_cmdidx  = 0;
@@ -4498,10 +4584,11 @@ static int
 lpfc_sli4_rb_setup(struct lpfc_hba *phba)
 {
 	phba->hbq_in_use = 1;
-	phba->hbqs[0].entry_count = lpfc_hbq_defs[0]->entry_count;
+	phba->hbqs[LPFC_ELS_HBQ].entry_count =
+		lpfc_hbq_defs[LPFC_ELS_HBQ]->entry_count;
 	phba->hbq_count = 1;
+	lpfc_sli_hbqbuf_init_hbqs(phba, LPFC_ELS_HBQ);
 	/* Initially populate or replenish the HBQs */
-	lpfc_sli_hbqbuf_init_hbqs(phba, 0);
 	return 0;
 }
 
@@ -5107,27 +5194,31 @@ lpfc_sli4_retrieve_pport_name(struct lpfc_hba *phba)
 static void
 lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba)
 {
-	int fcp_eqidx;
+	int qidx;
 
 	lpfc_sli4_cq_release(phba->sli4_hba.mbx_cq, LPFC_QUEUE_REARM);
 	lpfc_sli4_cq_release(phba->sli4_hba.els_cq, LPFC_QUEUE_REARM);
-	fcp_eqidx = 0;
-	if (phba->sli4_hba.fcp_cq) {
-		do {
-			lpfc_sli4_cq_release(phba->sli4_hba.fcp_cq[fcp_eqidx],
-					     LPFC_QUEUE_REARM);
-		} while (++fcp_eqidx < phba->cfg_fcp_io_channel);
-	}
+	if (phba->sli4_hba.nvmels_cq)
+		lpfc_sli4_cq_release(phba->sli4_hba.nvmels_cq,
+						LPFC_QUEUE_REARM);
+
+	if (phba->sli4_hba.fcp_cq)
+		for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++)
+			lpfc_sli4_cq_release(phba->sli4_hba.fcp_cq[qidx],
+						LPFC_QUEUE_REARM);
+
+	if (phba->sli4_hba.nvme_cq)
+		for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++)
+			lpfc_sli4_cq_release(phba->sli4_hba.nvme_cq[qidx],
+						LPFC_QUEUE_REARM);
 
 	if (phba->cfg_fof)
 		lpfc_sli4_cq_release(phba->sli4_hba.oas_cq, LPFC_QUEUE_REARM);
 
-	if (phba->sli4_hba.hba_eq) {
-		for (fcp_eqidx = 0; fcp_eqidx < phba->cfg_fcp_io_channel;
-		     fcp_eqidx++)
-			lpfc_sli4_eq_release(phba->sli4_hba.hba_eq[fcp_eqidx],
-					     LPFC_QUEUE_REARM);
-	}
+	if (phba->sli4_hba.hba_eq)
+		for (qidx = 0; qidx < phba->io_channel_irqs; qidx++)
+			lpfc_sli4_eq_release(phba->sli4_hba.hba_eq[qidx],
+						LPFC_QUEUE_REARM);
 
 	if (phba->cfg_fof)
 		lpfc_sli4_eq_release(phba->sli4_hba.fof_eq, LPFC_QUEUE_REARM);
@@ -5560,9 +5651,13 @@ lpfc_sli4_alloc_extent(struct lpfc_hba *phba, uint16_t type)
 		rsrc_blks->rsrc_size = rsrc_size;
 		list_add_tail(&rsrc_blks->list, ext_blk_list);
 		rsrc_start = rsrc_id;
-		if ((type == LPFC_RSC_TYPE_FCOE_XRI) && (j == 0))
+		if ((type == LPFC_RSC_TYPE_FCOE_XRI) && (j == 0)) {
 			phba->sli4_hba.scsi_xri_start = rsrc_start +
-				lpfc_sli4_get_els_iocb_cnt(phba);
+				lpfc_sli4_get_iocb_cnt(phba);
+			phba->sli4_hba.nvme_xri_start =
+				phba->sli4_hba.scsi_xri_start +
+				phba->sli4_hba.scsi_xri_max;
+		}
 
 		while (rsrc_id < (rsrc_start + rsrc_size)) {
 			ids[j] = rsrc_id;
@@ -5578,6 +5673,8 @@ lpfc_sli4_alloc_extent(struct lpfc_hba *phba, uint16_t type)
 	return rc;
 }
 
+
+
 /**
  * lpfc_sli4_dealloc_extent - Deallocate an SLI4 resource extent.
  * @phba: Pointer to HBA context object.
@@ -6156,42 +6253,45 @@ lpfc_sli4_get_allocated_extnts(struct lpfc_hba *phba, uint16_t type,
 }
 
 /**
- * lpfc_sli4_repost_els_sgl_list - Repsot the els buffers sgl pages as block
+ * lpfc_sli4_repost_sgl_list - Repsot the buffers sgl pages as block
  * @phba: pointer to lpfc hba data structure.
+ * @pring: Pointer to driver SLI ring object.
+ * @sgl_list: linked link of sgl buffers to post
+ * @cnt: number of linked list buffers
  *
- * This routine walks the list of els buffers that have been allocated and
+ * This routine walks the list of buffers that have been allocated and
  * repost them to the port by using SGL block post. This is needed after a
  * pci_function_reset/warm_start or start. It attempts to construct blocks
- * of els buffer sgls which contains contiguous xris and uses the non-embedded
- * SGL block post mailbox commands to post them to the port. For single els
+ * of buffer sgls which contains contiguous xris and uses the non-embedded
+ * SGL block post mailbox commands to post them to the port. For single
  * buffer sgl with non-contiguous xri, if any, it shall use embedded SGL post
  * mailbox command for posting.
  *
  * Returns: 0 = success, non-zero failure.
  **/
 static int
-lpfc_sli4_repost_els_sgl_list(struct lpfc_hba *phba)
+lpfc_sli4_repost_sgl_list(struct lpfc_hba *phba,
+			  struct list_head *sgl_list, int cnt)
 {
 	struct lpfc_sglq *sglq_entry = NULL;
 	struct lpfc_sglq *sglq_entry_next = NULL;
 	struct lpfc_sglq *sglq_entry_first = NULL;
-	int status, total_cnt, post_cnt = 0, num_posted = 0, block_cnt = 0;
+	int status, total_cnt;
+	int post_cnt = 0, num_posted = 0, block_cnt = 0;
 	int last_xritag = NO_XRI;
-	struct lpfc_sli_ring *pring;
 	LIST_HEAD(prep_sgl_list);
 	LIST_HEAD(blck_sgl_list);
 	LIST_HEAD(allc_sgl_list);
 	LIST_HEAD(post_sgl_list);
 	LIST_HEAD(free_sgl_list);
 
-	pring = &phba->sli.ring[LPFC_ELS_RING];
 	spin_lock_irq(&phba->hbalock);
-	spin_lock(&pring->ring_lock);
-	list_splice_init(&phba->sli4_hba.lpfc_sgl_list, &allc_sgl_list);
-	spin_unlock(&pring->ring_lock);
+	spin_lock(&phba->sli4_hba.sgl_list_lock);
+	list_splice_init(sgl_list, &allc_sgl_list);
+	spin_unlock(&phba->sli4_hba.sgl_list_lock);
 	spin_unlock_irq(&phba->hbalock);
 
-	total_cnt = phba->sli4_hba.els_xri_cnt;
+	total_cnt = cnt;
 	list_for_each_entry_safe(sglq_entry, sglq_entry_next,
 				 &allc_sgl_list, list) {
 		list_del_init(&sglq_entry->list);
@@ -6220,8 +6320,8 @@ lpfc_sli4_repost_els_sgl_list(struct lpfc_hba *phba)
 		/* keep track of last sgl's xritag */
 		last_xritag = sglq_entry->sli4_xritag;
 
-		/* end of repost sgl list condition for els buffers */
-		if (num_posted == phba->sli4_hba.els_xri_cnt) {
+		/* end of repost sgl list condition for buffers */
+		if (num_posted == total_cnt) {
 			if (post_cnt == 0) {
 				list_splice_init(&prep_sgl_list,
 						 &blck_sgl_list);
@@ -6238,7 +6338,7 @@ lpfc_sli4_repost_els_sgl_list(struct lpfc_hba *phba)
 					/* Failure, put sgl to free list */
 					lpfc_printf_log(phba, KERN_WARNING,
 						LOG_SLI,
-						"3159 Failed to post els "
+						"3159 Failed to post "
 						"sgl, xritag:x%x\n",
 						sglq_entry->sli4_xritag);
 					list_add_tail(&sglq_entry->list,
@@ -6252,9 +6352,9 @@ lpfc_sli4_repost_els_sgl_list(struct lpfc_hba *phba)
 		if (post_cnt == 0)
 			continue;
 
-		/* post the els buffer list sgls as a block */
-		status = lpfc_sli4_post_els_sgl_list(phba, &blck_sgl_list,
-						     post_cnt);
+		/* post the buffer list sgls as a block */
+		status = lpfc_sli4_post_sgl_list(phba, &blck_sgl_list,
+						 post_cnt);
 
 		if (!status) {
 			/* success, put sgl list to posted sgl list */
@@ -6265,7 +6365,7 @@ lpfc_sli4_repost_els_sgl_list(struct lpfc_hba *phba)
 							    struct lpfc_sglq,
 							    list);
 			lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
-					"3160 Failed to post els sgl-list, "
+					"3160 Failed to post sgl-list, "
 					"xritag:x%x-x%x\n",
 					sglq_entry_first->sli4_xritag,
 					(sglq_entry_first->sli4_xritag +
@@ -6278,29 +6378,28 @@ lpfc_sli4_repost_els_sgl_list(struct lpfc_hba *phba)
 		if (block_cnt == 0)
 			last_xritag = NO_XRI;
 
-		/* reset els sgl post count for next round of posting */
+		/* reset sgl post count for next round of posting */
 		post_cnt = 0;
 	}
-	/* update the number of XRIs posted for ELS */
-	phba->sli4_hba.els_xri_cnt = total_cnt;
 
-	/* free the els sgls failed to post */
+	/* free the sgls failed to post */
 	lpfc_free_sgl_list(phba, &free_sgl_list);
 
-	/* push els sgls posted to the availble list */
+	/* push sgls posted to the available list */
 	if (!list_empty(&post_sgl_list)) {
 		spin_lock_irq(&phba->hbalock);
-		spin_lock(&pring->ring_lock);
-		list_splice_init(&post_sgl_list,
-				 &phba->sli4_hba.lpfc_sgl_list);
-		spin_unlock(&pring->ring_lock);
+		spin_lock(&phba->sli4_hba.sgl_list_lock);
+		list_splice_init(&post_sgl_list, sgl_list);
+		spin_unlock(&phba->sli4_hba.sgl_list_lock);
 		spin_unlock_irq(&phba->hbalock);
 	} else {
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"3161 Failure to post els sgl to port.\n");
+				"3161 Failure to post sgl to port.\n");
 		return -EIO;
 	}
-	return 0;
+
+	/* return the number of XRIs actually posted */
+	return total_cnt;
 }
 
 void
@@ -6622,35 +6721,78 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 	fc_host_node_name(shost) = wwn_to_u64(vport->fc_nodename.u.wwn);
 	fc_host_port_name(shost) = wwn_to_u64(vport->fc_portname.u.wwn);
 
-	/* update host els and scsi xri-sgl sizes and mappings */
-	rc = lpfc_sli4_xri_sgl_update(phba);
+	/* Create all the SLI4 queues */
+	rc = lpfc_sli4_queue_create(phba);
+	if (rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"3089 Failed to allocate queues\n");
+		rc = -ENODEV;
+		goto out_free_mbox;
+	}
+	/* Set up all the queues to the device */
+	rc = lpfc_sli4_queue_setup(phba);
+	if (unlikely(rc)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+				"0381 Error %d during queue setup.\n ", rc);
+		goto out_stop_timers;
+	}
+	/* Initialize the driver internal SLI layer lists. */
+	lpfc_sli4_setup(phba);
+	lpfc_sli4_queue_init(phba);
+
+	/* update host els xri-sgl sizes and mappings */
+	rc = lpfc_sli4_els_sgl_update(phba);
 	if (unlikely(rc)) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
 				"1400 Failed to update xri-sgl size and "
 				"mapping: %d\n", rc);
-		goto out_free_mbox;
+		goto out_destroy_queue;
 	}
 
 	/* register the els sgl pool to the port */
-	rc = lpfc_sli4_repost_els_sgl_list(phba);
-	if (unlikely(rc)) {
+	rc = lpfc_sli4_repost_sgl_list(phba, &phba->sli4_hba.lpfc_els_sgl_list,
+				       phba->sli4_hba.els_xri_cnt);
+	if (unlikely(rc < 0)) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
 				"0582 Error %d during els sgl post "
 				"operation\n", rc);
 		rc = -ENODEV;
-		goto out_free_mbox;
+		goto out_destroy_queue;
 	}
+	phba->sli4_hba.els_xri_cnt = rc;
 
-	/* register the allocated scsi sgl pool to the port */
-	rc = lpfc_sli4_repost_scsi_sgl_list(phba);
-	if (unlikely(rc)) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
-				"0383 Error %d during scsi sgl post "
-				"operation\n", rc);
-		/* Some Scsi buffers were moved to the abort scsi list */
-		/* A pci function reset will repost them */
-		rc = -ENODEV;
-		goto out_free_mbox;
+	if (phba->nvmet_support == 0) {
+		/* update host scsi xri-sgl sizes and mappings */
+		rc = lpfc_sli4_scsi_sgl_update(phba);
+		if (unlikely(rc)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+					"6309 Failed to update scsi-sgl size "
+					"and mapping: %d\n", rc);
+			goto out_destroy_queue;
+		}
+
+		/* update host nvme xri-sgl sizes and mappings */
+		rc = lpfc_sli4_nvme_sgl_update(phba);
+		if (unlikely(rc)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+					"6082 Failed to update nvme-sgl size "
+					"and mapping: %d\n", rc);
+			goto out_destroy_queue;
+		}
+	}
+
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
+		/* register the allocated scsi sgl pool to the port */
+		rc = lpfc_sli4_repost_scsi_sgl_list(phba);
+		if (unlikely(rc)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+					"0383 Error %d during scsi sgl post "
+					"operation\n", rc);
+			/* Some Scsi buffers were moved to abort scsi list */
+			/* A pci function reset will repost them */
+			rc = -ENODEV;
+			goto out_destroy_queue;
+		}
 	}
 
 	/* Post the rpi header region to the device. */
@@ -6660,24 +6802,26 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 				"0393 Error %d during rpi post operation\n",
 				rc);
 		rc = -ENODEV;
-		goto out_free_mbox;
+		goto out_destroy_queue;
 	}
 	lpfc_sli4_node_prep(phba);
 
-	/* Create all the SLI4 queues */
-	rc = lpfc_sli4_queue_create(phba);
-	if (rc) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-				"3089 Failed to allocate queues\n");
-		rc = -ENODEV;
-		goto out_stop_timers;
-	}
-	/* Set up all the queues to the device */
-	rc = lpfc_sli4_queue_setup(phba);
-	if (unlikely(rc)) {
-		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
-				"0381 Error %d during queue setup.\n ", rc);
-		goto out_destroy_queue;
+	if (!(phba->hba_flag & HBA_FCOE_MODE)) {
+		if (phba->nvmet_support == 0) {
+			/*
+			 * The FC Port needs to register FCFI (index 0)
+			 */
+			lpfc_reg_fcfi(phba, mboxq);
+			mboxq->vport = phba->pport;
+			rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+			if (rc != MBX_SUCCESS)
+				goto out_unset_queue;
+			rc = 0;
+			phba->fcf.fcfi = bf_get(lpfc_reg_fcfi_fcfi,
+						&mboxq->u.mqe.un.reg_fcfi);
+		}
+		/* Check if the port is configured to be disabled */
+		lpfc_sli_read_link_ste(phba);
 	}
 
 	/* Arm the CQs and then EQs on device */
@@ -6731,23 +6875,6 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 		rc = 0;
 	}
 
-	if (!(phba->hba_flag & HBA_FCOE_MODE)) {
-		/*
-		 * The FC Port needs to register FCFI (index 0)
-		 */
-		lpfc_reg_fcfi(phba, mboxq);
-		mboxq->vport = phba->pport;
-		rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
-		if (rc != MBX_SUCCESS)
-			goto out_unset_queue;
-		rc = 0;
-		phba->fcf.fcfi = bf_get(lpfc_reg_fcfi_fcfi,
-					&mboxq->u.mqe.un.reg_fcfi);
-
-		/* Check if the port is configured to be disabled */
-		lpfc_sli_read_link_ste(phba);
-	}
-
 	/*
 	 * The port is ready, set the host's link state to LINK_DOWN
 	 * in preparation for link interrupts.
@@ -6884,7 +7011,7 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba)
 	/* Find the eq associated with the mcq */
 
 	if (phba->sli4_hba.hba_eq)
-		for (eqidx = 0; eqidx < phba->cfg_fcp_io_channel; eqidx++)
+		for (eqidx = 0; eqidx < phba->io_channel_irqs; eqidx++)
 			if (phba->sli4_hba.hba_eq[eqidx]->queue_id ==
 			    phba->sli4_hba.mbx_cq->assoc_qid) {
 				fpeq = phba->sli4_hba.hba_eq[eqidx];
@@ -7243,16 +7370,15 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 				= MAILBOX_HBA_EXT_OFFSET;
 
 		/* Copy the mailbox extension data */
-		if (pmbox->in_ext_byte_len && pmbox->context2) {
+		if (pmbox->in_ext_byte_len && pmbox->context2)
 			lpfc_memcpy_to_slim(phba->MBslimaddr +
 				MAILBOX_HBA_EXT_OFFSET,
 				pmbox->context2, pmbox->in_ext_byte_len);
 
-		}
-		if (mbx->mbxCommand == MBX_CONFIG_PORT) {
+		if (mbx->mbxCommand == MBX_CONFIG_PORT)
 			/* copy command data into host mbox for cmpl */
-			lpfc_sli_pcimem_bcopy(mbx, phba->mbox, MAILBOX_CMD_SIZE);
-		}
+			lpfc_sli_pcimem_bcopy(mbx, phba->mbox,
+					      MAILBOX_CMD_SIZE);
 
 		/* First copy mbox command data to HBA SLIM, skip past first
 		   word */
@@ -7266,10 +7392,9 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox,
 		writel(ldata, to_slim);
 		readl(to_slim); /* flush */
 
-		if (mbx->mbxCommand == MBX_CONFIG_PORT) {
+		if (mbx->mbxCommand == MBX_CONFIG_PORT)
 			/* switch over to host mailbox */
 			psli->sli_flag |= LPFC_SLI_ACTIVE;
-		}
 	}
 
 	wmb();
@@ -8060,7 +8185,7 @@ __lpfc_sli_issue_iocb_s3(struct lpfc_hba *phba, uint32_t ring_number,
 {
 	struct lpfc_iocbq *nextiocb;
 	IOCB_t *iocb;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[ring_number];
+	struct lpfc_sli_ring *pring = &phba->sli.sli3_ring[ring_number];
 
 	lockdep_assert_held(&phba->hbalock);
 
@@ -8134,7 +8259,7 @@ __lpfc_sli_issue_iocb_s3(struct lpfc_hba *phba, uint32_t ring_number,
 	 * For FCP commands, we must be in a state where we can process link
 	 * attention events.
 	 */
-	} else if (unlikely(pring->ringno == phba->sli.fcp_ring &&
+	} else if (unlikely(pring->ringno == LPFC_FCP_RING &&
 			    !(phba->sli.sli_flag & LPFC_PROCESS_LA))) {
 		goto iocb_busy;
 	}
@@ -8871,9 +8996,21 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number,
 	union lpfc_wqe *wqe;
 	union lpfc_wqe128 wqe128;
 	struct lpfc_queue *wq;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[ring_number];
+	struct lpfc_sli_ring *pring;
 
-	lockdep_assert_held(&phba->hbalock);
+	/* Get the WQ */
+	if ((piocb->iocb_flag & LPFC_IO_FCP) ||
+	    (piocb->iocb_flag & LPFC_USE_FCPWQIDX)) {
+		if (!phba->cfg_fof || (!(piocb->iocb_flag & LPFC_IO_OAS)))
+			wq = phba->sli4_hba.fcp_wq[piocb->hba_wqidx];
+		else
+			wq = phba->sli4_hba.oas_wq;
+	} else {
+		wq = phba->sli4_hba.els_wq;
+	}
+
+	/* Get corresponding ring */
+	pring = wq->pring;
 
 	/*
 	 * The WQE can be either 64 or 128 bytes,
@@ -8881,6 +9018,8 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number,
 	 */
 	wqe = (union lpfc_wqe *)&wqe128;
 
+	lockdep_assert_held(&phba->hbalock);
+
 	if (piocb->sli4_xritag == NO_XRI) {
 		if (piocb->iocb.ulpCommand == CMD_ABORT_XRI_CN ||
 		    piocb->iocb.ulpCommand == CMD_CLOSE_XRI_CN)
@@ -8895,7 +9034,7 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number,
 					return IOCB_BUSY;
 				}
 			} else {
-				sglq = __lpfc_sli_get_sglq(phba, piocb);
+				sglq = __lpfc_sli_get_els_sglq(phba, piocb);
 				if (!sglq) {
 					if (!(flag & SLI_IOCB_RET_IOCB)) {
 						__lpfc_sli_ringtx_put(phba,
@@ -8930,21 +9069,8 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number,
 	if (lpfc_sli4_iocb2wqe(phba, piocb, wqe))
 		return IOCB_ERROR;
 
-	if ((piocb->iocb_flag & LPFC_IO_FCP) ||
-	    (piocb->iocb_flag & LPFC_USE_FCPWQIDX)) {
-		if (!phba->cfg_fof || (!(piocb->iocb_flag & LPFC_IO_OAS))) {
-			wq = phba->sli4_hba.fcp_wq[piocb->fcp_wqidx];
-		} else {
-			wq = phba->sli4_hba.oas_wq;
-		}
-		if (lpfc_sli4_wq_put(wq, wqe))
-			return IOCB_ERROR;
-	} else {
-		if (unlikely(!phba->sli4_hba.els_wq))
-			return IOCB_ERROR;
-		if (lpfc_sli4_wq_put(phba->sli4_hba.els_wq, wqe))
-			return IOCB_ERROR;
-	}
+	if (lpfc_sli4_wq_put(wq, wqe))
+		return IOCB_ERROR;
 	lpfc_sli_ringtxcmpl_put(phba, pring, piocb);
 
 	return 0;
@@ -9002,46 +9128,44 @@ lpfc_sli_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
 }
 
 /**
- * lpfc_sli_calc_ring - Calculates which ring to use
+ * lpfc_sli4_calc_ring - Calculates which ring to use
  * @phba: Pointer to HBA context object.
- * @ring_number: Initial ring
  * @piocb: Pointer to command iocb.
  *
- * For SLI4, FCP IO can deferred to one fo many WQs, based on
- * fcp_wqidx, thus we need to calculate the corresponding ring.
+ * For SLI4 only, FCP IO can deferred to one fo many WQs, based on
+ * hba_wqidx, thus we need to calculate the corresponding ring.
  * Since ABORTS must go on the same WQ of the command they are
- * aborting, we use command's fcp_wqidx.
+ * aborting, we use command's hba_wqidx.
  */
-static int
-lpfc_sli_calc_ring(struct lpfc_hba *phba, uint32_t ring_number,
-		    struct lpfc_iocbq *piocb)
+struct lpfc_sli_ring *
+lpfc_sli4_calc_ring(struct lpfc_hba *phba, struct lpfc_iocbq *piocb)
 {
-	if (phba->sli_rev < LPFC_SLI_REV4)
-		return ring_number;
-
-	if (piocb->iocb_flag &  (LPFC_IO_FCP | LPFC_USE_FCPWQIDX)) {
+	if (piocb->iocb_flag & (LPFC_IO_FCP | LPFC_USE_FCPWQIDX)) {
 		if (!(phba->cfg_fof) ||
-				(!(piocb->iocb_flag & LPFC_IO_FOF))) {
+		    (!(piocb->iocb_flag & LPFC_IO_FOF))) {
 			if (unlikely(!phba->sli4_hba.fcp_wq))
-				return LPFC_HBA_ERROR;
+				return NULL;
 			/*
-			 * for abort iocb fcp_wqidx should already
+			 * for abort iocb hba_wqidx should already
 			 * be setup based on what work queue we used.
 			 */
 			if (!(piocb->iocb_flag & LPFC_USE_FCPWQIDX))
-				piocb->fcp_wqidx =
+				piocb->hba_wqidx =
 					lpfc_sli4_scmd_to_wqidx_distr(phba,
 							      piocb->context1);
-			ring_number = MAX_SLI3_CONFIGURED_RINGS +
-				piocb->fcp_wqidx;
+			return phba->sli4_hba.fcp_wq[piocb->hba_wqidx]->pring;
 		} else {
 			if (unlikely(!phba->sli4_hba.oas_wq))
-				return LPFC_HBA_ERROR;
-			piocb->fcp_wqidx = 0;
-			ring_number =  LPFC_FCP_OAS_RING;
+				return NULL;
+			piocb->hba_wqidx = 0;
+			return phba->sli4_hba.oas_wq->pring;
 		}
+	} else {
+		if (unlikely(!phba->sli4_hba.els_wq))
+			return NULL;
+		piocb->hba_wqidx = 0;
+		return phba->sli4_hba.els_wq->pring;
 	}
-	return ring_number;
 }
 
 /**
@@ -9061,7 +9185,7 @@ int
 lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
 		    struct lpfc_iocbq *piocb, uint32_t flag)
 {
-	struct lpfc_fcp_eq_hdl *fcp_eq_hdl;
+	struct lpfc_hba_eq_hdl *hba_eq_hdl;
 	struct lpfc_sli_ring *pring;
 	struct lpfc_queue *fpeq;
 	struct lpfc_eqe *eqe;
@@ -9069,21 +9193,19 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
 	int rc, idx;
 
 	if (phba->sli_rev == LPFC_SLI_REV4) {
-		ring_number = lpfc_sli_calc_ring(phba, ring_number, piocb);
-		if (unlikely(ring_number == LPFC_HBA_ERROR))
+		pring = lpfc_sli4_calc_ring(phba, piocb);
+		if (unlikely(pring == NULL))
 			return IOCB_ERROR;
-		idx = piocb->fcp_wqidx;
 
-		pring = &phba->sli.ring[ring_number];
 		spin_lock_irqsave(&pring->ring_lock, iflags);
 		rc = __lpfc_sli_issue_iocb(phba, ring_number, piocb, flag);
 		spin_unlock_irqrestore(&pring->ring_lock, iflags);
 
 		if (lpfc_fcp_look_ahead && (piocb->iocb_flag &  LPFC_IO_FCP)) {
-			fcp_eq_hdl = &phba->sli4_hba.fcp_eq_hdl[idx];
+			idx = piocb->hba_wqidx;
+			hba_eq_hdl = &phba->sli4_hba.hba_eq_hdl[idx];
 
-			if (atomic_dec_and_test(&fcp_eq_hdl->
-				fcp_eq_in_use)) {
+			if (atomic_dec_and_test(&hba_eq_hdl->hba_eq_in_use)) {
 
 				/* Get associated EQ with this index */
 				fpeq = phba->sli4_hba.hba_eq[idx];
@@ -9104,7 +9226,7 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
 				lpfc_sli4_eq_release(fpeq,
 					LPFC_QUEUE_REARM);
 			}
-			atomic_inc(&fcp_eq_hdl->fcp_eq_in_use);
+			atomic_inc(&hba_eq_hdl->hba_eq_in_use);
 		}
 	} else {
 		/* For now, SLI2/3 will still use hbalock */
@@ -9124,7 +9246,7 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
  * only when driver needs to support target mode functionality
  * or IP over FC functionalities.
  *
- * This function is called with no lock held.
+ * This function is called with no lock held. SLI3 only.
  **/
 static int
 lpfc_extra_ring_setup( struct lpfc_hba *phba)
@@ -9137,14 +9259,14 @@ lpfc_extra_ring_setup( struct lpfc_hba *phba)
 	/* Adjust cmd/rsp ring iocb entries more evenly */
 
 	/* Take some away from the FCP ring */
-	pring = &psli->ring[psli->fcp_ring];
+	pring = &psli->sli3_ring[LPFC_FCP_RING];
 	pring->sli.sli3.numCiocb -= SLI2_IOCB_CMD_R1XTRA_ENTRIES;
 	pring->sli.sli3.numRiocb -= SLI2_IOCB_RSP_R1XTRA_ENTRIES;
 	pring->sli.sli3.numCiocb -= SLI2_IOCB_CMD_R3XTRA_ENTRIES;
 	pring->sli.sli3.numRiocb -= SLI2_IOCB_RSP_R3XTRA_ENTRIES;
 
 	/* and give them to the extra ring */
-	pring = &psli->ring[psli->extra_ring];
+	pring = &psli->sli3_ring[LPFC_EXTRA_RING];
 
 	pring->sli.sli3.numCiocb += SLI2_IOCB_CMD_R1XTRA_ENTRIES;
 	pring->sli.sli3.numRiocb += SLI2_IOCB_RSP_R1XTRA_ENTRIES;
@@ -9329,7 +9451,7 @@ lpfc_sli_async_event_handler(struct lpfc_hba * phba,
 
 
 /**
- * lpfc_sli_setup - SLI ring setup function
+ * lpfc_sli4_setup - SLI ring setup function
  * @phba: Pointer to HBA context object.
  *
  * lpfc_sli_setup sets up rings of the SLI interface with
@@ -9340,6 +9462,51 @@ lpfc_sli_async_event_handler(struct lpfc_hba * phba,
  * This function always returns 0.
  **/
 int
+lpfc_sli4_setup(struct lpfc_hba *phba)
+{
+	struct lpfc_sli_ring *pring;
+
+	pring = phba->sli4_hba.els_wq->pring;
+	pring->num_mask = LPFC_MAX_RING_MASK;
+	pring->prt[0].profile = 0;	/* Mask 0 */
+	pring->prt[0].rctl = FC_RCTL_ELS_REQ;
+	pring->prt[0].type = FC_TYPE_ELS;
+	pring->prt[0].lpfc_sli_rcv_unsol_event =
+	    lpfc_els_unsol_event;
+	pring->prt[1].profile = 0;	/* Mask 1 */
+	pring->prt[1].rctl = FC_RCTL_ELS_REP;
+	pring->prt[1].type = FC_TYPE_ELS;
+	pring->prt[1].lpfc_sli_rcv_unsol_event =
+	    lpfc_els_unsol_event;
+	pring->prt[2].profile = 0;	/* Mask 2 */
+	/* NameServer Inquiry */
+	pring->prt[2].rctl = FC_RCTL_DD_UNSOL_CTL;
+	/* NameServer */
+	pring->prt[2].type = FC_TYPE_CT;
+	pring->prt[2].lpfc_sli_rcv_unsol_event =
+	    lpfc_ct_unsol_event;
+	pring->prt[3].profile = 0;	/* Mask 3 */
+	/* NameServer response */
+	pring->prt[3].rctl = FC_RCTL_DD_SOL_CTL;
+	/* NameServer */
+	pring->prt[3].type = FC_TYPE_CT;
+	pring->prt[3].lpfc_sli_rcv_unsol_event =
+	    lpfc_ct_unsol_event;
+	return 0;
+}
+
+/**
+ * lpfc_sli_setup - SLI ring setup function
+ * @phba: Pointer to HBA context object.
+ *
+ * lpfc_sli_setup sets up rings of the SLI interface with
+ * number of iocbs per ring and iotags. This function is
+ * called while driver attach to the HBA and before the
+ * interrupts are enabled. So there is no need for locking.
+ *
+ * This function always returns 0. SLI3 only.
+ **/
+int
 lpfc_sli_setup(struct lpfc_hba *phba)
 {
 	int i, totiocbsize = 0;
@@ -9347,19 +9514,14 @@ lpfc_sli_setup(struct lpfc_hba *phba)
 	struct lpfc_sli_ring *pring;
 
 	psli->num_rings = MAX_SLI3_CONFIGURED_RINGS;
-	if (phba->sli_rev == LPFC_SLI_REV4)
-		psli->num_rings += phba->cfg_fcp_io_channel;
 	psli->sli_flag = 0;
-	psli->fcp_ring = LPFC_FCP_RING;
-	psli->next_ring = LPFC_FCP_NEXT_RING;
-	psli->extra_ring = LPFC_EXTRA_RING;
 
 	psli->iocbq_lookup = NULL;
 	psli->iocbq_lookup_len = 0;
 	psli->last_iotag = 0;
 
 	for (i = 0; i < psli->num_rings; i++) {
-		pring = &psli->ring[i];
+		pring = &psli->sli3_ring[i];
 		switch (i) {
 		case LPFC_FCP_RING:	/* ring 0 - FCP */
 			/* numCiocb and numRiocb are used in config_port */
@@ -9458,18 +9620,18 @@ lpfc_sli_setup(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_sli_queue_setup - Queue initialization function
+ * lpfc_sli4_queue_init - Queue initialization function
  * @phba: Pointer to HBA context object.
  *
- * lpfc_sli_queue_setup sets up mailbox queues and iocb queues for each
+ * lpfc_sli4_queue_init sets up mailbox queues and iocb queues for each
  * ring. This function also initializes ring indices of each ring.
  * This function is called during the initialization of the SLI
  * interface of an HBA.
  * This function is called with no lock held and always returns
  * 1.
  **/
-int
-lpfc_sli_queue_setup(struct lpfc_hba *phba)
+void
+lpfc_sli4_queue_init(struct lpfc_hba *phba)
 {
 	struct lpfc_sli *psli;
 	struct lpfc_sli_ring *pring;
@@ -9480,31 +9642,102 @@ lpfc_sli_queue_setup(struct lpfc_hba *phba)
 	INIT_LIST_HEAD(&psli->mboxq);
 	INIT_LIST_HEAD(&psli->mboxq_cmpl);
 	/* Initialize list headers for txq and txcmplq as double linked lists */
-	for (i = 0; i < psli->num_rings; i++) {
-		pring = &psli->ring[i];
-		pring->ringno = i;
-		pring->sli.sli3.next_cmdidx  = 0;
-		pring->sli.sli3.local_getidx = 0;
-		pring->sli.sli3.cmdidx = 0;
+	for (i = 0; i < phba->cfg_fcp_io_channel; i++) {
+		pring = phba->sli4_hba.fcp_wq[i]->pring;
 		pring->flag = 0;
+		pring->ringno = LPFC_FCP_RING;
 		INIT_LIST_HEAD(&pring->txq);
 		INIT_LIST_HEAD(&pring->txcmplq);
 		INIT_LIST_HEAD(&pring->iocb_continueq);
-		INIT_LIST_HEAD(&pring->iocb_continue_saveq);
-		INIT_LIST_HEAD(&pring->postbufq);
 		spin_lock_init(&pring->ring_lock);
 	}
-	spin_unlock_irq(&phba->hbalock);
-	return 1;
-}
+	for (i = 0; i < phba->cfg_nvme_io_channel; i++) {
+		pring = phba->sli4_hba.nvme_wq[i]->pring;
+		pring->flag = 0;
+		pring->ringno = LPFC_FCP_RING;
+		INIT_LIST_HEAD(&pring->txq);
+		INIT_LIST_HEAD(&pring->txcmplq);
+		INIT_LIST_HEAD(&pring->iocb_continueq);
+		spin_lock_init(&pring->ring_lock);
+	}
+	pring = phba->sli4_hba.els_wq->pring;
+	pring->flag = 0;
+	pring->ringno = LPFC_ELS_RING;
+	INIT_LIST_HEAD(&pring->txq);
+	INIT_LIST_HEAD(&pring->txcmplq);
+	INIT_LIST_HEAD(&pring->iocb_continueq);
+	spin_lock_init(&pring->ring_lock);
 
-/**
- * lpfc_sli_mbox_sys_flush - Flush mailbox command sub-system
- * @phba: Pointer to HBA context object.
- *
- * This routine flushes the mailbox command subsystem. It will unconditionally
- * flush all the mailbox commands in the three possible stages in the mailbox
- * command sub-system: pending mailbox command queue; the outstanding mailbox
+	if (phba->cfg_nvme_io_channel) {
+		pring = phba->sli4_hba.nvmels_wq->pring;
+		pring->flag = 0;
+		pring->ringno = LPFC_ELS_RING;
+		INIT_LIST_HEAD(&pring->txq);
+		INIT_LIST_HEAD(&pring->txcmplq);
+		INIT_LIST_HEAD(&pring->iocb_continueq);
+		spin_lock_init(&pring->ring_lock);
+	}
+
+	if (phba->cfg_fof) {
+		pring = phba->sli4_hba.oas_wq->pring;
+		pring->flag = 0;
+		pring->ringno = LPFC_FCP_RING;
+		INIT_LIST_HEAD(&pring->txq);
+		INIT_LIST_HEAD(&pring->txcmplq);
+		INIT_LIST_HEAD(&pring->iocb_continueq);
+		spin_lock_init(&pring->ring_lock);
+	}
+
+	spin_unlock_irq(&phba->hbalock);
+}
+
+/**
+ * lpfc_sli_queue_init - Queue initialization function
+ * @phba: Pointer to HBA context object.
+ *
+ * lpfc_sli_queue_init sets up mailbox queues and iocb queues for each
+ * ring. This function also initializes ring indices of each ring.
+ * This function is called during the initialization of the SLI
+ * interface of an HBA.
+ * This function is called with no lock held and always returns
+ * 1.
+ **/
+void
+lpfc_sli_queue_init(struct lpfc_hba *phba)
+{
+	struct lpfc_sli *psli;
+	struct lpfc_sli_ring *pring;
+	int i;
+
+	psli = &phba->sli;
+	spin_lock_irq(&phba->hbalock);
+	INIT_LIST_HEAD(&psli->mboxq);
+	INIT_LIST_HEAD(&psli->mboxq_cmpl);
+	/* Initialize list headers for txq and txcmplq as double linked lists */
+	for (i = 0; i < psli->num_rings; i++) {
+		pring = &psli->sli3_ring[i];
+		pring->ringno = i;
+		pring->sli.sli3.next_cmdidx  = 0;
+		pring->sli.sli3.local_getidx = 0;
+		pring->sli.sli3.cmdidx = 0;
+		INIT_LIST_HEAD(&pring->iocb_continueq);
+		INIT_LIST_HEAD(&pring->iocb_continue_saveq);
+		INIT_LIST_HEAD(&pring->postbufq);
+		pring->flag = 0;
+		INIT_LIST_HEAD(&pring->txq);
+		INIT_LIST_HEAD(&pring->txcmplq);
+		spin_lock_init(&pring->ring_lock);
+	}
+	spin_unlock_irq(&phba->hbalock);
+}
+
+/**
+ * lpfc_sli_mbox_sys_flush - Flush mailbox command sub-system
+ * @phba: Pointer to HBA context object.
+ *
+ * This routine flushes the mailbox command subsystem. It will unconditionally
+ * flush all the mailbox commands in the three possible stages in the mailbox
+ * command sub-system: pending mailbox command queue; the outstanding mailbox
  * command; and completed mailbox command queue. It is caller's responsibility
  * to make sure that the driver is in the proper state to flush the mailbox
  * command sub-system. Namely, the posting of mailbox commands into the
@@ -9567,6 +9800,7 @@ lpfc_sli_host_down(struct lpfc_vport *vport)
 	LIST_HEAD(completions);
 	struct lpfc_hba *phba = vport->phba;
 	struct lpfc_sli *psli = &phba->sli;
+	struct lpfc_queue *qp = NULL;
 	struct lpfc_sli_ring *pring;
 	struct lpfc_iocbq *iocb, *next_iocb;
 	int i;
@@ -9576,36 +9810,64 @@ lpfc_sli_host_down(struct lpfc_vport *vport)
 	lpfc_cleanup_discovery_resources(vport);
 
 	spin_lock_irqsave(&phba->hbalock, flags);
-	for (i = 0; i < psli->num_rings; i++) {
-		pring = &psli->ring[i];
-		prev_pring_flag = pring->flag;
-		/* Only slow rings */
-		if (pring->ringno == LPFC_ELS_RING) {
-			pring->flag |= LPFC_DEFERRED_RING_EVENT;
-			/* Set the lpfc data pending flag */
-			set_bit(LPFC_DATA_READY, &phba->data_flags);
-		}
-		/*
-		 * Error everything on the txq since these iocbs have not been
-		 * given to the FW yet.
-		 */
-		list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) {
-			if (iocb->vport != vport)
-				continue;
-			list_move_tail(&iocb->list, &completions);
-		}
 
-		/* Next issue ABTS for everything on the txcmplq */
-		list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq,
-									list) {
-			if (iocb->vport != vport)
+	/*
+	 * Error everything on the txq since these iocbs
+	 * have not been given to the FW yet.
+	 * Also issue ABTS for everything on the txcmplq
+	 */
+	if (phba->sli_rev != LPFC_SLI_REV4) {
+		for (i = 0; i < psli->num_rings; i++) {
+			pring = &psli->sli3_ring[i];
+			prev_pring_flag = pring->flag;
+			/* Only slow rings */
+			if (pring->ringno == LPFC_ELS_RING) {
+				pring->flag |= LPFC_DEFERRED_RING_EVENT;
+				/* Set the lpfc data pending flag */
+				set_bit(LPFC_DATA_READY, &phba->data_flags);
+			}
+			list_for_each_entry_safe(iocb, next_iocb,
+						 &pring->txq, list) {
+				if (iocb->vport != vport)
+					continue;
+				list_move_tail(&iocb->list, &completions);
+			}
+			list_for_each_entry_safe(iocb, next_iocb,
+						 &pring->txcmplq, list) {
+				if (iocb->vport != vport)
+					continue;
+				lpfc_sli_issue_abort_iotag(phba, pring, iocb);
+			}
+			pring->flag = prev_pring_flag;
+		}
+	} else {
+		list_for_each_entry(qp, &phba->sli4_hba.lpfc_wq_list, wq_list) {
+			pring = qp->pring;
+			if (!pring)
 				continue;
-			lpfc_sli_issue_abort_iotag(phba, pring, iocb);
+			if (pring == phba->sli4_hba.els_wq->pring) {
+				pring->flag |= LPFC_DEFERRED_RING_EVENT;
+				/* Set the lpfc data pending flag */
+				set_bit(LPFC_DATA_READY, &phba->data_flags);
+			}
+			prev_pring_flag = pring->flag;
+			spin_lock_irq(&pring->ring_lock);
+			list_for_each_entry_safe(iocb, next_iocb,
+						 &pring->txq, list) {
+				if (iocb->vport != vport)
+					continue;
+				list_move_tail(&iocb->list, &completions);
+			}
+			spin_unlock_irq(&pring->ring_lock);
+			list_for_each_entry_safe(iocb, next_iocb,
+						 &pring->txcmplq, list) {
+				if (iocb->vport != vport)
+					continue;
+				lpfc_sli_issue_abort_iotag(phba, pring, iocb);
+			}
+			pring->flag = prev_pring_flag;
 		}
-
-		pring->flag = prev_pring_flag;
 	}
-
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 
 	/* Cancel all the IOCBs from the completions list */
@@ -9634,6 +9896,7 @@ lpfc_sli_hba_down(struct lpfc_hba *phba)
 {
 	LIST_HEAD(completions);
 	struct lpfc_sli *psli = &phba->sli;
+	struct lpfc_queue *qp = NULL;
 	struct lpfc_sli_ring *pring;
 	struct lpfc_dmabuf *buf_ptr;
 	unsigned long flags = 0;
@@ -9647,20 +9910,36 @@ lpfc_sli_hba_down(struct lpfc_hba *phba)
 	lpfc_fabric_abort_hba(phba);
 
 	spin_lock_irqsave(&phba->hbalock, flags);
-	for (i = 0; i < psli->num_rings; i++) {
-		pring = &psli->ring[i];
-		/* Only slow rings */
-		if (pring->ringno == LPFC_ELS_RING) {
-			pring->flag |= LPFC_DEFERRED_RING_EVENT;
-			/* Set the lpfc data pending flag */
-			set_bit(LPFC_DATA_READY, &phba->data_flags);
-		}
 
-		/*
-		 * Error everything on the txq since these iocbs have not been
-		 * given to the FW yet.
-		 */
-		list_splice_init(&pring->txq, &completions);
+	/*
+	 * Error everything on the txq since these iocbs
+	 * have not been given to the FW yet.
+	 */
+	if (phba->sli_rev != LPFC_SLI_REV4) {
+		for (i = 0; i < psli->num_rings; i++) {
+			pring = &psli->sli3_ring[i];
+			/* Only slow rings */
+			if (pring->ringno == LPFC_ELS_RING) {
+				pring->flag |= LPFC_DEFERRED_RING_EVENT;
+				/* Set the lpfc data pending flag */
+				set_bit(LPFC_DATA_READY, &phba->data_flags);
+			}
+			list_splice_init(&pring->txq, &completions);
+		}
+	} else {
+		list_for_each_entry(qp, &phba->sli4_hba.lpfc_wq_list, wq_list) {
+			pring = qp->pring;
+			if (!pring)
+				continue;
+			spin_lock_irq(&pring->ring_lock);
+			list_splice_init(&pring->txq, &completions);
+			spin_unlock_irq(&pring->ring_lock);
+			if (pring == phba->sli4_hba.els_wq->pring) {
+				pring->flag |= LPFC_DEFERRED_RING_EVENT;
+				/* Set the lpfc data pending flag */
+				set_bit(LPFC_DATA_READY, &phba->data_flags);
+			}
+		}
 	}
 	spin_unlock_irqrestore(&phba->hbalock, flags);
 
@@ -9987,7 +10266,6 @@ lpfc_sli_abort_iotag_issue(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	struct lpfc_iocbq *abtsiocbp;
 	IOCB_t *icmd = NULL;
 	IOCB_t *iabt = NULL;
-	int ring_number;
 	int retval;
 	unsigned long iflags;
 
@@ -10027,7 +10305,7 @@ lpfc_sli_abort_iotag_issue(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	iabt->ulpClass = icmd->ulpClass;
 
 	/* ABTS WQE must go to the same WQ as the WQE to be aborted */
-	abtsiocbp->fcp_wqidx = cmdiocb->fcp_wqidx;
+	abtsiocbp->hba_wqidx = cmdiocb->hba_wqidx;
 	if (cmdiocb->iocb_flag & LPFC_IO_FCP)
 		abtsiocbp->iocb_flag |= LPFC_USE_FCPWQIDX;
 	if (cmdiocb->iocb_flag & LPFC_IO_FOF)
@@ -10049,11 +10327,9 @@ lpfc_sli_abort_iotag_issue(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 			 abtsiocbp->iotag);
 
 	if (phba->sli_rev == LPFC_SLI_REV4) {
-		ring_number =
-			lpfc_sli_calc_ring(phba, pring->ringno, abtsiocbp);
-		if (unlikely(ring_number == LPFC_HBA_ERROR))
+		pring = lpfc_sli4_calc_ring(phba, abtsiocbp);
+		if (unlikely(pring == NULL))
 			return 0;
-		pring = &phba->sli.ring[ring_number];
 		/* Note: both hbalock and ring_lock need to be set here */
 		spin_lock_irqsave(&pring->ring_lock, iflags);
 		retval = __lpfc_sli_issue_iocb(phba, pring->ringno,
@@ -10134,6 +10410,111 @@ lpfc_sli_issue_abort_iotag(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	return retval;
 }
 
+/**
+ * lpfc_sli4_abort_nvme_io - Issue abort for a command iocb
+ * @phba: Pointer to HBA context object.
+ * @pring: Pointer to driver SLI ring object.
+ * @cmdiocb: Pointer to driver command iocb object.
+ *
+ * This function issues an abort iocb for the provided command iocb down to
+ * the port. Other than the case the outstanding command iocb is an abort
+ * request, this function issues abort out unconditionally. This function is
+ * called with hbalock held. The function returns 0 when it fails due to
+ * memory allocation failure or when the command iocb is an abort request.
+ **/
+static int
+lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+			struct lpfc_iocbq *cmdiocb)
+{
+	struct lpfc_vport *vport = cmdiocb->vport;
+	struct lpfc_iocbq *abtsiocbp;
+	union lpfc_wqe *abts_wqe;
+	int retval;
+
+	/*
+	 * There are certain command types we don't want to abort.  And we
+	 * don't want to abort commands that are already in the process of
+	 * being aborted.
+	 */
+	if (cmdiocb->iocb.ulpCommand == CMD_ABORT_XRI_CN ||
+	    cmdiocb->iocb.ulpCommand == CMD_CLOSE_XRI_CN ||
+	    (cmdiocb->iocb_flag & LPFC_DRIVER_ABORTED) != 0)
+		return 0;
+
+	/* issue ABTS for this io based on iotag */
+	abtsiocbp = __lpfc_sli_get_iocbq(phba);
+	if (abtsiocbp == NULL)
+		return 0;
+
+	/* This signals the response to set the correct status
+	 * before calling the completion handler
+	 */
+	cmdiocb->iocb_flag |= LPFC_DRIVER_ABORTED;
+
+	/* Complete prepping the abort wqe and issue to the FW. */
+	abts_wqe = &abtsiocbp->wqe;
+	bf_set(abort_cmd_ia, &abts_wqe->abort_cmd, 0);
+	bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG);
+
+	/* Explicitly set reserved fields to zero.*/
+	abts_wqe->abort_cmd.rsrvd4 = 0;
+	abts_wqe->abort_cmd.rsrvd5 = 0;
+
+	/* WQE Common - word 6.  Context is XRI tag.  Set 0. */
+	bf_set(wqe_xri_tag, &abts_wqe->abort_cmd.wqe_com, 0);
+	bf_set(wqe_ctxt_tag, &abts_wqe->abort_cmd.wqe_com, 0);
+
+	/* word 7 */
+	bf_set(wqe_ct, &abts_wqe->abort_cmd.wqe_com, 0);
+	bf_set(wqe_cmnd, &abts_wqe->abort_cmd.wqe_com, CMD_ABORT_XRI_CX);
+	bf_set(wqe_class, &abts_wqe->abort_cmd.wqe_com,
+	       cmdiocb->iocb.ulpClass);
+
+	/* word 8 - tell the FW to abort the IO associated with this
+	 * outstanding exchange ID.
+	 */
+	abts_wqe->abort_cmd.wqe_com.abort_tag = cmdiocb->sli4_xritag;
+
+	/* word 9 - this is the iotag for the abts_wqe completion. */
+	bf_set(wqe_reqtag, &abts_wqe->abort_cmd.wqe_com,
+	       abtsiocbp->iotag);
+
+	/* word 10 */
+	bf_set(wqe_wqid, &abts_wqe->abort_cmd.wqe_com, cmdiocb->hba_wqidx);
+	bf_set(wqe_qosd, &abts_wqe->abort_cmd.wqe_com, 1);
+	bf_set(wqe_lenloc, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_LENLOC_NONE);
+
+	/* word 11 */
+	bf_set(wqe_cmd_type, &abts_wqe->abort_cmd.wqe_com, OTHER_COMMAND);
+	bf_set(wqe_wqec, &abts_wqe->abort_cmd.wqe_com, 1);
+	bf_set(wqe_cqid, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_CQ_ID_DEFAULT);
+
+	/* ABTS WQE must go to the same WQ as the WQE to be aborted */
+	abtsiocbp->iocb_flag |= LPFC_IO_NVME;
+	abtsiocbp->vport = vport;
+	/* todo: assign wqe_cmpl to lpfc_nvme_abort_fcreq_cmpl
+	 * subsequent patch will add routine. For now, just skip assignment
+	 * as won't ever be called.
+	 */
+	retval = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abtsiocbp);
+	if (retval == IOCB_ERROR) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+				 "6147 Failed abts issue_wqe with status x%x "
+				 "for oxid x%x\n",
+				 retval, cmdiocb->sli4_xritag);
+		lpfc_sli_release_iocbq(phba, abtsiocbp);
+		return retval;
+	}
+
+	lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+			 "6148 Drv Abort NVME Request Issued for "
+			 "ox_id x%x on reqtag x%x\n",
+			 cmdiocb->sli4_xritag,
+			 abtsiocbp->iotag);
+
+	return retval;
+}
+
 /**
  * lpfc_sli_hba_iocb_abort - Abort all iocbs to an hba.
  * @phba: pointer to lpfc HBA data structure.
@@ -10145,10 +10526,20 @@ lpfc_sli_hba_iocb_abort(struct lpfc_hba *phba)
 {
 	struct lpfc_sli *psli = &phba->sli;
 	struct lpfc_sli_ring *pring;
+	struct lpfc_queue *qp = NULL;
 	int i;
 
-	for (i = 0; i < psli->num_rings; i++) {
-		pring = &psli->ring[i];
+	if (phba->sli_rev != LPFC_SLI_REV4) {
+		for (i = 0; i < psli->num_rings; i++) {
+			pring = &psli->sli3_ring[i];
+			lpfc_sli_abort_iocb_ring(phba, pring);
+		}
+		return;
+	}
+	list_for_each_entry(qp, &phba->sli4_hba.lpfc_wq_list, wq_list) {
+		pring = qp->pring;
+		if (!pring)
+			continue;
 		lpfc_sli_abort_iocb_ring(phba, pring);
 	}
 }
@@ -10352,7 +10743,7 @@ lpfc_sli_abort_iocb(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
 		abtsiocb->vport = vport;
 
 		/* ABTS WQE must go to the same WQ as the WQE to be aborted */
-		abtsiocb->fcp_wqidx = iocbq->fcp_wqidx;
+		abtsiocb->hba_wqidx = iocbq->hba_wqidx;
 		if (iocbq->iocb_flag & LPFC_IO_FCP)
 			abtsiocb->iocb_flag |= LPFC_USE_FCPWQIDX;
 		if (iocbq->iocb_flag & LPFC_IO_FOF)
@@ -10412,7 +10803,6 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
 	int sum, i, ret_val;
 	unsigned long iflags;
 	struct lpfc_sli_ring *pring_s4;
-	uint32_t ring_number;
 
 	spin_lock_irq(&phba->hbalock);
 
@@ -10455,7 +10845,7 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
 		abtsiocbq->vport = vport;
 
 		/* ABTS WQE must go to the same WQ as the WQE to be aborted */
-		abtsiocbq->fcp_wqidx = iocbq->fcp_wqidx;
+		abtsiocbq->hba_wqidx = iocbq->hba_wqidx;
 		if (iocbq->iocb_flag & LPFC_IO_FCP)
 			abtsiocbq->iocb_flag |= LPFC_USE_FCPWQIDX;
 		if (iocbq->iocb_flag & LPFC_IO_FOF)
@@ -10480,9 +10870,9 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
 		iocbq->iocb_flag |= LPFC_DRIVER_ABORTED;
 
 		if (phba->sli_rev == LPFC_SLI_REV4) {
-			ring_number = MAX_SLI3_CONFIGURED_RINGS +
-					 iocbq->fcp_wqidx;
-			pring_s4 = &phba->sli.ring[ring_number];
+			pring_s4 = lpfc_sli4_calc_ring(phba, iocbq);
+			if (pring_s4 == NULL)
+				continue;
 			/* Note: both hbalock and ring_lock must be set here */
 			spin_lock_irqsave(&pring_s4->ring_lock, iflags);
 			ret_val = __lpfc_sli_issue_iocb(phba, pring_s4->ringno,
@@ -10644,10 +11034,14 @@ lpfc_sli_issue_iocb_wait(struct lpfc_hba *phba,
 	struct lpfc_iocbq *iocb;
 	int txq_cnt = 0;
 	int txcmplq_cnt = 0;
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring;
 	unsigned long iflags;
 	bool iocb_completed = true;
 
+	if (phba->sli_rev >= LPFC_SLI_REV4)
+		pring = lpfc_sli4_calc_ring(phba, piocb);
+	else
+		pring = &phba->sli.sli3_ring[ring_number];
 	/*
 	 * If the caller has provided a response iocbq buffer, then context2
 	 * is NULL or its an error.
@@ -11442,6 +11836,7 @@ lpfc_sli_fp_intr_handler(int irq, void *dev_id)
 	uint32_t ha_copy;
 	unsigned long status;
 	unsigned long iflag;
+	struct lpfc_sli_ring *pring;
 
 	/* Get the driver's phba structure from the dev_id and
 	 * assume the HBA is not interrupting.
@@ -11486,10 +11881,9 @@ lpfc_sli_fp_intr_handler(int irq, void *dev_id)
 
 	status = (ha_copy & (HA_RXMASK << (4*LPFC_FCP_RING)));
 	status >>= (4*LPFC_FCP_RING);
+	pring = &phba->sli.sli3_ring[LPFC_FCP_RING];
 	if (status & HA_RXMASK)
-		lpfc_sli_handle_fast_ring_event(phba,
-						&phba->sli.ring[LPFC_FCP_RING],
-						status);
+		lpfc_sli_handle_fast_ring_event(phba, pring, status);
 
 	if (phba->cfg_multi_ring_support == 2) {
 		/*
@@ -11500,7 +11894,7 @@ lpfc_sli_fp_intr_handler(int irq, void *dev_id)
 		status >>= (4*LPFC_EXTRA_RING);
 		if (status & HA_RXMASK) {
 			lpfc_sli_handle_fast_ring_event(phba,
-					&phba->sli.ring[LPFC_EXTRA_RING],
+					&phba->sli.sli3_ring[LPFC_EXTRA_RING],
 					status);
 		}
 	}
@@ -11813,11 +12207,13 @@ static struct lpfc_iocbq *
 lpfc_sli4_els_wcqe_to_rspiocbq(struct lpfc_hba *phba,
 			       struct lpfc_iocbq *irspiocbq)
 {
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring;
 	struct lpfc_iocbq *cmdiocbq;
 	struct lpfc_wcqe_complete *wcqe;
 	unsigned long iflags;
 
+	pring = lpfc_phba_elsring(phba);
+
 	wcqe = &irspiocbq->cq_event.cqe.wcqe_cmpl;
 	spin_lock_irqsave(&pring->ring_lock, iflags);
 	pring->stats.iocb_event++;
@@ -12053,8 +12449,6 @@ lpfc_sli4_sp_handle_els_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 			txq_cnt++;
 		if (!list_empty(&pring->txcmplq))
 			txcmplq_cnt++;
-		if (!list_empty(&phba->sli.ring[LPFC_FCP_RING].txcmplq))
-			fcp_txcmplq_cnt++;
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 			"0387 NO IOCBQ data: txq_cnt=%d iocb_cnt=%d "
 			"fcp_txcmplq_cnt=%d, els_txcmplq_cnt=%d\n",
@@ -12173,6 +12567,7 @@ static bool
 lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
 {
 	bool workposted = false;
+	struct fc_frame_header *fc_hdr;
 	struct lpfc_queue *hrq = phba->sli4_hba.hdr_rq;
 	struct lpfc_queue *drq = phba->sli4_hba.dat_rq;
 	struct hbq_dmabuf *dma_buf;
@@ -12207,6 +12602,10 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
 		}
 		hrq->RQ_rcv_buf++;
 		memcpy(&dma_buf->cq_event.cqe.rcqe_cmpl, rcqe, sizeof(*rcqe));
+
+		/* If a NVME LS event (type 0x28), treat it as Fast path */
+		fc_hdr = (struct fc_frame_header *)dma_buf->hbuf.virt;
+
 		/* save off the frame for the word thread to process */
 		list_add_tail(&dma_buf->cq_event.list,
 			      &phba->sli4_hba.sp_queue_event);
@@ -12325,6 +12724,9 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 		return;
 	}
 
+	/* Save EQ associated with this CQ */
+	cq->assoc_qp = speq;
+
 	/* Process all the entries to the CQ */
 	switch (cq->type) {
 	case LPFC_MCQ:
@@ -12337,8 +12739,9 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 		break;
 	case LPFC_WCQ:
 		while ((cqe = lpfc_sli4_cq_get(cq))) {
-			if (cq->subtype == LPFC_FCP)
-				workposted |= lpfc_sli4_fp_handle_wcqe(phba, cq,
+			if ((cq->subtype == LPFC_FCP) ||
+			    (cq->subtype == LPFC_NVME))
+				workposted |= lpfc_sli4_fp_handle_cqe(phba, cq,
 								       cqe);
 			else
 				workposted |= lpfc_sli4_sp_handle_cqe(phba, cq,
@@ -12425,7 +12828,23 @@ lpfc_sli4_fp_handle_fcp_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 				bf_get(lpfc_wcqe_c_request_tag, wcqe));
 		return;
 	}
-	if (unlikely(!cmdiocbq->iocb_cmpl)) {
+
+	if (cq->assoc_qp)
+		cmdiocbq->isr_timestamp =
+			cq->assoc_qp->isr_timestamp;
+
+	if (cmdiocbq->iocb_cmpl == NULL) {
+		if (cmdiocbq->wqe_cmpl) {
+			if (cmdiocbq->iocb_flag & LPFC_DRIVER_ABORTED) {
+				spin_lock_irqsave(&phba->hbalock, iflags);
+				cmdiocbq->iocb_flag &= ~LPFC_DRIVER_ABORTED;
+				spin_unlock_irqrestore(&phba->hbalock, iflags);
+			}
+
+			/* Pass the cmd_iocb and the wcqe to the upper layer */
+			(cmdiocbq->wqe_cmpl)(phba, cmdiocbq, wcqe);
+			return;
+		}
 		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
 				"0375 FCP cmdiocb not callback function "
 				"iotag: (%d)\n",
@@ -12461,12 +12880,12 @@ lpfc_sli4_fp_handle_rel_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 {
 	struct lpfc_queue *childwq;
 	bool wqid_matched = false;
-	uint16_t fcp_wqid;
+	uint16_t hba_wqid;
 
 	/* Check for fast-path FCP work queue release */
-	fcp_wqid = bf_get(lpfc_wcqe_r_wq_id, wcqe);
+	hba_wqid = bf_get(lpfc_wcqe_r_wq_id, wcqe);
 	list_for_each_entry(childwq, &cq->child_list, list) {
-		if (childwq->queue_id == fcp_wqid) {
+		if (childwq->queue_id == hba_wqid) {
 			lpfc_sli4_wq_release(childwq,
 					bf_get(lpfc_wcqe_r_wqe_index, wcqe));
 			wqid_matched = true;
@@ -12477,11 +12896,11 @@ lpfc_sli4_fp_handle_rel_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	if (wqid_matched != true)
 		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
 				"2580 Fast-path wqe consume event carries "
-				"miss-matched qid: wcqe-qid=x%x\n", fcp_wqid);
+				"miss-matched qid: wcqe-qid=x%x\n", hba_wqid);
 }
 
 /**
- * lpfc_sli4_fp_handle_wcqe - Process fast-path work queue completion entry
+ * lpfc_sli4_fp_handle_cqe - Process fast-path work queue completion entry
  * @cq: Pointer to the completion queue.
  * @eqe: Pointer to fast-path completion queue entry.
  *
@@ -12489,7 +12908,7 @@ lpfc_sli4_fp_handle_rel_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
  * event queue for FCP command response completion.
  **/
 static int
-lpfc_sli4_fp_handle_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
+lpfc_sli4_fp_handle_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 			 struct lpfc_cqe *cqe)
 {
 	struct lpfc_wcqe_release wcqe;
@@ -12501,10 +12920,15 @@ lpfc_sli4_fp_handle_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	/* Check and process for different type of WCQE and dispatch */
 	switch (bf_get(lpfc_wcqe_c_code, &wcqe)) {
 	case CQE_CODE_COMPL_WQE:
+	case CQE_CODE_NVME_ERSP:
 		cq->CQ_wq++;
 		/* Process the WQ complete event */
 		phba->last_completion_time = jiffies;
-		lpfc_sli4_fp_handle_fcp_wcqe(phba, cq,
+		if ((cq->subtype == LPFC_FCP) || (cq->subtype == LPFC_NVME))
+			lpfc_sli4_fp_handle_fcp_wcqe(phba, cq,
+				(struct lpfc_wcqe_complete *)&wcqe);
+		if (cq->subtype == LPFC_NVME_LS)
+			lpfc_sli4_fp_handle_fcp_wcqe(phba, cq,
 				(struct lpfc_wcqe_complete *)&wcqe);
 		break;
 	case CQE_CODE_RELEASE_WQE:
@@ -12520,9 +12944,13 @@ lpfc_sli4_fp_handle_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 		workposted = lpfc_sli4_sp_handle_abort_xri_wcqe(phba, cq,
 				(struct sli4_wcqe_xri_aborted *)&wcqe);
 		break;
+	case CQE_CODE_RECEIVE_V1:
+	case CQE_CODE_RECEIVE:
+		phba->last_completion_time = jiffies;
+		break;
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-				"0144 Not a valid WCQE code: x%x\n",
+				"0144 Not a valid CQE code: x%x\n",
 				bf_get(lpfc_wcqe_c_code, &wcqe));
 		break;
 	}
@@ -12545,7 +12973,7 @@ static void
 lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 			uint32_t qidx)
 {
-	struct lpfc_queue *cq;
+	struct lpfc_queue *cq = NULL;
 	struct lpfc_cqe *cqe;
 	bool workposted = false;
 	uint16_t cqid;
@@ -12563,28 +12991,33 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 	/* Get the reference to the corresponding CQ */
 	cqid = bf_get_le32(lpfc_eqe_resource_id, eqe);
 
-	/* Check if this is a Slow path event */
-	if (unlikely(cqid != phba->sli4_hba.fcp_cq_map[qidx])) {
-		lpfc_sli4_sp_handle_eqe(phba, eqe,
-			phba->sli4_hba.hba_eq[qidx]);
-		return;
+	if (phba->sli4_hba.nvme_cq_map &&
+	    (cqid == phba->sli4_hba.nvme_cq_map[qidx])) {
+		/* Process NVME command completion */
+		cq = phba->sli4_hba.nvme_cq[qidx];
+		goto  process_cq;
 	}
 
-	if (unlikely(!phba->sli4_hba.fcp_cq)) {
-		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
-				"3146 Fast-path completion queues "
-				"does not exist\n");
-		return;
+	if (phba->sli4_hba.fcp_cq_map &&
+	    (cqid == phba->sli4_hba.fcp_cq_map[qidx])) {
+		/* Process FCP command completion */
+		cq = phba->sli4_hba.fcp_cq[qidx];
+		goto  process_cq;
 	}
-	cq = phba->sli4_hba.fcp_cq[qidx];
-	if (unlikely(!cq)) {
-		if (phba->sli.sli_flag & LPFC_SLI_ACTIVE)
-			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-					"0367 Fast-path completion queue "
-					"(%d) does not exist\n", qidx);
+
+	if (phba->sli4_hba.nvmels_cq &&
+	    (cqid == phba->sli4_hba.nvmels_cq->queue_id)) {
+		/* Process NVME unsol rcv */
+		cq = phba->sli4_hba.nvmels_cq;
+	}
+
+	/* Otherwise this is a Slow path event */
+	if (cq == NULL) {
+		lpfc_sli4_sp_handle_eqe(phba, eqe, phba->sli4_hba.hba_eq[qidx]);
 		return;
 	}
 
+process_cq:
 	if (unlikely(cqid != cq->queue_id)) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"0368 Miss-matched fast-path completion "
@@ -12593,9 +13026,12 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 		return;
 	}
 
+	/* Save EQ associated with this CQ */
+	cq->assoc_qp = phba->sli4_hba.hba_eq[qidx];
+
 	/* Process all the entries to the CQ */
 	while ((cqe = lpfc_sli4_cq_get(cq))) {
-		workposted |= lpfc_sli4_fp_handle_wcqe(phba, cq, cqe);
+		workposted |= lpfc_sli4_fp_handle_cqe(phba, cq, cqe);
 		if (!(++ecount % cq->entry_repost))
 			lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
 	}
@@ -12686,7 +13122,7 @@ lpfc_sli4_fof_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe)
 
 	/* Process all the entries to the OAS CQ */
 	while ((cqe = lpfc_sli4_cq_get(cq))) {
-		workposted |= lpfc_sli4_fp_handle_wcqe(phba, cq, cqe);
+		workposted |= lpfc_sli4_fp_handle_cqe(phba, cq, cqe);
 		if (!(++ecount % cq->entry_repost))
 			lpfc_sli4_cq_release(cq, LPFC_QUEUE_NOARM);
 	}
@@ -12734,15 +13170,15 @@ irqreturn_t
 lpfc_sli4_fof_intr_handler(int irq, void *dev_id)
 {
 	struct lpfc_hba *phba;
-	struct lpfc_fcp_eq_hdl *fcp_eq_hdl;
+	struct lpfc_hba_eq_hdl *hba_eq_hdl;
 	struct lpfc_queue *eq;
 	struct lpfc_eqe *eqe;
 	unsigned long iflag;
 	int ecount = 0;
 
 	/* Get the driver's phba structure from the dev_id */
-	fcp_eq_hdl = (struct lpfc_fcp_eq_hdl *)dev_id;
-	phba = fcp_eq_hdl->phba;
+	hba_eq_hdl = (struct lpfc_hba_eq_hdl *)dev_id;
+	phba = hba_eq_hdl->phba;
 
 	if (unlikely(!phba))
 		return IRQ_NONE;
@@ -12828,17 +13264,17 @@ irqreturn_t
 lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 {
 	struct lpfc_hba *phba;
-	struct lpfc_fcp_eq_hdl *fcp_eq_hdl;
+	struct lpfc_hba_eq_hdl *hba_eq_hdl;
 	struct lpfc_queue *fpeq;
 	struct lpfc_eqe *eqe;
 	unsigned long iflag;
 	int ecount = 0;
-	int fcp_eqidx;
+	int hba_eqidx;
 
 	/* Get the driver's phba structure from the dev_id */
-	fcp_eq_hdl = (struct lpfc_fcp_eq_hdl *)dev_id;
-	phba = fcp_eq_hdl->phba;
-	fcp_eqidx = fcp_eq_hdl->idx;
+	hba_eq_hdl = (struct lpfc_hba_eq_hdl *)dev_id;
+	phba = hba_eq_hdl->phba;
+	hba_eqidx = hba_eq_hdl->idx;
 
 	if (unlikely(!phba))
 		return IRQ_NONE;
@@ -12846,15 +13282,15 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 		return IRQ_NONE;
 
 	/* Get to the EQ struct associated with this vector */
-	fpeq = phba->sli4_hba.hba_eq[fcp_eqidx];
+	fpeq = phba->sli4_hba.hba_eq[hba_eqidx];
 	if (unlikely(!fpeq))
 		return IRQ_NONE;
 
 	if (lpfc_fcp_look_ahead) {
-		if (atomic_dec_and_test(&fcp_eq_hdl->fcp_eq_in_use))
+		if (atomic_dec_and_test(&hba_eq_hdl->hba_eq_in_use))
 			lpfc_sli4_eq_clr_intr(fpeq);
 		else {
-			atomic_inc(&fcp_eq_hdl->fcp_eq_in_use);
+			atomic_inc(&hba_eq_hdl->hba_eq_in_use);
 			return IRQ_NONE;
 		}
 	}
@@ -12869,7 +13305,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 			lpfc_sli4_eq_flush(phba, fpeq);
 		spin_unlock_irqrestore(&phba->hbalock, iflag);
 		if (lpfc_fcp_look_ahead)
-			atomic_inc(&fcp_eq_hdl->fcp_eq_in_use);
+			atomic_inc(&hba_eq_hdl->hba_eq_in_use);
 		return IRQ_NONE;
 	}
 
@@ -12880,7 +13316,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 		if (eqe == NULL)
 			break;
 
-		lpfc_sli4_hba_handle_eqe(phba, eqe, fcp_eqidx);
+		lpfc_sli4_hba_handle_eqe(phba, eqe, hba_eqidx);
 		if (!(++ecount % fpeq->entry_repost))
 			lpfc_sli4_eq_release(fpeq, LPFC_QUEUE_NOARM);
 		fpeq->EQ_processed++;
@@ -12897,7 +13333,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 		fpeq->EQ_no_entry++;
 
 		if (lpfc_fcp_look_ahead) {
-			atomic_inc(&fcp_eq_hdl->fcp_eq_in_use);
+			atomic_inc(&hba_eq_hdl->hba_eq_in_use);
 			return IRQ_NONE;
 		}
 
@@ -12911,7 +13347,8 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 	}
 
 	if (lpfc_fcp_look_ahead)
-		atomic_inc(&fcp_eq_hdl->fcp_eq_in_use);
+		atomic_inc(&hba_eq_hdl->hba_eq_in_use);
+
 	return IRQ_HANDLED;
 } /* lpfc_sli4_fp_intr_handler */
 
@@ -12938,7 +13375,7 @@ lpfc_sli4_intr_handler(int irq, void *dev_id)
 	struct lpfc_hba  *phba;
 	irqreturn_t hba_irq_rc;
 	bool hba_handled = false;
-	int fcp_eqidx;
+	int qidx;
 
 	/* Get the driver's phba structure from the dev_id */
 	phba = (struct lpfc_hba *)dev_id;
@@ -12949,16 +13386,16 @@ lpfc_sli4_intr_handler(int irq, void *dev_id)
 	/*
 	 * Invoke fast-path host attention interrupt handling as appropriate.
 	 */
-	for (fcp_eqidx = 0; fcp_eqidx < phba->cfg_fcp_io_channel; fcp_eqidx++) {
+	for (qidx = 0; qidx < phba->io_channel_irqs; qidx++) {
 		hba_irq_rc = lpfc_sli4_hba_intr_handler(irq,
-					&phba->sli4_hba.fcp_eq_hdl[fcp_eqidx]);
+					&phba->sli4_hba.hba_eq_hdl[qidx]);
 		if (hba_irq_rc == IRQ_HANDLED)
 			hba_handled |= true;
 	}
 
 	if (phba->cfg_fof) {
 		hba_irq_rc = lpfc_sli4_fof_intr_handler(irq,
-					&phba->sli4_hba.fcp_eq_hdl[0]);
+					&phba->sli4_hba.hba_eq_hdl[qidx]);
 		if (hba_irq_rc == IRQ_HANDLED)
 			hba_handled |= true;
 	}
@@ -12989,6 +13426,11 @@ lpfc_sli4_queue_free(struct lpfc_queue *queue)
 				  dmabuf->virt, dmabuf->phys);
 		kfree(dmabuf);
 	}
+	if (queue->rqbp) {
+		lpfc_free_rq_buffer(queue->phba, queue);
+		kfree(queue->rqbp);
+	}
+	kfree(queue->pring);
 	kfree(queue);
 	return;
 }
@@ -13022,7 +13464,13 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t entry_size,
 		return NULL;
 	queue->page_count = (ALIGN(entry_size * entry_count,
 			hw_page_size))/hw_page_size;
+
+	/* If needed, Adjust page count to match the max the adapter supports */
+	if (queue->page_count > phba->sli4_hba.pc_sli4_params.wqpcnt)
+		queue->page_count = phba->sli4_hba.pc_sli4_params.wqpcnt;
+
 	INIT_LIST_HEAD(&queue->list);
+	INIT_LIST_HEAD(&queue->wq_list);
 	INIT_LIST_HEAD(&queue->page_list);
 	INIT_LIST_HEAD(&queue->child_list);
 	for (x = 0, total_qe_count = 0; x < queue->page_count; x++) {
@@ -13094,7 +13542,7 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset)
 }
 
 /**
- * lpfc_modify_fcp_eq_delay - Modify Delay Multiplier on FCP EQs
+ * lpfc_modify_hba_eq_delay - Modify Delay Multiplier on FCP EQs
  * @phba: HBA structure that indicates port to create a queue on.
  * @startq: The starting FCP EQ to modify
  *
@@ -13110,7 +13558,7 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset)
  * fails this function will return -ENXIO.
  **/
 int
-lpfc_modify_fcp_eq_delay(struct lpfc_hba *phba, uint32_t startq)
+lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq)
 {
 	struct lpfc_mbx_modify_eq_delay *eq_delay;
 	LPFC_MBOXQ_t *mbox;
@@ -13118,11 +13566,11 @@ lpfc_modify_fcp_eq_delay(struct lpfc_hba *phba, uint32_t startq)
 	int cnt, rc, length, status = 0;
 	uint32_t shdr_status, shdr_add_status;
 	uint32_t result;
-	int fcp_eqidx;
+	int qidx;
 	union lpfc_sli4_cfg_shdr *shdr;
 	uint16_t dmult;
 
-	if (startq >= phba->cfg_fcp_io_channel)
+	if (startq >= phba->io_channel_irqs)
 		return 0;
 
 	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
@@ -13136,16 +13584,15 @@ lpfc_modify_fcp_eq_delay(struct lpfc_hba *phba, uint32_t startq)
 	eq_delay = &mbox->u.mqe.un.eq_delay;
 
 	/* Calculate delay multiper from maximum interrupt per second */
-	result = phba->cfg_fcp_imax / phba->cfg_fcp_io_channel;
-	if (result > LPFC_DMULT_CONST)
+	result = phba->cfg_fcp_imax / phba->io_channel_irqs;
+	if (result > LPFC_DMULT_CONST || result == 0)
 		dmult = 0;
 	else
 		dmult = LPFC_DMULT_CONST/result - 1;
 
 	cnt = 0;
-	for (fcp_eqidx = startq; fcp_eqidx < phba->cfg_fcp_io_channel;
-	    fcp_eqidx++) {
-		eq = phba->sli4_hba.hba_eq[fcp_eqidx];
+	for (qidx = startq; qidx < phba->io_channel_irqs; qidx++) {
+		eq = phba->sli4_hba.hba_eq[qidx];
 		if (!eq)
 			continue;
 		eq_delay->u.request.eq[cnt].eq_id = eq->queue_id;
@@ -13829,6 +14276,11 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
 		wq->db_format = LPFC_DB_LIST_FORMAT;
 		wq->db_regaddr = phba->sli4_hba.WQDBregaddr;
 	}
+	wq->pring = kzalloc(sizeof(struct lpfc_sli_ring), GFP_KERNEL);
+	if (wq->pring == NULL) {
+		status = -ENOMEM;
+		goto out;
+	}
 	wq->type = LPFC_WQ;
 	wq->assoc_qid = cq->queue_id;
 	wq->subtype = subtype;
@@ -14613,7 +15065,7 @@ lpfc_sli4_next_xritag(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_sli4_post_els_sgl_list - post a block of ELS sgls to the port.
+ * lpfc_sli4_post_sgl_list - post a block of ELS sgls to the port.
  * @phba: pointer to lpfc hba data structure.
  * @post_sgl_list: pointer to els sgl entry list.
  * @count: number of els sgl entries on the list.
@@ -14624,7 +15076,7 @@ lpfc_sli4_next_xritag(struct lpfc_hba *phba)
  * stopped.
  **/
 static int
-lpfc_sli4_post_els_sgl_list(struct lpfc_hba *phba,
+lpfc_sli4_post_sgl_list(struct lpfc_hba *phba,
 			    struct list_head *post_sgl_list,
 			    int post_cnt)
 {
@@ -14640,14 +15092,15 @@ lpfc_sli4_post_els_sgl_list(struct lpfc_hba *phba,
 	uint32_t shdr_status, shdr_add_status;
 	union lpfc_sli4_cfg_shdr *shdr;
 
-	reqlen = phba->sli4_hba.els_xri_cnt * sizeof(struct sgl_page_pairs) +
+	reqlen = post_cnt * sizeof(struct sgl_page_pairs) +
 		 sizeof(union lpfc_sli4_cfg_shdr) + sizeof(uint32_t);
 	if (reqlen > SLI4_PAGE_SIZE) {
-		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"2559 Block sgl registration required DMA "
 				"size (%d) great than a page\n", reqlen);
 		return -ENOMEM;
 	}
+
 	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 	if (!mbox)
 		return -ENOMEM;
@@ -14691,8 +15144,9 @@ lpfc_sli4_post_els_sgl_list(struct lpfc_hba *phba,
 
 	/* Complete initialization and perform endian conversion. */
 	bf_set(lpfc_post_sgl_pages_xri, sgl, xritag_start);
-	bf_set(lpfc_post_sgl_pages_xricnt, sgl, phba->sli4_hba.els_xri_cnt);
+	bf_set(lpfc_post_sgl_pages_xricnt, sgl, post_cnt);
 	sgl->word0 = cpu_to_le32(sgl->word0);
+
 	if (!phba->sli4_hba.intr_enable)
 		rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
 	else {
@@ -14888,6 +15342,7 @@ lpfc_fc_frame_check(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr)
 	case FC_TYPE_ELS:
 	case FC_TYPE_FCP:
 	case FC_TYPE_CT:
+	case FC_TYPE_NVME:
 		break;
 	case FC_TYPE_IP:
 	case FC_TYPE_ILS:
@@ -14945,14 +15400,11 @@ lpfc_fc_hdr_get_vfi(struct fc_frame_header *fc_hdr)
  **/
 static struct lpfc_vport *
 lpfc_fc_frame_to_vport(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr,
-		       uint16_t fcfi)
+		       uint16_t fcfi, uint32_t did)
 {
 	struct lpfc_vport **vports;
 	struct lpfc_vport *vport = NULL;
 	int i;
-	uint32_t did = (fc_hdr->fh_d_id[0] << 16 |
-			fc_hdr->fh_d_id[1] << 8 |
-			fc_hdr->fh_d_id[2]);
 
 	if (did == Fabric_DID)
 		return phba->pport;
@@ -14961,7 +15413,7 @@ lpfc_fc_frame_to_vport(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr,
 		return phba->pport;
 
 	vports = lpfc_create_vport_work_array(phba);
-	if (vports != NULL)
+	if (vports != NULL) {
 		for (i = 0; i <= phba->max_vpi && vports[i] != NULL; i++) {
 			if (phba->fcf.fcfi == fcfi &&
 			    vports[i]->vfi == lpfc_fc_hdr_get_vfi(fc_hdr) &&
@@ -14970,6 +15422,7 @@ lpfc_fc_frame_to_vport(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr,
 				break;
 			}
 		}
+	}
 	lpfc_destroy_vport_work_array(phba, vports);
 	return vport;
 }
@@ -15399,7 +15852,7 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport,
 	 * a BA_RJT.
 	 */
 	if ((fctl & FC_FC_EX_CTX) &&
-	    (lxri > lpfc_sli4_get_els_iocb_cnt(phba))) {
+	    (lxri > lpfc_sli4_get_iocb_cnt(phba))) {
 		icmd->un.xseq64.w5.hcsw.Rctl = FC_RCTL_BA_RJT;
 		bf_set(lpfc_vndr_code, &icmd->un.bls_rsp, 0);
 		bf_set(lpfc_rsn_expln, &icmd->un.bls_rsp, FC_BA_RJT_INV_XID);
@@ -15576,6 +16029,7 @@ lpfc_prep_seq(struct lpfc_vport *vport, struct hbq_dmabuf *seq_dmabuf)
 		/* Initialize the first IOCB. */
 		first_iocbq->iocb.unsli3.rcvsli3.acc_len = 0;
 		first_iocbq->iocb.ulpStatus = IOSTAT_SUCCESS;
+		first_iocbq->vport = vport;
 
 		/* Check FC Header to see what TYPE of frame we are rcv'ing */
 		if (sli4_type_from_fc_hdr(fc_hdr) == FC_TYPE_ELS) {
@@ -15688,7 +16142,7 @@ lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *vport,
 		return;
 	}
 	if (!lpfc_complete_unsol_iocb(phba,
-				      &phba->sli.ring[LPFC_ELS_RING],
+				      phba->sli4_hba.els_wq->pring,
 				      iocbq, fc_hdr->fh_r_ctl,
 				      fc_hdr->fh_type))
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
@@ -15713,8 +16167,7 @@ lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *vport,
  * This function is called with no lock held. This function processes all
  * the received buffers and gives it to upper layers when a received buffer
  * indicates that it is the final frame in the sequence. The interrupt
- * service routine processes received buffers at interrupt contexts and adds
- * received dma buffers to the rb_pend_list queue and signals the worker thread.
+ * service routine processes received buffers at interrupt contexts.
  * Worker thread calls lpfc_sli4_handle_received_buffer, which will call the
  * appropriate receive function when the final frame in a sequence is received.
  **/
@@ -15745,16 +16198,16 @@ lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba,
 		fcfi = bf_get(lpfc_rcqe_fcf_id,
 			      &dmabuf->cq_event.cqe.rcqe_cmpl);
 
-	vport = lpfc_fc_frame_to_vport(phba, fc_hdr, fcfi);
+	/* d_id this frame is directed to */
+	did = sli4_did_from_fc_hdr(fc_hdr);
+
+	vport = lpfc_fc_frame_to_vport(phba, fc_hdr, fcfi, did);
 	if (!vport) {
 		/* throw out the frame */
 		lpfc_in_buf_free(phba, &dmabuf->dbuf);
 		return;
 	}
 
-	/* d_id this frame is directed to */
-	did = sli4_did_from_fc_hdr(fc_hdr);
-
 	/* vport is registered unless we rcv a FLOGI directed to Fabric_DID */
 	if (!(vport->vpi_state & LPFC_VPI_REGISTERED) &&
 		(did != Fabric_DID)) {
@@ -17232,7 +17685,7 @@ uint32_t
 lpfc_drain_txq(struct lpfc_hba *phba)
 {
 	LIST_HEAD(completions);
-	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+	struct lpfc_sli_ring *pring;
 	struct lpfc_iocbq *piocbq = NULL;
 	unsigned long iflags = 0;
 	char *fail_msg = NULL;
@@ -17241,6 +17694,8 @@ lpfc_drain_txq(struct lpfc_hba *phba)
 	union lpfc_wqe *wqe = (union lpfc_wqe *) &wqe128;
 	uint32_t txq_cnt = 0;
 
+	pring = lpfc_phba_elsring(phba);
+
 	spin_lock_irqsave(&pring->ring_lock, iflags);
 	list_for_each_entry(piocbq, &pring->txq, list) {
 		txq_cnt++;
@@ -17262,7 +17717,7 @@ lpfc_drain_txq(struct lpfc_hba *phba)
 				txq_cnt);
 			break;
 		}
-		sglq = __lpfc_sli_get_sglq(phba, piocbq);
+		sglq = __lpfc_sli_get_els_sglq(phba, piocbq);
 		if (!sglq) {
 			__lpfc_sli_ringtx_put(phba, pring, piocbq);
 			spin_unlock_irqrestore(&pring->ring_lock, iflags);
@@ -17302,3 +17757,191 @@ lpfc_drain_txq(struct lpfc_hba *phba)
 
 	return txq_cnt;
 }
+
+/**
+ * lpfc_wqe_bpl2sgl - Convert the bpl/bde to a sgl.
+ * @phba: Pointer to HBA context object.
+ * @pwqe: Pointer to command WQE.
+ * @sglq: Pointer to the scatter gather queue object.
+ *
+ * This routine converts the bpl or bde that is in the WQE
+ * to a sgl list for the sli4 hardware. The physical address
+ * of the bpl/bde is converted back to a virtual address.
+ * If the WQE contains a BPL then the list of BDE's is
+ * converted to sli4_sge's. If the WQE contains a single
+ * BDE then it is converted to a single sli_sge.
+ * The WQE is still in cpu endianness so the contents of
+ * the bpl can be used without byte swapping.
+ *
+ * Returns valid XRI = Success, NO_XRI = Failure.
+ */
+static uint16_t
+lpfc_wqe_bpl2sgl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeq,
+		 struct lpfc_sglq *sglq)
+{
+	uint16_t xritag = NO_XRI;
+	struct ulp_bde64 *bpl = NULL;
+	struct ulp_bde64 bde;
+	struct sli4_sge *sgl  = NULL;
+	struct lpfc_dmabuf *dmabuf;
+	union lpfc_wqe *wqe;
+	int numBdes = 0;
+	int i = 0;
+	uint32_t offset = 0; /* accumulated offset in the sg request list */
+	int inbound = 0; /* number of sg reply entries inbound from firmware */
+	uint32_t cmd;
+
+	if (!pwqeq || !sglq)
+		return xritag;
+
+	sgl  = (struct sli4_sge *)sglq->sgl;
+	wqe = &pwqeq->wqe;
+	pwqeq->iocb.ulpIoTag = pwqeq->iotag;
+
+	cmd = bf_get(wqe_cmnd, &wqe->generic.wqe_com);
+	if (cmd == CMD_XMIT_BLS_RSP64_WQE)
+		return sglq->sli4_xritag;
+	numBdes = pwqeq->rsvd2;
+	if (numBdes) {
+		/* The addrHigh and addrLow fields within the WQE
+		 * have not been byteswapped yet so there is no
+		 * need to swap them back.
+		 */
+		if (pwqeq->context3)
+			dmabuf = (struct lpfc_dmabuf *)pwqeq->context3;
+		else
+			return xritag;
+
+		bpl  = (struct ulp_bde64 *)dmabuf->virt;
+		if (!bpl)
+			return xritag;
+
+		for (i = 0; i < numBdes; i++) {
+			/* Should already be byte swapped. */
+			sgl->addr_hi = bpl->addrHigh;
+			sgl->addr_lo = bpl->addrLow;
+
+			sgl->word2 = le32_to_cpu(sgl->word2);
+			if ((i+1) == numBdes)
+				bf_set(lpfc_sli4_sge_last, sgl, 1);
+			else
+				bf_set(lpfc_sli4_sge_last, sgl, 0);
+			/* swap the size field back to the cpu so we
+			 * can assign it to the sgl.
+			 */
+			bde.tus.w = le32_to_cpu(bpl->tus.w);
+			sgl->sge_len = cpu_to_le32(bde.tus.f.bdeSize);
+			/* The offsets in the sgl need to be accumulated
+			 * separately for the request and reply lists.
+			 * The request is always first, the reply follows.
+			 */
+			switch (cmd) {
+			case CMD_GEN_REQUEST64_WQE:
+				/* add up the reply sg entries */
+				if (bpl->tus.f.bdeFlags == BUFF_TYPE_BDE_64I)
+					inbound++;
+				/* first inbound? reset the offset */
+				if (inbound == 1)
+					offset = 0;
+				bf_set(lpfc_sli4_sge_offset, sgl, offset);
+				bf_set(lpfc_sli4_sge_type, sgl,
+					LPFC_SGE_TYPE_DATA);
+				offset += bde.tus.f.bdeSize;
+				break;
+			case CMD_FCP_TRSP64_WQE:
+				bf_set(lpfc_sli4_sge_offset, sgl, 0);
+				bf_set(lpfc_sli4_sge_type, sgl,
+					LPFC_SGE_TYPE_DATA);
+				break;
+			case CMD_FCP_TSEND64_WQE:
+			case CMD_FCP_TRECEIVE64_WQE:
+				bf_set(lpfc_sli4_sge_type, sgl,
+					bpl->tus.f.bdeFlags);
+				if (i < 3)
+					offset = 0;
+				else
+					offset += bde.tus.f.bdeSize;
+				bf_set(lpfc_sli4_sge_offset, sgl, offset);
+				break;
+			}
+			sgl->word2 = cpu_to_le32(sgl->word2);
+			bpl++;
+			sgl++;
+		}
+	} else if (wqe->gen_req.bde.tus.f.bdeFlags == BUFF_TYPE_BDE_64) {
+		/* The addrHigh and addrLow fields of the BDE have not
+		 * been byteswapped yet so they need to be swapped
+		 * before putting them in the sgl.
+		 */
+		sgl->addr_hi = cpu_to_le32(wqe->gen_req.bde.addrHigh);
+		sgl->addr_lo = cpu_to_le32(wqe->gen_req.bde.addrLow);
+		sgl->word2 = le32_to_cpu(sgl->word2);
+		bf_set(lpfc_sli4_sge_last, sgl, 1);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+		sgl->sge_len = cpu_to_le32(wqe->gen_req.bde.tus.f.bdeSize);
+	}
+	return sglq->sli4_xritag;
+}
+
+/**
+ * lpfc_sli4_issue_wqe - Issue an SLI4 Work Queue Entry (WQE)
+ * @phba: Pointer to HBA context object.
+ * @ring_number: Base sli ring number
+ * @pwqe: Pointer to command WQE.
+ **/
+int
+lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
+		    struct lpfc_iocbq *pwqe)
+{
+	union lpfc_wqe *wqe = &pwqe->wqe;
+	struct lpfc_queue *wq;
+	struct lpfc_sglq *sglq;
+	struct lpfc_sli_ring *pring;
+	unsigned long iflags;
+
+	/* NVME_LS and NVME_LS ABTS requests. */
+	if (pwqe->iocb_flag & LPFC_IO_NVME_LS) {
+		pring =  phba->sli4_hba.nvmels_wq->pring;
+		spin_lock_irqsave(&pring->ring_lock, iflags);
+		sglq = __lpfc_sli_get_els_sglq(phba, pwqe);
+		if (!sglq) {
+			spin_unlock_irqrestore(&pring->ring_lock, iflags);
+			return WQE_BUSY;
+		}
+		pwqe->sli4_lxritag = sglq->sli4_lxritag;
+		pwqe->sli4_xritag = sglq->sli4_xritag;
+		if (lpfc_wqe_bpl2sgl(phba, pwqe, sglq) == NO_XRI) {
+			spin_unlock_irqrestore(&pring->ring_lock, iflags);
+			return WQE_ERROR;
+		}
+		bf_set(wqe_xri_tag, &pwqe->wqe.xmit_bls_rsp.wqe_com,
+		       pwqe->sli4_xritag);
+		if (lpfc_sli4_wq_put(phba->sli4_hba.nvmels_wq, wqe)) {
+			spin_unlock_irqrestore(&pring->ring_lock, iflags);
+			return WQE_ERROR;
+		}
+		lpfc_sli_ringtxcmpl_put(phba, pring, pwqe);
+		spin_unlock_irqrestore(&pring->ring_lock, iflags);
+		return 0;
+	}
+
+	/* NVME_FCREQ and NVME_ABTS requests */
+	if (pwqe->iocb_flag & LPFC_IO_NVME) {
+		/* Get the IO distribution (hba_wqidx) for WQ assignment. */
+		pring = phba->sli4_hba.nvme_wq[pwqe->hba_wqidx]->pring;
+
+		spin_lock_irqsave(&pring->ring_lock, iflags);
+		wq = phba->sli4_hba.nvme_wq[pwqe->hba_wqidx];
+		bf_set(wqe_cqid, &wqe->generic.wqe_com,
+		      phba->sli4_hba.nvme_cq[pwqe->hba_wqidx]->queue_id);
+		if (lpfc_sli4_wq_put(wq, wqe)) {
+			spin_unlock_irqrestore(&pring->ring_lock, iflags);
+			return WQE_ERROR;
+		}
+		lpfc_sli_ringtxcmpl_put(phba, pring, pwqe);
+		spin_unlock_irqrestore(&pring->ring_lock, iflags);
+		return 0;
+	}
+
+	return WQE_ERROR;
+}
diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index 3fad5657514b..72520125251b 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h
@@ -54,9 +54,16 @@ struct lpfc_iocbq {
 	uint16_t iotag;         /* pre-assigned IO tag */
 	uint16_t sli4_lxritag;  /* logical pre-assigned XRI. */
 	uint16_t sli4_xritag;   /* pre-assigned XRI, (OXID) tag. */
+	uint16_t hba_wqidx;     /* index to HBA work queue */
 	struct lpfc_cq_event cq_event;
+	struct lpfc_wcqe_complete wcqe_cmpl;	/* WQE cmpl */
+	uint64_t isr_timestamp;
 
-	IOCB_t iocb;		/* IOCB cmd */
+	/* Be careful here */
+	union lpfc_wqe wqe;	/* WQE cmd */
+	IOCB_t iocb;		/* For IOCB cmd or if we want 128 byte WQE */
+
+	uint8_t rsvd2;
 	uint8_t priority;	/* OAS priority */
 	uint8_t retry;		/* retry counter for IOCB cmd - if needed */
 	uint32_t iocb_flag;
@@ -82,9 +89,12 @@ struct lpfc_iocbq {
 #define LPFC_IO_OAS		0x10000 /* OAS FCP IO */
 #define LPFC_IO_FOF		0x20000 /* FOF FCP IO */
 #define LPFC_IO_LOOPBACK	0x40000 /* Loopback IO */
+#define LPFC_PRLI_NVME_REQ	0x80000 /* This is an NVME PRLI. */
+#define LPFC_PRLI_FCP_REQ	0x100000 /* This is an NVME PRLI. */
+#define LPFC_IO_NVME	        0x200000 /* NVME FCP command */
+#define LPFC_IO_NVME_LS		0x400000 /* NVME LS command */
 
 	uint32_t drvrTimeout;	/* driver timeout in seconds */
-	uint32_t fcp_wqidx;	/* index to FCP work queue */
 	struct lpfc_vport *vport;/* virtual port pointer */
 	void *context1;		/* caller context information */
 	void *context2;		/* caller context information */
@@ -103,6 +113,8 @@ struct lpfc_iocbq {
 			   struct lpfc_iocbq *);
 	void (*iocb_cmpl)(struct lpfc_hba *, struct lpfc_iocbq *,
 			   struct lpfc_iocbq *);
+	void (*wqe_cmpl)(struct lpfc_hba *, struct lpfc_iocbq *,
+			  struct lpfc_wcqe_complete *);
 };
 
 #define SLI_IOCB_RET_IOCB      1	/* Return IOCB if cmd ring full */
@@ -112,6 +124,14 @@ struct lpfc_iocbq {
 #define IOCB_ERROR          2
 #define IOCB_TIMEDOUT       3
 
+#define SLI_WQE_RET_WQE    1    /* Return WQE if cmd ring full */
+
+#define WQE_SUCCESS        0
+#define WQE_BUSY           1
+#define WQE_ERROR          2
+#define WQE_TIMEDOUT       3
+#define WQE_ABORTED        4
+
 #define LPFC_MBX_WAKE		1
 #define LPFC_MBX_IMED_UNREG	2
 
@@ -298,11 +318,7 @@ struct lpfc_sli {
 #define LPFC_MENLO_MAINT          0x1000 /* need for menl fw download */
 #define LPFC_SLI_ASYNC_MBX_BLK    0x2000 /* Async mailbox is blocked */
 
-	struct lpfc_sli_ring *ring;
-	int fcp_ring;		/* ring used for FCP initiator commands */
-	int next_ring;
-
-	int extra_ring;		/* extra ring used for other protocols */
+	struct lpfc_sli_ring *sli3_ring;
 
 	struct lpfc_sli_stat slistat;	/* SLI statistical info */
 	struct list_head mboxq;
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index dfbb25e5c5b6..b5b4f6b843c0 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -35,9 +35,10 @@
 #define LPFC_NEMBED_MBOX_SGL_CNT		254
 
 /* Multi-queue arrangement for FCP EQ/CQ/WQ tuples */
-#define LPFC_FCP_IO_CHAN_DEF       4
-#define LPFC_FCP_IO_CHAN_MIN       1
-#define LPFC_FCP_IO_CHAN_MAX       16
+#define LPFC_HBA_IO_CHAN_MIN	0
+#define LPFC_HBA_IO_CHAN_MAX	32
+#define LPFC_FCP_IO_CHAN_DEF	4
+#define LPFC_NVME_IO_CHAN_DEF	0
 
 /* Number of channels used for Flash Optimized Fabric (FOF) operations */
 
@@ -107,6 +108,8 @@ enum lpfc_sli4_queue_subtype {
 	LPFC_MBOX,
 	LPFC_FCP,
 	LPFC_ELS,
+	LPFC_NVME,
+	LPFC_NVME_LS,
 	LPFC_USOL
 };
 
@@ -125,25 +128,41 @@ union sli4_qe {
 	struct lpfc_rqe *rqe;
 };
 
+/* RQ buffer list */
+struct lpfc_rqb {
+	uint16_t entry_count;	  /* Current number of RQ slots */
+	uint16_t buffer_count;	  /* Current number of buffers posted */
+	struct list_head rqb_buffer_list;  /* buffers assigned to this HBQ */
+				  /* Callback for HBQ buffer allocation */
+	struct rqb_dmabuf *(*rqb_alloc_buffer)(struct lpfc_hba *);
+				  /* Callback for HBQ buffer free */
+	void               (*rqb_free_buffer)(struct lpfc_hba *,
+					       struct rqb_dmabuf *);
+};
+
 struct lpfc_queue {
 	struct list_head list;
+	struct list_head wq_list;
 	enum lpfc_sli4_queue_type type;
 	enum lpfc_sli4_queue_subtype subtype;
 	struct lpfc_hba *phba;
 	struct list_head child_list;
+	struct list_head page_list;
+	struct list_head sgl_list;
 	uint32_t entry_count;	/* Number of entries to support on the queue */
 	uint32_t entry_size;	/* Size of each queue entry. */
 	uint32_t entry_repost;	/* Count of entries before doorbell is rung */
 #define LPFC_QUEUE_MIN_REPOST	8
 	uint32_t queue_id;	/* Queue ID assigned by the hardware */
 	uint32_t assoc_qid;     /* Queue ID associated with, for CQ/WQ/MQ */
-	struct list_head page_list;
 	uint32_t page_count;	/* Number of pages allocated for this queue */
 	uint32_t host_index;	/* The host's index for putting or getting */
 	uint32_t hba_index;	/* The last known hba index for get or put */
 
 	struct lpfc_sli_ring *pring; /* ptr to io ring associated with q */
+	struct lpfc_rqb *rqbp;	/* ptr to RQ buffers */
 
+	uint16_t sgl_list_cnt;
 	uint16_t db_format;
 #define LPFC_DB_RING_FORMAT	0x01
 #define LPFC_DB_LIST_FORMAT	0x02
@@ -176,6 +195,8 @@ struct lpfc_queue {
 #define	RQ_buf_trunc		q_cnt_3
 #define	RQ_rcv_buf		q_cnt_4
 
+	uint64_t isr_timestamp;
+	struct lpfc_queue *assoc_qp;
 	union sli4_qe qe[1];	/* array to index entries (must be last) */
 };
 
@@ -338,6 +359,7 @@ struct lpfc_bmbx {
 #define LPFC_CQE_DEF_COUNT      1024
 #define LPFC_WQE_DEF_COUNT      256
 #define LPFC_WQE128_DEF_COUNT   128
+#define LPFC_WQE128_MAX_COUNT   256
 #define LPFC_MQE_DEF_COUNT      16
 #define LPFC_RQE_DEF_COUNT	512
 
@@ -379,10 +401,14 @@ struct lpfc_max_cfg_param {
 
 struct lpfc_hba;
 /* SLI4 HBA multi-fcp queue handler struct */
-struct lpfc_fcp_eq_hdl {
+struct lpfc_hba_eq_hdl {
 	uint32_t idx;
 	struct lpfc_hba *phba;
-	atomic_t fcp_eq_in_use;
+	atomic_t hba_eq_in_use;
+	struct cpumask *cpumask;
+	/* CPU affinitsed to or 0xffffffff if multiple */
+	uint32_t cpu;
+#define LPFC_MULTI_CPU_AFFINITY 0xffffffff
 };
 
 /* Port Capabilities for SLI4 Parameters */
@@ -427,6 +453,7 @@ struct lpfc_pc_sli4_params {
 	uint8_t wqsize;
 #define LPFC_WQ_SZ64_SUPPORT	1
 #define LPFC_WQ_SZ128_SUPPORT	2
+	uint8_t wqpcnt;
 };
 
 struct lpfc_iov {
@@ -445,7 +472,7 @@ struct lpfc_sli4_lnk_info {
 	uint8_t optic_state;
 };
 
-#define LPFC_SLI4_HANDLER_CNT		(LPFC_FCP_IO_CHAN_MAX+ \
+#define LPFC_SLI4_HANDLER_CNT		(LPFC_HBA_IO_CHAN_MAX+ \
 					 LPFC_FOF_IO_CHAN_NUM)
 #define LPFC_SLI4_HANDLER_NAME_SZ	16
 
@@ -516,21 +543,30 @@ struct lpfc_sli4_hba {
 	struct lpfc_register sli_intf;
 	struct lpfc_pc_sli4_params pc_sli4_params;
 	uint8_t handler_name[LPFC_SLI4_HANDLER_CNT][LPFC_SLI4_HANDLER_NAME_SZ];
-	struct lpfc_fcp_eq_hdl *fcp_eq_hdl; /* FCP per-WQ handle */
+	struct lpfc_hba_eq_hdl *hba_eq_hdl; /* HBA per-WQ handle */
 
 	/* Pointers to the constructed SLI4 queues */
-	struct lpfc_queue **hba_eq;/* Event queues for HBA */
-	struct lpfc_queue **fcp_cq;/* Fast-path FCP compl queue */
-	struct lpfc_queue **fcp_wq;/* Fast-path FCP work queue */
+	struct lpfc_queue **hba_eq;  /* Event queues for HBA */
+	struct lpfc_queue **fcp_cq;  /* Fast-path FCP compl queue */
+	struct lpfc_queue **nvme_cq; /* Fast-path NVME compl queue */
+	struct lpfc_queue **fcp_wq;  /* Fast-path FCP work queue */
+	struct lpfc_queue **nvme_wq; /* Fast-path NVME work queue */
 	uint16_t *fcp_cq_map;
+	uint16_t *nvme_cq_map;
+	struct list_head lpfc_wq_list;
 
 	struct lpfc_queue *mbx_cq; /* Slow-path mailbox complete queue */
 	struct lpfc_queue *els_cq; /* Slow-path ELS response complete queue */
+	struct lpfc_queue *nvmels_cq; /* NVME LS complete queue */
 	struct lpfc_queue *mbx_wq; /* Slow-path MBOX work queue */
 	struct lpfc_queue *els_wq; /* Slow-path ELS work queue */
+	struct lpfc_queue *nvmels_wq; /* NVME LS work queue */
 	struct lpfc_queue *hdr_rq; /* Slow-path Header Receive queue */
 	struct lpfc_queue *dat_rq; /* Slow-path Data Receive queue */
 
+	struct lpfc_name wwnn;
+	struct lpfc_name wwpn;
+
 	uint32_t fw_func_mode;	/* FW function protocol mode */
 	uint32_t ulp0_mode;	/* ULP0 protocol mode */
 	uint32_t ulp1_mode;	/* ULP1 protocol mode */
@@ -567,14 +603,17 @@ struct lpfc_sli4_hba {
 	uint16_t rpi_hdrs_in_use; /* must post rpi hdrs if set. */
 	uint16_t next_xri; /* last_xri - max_cfg_param.xri_base = used */
 	uint16_t next_rpi;
+	uint16_t nvme_xri_max;
+	uint16_t nvme_xri_cnt;
+	uint16_t nvme_xri_start;
 	uint16_t scsi_xri_max;
 	uint16_t scsi_xri_cnt;
-	uint16_t els_xri_cnt;
 	uint16_t scsi_xri_start;
-	struct list_head lpfc_free_sgl_list;
-	struct list_head lpfc_sgl_list;
+	uint16_t els_xri_cnt;
+	struct list_head lpfc_els_sgl_list;
 	struct list_head lpfc_abts_els_sgl_list;
 	struct list_head lpfc_abts_scsi_buf_list;
+	struct list_head lpfc_abts_nvme_buf_list;
 	struct lpfc_sglq **lpfc_sglq_active_list;
 	struct list_head lpfc_rpi_hdr_list;
 	unsigned long *rpi_bmask;
@@ -601,8 +640,9 @@ struct lpfc_sli4_hba {
 #define LPFC_SLI4_PPNAME_NON	0
 #define LPFC_SLI4_PPNAME_GET	1
 	struct lpfc_iov iov;
+	spinlock_t abts_nvme_buf_list_lock; /* list of aborted SCSI IOs */
 	spinlock_t abts_scsi_buf_list_lock; /* list of aborted SCSI IOs */
-	spinlock_t abts_sgl_list_lock; /* list of aborted els IOs */
+	spinlock_t sgl_list_lock; /* list of aborted els IOs */
 	uint32_t physical_port;
 
 	/* CPU to vector mapping information */
@@ -614,7 +654,7 @@ struct lpfc_sli4_hba {
 
 enum lpfc_sge_type {
 	GEN_BUFF_TYPE,
-	SCSI_BUFF_TYPE
+	SCSI_BUFF_TYPE,
 };
 
 enum lpfc_sgl_state {
@@ -693,7 +733,7 @@ struct lpfc_queue *lpfc_sli4_queue_alloc(struct lpfc_hba *, uint32_t,
 			uint32_t);
 void lpfc_sli4_queue_free(struct lpfc_queue *);
 int lpfc_eq_create(struct lpfc_hba *, struct lpfc_queue *, uint32_t);
-int lpfc_modify_fcp_eq_delay(struct lpfc_hba *, uint32_t);
+int lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq);
 int lpfc_cq_create(struct lpfc_hba *, struct lpfc_queue *,
 			struct lpfc_queue *, uint32_t, uint32_t);
 int32_t lpfc_mq_create(struct lpfc_hba *, struct lpfc_queue *,
@@ -745,6 +785,7 @@ int lpfc_sli4_brdreset(struct lpfc_hba *);
 int lpfc_sli4_add_fcf_record(struct lpfc_hba *, struct fcf_record *);
 void lpfc_sli_remove_dflt_fcf(struct lpfc_hba *);
 int lpfc_sli4_get_els_iocb_cnt(struct lpfc_hba *);
+int lpfc_sli4_get_iocb_cnt(struct lpfc_hba *phba);
 int lpfc_sli4_init_vpi(struct lpfc_vport *);
 uint32_t lpfc_sli4_cq_release(struct lpfc_queue *, bool);
 uint32_t lpfc_sli4_eq_release(struct lpfc_queue *, bool);
diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c
index 5bbe6af148dd..07c7c5f9fdde 100644
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -403,6 +403,8 @@ lpfc_vport_create(struct fc_vport *fc_vport, bool disable)
 		vport->fdmi_port_mask = phba->pport->fdmi_port_mask;
 	}
 
+	/* todo: init: register port with nvme */
+
 	/*
 	 * In SLI4, the vpi must be activated before it can be used
 	 * by the port.
-- 
GitLab


From a0f2d3ef374fd8d2f51b8cc1ea723014b1aa2c9b Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:31 -0800
Subject: [PATCH 0442/1084] scsi: lpfc: NVME Initiator: Merge into FC discovery

NVME Initiator: Merge into FC discovery

Adds NVME PRLI support and Nameserver registrations and Queries for NVME

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h           |   6 +-
 drivers/scsi/lpfc/lpfc_ct.c        | 357 +++++++++++++++++++----------
 drivers/scsi/lpfc/lpfc_disc.h      |  19 +-
 drivers/scsi/lpfc/lpfc_els.c       | 280 +++++++++++++++-------
 drivers/scsi/lpfc/lpfc_hbadisc.c   | 169 +++++++++++---
 drivers/scsi/lpfc/lpfc_hw.h        |  69 ++++--
 drivers/scsi/lpfc/lpfc_hw4.h       |  43 ++++
 drivers/scsi/lpfc/lpfc_init.c      |   7 +
 drivers/scsi/lpfc/lpfc_nportdisc.c | 177 ++++++++++++--
 drivers/scsi/lpfc/lpfc_scsi.c      |   3 +-
 10 files changed, 851 insertions(+), 279 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 77ad757ca231..81c47d1aebb2 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -123,6 +123,8 @@ struct perf_prof {
 	uint16_t wqidx[40];
 };
 
+#define LPFC_FC4_TYPE_BITMASK	0x00000100
+
 /* Provide DMA memory definitions the driver uses per port instance. */
 struct lpfc_dmabuf {
 	struct list_head list;
@@ -390,7 +392,8 @@ struct lpfc_vport {
 	int32_t stopped;   /* HBA has not been restarted since last ERATT */
 	uint8_t fc_linkspeed;	/* Link speed after last READ_LA */
 
-	uint32_t num_disc_nodes;	/*in addition to hba_state */
+	uint32_t num_disc_nodes;	/* in addition to hba_state */
+	uint32_t gidft_inp;		/* cnt of outstanding GID_FTs */
 
 	uint32_t fc_nlp_cnt;	/* outstanding NODELIST requests */
 	uint32_t fc_rscn_id_cnt;	/* count of RSCNs payloads in list */
@@ -443,7 +446,6 @@ struct lpfc_vport {
 	uint32_t cfg_max_scsicmpl_time;
 	uint32_t cfg_tgt_queue_depth;
 	uint32_t cfg_first_burst_size;
-
 	uint32_t dev_loss_tmo_changed;
 
 	struct fc_vport *fc_vport;
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 4ac03b16d17f..afb25369bc3b 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -40,8 +40,9 @@
 #include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
-#include "lpfc_scsi.h"
 #include "lpfc.h"
+#include "lpfc_scsi.h"
+#include "lpfc_nvme.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_crtn.h"
 #include "lpfc_version.h"
@@ -453,8 +454,73 @@ lpfc_find_vport_by_did(struct lpfc_hba *phba, uint32_t did) {
 	return NULL;
 }
 
+static void
+lpfc_prep_node_fc4type(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type)
+{
+	struct lpfc_nodelist *ndlp;
+
+	if ((vport->port_type != LPFC_NPIV_PORT) ||
+	    !(vport->ct_flags & FC_CT_RFF_ID) || !vport->cfg_restrict_login) {
+
+		ndlp = lpfc_setup_disc_node(vport, Did);
+
+		if (ndlp && NLP_CHK_NODE_ACT(ndlp)) {
+			/* By default, the driver expects to support FCP FC4 */
+			if (fc4_type == FC_TYPE_FCP)
+				ndlp->nlp_fc4_type |= NLP_FC4_FCP;
+
+			if (fc4_type == FC_TYPE_NVME)
+				ndlp->nlp_fc4_type |= NLP_FC4_NVME;
+
+			lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+					 "0238 Process x%06x NameServer Rsp "
+					 "Data: x%x x%x x%x x%x\n", Did,
+					 ndlp->nlp_flag, ndlp->nlp_fc4_type,
+					 vport->fc_flag,
+					 vport->fc_rscn_id_cnt);
+		} else
+			lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+					 "0239 Skip x%06x NameServer Rsp "
+					 "Data: x%x x%x\n", Did,
+					 vport->fc_flag,
+					 vport->fc_rscn_id_cnt);
+
+	} else {
+		if (!(vport->fc_flag & FC_RSCN_MODE) ||
+		    lpfc_rscn_payload_check(vport, Did)) {
+			/*
+			 * This NPortID was previously a FCP target,
+			 * Don't even bother to send GFF_ID.
+			 */
+			ndlp = lpfc_findnode_did(vport, Did);
+			if (ndlp && NLP_CHK_NODE_ACT(ndlp))
+				ndlp->nlp_fc4_type = fc4_type;
+
+			if (ndlp && NLP_CHK_NODE_ACT(ndlp)) {
+				ndlp->nlp_fc4_type = fc4_type;
+
+				if (ndlp->nlp_type & NLP_FCP_TARGET)
+					lpfc_setup_disc_node(vport, Did);
+
+				else if (lpfc_ns_cmd(vport, SLI_CTNS_GFF_ID,
+							0, Did) == 0)
+					vport->num_disc_nodes++;
+
+				else
+					lpfc_setup_disc_node(vport, Did);
+			}
+		} else
+			lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+					 "0245 Skip x%06x NameServer Rsp "
+					 "Data: x%x x%x\n", Did,
+					 vport->fc_flag,
+					 vport->fc_rscn_id_cnt);
+	}
+}
+
 static int
-lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint32_t Size)
+lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint8_t fc4_type,
+	    uint32_t Size)
 {
 	struct lpfc_hba  *phba = vport->phba;
 	struct lpfc_sli_ct_request *Response =
@@ -499,97 +565,12 @@ lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint32_t Size)
 			 */
 			if ((Did != vport->fc_myDID) &&
 			    ((lpfc_find_vport_by_did(phba, Did) == NULL) ||
-			     vport->cfg_peer_port_login)) {
-				if ((vport->port_type != LPFC_NPIV_PORT) ||
-				    (!(vport->ct_flags & FC_CT_RFF_ID)) ||
-				    (!vport->cfg_restrict_login)) {
-					ndlp = lpfc_setup_disc_node(vport, Did);
-					if (ndlp && NLP_CHK_NODE_ACT(ndlp)) {
-						lpfc_debugfs_disc_trc(vport,
-						LPFC_DISC_TRC_CT,
-						"Parse GID_FTrsp: "
-						"did:x%x flg:x%x x%x",
-						Did, ndlp->nlp_flag,
-						vport->fc_flag);
-
-						lpfc_printf_vlog(vport,
-							KERN_INFO,
-							LOG_DISCOVERY,
-							"0238 Process "
-							"x%x NameServer Rsp"
-							"Data: x%x x%x x%x\n",
-							Did, ndlp->nlp_flag,
-							vport->fc_flag,
-							vport->fc_rscn_id_cnt);
-					} else {
-						lpfc_debugfs_disc_trc(vport,
-						LPFC_DISC_TRC_CT,
-						"Skip1 GID_FTrsp: "
-						"did:x%x flg:x%x cnt:%d",
-						Did, vport->fc_flag,
-						vport->fc_rscn_id_cnt);
-
-						lpfc_printf_vlog(vport,
-							KERN_INFO,
-							LOG_DISCOVERY,
-							"0239 Skip x%x "
-							"NameServer Rsp Data: "
-							"x%x x%x\n",
-							Did, vport->fc_flag,
-							vport->fc_rscn_id_cnt);
-					}
-
-				} else {
-					if (!(vport->fc_flag & FC_RSCN_MODE) ||
-					(lpfc_rscn_payload_check(vport, Did))) {
-						lpfc_debugfs_disc_trc(vport,
-						LPFC_DISC_TRC_CT,
-						"Query GID_FTrsp: "
-						"did:x%x flg:x%x cnt:%d",
-						Did, vport->fc_flag,
-						vport->fc_rscn_id_cnt);
-
-						/* This NPortID was previously
-						 * a FCP target, * Don't even
-						 * bother to send GFF_ID.
-						 */
-						ndlp = lpfc_findnode_did(vport,
-							Did);
-						if (ndlp &&
-						    NLP_CHK_NODE_ACT(ndlp)
-						    && (ndlp->nlp_type &
-						     NLP_FCP_TARGET))
-							lpfc_setup_disc_node
-								(vport, Did);
-						else if (lpfc_ns_cmd(vport,
-							SLI_CTNS_GFF_ID,
-							0, Did) == 0)
-							vport->num_disc_nodes++;
-						else
-							lpfc_setup_disc_node
-								(vport, Did);
-					}
-					else {
-						lpfc_debugfs_disc_trc(vport,
-						LPFC_DISC_TRC_CT,
-						"Skip2 GID_FTrsp: "
-						"did:x%x flg:x%x cnt:%d",
-						Did, vport->fc_flag,
-						vport->fc_rscn_id_cnt);
-
-						lpfc_printf_vlog(vport,
-							KERN_INFO,
-							LOG_DISCOVERY,
-							"0245 Skip x%x "
-							"NameServer Rsp Data: "
-							"x%x x%x\n",
-							Did, vport->fc_flag,
-							vport->fc_rscn_id_cnt);
-					}
-				}
-			}
+			     vport->cfg_peer_port_login))
+				lpfc_prep_node_fc4type(vport, Did, fc4_type);
+
 			if (CTentry & (cpu_to_be32(SLI_CT_LAST_ENTRY)))
 				goto nsout1;
+
 			Cnt -= sizeof(uint32_t);
 		}
 		ctptr = NULL;
@@ -609,16 +590,18 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 	IOCB_t *irsp;
 	struct lpfc_dmabuf *outp;
+	struct lpfc_dmabuf *inp;
 	struct lpfc_sli_ct_request *CTrsp;
+	struct lpfc_sli_ct_request *CTreq;
 	struct lpfc_nodelist *ndlp;
-	int rc;
+	int rc, type;
 
 	/* First save ndlp, before we overwrite it */
 	ndlp = cmdiocb->context_un.ndlp;
 
 	/* we pass cmdiocb to state machine which needs rspiocb as well */
 	cmdiocb->context_un.rsp_iocb = rspiocb;
-
+	inp = (struct lpfc_dmabuf *) cmdiocb->context1;
 	outp = (struct lpfc_dmabuf *) cmdiocb->context2;
 	irsp = &rspiocb->iocb;
 
@@ -656,9 +639,14 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 			    IOERR_NO_RESOURCES)
 				vport->fc_ns_retry++;
 
+			type = lpfc_get_gidft_type(vport, cmdiocb);
+			if (type == 0)
+				goto out;
+
 			/* CT command is being retried */
+			vport->gidft_inp--;
 			rc = lpfc_ns_cmd(vport, SLI_CTNS_GID_FT,
-					 vport->fc_ns_retry, 0);
+					 vport->fc_ns_retry, type);
 			if (rc == 0)
 				goto out;
 		}
@@ -670,13 +658,18 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 				 irsp->ulpStatus, vport->fc_ns_retry);
 	} else {
 		/* Good status, continue checking */
+		CTreq = (struct lpfc_sli_ct_request *) inp->virt;
 		CTrsp = (struct lpfc_sli_ct_request *) outp->virt;
 		if (CTrsp->CommandResponse.bits.CmdRsp ==
 		    cpu_to_be16(SLI_CT_RESPONSE_FS_ACC)) {
 			lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
-					 "0208 NameServer Rsp Data: x%x\n",
-					 vport->fc_flag);
-			lpfc_ns_rsp(vport, outp,
+					 "0208 NameServer Rsp Data: x%x x%x\n",
+					 vport->fc_flag,
+					 CTreq->un.gid.Fc4Type);
+
+			lpfc_ns_rsp(vport,
+				    outp,
+				    CTreq->un.gid.Fc4Type,
 				    (uint32_t) (irsp->un.genreq64.bdl.bdeSize));
 		} else if (CTrsp->CommandResponse.bits.CmdRsp ==
 			   be16_to_cpu(SLI_CT_RESPONSE_FS_RJT)) {
@@ -731,9 +724,11 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 				(uint32_t) CTrsp->ReasonCode,
 				(uint32_t) CTrsp->Explanation);
 		}
+		vport->gidft_inp--;
 	}
 	/* Link up / RSCN discovery */
-	if (vport->num_disc_nodes == 0) {
+	if ((vport->num_disc_nodes == 0) &&
+	    (vport->gidft_inp == 0)) {
 		/*
 		 * The driver has cycled through all Nports in the RSCN payload.
 		 * Complete the handling by cleaning up and marking the
@@ -881,6 +876,56 @@ lpfc_cmpl_ct_cmd_gff_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 	return;
 }
 
+static void
+lpfc_cmpl_ct_cmd_gft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+				struct lpfc_iocbq *rspiocb)
+{
+	struct lpfc_vport *vport = cmdiocb->vport;
+	IOCB_t *irsp = &rspiocb->iocb;
+	struct lpfc_dmabuf *inp = (struct lpfc_dmabuf *)cmdiocb->context1;
+	struct lpfc_dmabuf *outp = (struct lpfc_dmabuf *)cmdiocb->context2;
+	struct lpfc_sli_ct_request *CTrsp;
+	int did;
+	struct lpfc_nodelist *ndlp;
+	uint32_t fc4_data_0, fc4_data_1;
+
+	did = ((struct lpfc_sli_ct_request *)inp->virt)->un.gft.PortId;
+	did = be32_to_cpu(did);
+
+	if (irsp->ulpStatus == IOSTAT_SUCCESS) {
+		/* Good status, continue checking */
+		CTrsp = (struct lpfc_sli_ct_request *)outp->virt;
+		fc4_data_0 = be32_to_cpu(CTrsp->un.gft_acc.fc4_types[0]);
+		fc4_data_1 = be32_to_cpu(CTrsp->un.gft_acc.fc4_types[1]);
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_DISCOVERY,
+				 "3062 DID x%06x GFT Wd0 x%08x Wd1 x%08x\n",
+				 did, fc4_data_0, fc4_data_1);
+
+		ndlp = lpfc_findnode_did(vport, did);
+		if (ndlp) {
+			/* The bitmask value for FCP and NVME FCP types is
+			 * the same because they are 32 bits distant from
+			 * each other in word0 and word0.
+			 */
+			if (fc4_data_0 & LPFC_FC4_TYPE_BITMASK)
+				ndlp->nlp_fc4_type |= NLP_FC4_FCP;
+			if (fc4_data_1 &  LPFC_FC4_TYPE_BITMASK)
+				ndlp->nlp_fc4_type |= NLP_FC4_NVME;
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_DISCOVERY,
+					 "3064 Setting ndlp %p, DID x%06x with "
+					 "FC4 x%08x, Data: x%08x x%08x\n",
+					 ndlp, did, ndlp->nlp_fc4_type,
+					 FC_TYPE_FCP, FC_TYPE_NVME);
+		}
+		ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
+		lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
+		lpfc_issue_els_prli(vport, ndlp, 0);
+	} else
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_DISCOVERY,
+				 "3065 GFT_ID failed x%08x\n", irsp->ulpStatus);
+
+	lpfc_ct_free_iocb(phba, cmdiocb);
+}
 
 static void
 lpfc_cmpl_ct(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
@@ -1071,31 +1116,27 @@ lpfc_cmpl_ct_cmd_rff_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 	return;
 }
 
+/*
+ * Although the symbolic port name is thought to be an integer
+ * as of January 18, 2016, leave it as a string until more of
+ * the record state becomes defined.
+ */
 int
 lpfc_vport_symbolic_port_name(struct lpfc_vport *vport, char *symbol,
 	size_t size)
 {
 	int n;
-	uint8_t *wwn = vport->phba->wwpn;
-
-	n = snprintf(symbol, size,
-		     "Emulex PPN-%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x",
-		     wwn[0], wwn[1], wwn[2], wwn[3],
-		     wwn[4], wwn[5], wwn[6], wwn[7]);
 
-	if (vport->port_type == LPFC_PHYSICAL_PORT)
-		return n;
-
-	if (n < size)
-		n += snprintf(symbol + n, size - n, " VPort-%d", vport->vpi);
-
-	if (n < size &&
-	    strlen(vport->fc_vport->symbolic_name))
-		n += snprintf(symbol + n, size - n, " VName-%s",
-			      vport->fc_vport->symbolic_name);
+	/*
+	 * Use the lpfc board number as the Symbolic Port
+	 * Name object.  NPIV is not in play so this integer
+	 * value is sufficient and unique per FC-ID.
+	 */
+	n = snprintf(symbol, size, "%d", vport->phba->brd_no);
 	return n;
 }
 
+
 int
 lpfc_vport_symbolic_node_name(struct lpfc_vport *vport, char *symbol,
 	size_t size)
@@ -1106,24 +1147,26 @@ lpfc_vport_symbolic_node_name(struct lpfc_vport *vport, char *symbol,
 	lpfc_decode_firmware_rev(vport->phba, fwrev, 0);
 
 	n = snprintf(symbol, size, "Emulex %s", vport->phba->ModelName);
-
 	if (size < n)
 		return n;
-	n += snprintf(symbol + n, size - n, " FV%s", fwrev);
 
+	n += snprintf(symbol + n, size - n, " FV%s", fwrev);
 	if (size < n)
 		return n;
-	n += snprintf(symbol + n, size - n, " DV%s", lpfc_release_version);
 
+	n += snprintf(symbol + n, size - n, " DV%s.",
+		      lpfc_release_version);
 	if (size < n)
 		return n;
-	n += snprintf(symbol + n, size - n, " HN:%s", init_utsname()->nodename);
 
-	/* Note :- OS name is "Linux" */
+	n += snprintf(symbol + n, size - n, " HN:%s.",
+		      init_utsname()->nodename);
 	if (size < n)
 		return n;
-	n += snprintf(symbol + n, size - n, " OS:%s", init_utsname()->sysname);
 
+	/* Note :- OS name is "Linux" */
+	n += snprintf(symbol + n, size - n, " OS:%s\n",
+		      init_utsname()->sysname);
 	return n;
 }
 
@@ -1147,6 +1190,27 @@ lpfc_find_map_node(struct lpfc_vport *vport)
 	return cnt;
 }
 
+/*
+ * This routine will return the FC4 Type associated with the CT
+ * GID_FT command.
+ */
+int
+lpfc_get_gidft_type(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb)
+{
+	struct lpfc_sli_ct_request *CtReq;
+	struct lpfc_dmabuf *mp;
+	uint32_t type;
+
+	mp = cmdiocb->context1;
+	if (mp == NULL)
+		return 0;
+	CtReq = (struct lpfc_sli_ct_request *)mp->virt;
+	type = (uint32_t)CtReq->un.gid.Fc4Type;
+	if ((type != SLI_CTPT_FCP) && (type != SLI_CTPT_NVME))
+		return 0;
+	return type;
+}
+
 /*
  * lpfc_ns_cmd
  * Description:
@@ -1207,8 +1271,9 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 
 	/* NameServer Req */
 	lpfc_printf_vlog(vport, KERN_INFO ,LOG_DISCOVERY,
-			 "0236 NameServer Req Data: x%x x%x x%x\n",
-			 cmdcode, vport->fc_flag, vport->fc_rscn_id_cnt);
+			 "0236 NameServer Req Data: x%x x%x x%x x%x\n",
+			 cmdcode, vport->fc_flag, vport->fc_rscn_id_cnt,
+			 context);
 
 	bpl = (struct ulp_bde64 *) bmp->virt;
 	memset(bpl, 0, sizeof(struct ulp_bde64));
@@ -1219,6 +1284,8 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 		bpl->tus.f.bdeSize = GID_REQUEST_SZ;
 	else if (cmdcode == SLI_CTNS_GFF_ID)
 		bpl->tus.f.bdeSize = GFF_REQUEST_SZ;
+	else if (cmdcode == SLI_CTNS_GFT_ID)
+		bpl->tus.f.bdeSize = GFT_REQUEST_SZ;
 	else if (cmdcode == SLI_CTNS_RFT_ID)
 		bpl->tus.f.bdeSize = RFT_REQUEST_SZ;
 	else if (cmdcode == SLI_CTNS_RNN_ID)
@@ -1246,7 +1313,8 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 	case SLI_CTNS_GID_FT:
 		CtReq->CommandResponse.bits.CmdRsp =
 		    cpu_to_be16(SLI_CTNS_GID_FT);
-		CtReq->un.gid.Fc4Type = SLI_CTPT_FCP;
+		CtReq->un.gid.Fc4Type = context;
+
 		if (vport->port_state < LPFC_NS_QRY)
 			vport->port_state = LPFC_NS_QRY;
 		lpfc_set_disctmo(vport);
@@ -1261,12 +1329,32 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 		cmpl = lpfc_cmpl_ct_cmd_gff_id;
 		break;
 
+	case SLI_CTNS_GFT_ID:
+		CtReq->CommandResponse.bits.CmdRsp =
+			cpu_to_be16(SLI_CTNS_GFT_ID);
+		CtReq->un.gft.PortId = cpu_to_be32(context);
+		cmpl = lpfc_cmpl_ct_cmd_gft_id;
+		break;
+
 	case SLI_CTNS_RFT_ID:
 		vport->ct_flags &= ~FC_CT_RFT_ID;
 		CtReq->CommandResponse.bits.CmdRsp =
 		    cpu_to_be16(SLI_CTNS_RFT_ID);
 		CtReq->un.rft.PortId = cpu_to_be32(vport->fc_myDID);
-		CtReq->un.rft.fcpReg = 1;
+
+		/* Register FC4 FCP type if enabled.  */
+		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP))
+			CtReq->un.rft.fcpReg = 1;
+
+		/* Register NVME type if enabled.  Defined LE and swapped.
+		 * rsvd[0] is used as word1 because of the hard-coded
+		 * word0 usage in the ct_request data structure.
+		 */
+		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
+			CtReq->un.rft.rsvd[0] = cpu_to_be32(0x00000100);
+
 		cmpl = lpfc_cmpl_ct_cmd_rft_id;
 		break;
 
@@ -1316,7 +1404,25 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 		    cpu_to_be16(SLI_CTNS_RFF_ID);
 		CtReq->un.rff.PortId = cpu_to_be32(vport->fc_myDID);
 		CtReq->un.rff.fbits = FC4_FEATURE_INIT;
-		CtReq->un.rff.type_code = FC_TYPE_FCP;
+
+		/* The driver always supports FC_TYPE_FCP.  However, the
+		 * caller can specify NVME (type x28) as well.  But only
+		 * these that FC4 type is supported.
+		 */
+		if (((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+		     (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) &&
+		    (context == FC_TYPE_NVME)) {
+			/* todo: init: revise localport nvme attributes */
+			CtReq->un.rff.type_code = context;
+
+		} else if (((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+			    (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP)) &&
+			   (context == FC_TYPE_FCP))
+			CtReq->un.rff.type_code = context;
+
+		else
+			goto ns_cmd_free_bmpvirt;
+
 		cmpl = lpfc_cmpl_ct_cmd_rff_id;
 		break;
 	}
@@ -1337,6 +1443,7 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 	 */
 	lpfc_nlp_put(ndlp);
 
+ns_cmd_free_bmpvirt:
 	lpfc_mbuf_free(phba, bmp->virt, bmp->phys);
 ns_cmd_free_bmp:
 	kfree(bmp);
diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h
index 361f5b3d9d93..13cc1d19b336 100644
--- a/drivers/scsi/lpfc/lpfc_disc.h
+++ b/drivers/scsi/lpfc/lpfc_disc.h
@@ -86,6 +86,17 @@ struct lpfc_nodelist {
 #define NLP_FABRIC         0x4			/* entry rep a Fabric entity */
 #define NLP_FCP_TARGET     0x8			/* entry is an FCP target */
 #define NLP_FCP_INITIATOR  0x10			/* entry is an FCP Initiator */
+#define NLP_NVME_TARGET    0x20			/* entry is a NVME Target */
+#define NLP_NVME_INITIATOR 0x40			/* entry is a NVME Initiator */
+
+	uint16_t	nlp_fc4_type;		/* FC types node supports. */
+						/* Assigned from GID_FF, only
+						 * FCP (0x8) and NVME (0x28)
+						 * supported.
+						 */
+#define NLP_FC4_NONE	0x0
+#define NLP_FC4_FCP	0x1			/* FC4 Type FCP (value x8)) */
+#define NLP_FC4_NVME	0x2			/* FC4 TYPE NVME (value x28) */
 
 	uint16_t        nlp_rpi;
 	uint16_t        nlp_state;		/* state transition indicator */
@@ -107,8 +118,8 @@ struct lpfc_nodelist {
 
 	struct timer_list   nlp_delayfunc;	/* Used for delayed ELS cmds */
 	struct lpfc_hba *phba;
-	struct fc_rport *rport;			/* Corresponding FC transport
-						   port structure */
+	struct fc_rport *rport;		/* scsi_transport_fc port structure */
+	struct lpfc_nvme_rport *nrport;	/* nvme transport rport struct. */
 	struct lpfc_vport *vport;
 	struct lpfc_work_evt els_retry_evt;
 	struct lpfc_work_evt dev_loss_evt;
@@ -118,6 +129,10 @@ struct lpfc_nodelist {
 	unsigned long last_change_time;
 	unsigned long *active_rrqs_xri_bitmap;
 	struct lpfc_scsicmd_bkt *lat_data;	/* Latency data */
+	uint32_t fc4_prli_sent;
+	uint32_t upcall_flags;
+	uint32_t nvme_fb_size; /* NVME target's supported byte cnt */
+#define NVME_FB_BIT_SHIFT 9    /* PRLI Rsp first burst in 512B units. */
 };
 struct lpfc_node_rrq {
 	struct list_head list;
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 7be235457540..e164eed25e3d 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -29,7 +29,6 @@
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
 
-
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
 #include "lpfc_sli.h"
@@ -1513,7 +1512,7 @@ static struct lpfc_nodelist *
 lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
 			 struct lpfc_nodelist *ndlp)
 {
-	struct lpfc_vport    *vport = ndlp->vport;
+	struct lpfc_vport *vport = ndlp->vport;
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 	struct lpfc_nodelist *new_ndlp;
 	struct lpfc_rport_data *rdata;
@@ -1868,10 +1867,12 @@ lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 
 	/* PLOGI completes to NPort <nlp_DID> */
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
-			 "0102 PLOGI completes to NPort x%x "
+			 "0102 PLOGI completes to NPort x%06x "
 			 "Data: x%x x%x x%x x%x x%x\n",
-			 ndlp->nlp_DID, irsp->ulpStatus, irsp->un.ulpWord[4],
-			 irsp->ulpTimeout, disc, vport->num_disc_nodes);
+			 ndlp->nlp_DID, ndlp->nlp_fc4_type,
+			 irsp->ulpStatus, irsp->un.ulpWord[4],
+			 disc, vport->num_disc_nodes);
+
 	/* Check to see if link went down during discovery */
 	if (lpfc_els_chk_latt(vport)) {
 		spin_lock_irq(shost->host_lock);
@@ -2000,7 +2001,6 @@ lpfc_issue_els_plogi(struct lpfc_vport *vport, uint32_t did, uint8_t retry)
 		sp->cmn.fcphHigh = FC_PH3;
 
 	sp->cmn.valid_vendor_ver_level = 0;
-	memset(sp->vendorVersion, 0, sizeof(sp->vendorVersion));
 
 	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
 		"Issue PLOGI:     did:x%x",
@@ -2052,14 +2052,17 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		"PRLI cmpl:       status:x%x/x%x did:x%x",
 		irsp->ulpStatus, irsp->un.ulpWord[4],
 		ndlp->nlp_DID);
+
+	/* Ddriver supports multiple FC4 types.  Counters matter. */
+	vport->fc_prli_sent--;
+
 	/* PRLI completes to NPort <nlp_DID> */
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
-			 "0103 PRLI completes to NPort x%x "
+			 "0103 PRLI completes to NPort x%06x "
 			 "Data: x%x x%x x%x x%x\n",
 			 ndlp->nlp_DID, irsp->ulpStatus, irsp->un.ulpWord[4],
-			 irsp->ulpTimeout, vport->num_disc_nodes);
+			 vport->num_disc_nodes, ndlp->fc4_prli_sent);
 
-	vport->fc_prli_sent--;
 	/* Check to see if link went down during discovery */
 	if (lpfc_els_chk_latt(vport))
 		goto out;
@@ -2068,6 +2071,7 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		/* Check for retry */
 		if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
 			/* ELS command is being retried */
+			ndlp->fc4_prli_sent--;
 			goto out;
 		}
 		/* PRLI failed */
@@ -2082,9 +2086,14 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 			lpfc_disc_state_machine(vport, ndlp, cmdiocb,
 						NLP_EVT_CMPL_PRLI);
 	} else
-		/* Good status, call state machine */
+		/* Good status, call state machine.  However, if another
+		 * PRLI is outstanding, don't call the state machine
+		 * because final disposition to Mapped or Unmapped is
+		 * completed there.
+		 */
 		lpfc_disc_state_machine(vport, ndlp, cmdiocb,
 					NLP_EVT_CMPL_PRLI);
+
 out:
 	lpfc_els_free_iocb(phba, cmdiocb);
 	return;
@@ -2118,42 +2127,94 @@ lpfc_issue_els_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 	struct lpfc_hba *phba = vport->phba;
 	PRLI *npr;
+	struct lpfc_nvme_prli *npr_nvme;
 	struct lpfc_iocbq *elsiocb;
 	uint8_t *pcmd;
 	uint16_t cmdsize;
-
-	cmdsize = (sizeof(uint32_t) + sizeof(PRLI));
+	u32 local_nlp_type, elscmd;
+
+	local_nlp_type = ndlp->nlp_fc4_type;
+
+ send_next_prli:
+	if (local_nlp_type & NLP_FC4_FCP) {
+		/* Payload is 4 + 16 = 20 x14 bytes. */
+		cmdsize = (sizeof(uint32_t) + sizeof(PRLI));
+		elscmd = ELS_CMD_PRLI;
+	} else if (local_nlp_type & NLP_FC4_NVME) {
+		/* Payload is 4 + 20 = 24 x18 bytes. */
+		cmdsize = (sizeof(uint32_t) + sizeof(struct lpfc_nvme_prli));
+		elscmd = ELS_CMD_NVMEPRLI;
+	} else {
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+				 "3083 Unknown FC_TYPE x%x ndlp x%06x\n",
+				 ndlp->nlp_fc4_type, ndlp->nlp_DID);
+		return 1;
+	}
 	elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
-				     ndlp->nlp_DID, ELS_CMD_PRLI);
+				     ndlp->nlp_DID, elscmd);
 	if (!elsiocb)
 		return 1;
 
 	pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
 
 	/* For PRLI request, remainder of payload is service parameters */
-	memset(pcmd, 0, (sizeof(PRLI) + sizeof(uint32_t)));
-	*((uint32_t *) (pcmd)) = ELS_CMD_PRLI;
-	pcmd += sizeof(uint32_t);
+	memset(pcmd, 0, cmdsize);
 
-	/* For PRLI, remainder of payload is PRLI parameter page */
-	npr = (PRLI *) pcmd;
-	/*
-	 * If our firmware version is 3.20 or later,
-	 * set the following bits for FC-TAPE support.
-	 */
-	if (phba->vpd.rev.feaLevelHigh >= 0x02) {
-		npr->ConfmComplAllowed = 1;
-		npr->Retry = 1;
-		npr->TaskRetryIdReq = 1;
-	}
-	npr->estabImagePair = 1;
-	npr->readXferRdyDis = 1;
-	 if (vport->cfg_first_burst_size)
-		npr->writeXferRdyDis = 1;
+	if (local_nlp_type & NLP_FC4_FCP) {
+		/* Remainder of payload is FCP PRLI parameter page.
+		 * Note: this data structure is defined as
+		 * BE/LE in the structure definition so no
+		 * byte swap call is made.
+		 */
+		*((uint32_t *)(pcmd)) = ELS_CMD_PRLI;
+		pcmd += sizeof(uint32_t);
+		npr = (PRLI *)pcmd;
 
-	/* For FCP support */
-	npr->prliType = PRLI_FCP_TYPE;
-	npr->initiatorFunc = 1;
+		/*
+		 * If our firmware version is 3.20 or later,
+		 * set the following bits for FC-TAPE support.
+		 */
+		if (phba->vpd.rev.feaLevelHigh >= 0x02) {
+			npr->ConfmComplAllowed = 1;
+			npr->Retry = 1;
+			npr->TaskRetryIdReq = 1;
+		}
+		npr->estabImagePair = 1;
+		npr->readXferRdyDis = 1;
+		if (vport->cfg_first_burst_size)
+			npr->writeXferRdyDis = 1;
+
+		/* For FCP support */
+		npr->prliType = PRLI_FCP_TYPE;
+		npr->initiatorFunc = 1;
+		elsiocb->iocb_flag |= LPFC_PRLI_FCP_REQ;
+
+		/* Remove FCP type - processed. */
+		local_nlp_type &= ~NLP_FC4_FCP;
+	} else if (local_nlp_type & NLP_FC4_NVME) {
+		/* Remainder of payload is NVME PRLI parameter page.
+		 * This data structure is the newer definition that
+		 * uses bf macros so a byte swap is required.
+		 */
+		*((uint32_t *)(pcmd)) = ELS_CMD_NVMEPRLI;
+		pcmd += sizeof(uint32_t);
+		npr_nvme = (struct lpfc_nvme_prli *)pcmd;
+		bf_set(prli_type_code, npr_nvme, PRLI_NVME_TYPE);
+		bf_set(prli_estabImagePair, npr_nvme, 0);  /* Should be 0 */
+
+		/* Only initiators request first burst. */
+		if ((phba->cfg_nvme_enable_fb) &&
+		    !phba->nvmet_support)
+			bf_set(prli_fba, npr_nvme, 1);
+
+		bf_set(prli_init, npr_nvme, 1);
+		npr_nvme->word1 = cpu_to_be32(npr_nvme->word1);
+		npr_nvme->word4 = cpu_to_be32(npr_nvme->word4);
+		elsiocb->iocb_flag |= LPFC_PRLI_NVME_REQ;
+
+		/* Remove NVME type - processed. */
+		local_nlp_type &= ~NLP_FC4_NVME;
+	}
 
 	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
 		"Issue PRLI:      did:x%x",
@@ -2172,7 +2233,20 @@ lpfc_issue_els_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		lpfc_els_free_iocb(phba, elsiocb);
 		return 1;
 	}
+
+	/* The vport counters are used for lpfc_scan_finished, but
+	 * the ndlp is used to track outstanding PRLIs for different
+	 * FC4 types.
+	 */
 	vport->fc_prli_sent++;
+	ndlp->fc4_prli_sent++;
+
+	/* The driver supports 2 FC4 types.  Make sure
+	 * a PRLI is issued for all types before exiting.
+	 */
+	if (local_nlp_type & (NLP_FC4_FCP | NLP_FC4_NVME))
+		goto send_next_prli;
+
 	return 0;
 }
 
@@ -2543,6 +2617,9 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 	if ((vport->fc_flag & FC_PT2PT) &&
 		!(vport->fc_flag & FC_PT2PT_PLOGI)) {
 		phba->pport->fc_myDID = 0;
+
+		/* todo: init: revise localport nvme attributes */
+
 		mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 		if (mbox) {
 			lpfc_config_link(phba, mbox);
@@ -3055,6 +3132,7 @@ lpfc_els_retry_delay_handler(struct lpfc_nodelist *ndlp)
 		}
 		break;
 	case ELS_CMD_PRLI:
+	case ELS_CMD_NVMEPRLI:
 		if (!lpfc_issue_els_prli(vport, ndlp, retry)) {
 			ndlp->nlp_prev_state = ndlp->nlp_state;
 			lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
@@ -3245,7 +3323,8 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 				break;
 			}
 			if ((cmd == ELS_CMD_PLOGI) ||
-			    (cmd == ELS_CMD_PRLI)) {
+			    (cmd == ELS_CMD_PRLI) ||
+			    (cmd == ELS_CMD_NVMEPRLI)) {
 				delay = 1000;
 				maxretry = lpfc_max_els_tries + 1;
 				retry = 1;
@@ -3265,7 +3344,8 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 
 		case LSRJT_LOGICAL_BSY:
 			if ((cmd == ELS_CMD_PLOGI) ||
-			    (cmd == ELS_CMD_PRLI)) {
+			    (cmd == ELS_CMD_PRLI) ||
+			    (cmd == ELS_CMD_NVMEPRLI)) {
 				delay = 1000;
 				maxretry = 48;
 			} else if (cmd == ELS_CMD_FDISC) {
@@ -3399,7 +3479,8 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 			spin_unlock_irq(shost->host_lock);
 
 			ndlp->nlp_prev_state = ndlp->nlp_state;
-			if (cmd == ELS_CMD_PRLI)
+			if ((cmd == ELS_CMD_PRLI) ||
+			    (cmd == ELS_CMD_NVMEPRLI))
 				lpfc_nlp_set_state(vport, ndlp,
 					NLP_STE_PRLI_ISSUE);
 			else
@@ -3430,6 +3511,7 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 			lpfc_issue_els_adisc(vport, ndlp, cmdiocb->retry);
 			return 1;
 		case ELS_CMD_PRLI:
+		case ELS_CMD_NVMEPRLI:
 			ndlp->nlp_prev_state = ndlp->nlp_state;
 			lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
 			lpfc_issue_els_prli(vport, ndlp, cmdiocb->retry);
@@ -3990,14 +4072,10 @@ lpfc_els_rsp_acc(struct lpfc_vport *vport, uint32_t flag,
 			       sizeof(struct lpfc_name));
 			memcpy(&sp->nodeName, &vport->fc_sparam.nodeName,
 			       sizeof(struct lpfc_name));
-		} else {
+		} else
 			memcpy(pcmd, &vport->fc_sparam,
 			       sizeof(struct serv_parm));
 
-			sp->cmn.valid_vendor_ver_level = 0;
-			memset(sp->vendorVersion, 0, sizeof(sp->vendorVersion));
-		}
-
 		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
 			"Issue ACC FLOGI/PLOGI: did:x%x flg:x%x",
 			ndlp->nlp_DID, ndlp->nlp_flag, 0);
@@ -4231,17 +4309,43 @@ lpfc_els_rsp_prli_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb,
 {
 	struct lpfc_hba  *phba = vport->phba;
 	PRLI *npr;
+	struct lpfc_nvme_prli *npr_nvme;
 	lpfc_vpd_t *vpd;
 	IOCB_t *icmd;
 	IOCB_t *oldcmd;
 	struct lpfc_iocbq *elsiocb;
 	uint8_t *pcmd;
 	uint16_t cmdsize;
+	uint32_t prli_fc4_req, *req_payload;
+	struct lpfc_dmabuf *req_buf;
 	int rc;
+	u32 elsrspcmd;
+
+	/* Need the incoming PRLI payload to determine if the ACC is for an
+	 * FC4 or NVME PRLI type.  The PRLI type is at word 1.
+	 */
+	req_buf = (struct lpfc_dmabuf *)oldiocb->context2;
+	req_payload = (((uint32_t *)req_buf->virt) + 1);
+
+	/* PRLI type payload is at byte 3 for FCP or NVME. */
+	prli_fc4_req = be32_to_cpu(*req_payload);
+	prli_fc4_req = (prli_fc4_req >> 24) & 0xff;
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
+			 "6127 PRLI_ACC:  Req Type x%x, Word1 x%08x\n",
+			 prli_fc4_req, *((uint32_t *)req_payload));
+
+	if (prli_fc4_req == PRLI_FCP_TYPE) {
+		cmdsize = sizeof(uint32_t) + sizeof(PRLI);
+		elsrspcmd = (ELS_CMD_ACC | (ELS_CMD_PRLI & ~ELS_RSP_MASK));
+	} else if (prli_fc4_req & PRLI_NVME_TYPE) {
+		cmdsize = sizeof(uint32_t) + sizeof(struct lpfc_nvme_prli);
+		elsrspcmd = (ELS_CMD_ACC | (ELS_CMD_NVMEPRLI & ~ELS_RSP_MASK));
+	} else {
+		return 1;
+	}
 
-	cmdsize = sizeof(uint32_t) + sizeof(PRLI);
 	elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp,
-		ndlp->nlp_DID, (ELS_CMD_ACC | (ELS_CMD_PRLI & ~ELS_RSP_MASK)));
+		ndlp->nlp_DID, elsrspcmd);
 	if (!elsiocb)
 		return 1;
 
@@ -4258,33 +4362,56 @@ lpfc_els_rsp_prli_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb,
 			 ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state,
 			 ndlp->nlp_rpi);
 	pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+	memset(pcmd, 0, cmdsize);
 
 	*((uint32_t *) (pcmd)) = (ELS_CMD_ACC | (ELS_CMD_PRLI & ~ELS_RSP_MASK));
 	pcmd += sizeof(uint32_t);
 
 	/* For PRLI, remainder of payload is PRLI parameter page */
-	memset(pcmd, 0, sizeof(PRLI));
-
-	npr = (PRLI *) pcmd;
 	vpd = &phba->vpd;
-	/*
-	 * If the remote port is a target and our firmware version is 3.20 or
-	 * later, set the following bits for FC-TAPE support.
-	 */
-	if ((ndlp->nlp_type & NLP_FCP_TARGET) &&
-	    (vpd->rev.feaLevelHigh >= 0x02)) {
-		npr->ConfmComplAllowed = 1;
-		npr->Retry = 1;
-		npr->TaskRetryIdReq = 1;
-	}
-
-	npr->acceptRspCode = PRLI_REQ_EXECUTED;
-	npr->estabImagePair = 1;
-	npr->readXferRdyDis = 1;
-	npr->ConfmComplAllowed = 1;
 
-	npr->prliType = PRLI_FCP_TYPE;
-	npr->initiatorFunc = 1;
+	if (prli_fc4_req == PRLI_FCP_TYPE) {
+		/*
+		 * If the remote port is a target and our firmware version
+		 * is 3.20 or later, set the following bits for FC-TAPE
+		 * support.
+		 */
+		npr = (PRLI *) pcmd;
+		if ((ndlp->nlp_type & NLP_FCP_TARGET) &&
+		    (vpd->rev.feaLevelHigh >= 0x02)) {
+			npr->ConfmComplAllowed = 1;
+			npr->Retry = 1;
+			npr->TaskRetryIdReq = 1;
+		}
+		npr->acceptRspCode = PRLI_REQ_EXECUTED;
+		npr->estabImagePair = 1;
+		npr->readXferRdyDis = 1;
+		npr->ConfmComplAllowed = 1;
+		npr->prliType = PRLI_FCP_TYPE;
+		npr->initiatorFunc = 1;
+	} else if (prli_fc4_req & PRLI_NVME_TYPE) {
+		/* Respond with an NVME PRLI Type */
+		npr_nvme = (struct lpfc_nvme_prli *) pcmd;
+		bf_set(prli_type_code, npr_nvme, PRLI_NVME_TYPE);
+		bf_set(prli_estabImagePair, npr_nvme, 0);  /* Should be 0 */
+		bf_set(prli_acc_rsp_code, npr_nvme, PRLI_REQ_EXECUTED);
+		bf_set(prli_init, npr_nvme, 1);
+
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+				 "6015 NVME issue PRLI ACC word1 x%08x "
+				 "word4 x%08x word5 x%08x flag x%x, "
+				 "fcp_info x%x nlp_type x%x\n",
+				 npr_nvme->word1, npr_nvme->word4,
+				 npr_nvme->word5, ndlp->nlp_flag,
+				 ndlp->nlp_fcp_info, ndlp->nlp_type);
+		npr_nvme->word1 = cpu_to_be32(npr_nvme->word1);
+		npr_nvme->word4 = cpu_to_be32(npr_nvme->word4);
+		npr_nvme->word5 = cpu_to_be32(npr_nvme->word5);
+	} else
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+				 "6128 Unknown FC_TYPE x%x x%x ndlp x%06x\n",
+				 prli_fc4_req, ndlp->nlp_fc4_type,
+				 ndlp->nlp_DID);
 
 	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
 		"Issue ACC PRLI:  did:x%x flg:x%x",
@@ -4411,7 +4538,7 @@ lpfc_els_rsp_rnid_acc(struct lpfc_vport *vport, uint8_t format,
  **/
 static void
 lpfc_els_clear_rrq(struct lpfc_vport *vport,
-      struct lpfc_iocbq *iocb, struct lpfc_nodelist *ndlp)
+		   struct lpfc_iocbq *iocb, struct lpfc_nodelist *ndlp)
 {
 	struct lpfc_hba  *phba = vport->phba;
 	uint8_t *pcmd;
@@ -4909,7 +5036,7 @@ lpfc_rdp_res_opd_desc(struct fc_rdp_opd_sfp_desc *desc,
 	memcpy(desc->opd_info.vendor_name, &page_a0[SSF_VENDOR_NAME], 16);
 	memcpy(desc->opd_info.model_number, &page_a0[SSF_VENDOR_PN], 16);
 	memcpy(desc->opd_info.serial_number, &page_a0[SSF_VENDOR_SN], 16);
-	memcpy(desc->opd_info.revision, &page_a0[SSF_VENDOR_REV], 2);
+	memcpy(desc->opd_info.revision, &page_a0[SSF_VENDOR_REV], 4);
 	memcpy(desc->opd_info.date, &page_a0[SSF_DATE_CODE], 8);
 	desc->length = cpu_to_be32(sizeof(desc->opd_info));
 	return sizeof(struct fc_rdp_opd_sfp_desc);
@@ -5004,7 +5131,7 @@ lpfc_rdp_res_diag_port_names(struct fc_rdp_port_name_desc *desc,
 	memcpy(desc->port_names.wwnn, phba->wwnn,
 			sizeof(desc->port_names.wwnn));
 
-	memcpy(desc->port_names.wwpn, &phba->wwpn,
+	memcpy(desc->port_names.wwpn, phba->wwpn,
 			sizeof(desc->port_names.wwpn));
 
 	desc->length = cpu_to_be32(sizeof(desc->port_names));
@@ -5233,9 +5360,8 @@ lpfc_els_rcv_rdp(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
 	struct ls_rjt stat;
 
 	if (phba->sli_rev < LPFC_SLI_REV4 ||
-			(bf_get(lpfc_sli_intf_if_type,
-				&phba->sli4_hba.sli_intf) !=
-						LPFC_SLI_INTF_IF_TYPE_2)) {
+	    bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) !=
+						LPFC_SLI_INTF_IF_TYPE_2) {
 		rjt_err = LSRJT_UNABLE_TPC;
 		rjt_expl = LSEXP_REQ_UNSUPPORTED;
 		goto error;
@@ -5976,9 +6102,11 @@ lpfc_els_handle_rscn(struct lpfc_vport *vport)
 	if (ndlp && NLP_CHK_NODE_ACT(ndlp)
 	    && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) {
 		/* Good ndlp, issue CT Request to NameServer */
-		if (lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, 0, 0) == 0)
+		vport->gidft_inp = 0;
+		if (lpfc_issue_gidft(vport) == 0)
 			/* Wait for NameServer query cmpl before we can
-			   continue */
+			 * continue
+			 */
 			return 1;
 	} else {
 		/* If login to NameServer does not exist, issue one */
@@ -6082,7 +6210,6 @@ lpfc_els_rcv_flogi(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
 
 	(void) lpfc_check_sparm(vport, ndlp, sp, CLASS3, 1);
 
-
 	/*
 	 * If our portname is greater than the remote portname,
 	 * then we initiate Nport login.
@@ -7779,6 +7906,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 		lpfc_els_rcv_fan(vport, elsiocb, ndlp);
 		break;
 	case ELS_CMD_PRLI:
+	case ELS_CMD_NVMEPRLI:
 		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
 			"RCV PRLI:        did:x%x/ste:x%x flg:x%x",
 			did, vport->port_state, ndlp->nlp_flag);
@@ -8883,8 +9011,7 @@ lpfc_cmpl_fabric_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 			break;
 	}
 
-	if (atomic_read(&phba->fabric_iocb_count) == 0)
-		BUG();
+	BUG_ON(atomic_read(&phba->fabric_iocb_count) == 0);
 
 	cmdiocb->iocb_cmpl = cmdiocb->fabric_iocb_cmpl;
 	cmdiocb->fabric_iocb_cmpl = NULL;
@@ -8929,8 +9056,7 @@ lpfc_issue_fabric_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *iocb)
 	int ready;
 	int ret;
 
-	if (atomic_read(&phba->fabric_iocb_count) > 1)
-		BUG();
+	BUG_ON(atomic_read(&phba->fabric_iocb_count) > 1);
 
 	spin_lock_irqsave(&phba->hbalock, iflags);
 	ready = atomic_read(&phba->fabric_iocb_count) == 0 &&
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index ede831d1f467..6e64c8e8e44f 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -31,6 +31,9 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
+#include <scsi/fc/fc_fs.h>
+
+#include <linux/nvme-fc-driver.h>
 
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
@@ -38,8 +41,9 @@
 #include "lpfc_disc.h"
 #include "lpfc_sli.h"
 #include "lpfc_sli4.h"
-#include "lpfc_scsi.h"
 #include "lpfc.h"
+#include "lpfc_scsi.h"
+#include "lpfc_nvme.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_crtn.h"
 #include "lpfc_vport.h"
@@ -853,9 +857,12 @@ lpfc_port_link_failure(struct lpfc_vport *vport)
 void
 lpfc_linkdown_port(struct lpfc_vport *vport)
 {
+	struct lpfc_hba  *phba = vport->phba;
 	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
 
-	fc_host_post_event(shost, fc_get_event_number(), FCH_EVT_LINKDOWN, 0);
+	if (phba->cfg_enable_fc4_type != LPFC_ENABLE_NVME)
+		fc_host_post_event(shost, fc_get_event_number(),
+				   FCH_EVT_LINKDOWN, 0);
 
 	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
 		"Link Down:       state:x%x rtry:x%x flg:x%x",
@@ -981,7 +988,9 @@ lpfc_linkup_port(struct lpfc_vport *vport)
 		(vport != phba->pport))
 		return;
 
-	fc_host_post_event(shost, fc_get_event_number(), FCH_EVT_LINKUP, 0);
+	if (phba->cfg_enable_fc4_type != LPFC_ENABLE_NVME)
+		fc_host_post_event(shost, fc_get_event_number(),
+				   FCH_EVT_LINKUP, 0);
 
 	spin_lock_irq(shost->host_lock);
 	vport->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI | FC_ABORT_DISCOVERY |
@@ -3570,6 +3579,8 @@ lpfc_mbx_cmpl_reg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 		vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
 		spin_unlock_irq(shost->host_lock);
 		vport->fc_myDID = 0;
+
+		/* todo: init: revise localport nvme attributes */
 		goto out;
 	}
 
@@ -3819,6 +3830,52 @@ lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	return;
 }
 
+ /*
+  * This routine will issue a GID_FT for each FC4 Type supported
+  * by the driver. ALL GID_FTs must complete before discovery is started.
+  */
+int
+lpfc_issue_gidft(struct lpfc_vport *vport)
+{
+	struct lpfc_hba *phba = vport->phba;
+
+	/* Good status, issue CT Request to NameServer */
+	if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+	    (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP)) {
+		if (lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, 0, SLI_CTPT_FCP)) {
+			/* Cannot issue NameServer FCP Query, so finish up
+			 * discovery
+			 */
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_SLI,
+					 "0604 %s FC TYPE %x %s\n",
+					 "Failed to issue GID_FT to ",
+					 FC_TYPE_FCP,
+					 "Finishing discovery.");
+			return 0;
+		}
+		vport->gidft_inp++;
+	}
+
+	if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+	    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
+		if (lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, 0, SLI_CTPT_NVME)) {
+			/* Cannot issue NameServer NVME Query, so finish up
+			 * discovery
+			 */
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_SLI,
+					 "0605 %s FC_TYPE %x %s %d\n",
+					 "Failed to issue GID_FT to ",
+					 FC_TYPE_NVME,
+					 "Finishing discovery: gidftinp ",
+					 vport->gidft_inp);
+			if (vport->gidft_inp == 0)
+				return 0;
+		} else
+			vport->gidft_inp++;
+	}
+	return vport->gidft_inp;
+}
+
 /*
  * This routine handles processing a NameServer REG_LOGIN mailbox
  * command upon completion. It is setup in the LPFC_MBOXQ
@@ -3835,12 +3892,14 @@ lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 
 	pmb->context1 = NULL;
 	pmb->context2 = NULL;
+	vport->gidft_inp = 0;
 
 	if (mb->mbxStatus) {
-out:
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
 				 "0260 Register NameServer error: 0x%x\n",
 				 mb->mbxStatus);
+
+out:
 		/* decrement the node reference count held for this
 		 * callback function.
 		 */
@@ -3884,20 +3943,28 @@ lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 		lpfc_ns_cmd(vport, SLI_CTNS_RSNN_NN, 0, 0);
 		lpfc_ns_cmd(vport, SLI_CTNS_RSPN_ID, 0, 0);
 		lpfc_ns_cmd(vport, SLI_CTNS_RFT_ID, 0, 0);
-		lpfc_ns_cmd(vport, SLI_CTNS_RFF_ID, 0, 0);
+
+		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP))
+			lpfc_ns_cmd(vport, SLI_CTNS_RFF_ID, 0, FC_TYPE_FCP);
+
+		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
+			lpfc_ns_cmd(vport, SLI_CTNS_RFF_ID, 0, FC_TYPE_NVME);
 
 		/* Issue SCR just before NameServer GID_FT Query */
 		lpfc_issue_els_scr(vport, SCR_DID, 0);
 	}
 
 	vport->fc_ns_retry = 0;
-	/* Good status, issue CT Request to NameServer */
-	if (lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, 0, 0)) {
-		/* Cannot issue NameServer Query, so finish up discovery */
+	if (lpfc_issue_gidft(vport) == 0)
 		goto out;
-	}
 
-	/* decrement the node reference count held for this
+	/*
+	 * At this point in time we may need to wait for multiple
+	 * SLI_CTNS_GID_FT CT commands to complete before we start discovery.
+	 *
+	 * decrement the node reference count held for this
 	 * callback function.
 	 */
 	lpfc_nlp_put(ndlp);
@@ -3990,6 +4057,10 @@ lpfc_unregister_remote_port(struct lpfc_nodelist *ndlp)
 {
 	struct fc_rport *rport = ndlp->rport;
 	struct lpfc_vport *vport = ndlp->vport;
+	struct lpfc_hba  *phba = vport->phba;
+
+	if (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)
+		return;
 
 	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT,
 		"rport delete:    did:x%x flg:x%x type x%x",
@@ -4047,6 +4118,7 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		       int old_state, int new_state)
 {
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+	struct lpfc_hba *phba = vport->phba;
 
 	if (new_state == NLP_STE_UNMAPPED_NODE) {
 		ndlp->nlp_flag &= ~NLP_NODEV_REMOVE;
@@ -4057,23 +4129,51 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	if (new_state == NLP_STE_NPR_NODE)
 		ndlp->nlp_flag &= ~NLP_RCV_PLOGI;
 
-	/* Transport interface */
-	if (ndlp->rport && (old_state == NLP_STE_MAPPED_NODE ||
-			    old_state == NLP_STE_UNMAPPED_NODE)) {
-		vport->phba->nport_event_cnt++;
-		lpfc_unregister_remote_port(ndlp);
+	/* FCP and NVME Transport interface */
+	if ((old_state == NLP_STE_MAPPED_NODE ||
+	     old_state == NLP_STE_UNMAPPED_NODE)) {
+		if (ndlp->rport) {
+			vport->phba->nport_event_cnt++;
+			lpfc_unregister_remote_port(ndlp);
+		}
+
+		/* Notify the NVME transport of this rport's loss */
+		if (((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+		     (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) &&
+		    (vport->phba->nvmet_support == 0) &&
+		    ((ndlp->nlp_fc4_type & NLP_FC4_NVME) ||
+		    (ndlp->nlp_DID == Fabric_DID))) {
+			vport->phba->nport_event_cnt++;
+			/* todo: init: unregister rport from nvme */
+		}
 	}
 
+	/* FCP and NVME Transport interfaces */
+
 	if (new_state ==  NLP_STE_MAPPED_NODE ||
 	    new_state == NLP_STE_UNMAPPED_NODE) {
-		vport->phba->nport_event_cnt++;
-		/*
-		 * Tell the fc transport about the port, if we haven't
-		 * already. If we have, and it's a scsi entity, be
-		 * sure to unblock any attached scsi devices
-		 */
-		lpfc_register_remote_port(vport, ndlp);
+		if ((ndlp->nlp_fc4_type & NLP_FC4_FCP) ||
+		    (ndlp->nlp_DID == Fabric_DID)) {
+			vport->phba->nport_event_cnt++;
+			/*
+			 * Tell the fc transport about the port, if we haven't
+			 * already. If we have, and it's a scsi entity, be
+			 */
+			lpfc_register_remote_port(vport, ndlp);
+		}
+		/* Notify the NVME transport of this new rport. */
+		if (ndlp->nlp_fc4_type & NLP_FC4_NVME) {
+			if (vport->phba->nvmet_support == 0) {
+				/* Register this rport with the transport.
+				 * Initiators take the NDLP ref count in
+				 * the register.
+				 */
+				vport->phba->nport_event_cnt++;
+				/* todo: init: register rport with nvme */
+			}
+		}
 	}
+
 	if ((new_state ==  NLP_STE_MAPPED_NODE) &&
 		(vport->stat_data_enabled)) {
 		/*
@@ -4091,12 +4191,13 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 				"0x%x\n", ndlp->nlp_DID);
 	}
 	/*
-	 * if we added to Mapped list, but the remote port
-	 * registration failed or assigned a target id outside
-	 * our presentable range - move the node to the
-	 * Unmapped List
+	 * If the node just added to Mapped list was an FCP target,
+	 * but the remote port registration failed or assigned a target
+	 * id outside the presentable range - move the node to the
+	 * Unmapped List.
 	 */
-	if (new_state == NLP_STE_MAPPED_NODE &&
+	if ((new_state == NLP_STE_MAPPED_NODE) &&
+	    (ndlp->nlp_type & NLP_FCP_TARGET) &&
 	    (!ndlp->rport ||
 	     ndlp->rport->scsi_target_id == -1 ||
 	     ndlp->rport->scsi_target_id >= LPFC_MAX_TARGET)) {
@@ -4230,6 +4331,7 @@ lpfc_initialize_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	ndlp->vport = vport;
 	ndlp->phba = vport->phba;
 	ndlp->nlp_sid = NLP_NO_SID;
+	ndlp->nlp_fc4_type = NLP_FC4_NONE;
 	kref_init(&ndlp->kref);
 	NLP_INT_NODE_ACT(ndlp);
 	atomic_set(&ndlp->cmd_pending, 0);
@@ -5369,12 +5471,13 @@ lpfc_disc_timeout_handler(struct lpfc_vport *vport)
 	switch (vport->port_state) {
 
 	case LPFC_LOCAL_CFG_LINK:
-	/* port_state is identically  LPFC_LOCAL_CFG_LINK while waiting for
-	 * FAN
-	 */
-				/* FAN timeout */
+		/*
+		 * port_state is identically  LPFC_LOCAL_CFG_LINK while
+		 * waiting for FAN timeout
+		 */
 		lpfc_printf_vlog(vport, KERN_WARNING, LOG_DISCOVERY,
 				 "0221 FAN timeout\n");
+
 		/* Start discovery by sending FLOGI, clean up old rpis */
 		list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes,
 					 nlp_listp) {
@@ -5445,8 +5548,8 @@ lpfc_disc_timeout_handler(struct lpfc_vport *vport)
 		if (vport->fc_ns_retry < LPFC_MAX_NS_RETRY) {
 			/* Try it one more time */
 			vport->fc_ns_retry++;
-			rc = lpfc_ns_cmd(vport, SLI_CTNS_GID_FT,
-					 vport->fc_ns_retry, 0);
+			vport->gidft_inp = 0;
+			rc = lpfc_issue_gidft(vport);
 			if (rc == 0)
 				break;
 		}
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index 28247c99b4f2..883e6d2a7bc7 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -90,8 +90,10 @@ union CtCommandResponse {
 	uint32_t word;
 };
 
-#define FC4_FEATURE_INIT 0x2
-#define FC4_FEATURE_TARGET 0x1
+/* FC4 Feature bits for RFF_ID */
+#define FC4_FEATURE_TARGET	0x1
+#define FC4_FEATURE_INIT	0x2
+#define FC4_FEATURE_NVME_DISC	0x4
 
 struct lpfc_sli_ct_request {
 	/* Structure is in Big Endian format */
@@ -115,6 +117,16 @@ struct lpfc_sli_ct_request {
 			uint8_t AreaScope;
 			uint8_t Fc4Type;	/* for GID_FT requests */
 		} gid;
+		struct gid_ff {
+			uint8_t Flags;
+			uint8_t DomainScope;
+			uint8_t AreaScope;
+			uint8_t rsvd1;
+			uint8_t rsvd2;
+			uint8_t rsvd3;
+			uint8_t Fc4FBits;
+			uint8_t Fc4Type;
+		} gid_ff;
 		struct rft {
 			uint32_t PortId;	/* For RFT_ID requests */
 
@@ -159,6 +171,12 @@ struct lpfc_sli_ct_request {
 		struct gff_acc {
 			uint8_t fbits[128];
 		} gff_acc;
+		struct gft {
+			uint32_t PortId;
+		} gft;
+		struct gft_acc {
+			uint32_t fc4_types[8];
+		} gft_acc;
 #define FCP_TYPE_FEATURE_OFFSET 7
 		struct rff {
 			uint32_t PortId;
@@ -174,8 +192,12 @@ struct lpfc_sli_ct_request {
 #define  SLI_CT_REVISION        1
 #define  GID_REQUEST_SZ   (offsetof(struct lpfc_sli_ct_request, un) + \
 			   sizeof(struct gid))
+#define  GIDFF_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \
+			   sizeof(struct gid_ff))
 #define  GFF_REQUEST_SZ   (offsetof(struct lpfc_sli_ct_request, un) + \
 			   sizeof(struct gff))
+#define  GFT_REQUEST_SZ   (offsetof(struct lpfc_sli_ct_request, un) + \
+			   sizeof(struct gft))
 #define  RFT_REQUEST_SZ   (offsetof(struct lpfc_sli_ct_request, un) + \
 			   sizeof(struct rft))
 #define  RFF_REQUEST_SZ   (offsetof(struct lpfc_sli_ct_request, un) + \
@@ -271,6 +293,7 @@ struct lpfc_sli_ct_request {
 #define  SLI_CTNS_GNN_IP      0x0153
 #define  SLI_CTNS_GIPA_IP     0x0156
 #define  SLI_CTNS_GID_FT      0x0171
+#define  SLI_CTNS_GID_FF      0x01F1
 #define  SLI_CTNS_GID_PT      0x01A1
 #define  SLI_CTNS_RPN_ID      0x0212
 #define  SLI_CTNS_RNN_ID      0x0213
@@ -288,15 +311,16 @@ struct lpfc_sli_ct_request {
  * Port Types
  */
 
-#define  SLI_CTPT_N_PORT      0x01
-#define  SLI_CTPT_NL_PORT     0x02
-#define  SLI_CTPT_FNL_PORT    0x03
-#define  SLI_CTPT_IP          0x04
-#define  SLI_CTPT_FCP         0x08
-#define  SLI_CTPT_NX_PORT     0x7F
-#define  SLI_CTPT_F_PORT      0x81
-#define  SLI_CTPT_FL_PORT     0x82
-#define  SLI_CTPT_E_PORT      0x84
+#define SLI_CTPT_N_PORT		0x01
+#define SLI_CTPT_NL_PORT	0x02
+#define SLI_CTPT_FNL_PORT	0x03
+#define SLI_CTPT_IP		0x04
+#define SLI_CTPT_FCP		0x08
+#define SLI_CTPT_NVME		0x28
+#define SLI_CTPT_NX_PORT	0x7F
+#define SLI_CTPT_F_PORT		0x81
+#define SLI_CTPT_FL_PORT	0x82
+#define SLI_CTPT_E_PORT		0x84
 
 #define SLI_CT_LAST_ENTRY     0x80000000
 
@@ -337,6 +361,7 @@ struct lpfc_name {
 			uint8_t IEEE[6];	/* FC IEEE address */
 		} s;
 		uint8_t wwn[8];
+		uint64_t name;
 	} u;
 };
 
@@ -549,6 +574,7 @@ struct fc_vft_header {
 #define ELS_CMD_REC       0x13000000
 #define ELS_CMD_RDP       0x18000000
 #define ELS_CMD_PRLI      0x20100014
+#define ELS_CMD_NVMEPRLI  0x20140018
 #define ELS_CMD_PRLO      0x21100014
 #define ELS_CMD_PRLO_ACC  0x02100014
 #define ELS_CMD_PDISC     0x50000000
@@ -588,6 +614,7 @@ struct fc_vft_header {
 #define ELS_CMD_REC       0x13
 #define ELS_CMD_RDP	  0x18
 #define ELS_CMD_PRLI      0x14001020
+#define ELS_CMD_NVMEPRLI  0x18001420
 #define ELS_CMD_PRLO      0x14001021
 #define ELS_CMD_PRLO_ACC  0x14001002
 #define ELS_CMD_PDISC     0x50
@@ -684,6 +711,7 @@ typedef struct _PRLI {		/* Structure is in Big Endian format */
 	uint8_t prliType;	/* FC Parm Word 0, bit 24:31 */
 
 #define PRLI_FCP_TYPE 0x08
+#define PRLI_NVME_TYPE 0x28
 	uint8_t word0Reserved1;	/* FC Parm Word 0, bit 16:23 */
 
 #ifdef __BIG_ENDIAN_BITFIELD
@@ -1243,8 +1271,7 @@ struct fc_rdp_opd_sfp_info {
 	uint8_t            vendor_name[16];
 	uint8_t            model_number[16];
 	uint8_t            serial_number[16];
-	uint8_t            revision[2];
-	uint8_t            reserved[2];
+	uint8_t            revision[4];
 	uint8_t            date[8];
 };
 
@@ -1263,14 +1290,14 @@ struct fc_rdp_req_frame {
 
 
 struct fc_rdp_res_frame {
-	uint32_t	reply_sequence;		/* FC word0 LS_ACC or LS_RJT */
-	uint32_t	length;			/* FC Word 1      */
-	struct fc_rdp_link_service_desc link_service_desc;    /* Word 2 -4  */
-	struct fc_rdp_sfp_desc sfp_desc;                      /* Word 5 -9  */
-	struct fc_rdp_port_speed_desc portspeed_desc;         /* Word 10-12 */
-	struct fc_rdp_link_error_status_desc link_error_desc; /* Word 13-21 */
-	struct fc_rdp_port_name_desc diag_port_names_desc;    /* Word 22-27 */
-	struct fc_rdp_port_name_desc attached_port_names_desc;/* Word 28-33 */
+	uint32_t    reply_sequence;		/* FC word0 LS_ACC or LS_RJT */
+	uint32_t   length;			/* FC Word 1      */
+	struct fc_rdp_link_service_desc link_service_desc;    /* Word 2 -4   */
+	struct fc_rdp_sfp_desc sfp_desc;                      /* Word 5 -9   */
+	struct fc_rdp_port_speed_desc portspeed_desc;         /* Word 10 -12 */
+	struct fc_rdp_link_error_status_desc link_error_desc; /* Word 13 -21 */
+	struct fc_rdp_port_name_desc diag_port_names_desc;    /* Word 22 -27 */
+	struct fc_rdp_port_name_desc attached_port_names_desc;/* Word 28 -33 */
 	struct fc_fec_rdp_desc fec_desc;                      /* FC word 34-37*/
 	struct fc_rdp_bbc_desc bbc_desc;                      /* FC Word 38-42*/
 	struct fc_rdp_oed_sfp_desc oed_temp_desc;             /* FC Word 43-47*/
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index c3277c5312c9..fcc083cc00e0 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -3922,6 +3922,49 @@ struct gen_req64_wqe {
 	uint32_t max_response_payload_len;
 };
 
+/* Define NVME PRLI request to fabric. NVME is a
+ * fabric-only protocol.
+ * Updated to red-lined v1.08 on Sept 16, 2016
+ */
+struct lpfc_nvme_prli {
+	uint32_t word1;
+	/* The Response Code is defined in the FCP PRLI lpfc_hw.h */
+#define prli_acc_rsp_code_SHIFT         8
+#define prli_acc_rsp_code_MASK          0x0000000f
+#define prli_acc_rsp_code_WORD          word1
+#define prli_estabImagePair_SHIFT       13
+#define prli_estabImagePair_MASK        0x00000001
+#define prli_estabImagePair_WORD        word1
+#define prli_type_code_ext_SHIFT        16
+#define prli_type_code_ext_MASK         0x000000ff
+#define prli_type_code_ext_WORD         word1
+#define prli_type_code_SHIFT            24
+#define prli_type_code_MASK             0x000000ff
+#define prli_type_code_WORD             word1
+	uint32_t word_rsvd2;
+	uint32_t word_rsvd3;
+	uint32_t word4;
+#define prli_fba_SHIFT                  0
+#define prli_fba_MASK                   0x00000001
+#define prli_fba_WORD                   word4
+#define prli_disc_SHIFT                 3
+#define prli_disc_MASK                  0x00000001
+#define prli_disc_WORD                  word4
+#define prli_tgt_SHIFT                  4
+#define prli_tgt_MASK                   0x00000001
+#define prli_tgt_WORD                   word4
+#define prli_init_SHIFT                 5
+#define prli_init_MASK                  0x00000001
+#define prli_init_WORD                  word4
+#define prli_recov_SHIFT                8
+#define prli_recov_MASK                 0x00000001
+#define prli_recov_WORD                 word4
+	uint32_t word5;
+#define prli_fb_sz_SHIFT                0
+#define prli_fb_sz_MASK                 0x0000ffff
+#define prli_fb_sz_WORD                 word5
+};
+
 struct create_xri_wqe {
 	uint32_t rsrvd[5];           /* words 0-4 */
 	struct wqe_did	wqe_dest;  /* word 5 */
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 57087ba4834f..e609afaa472a 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -2667,6 +2667,13 @@ lpfc_cleanup(struct lpfc_vport *vport)
 			lpfc_disc_state_machine(vport, ndlp, NULL,
 					NLP_EVT_DEVICE_RECOVERY);
 
+		if (ndlp->nlp_fc4_type & NLP_FC4_NVME) {
+			/* Remove the NVME transport reference now and
+			 * continue to remove the node.
+			 */
+			lpfc_nlp_put(ndlp);
+		}
+
 		lpfc_disc_state_machine(vport, ndlp, NULL,
 					     NLP_EVT_DEVICE_RM);
 	}
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 835ea9f78219..65e7b2433ee7 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -28,6 +28,9 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
+#include <scsi/fc/fc_fs.h>
+
+#include <linux/nvme-fc-driver.h>
 
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
@@ -35,8 +38,9 @@
 #include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
-#include "lpfc_scsi.h"
 #include "lpfc.h"
+#include "lpfc_scsi.h"
+#include "lpfc_nvme.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_crtn.h"
 #include "lpfc_vport.h"
@@ -708,6 +712,7 @@ static void
 lpfc_rcv_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	      struct lpfc_iocbq *cmdiocb)
 {
+	struct lpfc_hba  *phba = vport->phba;
 	struct lpfc_dmabuf *pcmd;
 	uint32_t *lp;
 	PRLI *npr;
@@ -721,11 +726,19 @@ lpfc_rcv_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	ndlp->nlp_type &= ~(NLP_FCP_TARGET | NLP_FCP_INITIATOR);
 	ndlp->nlp_fcp_info &= ~NLP_FCP_2_DEVICE;
 	ndlp->nlp_flag &= ~NLP_FIRSTBURST;
-	if (npr->prliType == PRLI_FCP_TYPE) {
-		if (npr->initiatorFunc)
-			ndlp->nlp_type |= NLP_FCP_INITIATOR;
+	if ((npr->prliType == PRLI_FCP_TYPE) ||
+	    (npr->prliType == PRLI_NVME_TYPE)) {
+		if (npr->initiatorFunc) {
+			if (npr->prliType == PRLI_FCP_TYPE)
+				ndlp->nlp_type |= NLP_FCP_INITIATOR;
+			if (npr->prliType == PRLI_NVME_TYPE)
+				ndlp->nlp_type |= NLP_NVME_INITIATOR;
+		}
 		if (npr->targetFunc) {
-			ndlp->nlp_type |= NLP_FCP_TARGET;
+			if (npr->prliType == PRLI_FCP_TYPE)
+				ndlp->nlp_type |= NLP_FCP_TARGET;
+			if (npr->prliType == PRLI_NVME_TYPE)
+				ndlp->nlp_type |= NLP_NVME_TARGET;
 			if (npr->writeXferRdyDis)
 				ndlp->nlp_flag |= NLP_FIRSTBURST;
 		}
@@ -744,7 +757,8 @@ lpfc_rcv_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 			"rport rolechg:   role:x%x did:x%x flg:x%x",
 			roles, ndlp->nlp_DID, ndlp->nlp_flag);
 
-		fc_remote_port_rolechg(rport, roles);
+		if (phba->cfg_enable_fc4_type != LPFC_ENABLE_NVME)
+			fc_remote_port_rolechg(rport, roles);
 	}
 }
 
@@ -1491,7 +1505,9 @@ lpfc_rcv_prli_reglogin_issue(struct lpfc_vport *vport,
 {
 	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
 
+	/* Initiator mode. */
 	lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp);
+
 	return ndlp->nlp_state;
 }
 
@@ -1574,9 +1590,11 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
 				  uint32_t evt)
 {
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+	struct lpfc_hba *phba = vport->phba;
 	LPFC_MBOXQ_t *pmb = (LPFC_MBOXQ_t *) arg;
 	MAILBOX_t *mb = &pmb->u.mb;
 	uint32_t did  = mb->un.varWords[1];
+	int rc = 0;
 
 	if (mb->mbxStatus) {
 		/* RegLogin failed */
@@ -1611,19 +1629,52 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
 	}
 
 	/* SLI4 ports have preallocated logical rpis. */
-	if (vport->phba->sli_rev < LPFC_SLI_REV4)
+	if (phba->sli_rev < LPFC_SLI_REV4)
 		ndlp->nlp_rpi = mb->un.varWords[0];
 
 	ndlp->nlp_flag |= NLP_RPI_REGISTERED;
 
 	/* Only if we are not a fabric nport do we issue PRLI */
-	if (!(ndlp->nlp_type & NLP_FABRIC)) {
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+			 "3066 RegLogin Complete on x%x x%x x%x\n",
+			 did, ndlp->nlp_type, ndlp->nlp_fc4_type);
+	if (!(ndlp->nlp_type & NLP_FABRIC) &&
+	    (phba->nvmet_support == 0)) {
+		/* The driver supports FCP and NVME concurrently.  If the
+		 * ndlp's nlp_fc4_type is still zero, the driver doesn't
+		 * know what PRLI to send yet.  Figure that out now and
+		 * call PRLI depending on the outcome.
+		 */
+		if (vport->fc_flag & FC_PT2PT) {
+			/* If we are pt2pt, there is no Fabric to determine
+			 * the FC4 type of the remote nport. So if NVME
+			 * is configured try it.
+			 */
+			ndlp->nlp_fc4_type |= NLP_FC4_FCP;
+			if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+			     (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
+				ndlp->nlp_fc4_type |= NLP_FC4_NVME;
+				/* We need to update the localport also */
+				/* todo: init: revise localport nvme
+				 * attributes
+				 */
+			}
+
+		} else if (ndlp->nlp_fc4_type == 0) {
+			rc = lpfc_ns_cmd(vport, SLI_CTNS_GFT_ID,
+					 0, ndlp->nlp_DID);
+			return ndlp->nlp_state;
+		}
+
 		ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
 		lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
 		lpfc_issue_els_prli(vport, ndlp, 0);
 	} else {
-		ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
-		lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+		/* Only Fabric ports should transition */
+		if (ndlp->nlp_type & NLP_FABRIC) {
+			ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
+			lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+		}
 	}
 	return ndlp->nlp_state;
 }
@@ -1664,7 +1715,7 @@ lpfc_device_recov_reglogin_issue(struct lpfc_vport *vport,
 	ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
 	lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
 	spin_lock_irq(shost->host_lock);
-	ndlp->nlp_flag |= NLP_IGNR_REG_CMPL;
+
 	ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC);
 	spin_unlock_irq(shost->host_lock);
 	lpfc_disc_set_adisc(vport, ndlp);
@@ -1740,10 +1791,23 @@ lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	struct lpfc_hba   *phba = vport->phba;
 	IOCB_t *irsp;
 	PRLI *npr;
+	struct lpfc_nvme_prli *nvpr;
+	void *temp_ptr;
 
 	cmdiocb = (struct lpfc_iocbq *) arg;
 	rspiocb = cmdiocb->context_un.rsp_iocb;
-	npr = (PRLI *)lpfc_check_elscmpl_iocb(phba, cmdiocb, rspiocb);
+
+	/* A solicited PRLI is either FCP or NVME.  The PRLI cmd/rsp
+	 * format is different so NULL the two PRLI types so that the
+	 * driver correctly gets the correct context.
+	 */
+	npr = NULL;
+	nvpr = NULL;
+	temp_ptr = lpfc_check_elscmpl_iocb(phba, cmdiocb, rspiocb);
+	if (cmdiocb->iocb_flag & LPFC_PRLI_FCP_REQ)
+		npr = (PRLI *) temp_ptr;
+	else if (cmdiocb->iocb_flag & LPFC_PRLI_NVME_REQ)
+		nvpr = (struct lpfc_nvme_prli *) temp_ptr;
 
 	irsp = &rspiocb->iocb;
 	if (irsp->ulpStatus) {
@@ -1751,7 +1815,21 @@ lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		    vport->cfg_restrict_login) {
 			goto out;
 		}
+
+		/* The LS Req had some error.  Don't let this be a
+		 * target.
+		 */
+		if ((ndlp->fc4_prli_sent == 1) &&
+		    (ndlp->nlp_state == NLP_STE_PRLI_ISSUE) &&
+		    (ndlp->nlp_type & (NLP_FCP_TARGET | NLP_FCP_INITIATOR)))
+			/* The FCP PRLI completed successfully but
+			 * the NVME PRLI failed.  Since they are sent in
+			 * succession, allow the FCP to complete.
+			 */
+			goto out_err;
+
 		ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE;
+		ndlp->nlp_type |= NLP_FCP_INITIATOR;
 		lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
 		return ndlp->nlp_state;
 	}
@@ -1759,9 +1837,16 @@ lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	/* Check out PRLI rsp */
 	ndlp->nlp_type &= ~(NLP_FCP_TARGET | NLP_FCP_INITIATOR);
 	ndlp->nlp_fcp_info &= ~NLP_FCP_2_DEVICE;
+
+	/* NVME or FCP first burst must be negotiated for each PRLI. */
 	ndlp->nlp_flag &= ~NLP_FIRSTBURST;
-	if ((npr->acceptRspCode == PRLI_REQ_EXECUTED) &&
+	ndlp->nvme_fb_size = 0;
+	if (npr && (npr->acceptRspCode == PRLI_REQ_EXECUTED) &&
 	    (npr->prliType == PRLI_FCP_TYPE)) {
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+				 "6028 FCP NPR PRLI Cmpl Init %d Target %d\n",
+				 npr->initiatorFunc,
+				 npr->targetFunc);
 		if (npr->initiatorFunc)
 			ndlp->nlp_type |= NLP_FCP_INITIATOR;
 		if (npr->targetFunc) {
@@ -1771,6 +1856,49 @@ lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		}
 		if (npr->Retry)
 			ndlp->nlp_fcp_info |= NLP_FCP_2_DEVICE;
+
+		/* PRLI completed.  Decrement count. */
+		ndlp->fc4_prli_sent--;
+	} else if (nvpr &&
+		   (bf_get_be32(prli_acc_rsp_code, nvpr) ==
+		    PRLI_REQ_EXECUTED) &&
+		   (bf_get_be32(prli_type_code, nvpr) ==
+		    PRLI_NVME_TYPE)) {
+
+		/* Complete setting up the remote ndlp personality. */
+		if (bf_get_be32(prli_init, nvpr))
+			ndlp->nlp_type |= NLP_NVME_INITIATOR;
+
+		/* Target driver cannot solicit NVME FB. */
+		if (bf_get_be32(prli_tgt, nvpr)) {
+			ndlp->nlp_type |= NLP_NVME_TARGET;
+			if ((bf_get_be32(prli_fba, nvpr) == 1) &&
+			    (bf_get_be32(prli_fb_sz, nvpr) > 0) &&
+			    (phba->cfg_nvme_enable_fb) &&
+			    (!phba->nvmet_support)) {
+				/* Both sides support FB. The target's first
+				 * burst size is a 512 byte encoded value.
+				 */
+				ndlp->nlp_flag |= NLP_FIRSTBURST;
+				ndlp->nvme_fb_size = bf_get_be32(prli_fb_sz,
+								 nvpr);
+			}
+		}
+
+		if (bf_get_be32(prli_recov, nvpr))
+			ndlp->nlp_fcp_info |= NLP_FCP_2_DEVICE;
+
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+				 "6029 NVME PRLI Cmpl w1 x%08x "
+				 "w4 x%08x w5 x%08x flag x%x, "
+				 "fcp_info x%x nlp_type x%x\n",
+				 be32_to_cpu(nvpr->word1),
+				 be32_to_cpu(nvpr->word4),
+				 be32_to_cpu(nvpr->word5),
+				 ndlp->nlp_flag, ndlp->nlp_fcp_info,
+				 ndlp->nlp_type);
+		/* PRLI completed.  Decrement count. */
+		ndlp->fc4_prli_sent--;
 	}
 	if (!(ndlp->nlp_type & NLP_FCP_TARGET) &&
 	    (vport->port_type == LPFC_NPIV_PORT) &&
@@ -1786,11 +1914,24 @@ lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		return ndlp->nlp_state;
 	}
 
-	ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE;
-	if (ndlp->nlp_type & NLP_FCP_TARGET)
-		lpfc_nlp_set_state(vport, ndlp, NLP_STE_MAPPED_NODE);
-	else
-		lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+out_err:
+	/* The ndlp state cannot move to MAPPED or UNMAPPED before all PRLIs
+	 * are complete.
+	 */
+	if (ndlp->fc4_prli_sent == 0) {
+		ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE;
+		if (ndlp->nlp_type & (NLP_FCP_TARGET | NLP_NVME_TARGET))
+			lpfc_nlp_set_state(vport, ndlp, NLP_STE_MAPPED_NODE);
+		else
+			lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+	} else
+		lpfc_printf_vlog(vport,
+				 KERN_INFO, LOG_ELS,
+				 "3067 PRLI's still outstanding "
+				 "on x%06x - count %d, Pend Node Mode "
+				 "transition...\n",
+				 ndlp->nlp_DID, ndlp->fc4_prli_sent);
+
 	return ndlp->nlp_state;
 }
 
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index c327fc7b1a54..6ba2b3412337 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -5333,7 +5333,8 @@ lpfc_bus_reset_handler(struct scsi_cmnd *cmnd)
 				continue;
 			if (ndlp->nlp_state == NLP_STE_MAPPED_NODE &&
 			    ndlp->nlp_sid == i &&
-			    ndlp->rport) {
+			    ndlp->rport &&
+			    ndlp->nlp_type & NLP_FCP_TARGET) {
 				match = 1;
 				break;
 			}
-- 
GitLab


From 01649561a8b4b77247bd234f240d737367bb8a52 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:32 -0800
Subject: [PATCH 0443/1084] scsi: lpfc: NVME Initiator: bind to nvme_fc api

NVME Initiator: Tie in to NVME Fabrics nvme_fc LLDD initiator api

Adds the routines to:
- register and deregister the FC port as a nvme-fc initiator localport
- register and deregister remote FC ports as a nvme-fc remoteport
- binding of nvme queues to adapter WQs
- send/perform NVME LS's
- send/perform NVME FCP initiator io operations

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/Makefile         |    7 +-
 drivers/scsi/lpfc/lpfc.h           |    5 +
 drivers/scsi/lpfc/lpfc_crtn.h      |   10 +
 drivers/scsi/lpfc/lpfc_ct.c        |    2 +-
 drivers/scsi/lpfc/lpfc_els.c       |    4 +-
 drivers/scsi/lpfc/lpfc_hbadisc.c   |   15 +-
 drivers/scsi/lpfc/lpfc_init.c      |   39 +-
 drivers/scsi/lpfc/lpfc_nportdisc.c |    4 +-
 drivers/scsi/lpfc/lpfc_nvme.c      | 2319 ++++++++++++++++++++++++++++
 drivers/scsi/lpfc/lpfc_nvme.h      |    6 +
 drivers/scsi/lpfc/lpfc_sli.c       |   21 +-
 drivers/scsi/lpfc/lpfc_sli4.h      |    1 +
 drivers/scsi/lpfc/lpfc_vport.c     |   16 +-
 13 files changed, 2411 insertions(+), 38 deletions(-)
 create mode 100644 drivers/scsi/lpfc/lpfc_nvme.c

diff --git a/drivers/scsi/lpfc/Makefile b/drivers/scsi/lpfc/Makefile
index e2516ba8ebfa..cd7e1fcf52c6 100644
--- a/drivers/scsi/lpfc/Makefile
+++ b/drivers/scsi/lpfc/Makefile
@@ -28,6 +28,7 @@ endif
 
 obj-$(CONFIG_SCSI_LPFC) := lpfc.o
 
-lpfc-objs := lpfc_mem.o lpfc_sli.o lpfc_ct.o lpfc_els.o lpfc_hbadisc.o	\
-	lpfc_init.o lpfc_mbox.o lpfc_nportdisc.o lpfc_scsi.o lpfc_attr.o \
-	lpfc_vport.o lpfc_debugfs.o lpfc_bsg.o
+lpfc-objs := lpfc_mem.o lpfc_sli.o lpfc_ct.o lpfc_els.o \
+	lpfc_hbadisc.o	lpfc_init.o lpfc_mbox.o lpfc_nportdisc.o   \
+	lpfc_scsi.o lpfc_attr.o lpfc_vport.o lpfc_debugfs.o lpfc_bsg.o \
+	lpfc_nvme.o
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 81c47d1aebb2..9ad63a47425b 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -123,6 +123,11 @@ struct perf_prof {
 	uint16_t wqidx[40];
 };
 
+/*
+ * Provide for FC4 TYPE x28 - NVME.  The
+ * bit mask for FCP and NVME is 0x8 identically
+ * because they are 32 bit positions distance.
+ */
 #define LPFC_FC4_TYPE_BITMASK	0x00000100
 
 /* Provide DMA memory definitions the driver uses per port instance. */
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 2e6345ebd6c5..a9c8a0a41b16 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -518,4 +518,14 @@ int lpfc_sli4_dump_page_a0(struct lpfc_hba *phba, struct lpfcMboxq *mbox);
 void lpfc_mbx_cmpl_rdp_page_a0(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb);
 
 /* NVME interfaces. */
+void lpfc_nvme_unregister_port(struct lpfc_vport *vport,
+			struct lpfc_nodelist *ndlp);
+int lpfc_nvme_register_port(struct lpfc_vport *vport,
+			struct lpfc_nodelist *ndlp);
+int lpfc_nvme_create_localport(struct lpfc_vport *vport);
+void lpfc_nvme_destroy_localport(struct lpfc_vport *vport);
+void lpfc_nvme_update_localport(struct lpfc_vport *vport);
 void lpfc_nvme_mod_param_dep(struct lpfc_hba *phba);
+void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
+				struct lpfc_iocbq *cmdiocb,
+				struct lpfc_wcqe_complete *abts_cmpl);
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index afb25369bc3b..8cfeffe312e0 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -1412,7 +1412,7 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 		if (((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
 		     (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) &&
 		    (context == FC_TYPE_NVME)) {
-			/* todo: init: revise localport nvme attributes */
+			lpfc_nvme_update_localport(vport);
 			CtReq->un.rff.type_code = context;
 
 		} else if (((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index e164eed25e3d..23546b3c950c 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -2618,7 +2618,9 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		!(vport->fc_flag & FC_PT2PT_PLOGI)) {
 		phba->pport->fc_myDID = 0;
 
-		/* todo: init: revise localport nvme attributes */
+		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
+			lpfc_nvme_update_localport(phba->pport);
 
 		mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 		if (mbox) {
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 6e64c8e8e44f..8936f5d91c87 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -909,7 +909,9 @@ lpfc_linkdown(struct lpfc_hba *phba)
 
 			vports[i]->fc_myDID = 0;
 
-			/* todo: init: revise localport nvme attributes */
+			if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+			    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
+				lpfc_nvme_update_localport(vports[i]);
 		}
 	}
 	lpfc_destroy_vport_work_array(phba, vports);
@@ -3580,7 +3582,9 @@ lpfc_mbx_cmpl_reg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 		spin_unlock_irq(shost->host_lock);
 		vport->fc_myDID = 0;
 
-		/* todo: init: revise localport nvme attributes */
+		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
+			lpfc_nvme_update_localport(vport);
 		goto out;
 	}
 
@@ -3950,7 +3954,8 @@ lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 
 		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
 		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
-			lpfc_ns_cmd(vport, SLI_CTNS_RFF_ID, 0, FC_TYPE_NVME);
+			lpfc_ns_cmd(vport, SLI_CTNS_RFF_ID, 0,
+				    FC_TYPE_NVME);
 
 		/* Issue SCR just before NameServer GID_FT Query */
 		lpfc_issue_els_scr(vport, SCR_DID, 0);
@@ -4144,7 +4149,7 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		    ((ndlp->nlp_fc4_type & NLP_FC4_NVME) ||
 		    (ndlp->nlp_DID == Fabric_DID))) {
 			vport->phba->nport_event_cnt++;
-			/* todo: init: unregister rport from nvme */
+			lpfc_nvme_unregister_port(vport, ndlp);
 		}
 	}
 
@@ -4169,7 +4174,7 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 				 * the register.
 				 */
 				vport->phba->nport_event_cnt++;
-				/* todo: init: register rport with nvme */
+				lpfc_nvme_register_port(vport, ndlp);
 			}
 		}
 	}
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index e609afaa472a..474bafc63055 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -3128,7 +3128,6 @@ static void
 lpfc_scsi_free(struct lpfc_hba *phba)
 {
 	struct lpfc_scsi_buf *sb, *sb_next;
-	struct lpfc_iocbq *io, *io_next;
 
 	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
 		return;
@@ -3158,14 +3157,6 @@ lpfc_scsi_free(struct lpfc_hba *phba)
 		phba->total_scsi_bufs--;
 	}
 	spin_unlock(&phba->scsi_buf_list_get_lock);
-
-	/* Release all the lpfc_iocbq entries maintained by this host. */
-	list_for_each_entry_safe(io, io_next, &phba->lpfc_iocb_list, list) {
-		list_del(&io->list);
-		kfree(io);
-		phba->total_iocbq_bufs--;
-	}
-
 	spin_unlock_irq(&phba->hbalock);
 }
 /**
@@ -3180,7 +3171,6 @@ static void
 lpfc_nvme_free(struct lpfc_hba *phba)
 {
 	struct lpfc_nvme_buf *lpfc_ncmd, *lpfc_ncmd_next;
-	struct lpfc_iocbq *io, *io_next;
 
 	if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
 		return;
@@ -3209,14 +3199,6 @@ lpfc_nvme_free(struct lpfc_hba *phba)
 		phba->total_nvme_bufs--;
 	}
 	spin_unlock(&phba->nvme_buf_list_get_lock);
-
-	/* Release all the lpfc_iocbq entries maintained by this host. */
-	list_for_each_entry_safe(io, io_next, &phba->lpfc_iocb_list, list) {
-		list_del(&io->list);
-		kfree(io);
-		phba->total_iocbq_bufs--;
-	}
-
 	spin_unlock_irq(&phba->hbalock);
 }
 /**
@@ -10685,7 +10667,23 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	/* Perform post initialization setup */
 	lpfc_post_init_setup(phba);
 
-	/* todo: init: register port with nvme */
+	/* NVME support in FW earlier in the driver load corrects the
+	 * FC4 type making a check for nvme_support unnecessary.
+	 */
+	if ((phba->nvmet_support == 0) &&
+	    (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) {
+		/* Create NVME binding with nvme_fc_transport. This
+		 * ensures the vport is initialized.
+		 */
+		error = lpfc_nvme_create_localport(vport);
+		if (error) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6004 NVME registration failed, "
+					"error x%x\n",
+					error);
+			goto out_disable_intr;
+		}
+	}
 
 	/* check for firmware upgrade or downgrade */
 	if (phba->cfg_request_firmware_upgrade)
@@ -10761,8 +10759,8 @@ lpfc_pci_remove_one_s4(struct pci_dev *pdev)
 	/* Perform ndlp cleanup on the physical port.  The nvme localport
 	 * is destroyed after to ensure all rports are io-disabled.
 	 */
+	lpfc_nvme_destroy_localport(vport);
 	lpfc_cleanup(vport);
-	/* todo: init: unregister port with nvme */
 
 	/*
 	 * Bring down the SLI Layer. This step disables all interrupts,
@@ -10781,6 +10779,7 @@ lpfc_pci_remove_one_s4(struct pci_dev *pdev)
 	 */
 	lpfc_scsi_free(phba);
 	lpfc_nvme_free(phba);
+	lpfc_free_iocb_list(phba);
 
 	lpfc_sli4_driver_resource_unset(phba);
 
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 65e7b2433ee7..470f9586192f 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -1655,9 +1655,7 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
 			     (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
 				ndlp->nlp_fc4_type |= NLP_FC4_NVME;
 				/* We need to update the localport also */
-				/* todo: init: revise localport nvme
-				 * attributes
-				 */
+				lpfc_nvme_update_localport(vport);
 			}
 
 		} else if (ndlp->nlp_fc4_type == 0) {
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
new file mode 100644
index 000000000000..729803dacf15
--- /dev/null
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -0,0 +1,2319 @@
+/*******************************************************************
+ * This file is part of the Emulex Linux Device Driver for         *
+ * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ ********************************************************************/
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <asm/unaligned.h>
+#include <linux/crc-t10dif.h>
+#include <net/checksum.h>
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_eh.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/scsi_transport_fc.h>
+#include <scsi/fc/fc_fs.h>
+
+#include <linux/nvme.h>
+#include <linux/nvme-fc-driver.h>
+#include <linux/nvme-fc.h>
+#include "lpfc_version.h"
+#include "lpfc_hw4.h"
+#include "lpfc_hw.h"
+#include "lpfc_sli.h"
+#include "lpfc_sli4.h"
+#include "lpfc_nl.h"
+#include "lpfc_disc.h"
+#include "lpfc.h"
+#include "lpfc_nvme.h"
+#include "lpfc_scsi.h"
+#include "lpfc_logmsg.h"
+#include "lpfc_crtn.h"
+#include "lpfc_vport.h"
+
+/* NVME initiator-based functions */
+
+static struct lpfc_nvme_buf *
+lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp);
+
+static void
+lpfc_release_nvme_buf(struct lpfc_hba *, struct lpfc_nvme_buf *);
+
+
+/**
+ * lpfc_nvme_create_queue -
+ * @lpfc_pnvme: Pointer to the driver's nvme instance data
+ * @qidx: An cpu index used to affinitize IO queues and MSIX vectors.
+ * @handle: An opaque driver handle used in follow-up calls.
+ *
+ * Driver registers this routine to preallocate and initialize any
+ * internal data structures to bind the @qidx to its internal IO queues.
+ * A hardware queue maps (qidx) to a specific driver MSI-X vector/EQ/CQ/WQ.
+ *
+ * Return value :
+ *   0 - Success
+ *   -EINVAL - Unsupported input value.
+ *   -ENOMEM - Could not alloc necessary memory
+ **/
+static int
+lpfc_nvme_create_queue(struct nvme_fc_local_port *pnvme_lport,
+		       unsigned int qidx, u16 qsize,
+		       void **handle)
+{
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_vport *vport;
+	struct lpfc_nvme_qhandle *qhandle;
+	char *str;
+
+	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
+	vport = lport->vport;
+	qhandle = kzalloc(sizeof(struct lpfc_nvme_qhandle), GFP_KERNEL);
+	if (qhandle == NULL)
+		return -ENOMEM;
+
+	qhandle->cpu_id = smp_processor_id();
+	qhandle->qidx = qidx;
+	/*
+	 * NVME qidx == 0 is the admin queue, so both admin queue
+	 * and first IO queue will use MSI-X vector and associated
+	 * EQ/CQ/WQ at index 0. After that they are sequentially assigned.
+	 */
+	if (qidx) {
+		str = "IO ";  /* IO queue */
+		qhandle->index = ((qidx - 1) %
+			vport->phba->cfg_nvme_io_channel);
+	} else {
+		str = "ADM";  /* Admin queue */
+		qhandle->index = qidx;
+	}
+
+	lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+			 "6073 Binding %s HdwQueue %d  (cpu %d) to "
+			 "io_channel %d qhandle %p\n", str,
+			 qidx, qhandle->cpu_id, qhandle->index, qhandle);
+	*handle = (void *)qhandle;
+	return 0;
+}
+
+/**
+ * lpfc_nvme_delete_queue -
+ * @lpfc_pnvme: Pointer to the driver's nvme instance data
+ * @qidx: An cpu index used to affinitize IO queues and MSIX vectors.
+ * @handle: An opaque driver handle from lpfc_nvme_create_queue
+ *
+ * Driver registers this routine to free
+ * any internal data structures to bind the @qidx to its internal
+ * IO queues.
+ *
+ * Return value :
+ *   0 - Success
+ *   TODO:  What are the failure codes.
+ **/
+static void
+lpfc_nvme_delete_queue(struct nvme_fc_local_port *pnvme_lport,
+		       unsigned int qidx,
+		       void *handle)
+{
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_vport *vport;
+
+	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
+	vport = lport->vport;
+
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
+			"6001 ENTER.  lpfc_pnvme %p, qidx x%xi qhandle %p\n",
+			lport, qidx, handle);
+	kfree(handle);
+}
+
+static void
+lpfc_nvme_localport_delete(struct nvme_fc_local_port *localport)
+{
+	struct lpfc_nvme_lport *lport = localport->private;
+
+	/* release any threads waiting for the unreg to complete */
+	complete(&lport->lport_unreg_done);
+}
+
+/* lpfc_nvme_remoteport_delete
+ *
+ * @remoteport: Pointer to an nvme transport remoteport instance.
+ *
+ * This is a template downcall.  NVME transport calls this function
+ * when it has completed the unregistration of a previously
+ * registered remoteport.
+ *
+ * Return value :
+ * None
+ */
+void
+lpfc_nvme_remoteport_delete(struct nvme_fc_remote_port *remoteport)
+{
+	struct lpfc_nvme_rport *rport = remoteport->private;
+	struct lpfc_vport *vport;
+	struct lpfc_nodelist *ndlp;
+
+	ndlp = rport->ndlp;
+	if (!ndlp)
+		goto rport_err;
+
+	vport = ndlp->vport;
+	if (!vport)
+		goto rport_err;
+
+	/* Remove this rport from the lport's list - memory is owned by the
+	 * transport. Remove the ndlp reference for the NVME transport before
+	 * calling state machine to remove the node, this is devloss = 0
+	 * semantics.
+	 */
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+			"6146 remoteport delete complete %p\n",
+			remoteport);
+	list_del(&rport->list);
+	lpfc_nlp_put(ndlp);
+
+ rport_err:
+	/* This call has to execute as long as the rport is valid.
+	 * Release any threads waiting for the unreg to complete.
+	 */
+	complete(&rport->rport_unreg_done);
+}
+
+static void
+lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
+		       struct lpfc_wcqe_complete *wcqe)
+{
+	struct lpfc_vport *vport = cmdwqe->vport;
+	uint32_t status;
+	struct nvmefc_ls_req *pnvme_lsreq;
+	struct lpfc_dmabuf *buf_ptr;
+	struct lpfc_nodelist *ndlp;
+
+	vport->phba->fc4NvmeLsCmpls++;
+
+	pnvme_lsreq = (struct nvmefc_ls_req *)cmdwqe->context2;
+	status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK;
+	ndlp = (struct lpfc_nodelist *)cmdwqe->context1;
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+			 "6047 nvme cmpl Enter "
+			 "Data %p DID %x Xri: %x status %x cmd:%p lsreg:%p "
+			 "bmp:%p ndlp:%p\n",
+			 pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0,
+			 cmdwqe->sli4_xritag, status,
+			 cmdwqe, pnvme_lsreq, cmdwqe->context3, ndlp);
+
+	if (cmdwqe->context3) {
+		buf_ptr = (struct lpfc_dmabuf *)cmdwqe->context3;
+		lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys);
+		kfree(buf_ptr);
+		cmdwqe->context3 = NULL;
+	}
+	if (pnvme_lsreq->done)
+		pnvme_lsreq->done(pnvme_lsreq, status);
+	else
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
+				 "6046 nvme cmpl without done call back? "
+				 "Data %p DID %x Xri: %x status %x\n",
+				pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0,
+				cmdwqe->sli4_xritag, status);
+	if (ndlp) {
+		lpfc_nlp_put(ndlp);
+		cmdwqe->context1 = NULL;
+	}
+	lpfc_sli_release_iocbq(phba, cmdwqe);
+}
+
+static int
+lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
+		  struct lpfc_dmabuf *inp,
+		 struct nvmefc_ls_req *pnvme_lsreq,
+	     void (*cmpl)(struct lpfc_hba *, struct lpfc_iocbq *,
+			   struct lpfc_wcqe_complete *),
+	     struct lpfc_nodelist *ndlp, uint32_t num_entry,
+	     uint32_t tmo, uint8_t retry)
+{
+	struct lpfc_hba  *phba = vport->phba;
+	union lpfc_wqe *wqe;
+	struct lpfc_iocbq *genwqe;
+	struct ulp_bde64 *bpl;
+	struct ulp_bde64 bde;
+	int i, rc, xmit_len, first_len;
+
+	/* Allocate buffer for  command WQE */
+	genwqe = lpfc_sli_get_iocbq(phba);
+	if (genwqe == NULL)
+		return 1;
+
+	wqe = &genwqe->wqe;
+	memset(wqe, 0, sizeof(union lpfc_wqe));
+
+	genwqe->context3 = (uint8_t *)bmp;
+	genwqe->iocb_flag |= LPFC_IO_NVME_LS;
+
+	/* Save for completion so we can release these resources */
+	genwqe->context1 = lpfc_nlp_get(ndlp);
+	genwqe->context2 = (uint8_t *)pnvme_lsreq;
+	/* Fill in payload, bp points to frame payload */
+
+	if (!tmo)
+		/* FC spec states we need 3 * ratov for CT requests */
+		tmo = (3 * phba->fc_ratov);
+
+	/* For this command calculate the xmit length of the request bde. */
+	xmit_len = 0;
+	first_len = 0;
+	bpl = (struct ulp_bde64 *)bmp->virt;
+	for (i = 0; i < num_entry; i++) {
+		bde.tus.w = bpl[i].tus.w;
+		if (bde.tus.f.bdeFlags != BUFF_TYPE_BDE_64)
+			break;
+		xmit_len += bde.tus.f.bdeSize;
+		if (i == 0)
+			first_len = xmit_len;
+	}
+
+	genwqe->rsvd2 = num_entry;
+	genwqe->hba_wqidx = 0;
+
+	/* Words 0 - 2 */
+	wqe->generic.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64;
+	wqe->generic.bde.tus.f.bdeSize = first_len;
+	wqe->generic.bde.addrLow = bpl[0].addrLow;
+	wqe->generic.bde.addrHigh = bpl[0].addrHigh;
+
+	/* Word 3 */
+	wqe->gen_req.request_payload_len = first_len;
+
+	/* Word 4 */
+
+	/* Word 5 */
+	bf_set(wqe_dfctl, &wqe->gen_req.wge_ctl, 0);
+	bf_set(wqe_si, &wqe->gen_req.wge_ctl, 1);
+	bf_set(wqe_la, &wqe->gen_req.wge_ctl, 1);
+	bf_set(wqe_rctl, &wqe->gen_req.wge_ctl, FC_RCTL_DD_UNSOL_CTL);
+	bf_set(wqe_type, &wqe->gen_req.wge_ctl, FC_TYPE_NVME);
+
+	/* Word 6 */
+	bf_set(wqe_ctxt_tag, &wqe->gen_req.wqe_com,
+	       phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]);
+	bf_set(wqe_xri_tag, &wqe->gen_req.wqe_com, genwqe->sli4_xritag);
+
+	/* Word 7 */
+	bf_set(wqe_tmo, &wqe->gen_req.wqe_com, (vport->phba->fc_ratov-1));
+	bf_set(wqe_class, &wqe->gen_req.wqe_com, CLASS3);
+	bf_set(wqe_cmnd, &wqe->gen_req.wqe_com, CMD_GEN_REQUEST64_WQE);
+	bf_set(wqe_ct, &wqe->gen_req.wqe_com, SLI4_CT_RPI);
+
+	/* Word 8 */
+	wqe->gen_req.wqe_com.abort_tag = genwqe->iotag;
+
+	/* Word 9 */
+	bf_set(wqe_reqtag, &wqe->gen_req.wqe_com, genwqe->iotag);
+
+	/* Word 10 */
+	bf_set(wqe_dbde, &wqe->gen_req.wqe_com, 1);
+	bf_set(wqe_iod, &wqe->gen_req.wqe_com, LPFC_WQE_IOD_READ);
+	bf_set(wqe_qosd, &wqe->gen_req.wqe_com, 1);
+	bf_set(wqe_lenloc, &wqe->gen_req.wqe_com, LPFC_WQE_LENLOC_NONE);
+	bf_set(wqe_ebde_cnt, &wqe->gen_req.wqe_com, 0);
+
+	/* Word 11 */
+	bf_set(wqe_cqid, &wqe->gen_req.wqe_com, LPFC_WQE_CQ_ID_DEFAULT);
+	bf_set(wqe_cmd_type, &wqe->gen_req.wqe_com, OTHER_COMMAND);
+
+
+	/* Issue GEN REQ WQE for NPORT <did> */
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
+			 "6050 Issue GEN REQ WQE to NPORT x%x "
+			 "Data: x%x x%x wq:%p lsreq:%p bmp:%p xmit:%d 1st:%d\n",
+			 ndlp->nlp_DID, genwqe->iotag,
+			 vport->port_state,
+			genwqe, pnvme_lsreq, bmp, xmit_len, first_len);
+	genwqe->wqe_cmpl = cmpl;
+	genwqe->iocb_cmpl = NULL;
+	genwqe->drvrTimeout = tmo + LPFC_DRVR_TIMEOUT;
+	genwqe->vport = vport;
+	genwqe->retry = retry;
+
+	rc = lpfc_sli4_issue_wqe(phba, LPFC_ELS_RING, genwqe);
+	if (rc == WQE_ERROR) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
+				 "6045 Issue GEN REQ WQE to NPORT x%x "
+				 "Data: x%x x%x\n",
+				 ndlp->nlp_DID, genwqe->iotag,
+				 vport->port_state);
+		lpfc_sli_release_iocbq(phba, genwqe);
+		return 1;
+	}
+	return 0;
+}
+
+/**
+ * lpfc_nvme_ls_req - Issue an Link Service request
+ * @lpfc_pnvme: Pointer to the driver's nvme instance data
+ * @lpfc_nvme_lport: Pointer to the driver's local port data
+ * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
+ *
+ * Driver registers this routine to handle any link service request
+ * from the nvme_fc transport to a remote nvme-aware port.
+ *
+ * Return value :
+ *   0 - Success
+ *   TODO: What are the failure codes.
+ **/
+static int
+lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
+		 struct nvme_fc_remote_port *pnvme_rport,
+		 struct nvmefc_ls_req *pnvme_lsreq)
+{
+	int ret = 0;
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_vport *vport;
+	struct lpfc_nodelist *ndlp;
+	struct ulp_bde64 *bpl;
+	struct lpfc_dmabuf *bmp;
+
+	/* there are two dma buf in the request, actually there is one and
+	 * the second one is just the start address + cmd size.
+	 * Before calling lpfc_nvme_gen_req these buffers need to be wrapped
+	 * in a lpfc_dmabuf struct. When freeing we just free the wrapper
+	 * because the nvem layer owns the data bufs.
+	 * We do not have to break these packets open, we don't care what is in
+	 * them. And we do not have to look at the resonse data, we only care
+	 * that we got a response. All of the caring is going to happen in the
+	 * nvme-fc layer.
+	 */
+
+	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
+	vport = lport->vport;
+
+	ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id);
+	if (!ndlp) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
+				 "6043 Could not find node for DID %x\n",
+				 pnvme_rport->port_id);
+		return 1;
+	}
+	bmp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+	if (!bmp) {
+
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
+				 "6044 Could not find node for DID %x\n",
+				 pnvme_rport->port_id);
+		return 2;
+	}
+	INIT_LIST_HEAD(&bmp->list);
+	bmp->virt = lpfc_mbuf_alloc(vport->phba, MEM_PRI, &(bmp->phys));
+	if (!bmp->virt) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
+				 "6042 Could not find node for DID %x\n",
+				 pnvme_rport->port_id);
+		kfree(bmp);
+		return 3;
+	}
+	bpl = (struct ulp_bde64 *)bmp->virt;
+	bpl->addrHigh = le32_to_cpu(putPaddrHigh(pnvme_lsreq->rqstdma));
+	bpl->addrLow = le32_to_cpu(putPaddrLow(pnvme_lsreq->rqstdma));
+	bpl->tus.f.bdeFlags = 0;
+	bpl->tus.f.bdeSize = pnvme_lsreq->rqstlen;
+	bpl->tus.w = le32_to_cpu(bpl->tus.w);
+	bpl++;
+
+	bpl->addrHigh = le32_to_cpu(putPaddrHigh(pnvme_lsreq->rspdma));
+	bpl->addrLow = le32_to_cpu(putPaddrLow(pnvme_lsreq->rspdma));
+	bpl->tus.f.bdeFlags = BUFF_TYPE_BDE_64I;
+	bpl->tus.f.bdeSize = pnvme_lsreq->rsplen;
+	bpl->tus.w = le32_to_cpu(bpl->tus.w);
+
+	/* Expand print to include key fields. */
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+			 "6051 ENTER.  lport %p, rport %p lsreq%p rqstlen:%d "
+			 "rsplen:%d %llux %llux\n",
+			 pnvme_lport, pnvme_rport,
+			 pnvme_lsreq, pnvme_lsreq->rqstlen,
+			 pnvme_lsreq->rsplen, pnvme_lsreq->rqstdma,
+			 pnvme_lsreq->rspdma);
+
+	vport->phba->fc4NvmeLsRequests++;
+
+	/* Hardcode the wait to 30 seconds.  Connections are failing otherwise.
+	 * This code allows it all to work.
+	 */
+	ret = lpfc_nvme_gen_req(vport, bmp, pnvme_lsreq->rqstaddr,
+				pnvme_lsreq, lpfc_nvme_cmpl_gen_req,
+				ndlp, 2, 30, 0);
+	if (ret != WQE_SUCCESS) {
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+				 "6052 EXIT. issue ls wqe failed lport %p, "
+				 "rport %p lsreq%p Status %x DID %x\n",
+				 pnvme_lport, pnvme_rport, pnvme_lsreq,
+				 ret, ndlp->nlp_DID);
+		lpfc_mbuf_free(vport->phba, bmp->virt, bmp->phys);
+		kfree(bmp);
+		return ret;
+	}
+
+	/* Stub in routine and return 0 for now. */
+	return ret;
+}
+
+/**
+ * lpfc_nvme_ls_abort - Issue an Link Service request
+ * @lpfc_pnvme: Pointer to the driver's nvme instance data
+ * @lpfc_nvme_lport: Pointer to the driver's local port data
+ * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
+ *
+ * Driver registers this routine to handle any link service request
+ * from the nvme_fc transport to a remote nvme-aware port.
+ *
+ * Return value :
+ *   0 - Success
+ *   TODO: What are the failure codes.
+ **/
+static void
+lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport,
+		   struct nvme_fc_remote_port *pnvme_rport,
+		   struct nvmefc_ls_req *pnvme_lsreq)
+{
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_vport *vport;
+	struct lpfc_hba *phba;
+	struct lpfc_nodelist *ndlp;
+	LIST_HEAD(abort_list);
+	struct lpfc_sli_ring *pring;
+	struct lpfc_iocbq *wqe, *next_wqe;
+
+	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
+	vport = lport->vport;
+	phba = vport->phba;
+
+	ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id);
+	if (!ndlp) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
+				 "6049 Could not find node for DID %x\n",
+				 pnvme_rport->port_id);
+		return;
+	}
+
+	/* Expand print to include key fields. */
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,
+			 "6040 ENTER.  lport %p, rport %p lsreq %p rqstlen:%d "
+			 "rsplen:%d %llux %llux\n",
+			 pnvme_lport, pnvme_rport,
+			 pnvme_lsreq, pnvme_lsreq->rqstlen,
+			 pnvme_lsreq->rsplen, pnvme_lsreq->rqstdma,
+			 pnvme_lsreq->rspdma);
+
+	/*
+	 * Lock the ELS ring txcmplq and build a local list of all ELS IOs
+	 * that need an ABTS.  The IOs need to stay on the txcmplq so that
+	 * the abort operation completes them successfully.
+	 */
+	pring = phba->sli4_hba.nvmels_wq->pring;
+	spin_lock_irq(&phba->hbalock);
+	spin_lock(&pring->ring_lock);
+	list_for_each_entry_safe(wqe, next_wqe, &pring->txcmplq, list) {
+		/* Add to abort_list on on NDLP match. */
+		if (lpfc_check_sli_ndlp(phba, pring, wqe, ndlp)) {
+			wqe->iocb_flag |= LPFC_DRIVER_ABORTED;
+			list_add_tail(&wqe->dlist, &abort_list);
+		}
+	}
+	spin_unlock(&pring->ring_lock);
+	spin_unlock_irq(&phba->hbalock);
+
+	/* Abort the targeted IOs and remove them from the abort list. */
+	list_for_each_entry_safe(wqe, next_wqe, &abort_list, dlist) {
+		spin_lock_irq(&phba->hbalock);
+		list_del_init(&wqe->dlist);
+		lpfc_sli_issue_abort_iotag(phba, pring, wqe);
+		spin_unlock_irq(&phba->hbalock);
+	}
+}
+
+/* Fix up the existing sgls for NVME IO. */
+static void
+lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport,
+		       struct lpfc_nvme_buf *lpfc_ncmd,
+		       struct nvmefc_fcp_req *nCmd)
+{
+	struct sli4_sge *sgl;
+	union lpfc_wqe128 *wqe;
+	uint32_t *wptr, *dptr;
+
+	/*
+	 * Adjust the FCP_CMD and FCP_RSP DMA data and sge_len to
+	 * match NVME.  NVME sends 96 bytes. Also, use the
+	 * nvme commands command and response dma addresses
+	 * rather than the virtual memory to ease the restore
+	 * operation.
+	 */
+	sgl = lpfc_ncmd->nvme_sgl;
+	sgl->sge_len = cpu_to_le32(nCmd->cmdlen);
+
+	sgl++;
+
+	/* Setup the physical region for the FCP RSP */
+	sgl->addr_hi = cpu_to_le32(putPaddrHigh(nCmd->rspdma));
+	sgl->addr_lo = cpu_to_le32(putPaddrLow(nCmd->rspdma));
+	sgl->word2 = le32_to_cpu(sgl->word2);
+	if (nCmd->sg_cnt)
+		bf_set(lpfc_sli4_sge_last, sgl, 0);
+	else
+		bf_set(lpfc_sli4_sge_last, sgl, 1);
+	sgl->word2 = cpu_to_le32(sgl->word2);
+	sgl->sge_len = cpu_to_le32(nCmd->rsplen);
+
+	/*
+	 * Get a local pointer to the built-in wqe and correct
+	 * the cmd size to match NVME's 96 bytes and fix
+	 * the dma address.
+	 */
+
+	/* 128 byte wqe support here */
+	wqe = (union lpfc_wqe128 *)&lpfc_ncmd->cur_iocbq.wqe;
+
+	/* Word 0-2 - NVME CMND IU (embedded payload) */
+	wqe->generic.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_IMMED;
+	wqe->generic.bde.tus.f.bdeSize = 60;
+	wqe->generic.bde.addrHigh = 0;
+	wqe->generic.bde.addrLow =  64;  /* Word 16 */
+
+	/* Word 3 */
+	bf_set(payload_offset_len, &wqe->fcp_icmd,
+	       (nCmd->rsplen + nCmd->cmdlen));
+
+	/* Word 10 */
+	bf_set(wqe_nvme, &wqe->fcp_icmd.wqe_com, 1);
+	bf_set(wqe_wqes, &wqe->fcp_icmd.wqe_com, 1);
+
+	/*
+	 * Embed the payload in the last half of the WQE
+	 * WQE words 16-30 get the NVME CMD IU payload
+	 *
+	 * WQE Word 16 is already setup with flags
+	 * WQE words 17-19 get payload Words 2-4
+	 * WQE words 20-21 get payload Words 6-7
+	 * WQE words 22-29 get payload Words 16-23
+	 */
+	wptr = &wqe->words[17];  /* WQE ptr */
+	dptr = (uint32_t *)nCmd->cmdaddr;  /* payload ptr */
+	dptr += 2;		/* Skip Words 0-1 in payload */
+
+	*wptr++ = *dptr++;	/* Word 2 */
+	*wptr++ = *dptr++;	/* Word 3 */
+	*wptr++ = *dptr++;	/* Word 4 */
+	dptr++;			/* Skip Word 5 in payload */
+	*wptr++ = *dptr++;	/* Word 6 */
+	*wptr++ = *dptr++;	/* Word 7 */
+	dptr += 8;		/* Skip Words 8-15 in payload */
+	*wptr++ = *dptr++;	/* Word 16 */
+	*wptr++ = *dptr++;	/* Word 17 */
+	*wptr++ = *dptr++;	/* Word 18 */
+	*wptr++ = *dptr++;	/* Word 19 */
+	*wptr++ = *dptr++;	/* Word 20 */
+	*wptr++ = *dptr++;	/* Word 21 */
+	*wptr++ = *dptr++;	/* Word 22 */
+	*wptr   = *dptr;	/* Word 23 */
+}
+
+/**
+ * lpfc_nvme_io_cmd_wqe_cmpl - Complete an NVME-over-FCP IO
+ * @lpfc_pnvme: Pointer to the driver's nvme instance data
+ * @lpfc_nvme_lport: Pointer to the driver's local port data
+ * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
+ *
+ * Driver registers this routine as it io request handler.  This
+ * routine issues an fcp WQE with data from the @lpfc_nvme_fcpreq
+ * data structure to the rport indicated in @lpfc_nvme_rport.
+ *
+ * Return value :
+ *   0 - Success
+ *   TODO: What are the failure codes.
+ **/
+static void
+lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
+			  struct lpfc_wcqe_complete *wcqe)
+{
+	struct lpfc_nvme_buf *lpfc_ncmd =
+		(struct lpfc_nvme_buf *)pwqeIn->context1;
+	struct lpfc_vport *vport = pwqeIn->vport;
+	struct nvmefc_fcp_req *nCmd;
+	struct nvme_fc_ersp_iu *ep;
+	struct nvme_fc_cmd_iu *cp;
+	struct lpfc_nvme_rport *rport;
+	struct lpfc_nodelist *ndlp;
+	unsigned long flags;
+	uint32_t code;
+	uint16_t cid, sqhd, data;
+	uint32_t *ptr;
+
+	/* Sanity check on return of outstanding command */
+	if (!lpfc_ncmd || !lpfc_ncmd->nvmeCmd || !lpfc_ncmd->nrport) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
+				 "6071 Completion pointers bad on wqe %p.\n",
+				 wcqe);
+		return;
+	}
+	phba->fc4NvmeIoCmpls++;
+
+	nCmd = lpfc_ncmd->nvmeCmd;
+	rport = lpfc_ncmd->nrport;
+
+	/*
+	 * Catch race where our node has transitioned, but the
+	 * transport is still transitioning.
+	 */
+	ndlp = rport->ndlp;
+	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
+				 "6061 rport %p, ndlp %p, DID x%06x ndlp "
+				 "not ready.\n",
+				 rport, ndlp, rport->remoteport->port_id);
+
+		ndlp = lpfc_findnode_did(vport, rport->remoteport->port_id);
+		if (!ndlp) {
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
+					 "6062 Ignoring NVME cmpl.  No ndlp\n");
+			goto out_err;
+		}
+	}
+
+	code = bf_get(lpfc_wcqe_c_code, wcqe);
+	if (code == CQE_CODE_NVME_ERSP) {
+		/* For this type of CQE, we need to rebuild the rsp */
+		ep = (struct nvme_fc_ersp_iu *)nCmd->rspaddr;
+
+		/*
+		 * Get Command Id from cmd to plug into response. This
+		 * code is not needed in the next NVME Transport drop.
+		 */
+		cp = (struct nvme_fc_cmd_iu *)nCmd->cmdaddr;
+		cid = cp->sqe.common.command_id;
+
+		/*
+		 * RSN is in CQE word 2
+		 * SQHD is in CQE Word 3 bits 15:0
+		 * Cmd Specific info is in CQE Word 1
+		 * and in CQE Word 0 bits 15:0
+		 */
+		sqhd = bf_get(lpfc_wcqe_c_sqhead, wcqe);
+
+		/* Now lets build the NVME ERSP IU */
+		ep->iu_len = cpu_to_be16(8);
+		ep->rsn = wcqe->parameter;
+		ep->xfrd_len = cpu_to_be32(nCmd->payload_length);
+		ep->rsvd12 = 0;
+		ptr = (uint32_t *)&ep->cqe.result.u64;
+		*ptr++ = wcqe->total_data_placed;
+		data = bf_get(lpfc_wcqe_c_ersp0, wcqe);
+		*ptr = (uint32_t)data;
+		ep->cqe.sq_head = sqhd;
+		ep->cqe.sq_id =  nCmd->sqid;
+		ep->cqe.command_id = cid;
+		ep->cqe.status = 0;
+
+		lpfc_ncmd->status = IOSTAT_SUCCESS;
+		lpfc_ncmd->result = 0;
+		nCmd->rcv_rsplen = LPFC_NVME_ERSP_LEN;
+		nCmd->transferred_length = nCmd->payload_length;
+	} else {
+		lpfc_ncmd->status = (bf_get(lpfc_wcqe_c_status, wcqe) &
+			    LPFC_IOCB_STATUS_MASK);
+		lpfc_ncmd->result = wcqe->parameter;
+
+		/* For NVME, the only failure path that results in an
+		 * IO error is when the adapter rejects it.  All other
+		 * conditions are a success case and resolved by the
+		 * transport.
+		 * IOSTAT_FCP_RSP_ERROR means:
+		 * 1. Length of data received doesn't match total
+		 *    transfer length in WQE
+		 * 2. If the RSP payload does NOT match these cases:
+		 *    a. RSP length 12/24 bytes and all zeros
+		 *    b. NVME ERSP
+		 */
+		switch (lpfc_ncmd->status) {
+		case IOSTAT_SUCCESS:
+			nCmd->transferred_length = wcqe->total_data_placed;
+			nCmd->rcv_rsplen = 0;
+			nCmd->status = 0;
+			break;
+		case IOSTAT_FCP_RSP_ERROR:
+			nCmd->transferred_length = wcqe->total_data_placed;
+			nCmd->rcv_rsplen = wcqe->parameter;
+			nCmd->status = 0;
+			/* Sanity check */
+			if (nCmd->rcv_rsplen == LPFC_NVME_ERSP_LEN)
+				break;
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
+					 "6081 NVME Completion Protocol Error: "
+					 "status x%x result x%x placed x%x\n",
+					 lpfc_ncmd->status, lpfc_ncmd->result,
+					 wcqe->total_data_placed);
+			break;
+		default:
+out_err:
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
+					 "6072 NVME Completion Error: "
+					 "status x%x result x%x placed x%x\n",
+					 lpfc_ncmd->status, lpfc_ncmd->result,
+					 wcqe->total_data_placed);
+			nCmd->transferred_length = 0;
+			nCmd->rcv_rsplen = 0;
+			nCmd->status = NVME_SC_FC_TRANSPORT_ERROR;
+		}
+	}
+
+	/* pick up SLI4 exhange busy condition */
+	if (bf_get(lpfc_wcqe_c_xb, wcqe))
+		lpfc_ncmd->flags |= LPFC_SBUF_XBUSY;
+	else
+		lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
+
+	if (ndlp && NLP_CHK_NODE_ACT(ndlp))
+		atomic_dec(&ndlp->cmd_pending);
+
+	/* Update stats and complete the IO.  There is
+	 * no need for dma unprep because the nvme_transport
+	 * owns the dma address.
+	 */
+	nCmd->done(nCmd);
+
+	spin_lock_irqsave(&phba->hbalock, flags);
+	lpfc_ncmd->nrport = NULL;
+	spin_unlock_irqrestore(&phba->hbalock, flags);
+
+	lpfc_release_nvme_buf(phba, lpfc_ncmd);
+}
+
+
+/**
+ * lpfc_nvme_prep_io_cmd - Issue an NVME-over-FCP IO
+ * @lpfc_pnvme: Pointer to the driver's nvme instance data
+ * @lpfc_nvme_lport: Pointer to the driver's local port data
+ * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
+ * @lpfc_nvme_fcreq: IO request from nvme fc to driver.
+ * @hw_queue_handle: Driver-returned handle in lpfc_nvme_create_queue
+ *
+ * Driver registers this routine as it io request handler.  This
+ * routine issues an fcp WQE with data from the @lpfc_nvme_fcpreq
+ * data structure to the rport indicated in @lpfc_nvme_rport.
+ *
+ * Return value :
+ *   0 - Success
+ *   TODO: What are the failure codes.
+ **/
+static int
+lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
+		      struct lpfc_nvme_buf *lpfc_ncmd,
+		      struct lpfc_nodelist *pnode)
+{
+	struct lpfc_hba *phba = vport->phba;
+	struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd;
+	struct lpfc_iocbq *pwqeq = &(lpfc_ncmd->cur_iocbq);
+	union lpfc_wqe128 *wqe = (union lpfc_wqe128 *)&pwqeq->wqe;
+	uint32_t req_len;
+
+	if (!pnode || !NLP_CHK_NODE_ACT(pnode))
+		return -EINVAL;
+
+	/*
+	 * There are three possibilities here - use scatter-gather segment, use
+	 * the single mapping, or neither.
+	 */
+	wqe->fcp_iwrite.initial_xfer_len = 0;
+	if (nCmd->sg_cnt) {
+		if (nCmd->io_dir == NVMEFC_FCP_WRITE) {
+			/* Word 5 */
+			if ((phba->cfg_nvme_enable_fb) &&
+			    (pnode->nlp_flag & NLP_FIRSTBURST)) {
+				req_len = lpfc_ncmd->nvmeCmd->payload_length;
+				if (req_len < pnode->nvme_fb_size)
+					wqe->fcp_iwrite.initial_xfer_len =
+						req_len;
+				else
+					wqe->fcp_iwrite.initial_xfer_len =
+						pnode->nvme_fb_size;
+			}
+
+			/* Word 7 */
+			bf_set(wqe_cmnd, &wqe->generic.wqe_com,
+			       CMD_FCP_IWRITE64_WQE);
+			bf_set(wqe_pu, &wqe->generic.wqe_com,
+			       PARM_READ_CHECK);
+
+			/* Word 10 */
+			bf_set(wqe_qosd, &wqe->fcp_iwrite.wqe_com, 0);
+			bf_set(wqe_iod, &wqe->fcp_iwrite.wqe_com,
+			       LPFC_WQE_IOD_WRITE);
+			bf_set(wqe_lenloc, &wqe->fcp_iwrite.wqe_com,
+			       LPFC_WQE_LENLOC_WORD4);
+			if (phba->cfg_nvme_oas)
+				bf_set(wqe_oas, &wqe->fcp_iwrite.wqe_com, 1);
+
+			/* Word 11 */
+			bf_set(wqe_cmd_type, &wqe->generic.wqe_com,
+			       NVME_WRITE_CMD);
+
+			/* Word 16 */
+			wqe->words[16] = LPFC_NVME_EMBED_WRITE;
+
+			phba->fc4NvmeOutputRequests++;
+		} else {
+			/* Word 7 */
+			bf_set(wqe_cmnd, &wqe->generic.wqe_com,
+			       CMD_FCP_IREAD64_WQE);
+			bf_set(wqe_pu, &wqe->generic.wqe_com,
+			       PARM_READ_CHECK);
+
+			/* Word 10 */
+			bf_set(wqe_qosd, &wqe->fcp_iread.wqe_com, 0);
+			bf_set(wqe_iod, &wqe->fcp_iread.wqe_com,
+			       LPFC_WQE_IOD_READ);
+			bf_set(wqe_lenloc, &wqe->fcp_iread.wqe_com,
+			       LPFC_WQE_LENLOC_WORD4);
+			if (phba->cfg_nvme_oas)
+				bf_set(wqe_oas, &wqe->fcp_iread.wqe_com, 1);
+
+			/* Word 11 */
+			bf_set(wqe_cmd_type, &wqe->generic.wqe_com,
+			       NVME_READ_CMD);
+
+			/* Word 16 */
+			wqe->words[16] = LPFC_NVME_EMBED_READ;
+
+			phba->fc4NvmeInputRequests++;
+		}
+	} else {
+		/* Word 4 */
+		wqe->fcp_icmd.rsrvd4 = 0;
+
+		/* Word 7 */
+		bf_set(wqe_cmnd, &wqe->generic.wqe_com, CMD_FCP_ICMND64_WQE);
+		bf_set(wqe_pu, &wqe->generic.wqe_com, 0);
+
+		/* Word 10 */
+		bf_set(wqe_qosd, &wqe->fcp_icmd.wqe_com, 1);
+		bf_set(wqe_iod, &wqe->fcp_icmd.wqe_com, LPFC_WQE_IOD_WRITE);
+		bf_set(wqe_lenloc, &wqe->fcp_icmd.wqe_com,
+		       LPFC_WQE_LENLOC_NONE);
+		if (phba->cfg_nvme_oas)
+			bf_set(wqe_oas, &wqe->fcp_icmd.wqe_com, 1);
+
+		/* Word 11 */
+		bf_set(wqe_cmd_type, &wqe->generic.wqe_com, NVME_READ_CMD);
+
+		/* Word 16 */
+		wqe->words[16] = LPFC_NVME_EMBED_CMD;
+
+		phba->fc4NvmeControlRequests++;
+	}
+	/*
+	 * Finish initializing those WQE fields that are independent
+	 * of the nvme_cmnd request_buffer
+	 */
+
+	/* Word 6 */
+	bf_set(wqe_ctxt_tag, &wqe->generic.wqe_com,
+	       phba->sli4_hba.rpi_ids[pnode->nlp_rpi]);
+	bf_set(wqe_xri_tag, &wqe->generic.wqe_com, pwqeq->sli4_xritag);
+
+	/* Word 7 */
+	/* Preserve Class data in the ndlp. */
+	bf_set(wqe_class, &wqe->generic.wqe_com,
+	       (pnode->nlp_fcp_info & 0x0f));
+
+	/* Word 8 */
+	wqe->generic.wqe_com.abort_tag = pwqeq->iotag;
+
+	/* Word 9 */
+	bf_set(wqe_reqtag, &wqe->generic.wqe_com, pwqeq->iotag);
+
+	/* Word 11 */
+	bf_set(wqe_cqid, &wqe->generic.wqe_com, LPFC_WQE_CQ_ID_DEFAULT);
+
+	pwqeq->vport = vport;
+	return 0;
+}
+
+
+/**
+ * lpfc_nvme_prep_io_dma - Issue an NVME-over-FCP IO
+ * @lpfc_pnvme: Pointer to the driver's nvme instance data
+ * @lpfc_nvme_lport: Pointer to the driver's local port data
+ * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
+ * @lpfc_nvme_fcreq: IO request from nvme fc to driver.
+ * @hw_queue_handle: Driver-returned handle in lpfc_nvme_create_queue
+ *
+ * Driver registers this routine as it io request handler.  This
+ * routine issues an fcp WQE with data from the @lpfc_nvme_fcpreq
+ * data structure to the rport indicated in @lpfc_nvme_rport.
+ *
+ * Return value :
+ *   0 - Success
+ *   TODO: What are the failure codes.
+ **/
+static int
+lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
+		      struct lpfc_nvme_buf *lpfc_ncmd)
+{
+	struct lpfc_hba *phba = vport->phba;
+	struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd;
+	union lpfc_wqe128 *wqe = (union lpfc_wqe128 *)&lpfc_ncmd->cur_iocbq.wqe;
+	struct sli4_sge *sgl = lpfc_ncmd->nvme_sgl;
+	struct scatterlist *data_sg;
+	struct sli4_sge *first_data_sgl;
+	dma_addr_t physaddr;
+	uint32_t num_bde = 0;
+	uint32_t dma_len;
+	uint32_t dma_offset = 0;
+	int nseg, i;
+
+	/* Fix up the command and response DMA stuff. */
+	lpfc_nvme_adj_fcp_sgls(vport, lpfc_ncmd, nCmd);
+
+	/*
+	 * There are three possibilities here - use scatter-gather segment, use
+	 * the single mapping, or neither.
+	 */
+	if (nCmd->sg_cnt) {
+		/*
+		 * Jump over the cmd and rsp SGEs.  The fix routine
+		 * has already adjusted for this.
+		 */
+		sgl += 2;
+
+		first_data_sgl = sgl;
+		lpfc_ncmd->seg_cnt = nCmd->sg_cnt;
+		if (lpfc_ncmd->seg_cnt > phba->cfg_sg_seg_cnt) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+					"6058 Too many sg segments from "
+					"NVME Transport.  Max %d, "
+					"nvmeIO sg_cnt %d\n",
+					phba->cfg_sg_seg_cnt,
+					lpfc_ncmd->seg_cnt);
+			lpfc_ncmd->seg_cnt = 0;
+			return 1;
+		}
+
+		/*
+		 * The driver established a maximum scatter-gather segment count
+		 * during probe that limits the number of sg elements in any
+		 * single nvme command.  Just run through the seg_cnt and format
+		 * the sge's.
+		 */
+		nseg = nCmd->sg_cnt;
+		data_sg = nCmd->first_sgl;
+		for (i = 0; i < nseg; i++) {
+			if (data_sg == NULL) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+						"6059 dptr err %d, nseg %d\n",
+						i, nseg);
+				lpfc_ncmd->seg_cnt = 0;
+				return 1;
+			}
+			physaddr = data_sg->dma_address;
+			dma_len = data_sg->length;
+			sgl->addr_lo = cpu_to_le32(putPaddrLow(physaddr));
+			sgl->addr_hi = cpu_to_le32(putPaddrHigh(physaddr));
+			sgl->word2 = le32_to_cpu(sgl->word2);
+			if ((num_bde + 1) == nseg)
+				bf_set(lpfc_sli4_sge_last, sgl, 1);
+			else
+				bf_set(lpfc_sli4_sge_last, sgl, 0);
+			bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
+			bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_DATA);
+			sgl->word2 = cpu_to_le32(sgl->word2);
+			sgl->sge_len = cpu_to_le32(dma_len);
+
+			dma_offset += dma_len;
+			data_sg = sg_next(data_sg);
+			sgl++;
+		}
+	} else {
+		/* For this clause to be valid, the payload_length
+		 * and sg_cnt must zero.
+		 */
+		if (nCmd->payload_length != 0) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+					"6063 NVME DMA Prep Err: sg_cnt %d "
+					"payload_length x%x\n",
+					nCmd->sg_cnt, nCmd->payload_length);
+			return 1;
+		}
+	}
+
+	/*
+	 * Due to difference in data length between DIF/non-DIF paths,
+	 * we need to set word 4 of WQE here
+	 */
+	wqe->fcp_iread.total_xfer_len = nCmd->payload_length;
+	return 0;
+}
+
+/**
+ * lpfc_nvme_fcp_io_submit - Issue an NVME-over-FCP IO
+ * @lpfc_pnvme: Pointer to the driver's nvme instance data
+ * @lpfc_nvme_lport: Pointer to the driver's local port data
+ * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
+ * @lpfc_nvme_fcreq: IO request from nvme fc to driver.
+ * @hw_queue_handle: Driver-returned handle in lpfc_nvme_create_queue
+ *
+ * Driver registers this routine as it io request handler.  This
+ * routine issues an fcp WQE with data from the @lpfc_nvme_fcpreq
+ * data structure to the rport
+ indicated in @lpfc_nvme_rport.
+ *
+ * Return value :
+ *   0 - Success
+ *   TODO: What are the failure codes.
+ **/
+static int
+lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
+			struct nvme_fc_remote_port *pnvme_rport,
+			void *hw_queue_handle,
+			struct nvmefc_fcp_req *pnvme_fcreq)
+{
+	int ret = 0;
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_vport *vport;
+	struct lpfc_hba *phba;
+	struct lpfc_nodelist *ndlp;
+	struct lpfc_nvme_buf *lpfc_ncmd;
+	struct lpfc_nvme_rport *rport;
+	struct lpfc_nvme_qhandle *lpfc_queue_info;
+
+	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
+	vport = lport->vport;
+	phba = vport->phba;
+
+	rport = (struct lpfc_nvme_rport *)pnvme_rport->private;
+	lpfc_queue_info = (struct lpfc_nvme_qhandle *)hw_queue_handle;
+
+	/*
+	 * Catch race where our node has transitioned, but the
+	 * transport is still transitioning.
+	 */
+	ndlp = rport->ndlp;
+	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
+				 "6053 rport %p, ndlp %p, DID x%06x "
+				 "ndlp not ready.\n",
+				 rport, ndlp, pnvme_rport->port_id);
+
+		ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id);
+		if (!ndlp) {
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
+					 "6066 Missing node for DID %x\n",
+					 pnvme_rport->port_id);
+			ret = -ENODEV;
+			goto out_fail;
+		}
+	}
+
+	/* The remote node has to be a mapped target or it's an error. */
+	if ((ndlp->nlp_type & NLP_NVME_TARGET) &&
+	    (ndlp->nlp_state != NLP_STE_MAPPED_NODE)) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
+				 "6036 rport %p, DID x%06x not ready for "
+				 "IO. State x%x, Type x%x\n",
+				 rport, pnvme_rport->port_id,
+				 ndlp->nlp_state, ndlp->nlp_type);
+		ret = -ENODEV;
+		goto out_fail;
+
+	}
+
+	/* The node is shared with FCP IO, make sure the IO pending count does
+	 * not exceed the programmed depth.
+	 */
+	if (atomic_read(&ndlp->cmd_pending) >= ndlp->cmd_qdepth) {
+		ret = -EAGAIN;
+		goto out_fail;
+	}
+
+	lpfc_ncmd = lpfc_get_nvme_buf(phba, ndlp);
+	if (lpfc_ncmd == NULL) {
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
+				 "6065 driver's buffer pool is empty, "
+				 "IO failed\n");
+		ret = -ENOMEM;
+		goto out_fail;
+	}
+
+	/*
+	 * Store the data needed by the driver to issue, abort, and complete
+	 * an IO.
+	 * Do not let the IO hang out forever.  There is no midlayer issuing
+	 * an abort so inform the FW of the maximum IO pending time.
+	 */
+	pnvme_fcreq->private = (void *)lpfc_ncmd;
+	lpfc_ncmd->nvmeCmd = pnvme_fcreq;
+	lpfc_ncmd->nrport = rport;
+	lpfc_ncmd->start_time = jiffies;
+
+	lpfc_nvme_prep_io_cmd(vport, lpfc_ncmd, ndlp);
+	ret = lpfc_nvme_prep_io_dma(vport, lpfc_ncmd);
+	if (ret) {
+		ret = -ENOMEM;
+		goto out_free_nvme_buf;
+	}
+
+	atomic_inc(&ndlp->cmd_pending);
+
+	/*
+	 * Issue the IO on the WQ indicated by index in the hw_queue_handle.
+	 * This identfier was create in our hardware queue create callback
+	 * routine. The driver now is dependent on the IO queue steering from
+	 * the transport.  We are trusting the upper NVME layers know which
+	 * index to use and that they have affinitized a CPU to this hardware
+	 * queue. A hardware queue maps to a driver MSI-X vector/EQ/CQ/WQ.
+	 */
+	lpfc_ncmd->cur_iocbq.hba_wqidx = lpfc_queue_info->index;
+
+	ret = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, &lpfc_ncmd->cur_iocbq);
+	if (ret) {
+		atomic_dec(&ndlp->cmd_pending);
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
+				 "6113 FCP could not issue WQE err %x "
+				 "sid: x%x did: x%x oxid: x%x\n",
+				 ret, vport->fc_myDID, ndlp->nlp_DID,
+				 lpfc_ncmd->cur_iocbq.sli4_xritag);
+		ret = -EINVAL;
+		goto out_free_nvme_buf;
+	}
+
+	return 0;
+
+ out_free_nvme_buf:
+	lpfc_release_nvme_buf(phba, lpfc_ncmd);
+ out_fail:
+	return ret;
+}
+
+/**
+ * lpfc_nvme_abort_fcreq_cmpl - Complete an NVME FCP abort request.
+ * @phba: Pointer to HBA context object
+ * @cmdiocb: Pointer to command iocb object.
+ * @rspiocb: Pointer to response iocb object.
+ *
+ * This is the callback function for any NVME FCP IO that was aborted.
+ *
+ * Return value:
+ *   None
+ **/
+void
+lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+			   struct lpfc_wcqe_complete *abts_cmpl)
+{
+	lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+			"6145 ABORT_XRI_CN completing on rpi x%x "
+			"original iotag x%x, abort cmd iotag x%x "
+			"req_tag x%x, status x%x, hwstatus x%x\n",
+			cmdiocb->iocb.un.acxri.abortContextTag,
+			cmdiocb->iocb.un.acxri.abortIoTag,
+			cmdiocb->iotag,
+			bf_get(lpfc_wcqe_c_request_tag, abts_cmpl),
+			bf_get(lpfc_wcqe_c_status, abts_cmpl),
+			bf_get(lpfc_wcqe_c_hw_status, abts_cmpl));
+	lpfc_sli_release_iocbq(phba, cmdiocb);
+}
+
+/**
+ * lpfc_nvme_fcp_abort - Issue an NVME-over-FCP ABTS
+ * @lpfc_pnvme: Pointer to the driver's nvme instance data
+ * @lpfc_nvme_lport: Pointer to the driver's local port data
+ * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
+ * @lpfc_nvme_fcreq: IO request from nvme fc to driver.
+ * @hw_queue_handle: Driver-returned handle in lpfc_nvme_create_queue
+ *
+ * Driver registers this routine as its nvme request io abort handler.  This
+ * routine issues an fcp Abort WQE with data from the @lpfc_nvme_fcpreq
+ * data structure to the rport indicated in @lpfc_nvme_rport.  This routine
+ * is executed asynchronously - one the target is validated as "MAPPED" and
+ * ready for IO, the driver issues the abort request and returns.
+ *
+ * Return value:
+ *   None
+ **/
+static void
+lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
+		    struct nvme_fc_remote_port *pnvme_rport,
+		    void *hw_queue_handle,
+		    struct nvmefc_fcp_req *pnvme_fcreq)
+{
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_vport *vport;
+	struct lpfc_hba *phba;
+	struct lpfc_nodelist *ndlp;
+	struct lpfc_nvme_rport *rport;
+	struct lpfc_nvme_buf *lpfc_nbuf;
+	struct lpfc_iocbq *abts_buf;
+	struct lpfc_iocbq *nvmereq_wqe;
+	union lpfc_wqe *abts_wqe;
+	unsigned long flags;
+	int ret_val;
+
+	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
+	rport = (struct lpfc_nvme_rport *)pnvme_rport->private;
+	vport = lport->vport;
+	phba = vport->phba;
+
+	/* Announce entry to new IO submit field. */
+	lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
+			 "6002 Abort Request to rport DID x%06x "
+			 "for nvme_fc_req %p\n",
+			 pnvme_rport->port_id,
+			 pnvme_fcreq);
+
+	/*
+	 * Catch race where our node has transitioned, but the
+	 * transport is still transitioning.
+	 */
+	ndlp = rport->ndlp;
+	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_ABTS,
+				 "6054 rport %p, ndlp %p, DID x%06x ndlp "
+				 " not ready.\n",
+				 rport, ndlp, pnvme_rport->port_id);
+
+		ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id);
+		if (!ndlp) {
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
+					 "6055 Could not find node for "
+					 "DID %x\n",
+					 pnvme_rport->port_id);
+			return;
+		}
+	}
+
+	/* The remote node has to be ready to send an abort. */
+	if ((ndlp->nlp_state != NLP_STE_MAPPED_NODE) &&
+	    !(ndlp->nlp_type & NLP_NVME_TARGET)) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_ABTS,
+				 "6048 rport %p, DID x%06x not ready for "
+				 "IO. State x%x, Type x%x\n",
+				 rport, pnvme_rport->port_id,
+				 ndlp->nlp_state, ndlp->nlp_type);
+		return;
+	}
+
+	/* If the hba is getting reset, this flag is set.  It is
+	 * cleared when the reset is complete and rings reestablished.
+	 */
+	spin_lock_irqsave(&phba->hbalock, flags);
+	/* driver queued commands are in process of being flushed */
+	if (phba->hba_flag & HBA_NVME_IOQ_FLUSH) {
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+				 "6139 Driver in reset cleanup - flushing "
+				 "NVME Req now.  hba_flag x%x\n",
+				 phba->hba_flag);
+		return;
+	}
+
+	lpfc_nbuf = (struct lpfc_nvme_buf *)pnvme_fcreq->private;
+	if (!lpfc_nbuf) {
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+				 "6140 NVME IO req has no matching lpfc nvme "
+				 "io buffer.  Skipping abort req.\n");
+		return;
+	} else if (!lpfc_nbuf->nvmeCmd) {
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+				 "6141 lpfc NVME IO req has no nvme_fcreq "
+				 "io buffer.  Skipping abort req.\n");
+		return;
+	}
+
+	/*
+	 * The lpfc_nbuf and the mapped nvme_fcreq in the driver's
+	 * state must match the nvme_fcreq passed by the nvme
+	 * transport.  If they don't match, it is likely the driver
+	 * has already completed the NVME IO and the nvme transport
+	 * has not seen it yet.
+	 */
+	if (lpfc_nbuf->nvmeCmd != pnvme_fcreq) {
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+				 "6143 NVME req mismatch: "
+				 "lpfc_nbuf %p nvmeCmd %p, "
+				 "pnvme_fcreq %p.  Skipping Abort\n",
+				 lpfc_nbuf, lpfc_nbuf->nvmeCmd,
+				 pnvme_fcreq);
+		return;
+	}
+
+	/* Don't abort IOs no longer on the pending queue. */
+	nvmereq_wqe = &lpfc_nbuf->cur_iocbq;
+	if (!(nvmereq_wqe->iocb_flag & LPFC_IO_ON_TXCMPLQ)) {
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+				 "6142 NVME IO req %p not queued - skipping "
+				 "abort req\n",
+				 pnvme_fcreq);
+		return;
+	}
+
+	/* Outstanding abort is in progress */
+	if (nvmereq_wqe->iocb_flag & LPFC_DRIVER_ABORTED) {
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+				 "6144 Outstanding NVME I/O Abort Request "
+				 "still pending on nvme_fcreq %p, "
+				 "lpfc_ncmd %p\n",
+				 pnvme_fcreq, lpfc_nbuf);
+		return;
+	}
+
+	abts_buf = __lpfc_sli_get_iocbq(phba);
+	if (!abts_buf) {
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+				 "6136 No available abort wqes. Skipping "
+				 "Abts req for nvme_fcreq %p.\n",
+				 pnvme_fcreq);
+		return;
+	}
+
+	/* Ready - mark outstanding as aborted by driver. */
+	nvmereq_wqe->iocb_flag |= LPFC_DRIVER_ABORTED;
+
+	/* Complete prepping the abort wqe and issue to the FW. */
+	abts_wqe = &abts_buf->wqe;
+
+	/* WQEs are reused.  Clear stale data and set key fields to
+	 * zero like ia, iaab, iaar, xri_tag, and ctxt_tag.
+	 */
+	memset(abts_wqe, 0, sizeof(union lpfc_wqe));
+	bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG);
+
+	/* word 7 */
+	bf_set(wqe_ct, &abts_wqe->abort_cmd.wqe_com, 0);
+	bf_set(wqe_cmnd, &abts_wqe->abort_cmd.wqe_com, CMD_ABORT_XRI_CX);
+	bf_set(wqe_class, &abts_wqe->abort_cmd.wqe_com,
+	       nvmereq_wqe->iocb.ulpClass);
+
+	/* word 8 - tell the FW to abort the IO associated with this
+	 * outstanding exchange ID.
+	 */
+	abts_wqe->abort_cmd.wqe_com.abort_tag = nvmereq_wqe->sli4_xritag;
+
+	/* word 9 - this is the iotag for the abts_wqe completion. */
+	bf_set(wqe_reqtag, &abts_wqe->abort_cmd.wqe_com,
+	       abts_buf->iotag);
+
+	/* word 10 */
+	bf_set(wqe_wqid, &abts_wqe->abort_cmd.wqe_com, nvmereq_wqe->hba_wqidx);
+	bf_set(wqe_qosd, &abts_wqe->abort_cmd.wqe_com, 1);
+	bf_set(wqe_lenloc, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_LENLOC_NONE);
+
+	/* word 11 */
+	bf_set(wqe_cmd_type, &abts_wqe->abort_cmd.wqe_com, OTHER_COMMAND);
+	bf_set(wqe_wqec, &abts_wqe->abort_cmd.wqe_com, 1);
+	bf_set(wqe_cqid, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_CQ_ID_DEFAULT);
+
+	/* ABTS WQE must go to the same WQ as the WQE to be aborted */
+	abts_buf->iocb_flag |= LPFC_IO_NVME;
+	abts_buf->hba_wqidx = nvmereq_wqe->hba_wqidx;
+	abts_buf->vport = vport;
+	abts_buf->wqe_cmpl = lpfc_nvme_abort_fcreq_cmpl;
+	ret_val = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_buf);
+	spin_unlock_irqrestore(&phba->hbalock, flags);
+	if (ret_val == IOCB_ERROR) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+				 "6137 Failed abts issue_wqe with status x%x "
+				 "for nvme_fcreq %p.\n",
+				 ret_val, pnvme_fcreq);
+		lpfc_sli_release_iocbq(phba, abts_buf);
+		return;
+	}
+
+	lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
+			 "6138 Transport Abort NVME Request Issued for\n"
+			 "ox_id x%x on reqtag x%x\n",
+			 nvmereq_wqe->sli4_xritag,
+			 abts_buf->iotag);
+}
+
+/* Declare and initialization an instance of the FC NVME template. */
+static struct nvme_fc_port_template lpfc_nvme_template = {
+	/* initiator-based functions */
+	.localport_delete  = lpfc_nvme_localport_delete,
+	.remoteport_delete = lpfc_nvme_remoteport_delete,
+	.create_queue = lpfc_nvme_create_queue,
+	.delete_queue = lpfc_nvme_delete_queue,
+	.ls_req       = lpfc_nvme_ls_req,
+	.fcp_io       = lpfc_nvme_fcp_io_submit,
+	.ls_abort     = lpfc_nvme_ls_abort,
+	.fcp_abort    = lpfc_nvme_fcp_abort,
+
+	.max_hw_queues = 1,
+	.max_sgl_segments = LPFC_NVME_DEFAULT_SEGS,
+	.max_dif_sgl_segments = LPFC_NVME_DEFAULT_SEGS,
+	.dma_boundary = 0xFFFFFFFF,
+
+	/* Sizes of additional private data for data structures.
+	 * No use for the last two sizes at this time.
+	 */
+	.local_priv_sz = sizeof(struct lpfc_nvme_lport),
+	.remote_priv_sz = sizeof(struct lpfc_nvme_rport),
+	.lsrqst_priv_sz = 0,
+	.fcprqst_priv_sz = 0,
+};
+
+/**
+ * lpfc_sli4_post_nvme_sgl_block - post a block of nvme sgl list to firmware
+ * @phba: pointer to lpfc hba data structure.
+ * @nblist: pointer to nvme buffer list.
+ * @count: number of scsi buffers on the list.
+ *
+ * This routine is invoked to post a block of @count scsi sgl pages from a
+ * SCSI buffer list @nblist to the HBA using non-embedded mailbox command.
+ * No Lock is held.
+ *
+ **/
+static int
+lpfc_sli4_post_nvme_sgl_block(struct lpfc_hba *phba,
+			      struct list_head *nblist,
+			      int count)
+{
+	struct lpfc_nvme_buf *lpfc_ncmd;
+	struct lpfc_mbx_post_uembed_sgl_page1 *sgl;
+	struct sgl_page_pairs *sgl_pg_pairs;
+	void *viraddr;
+	LPFC_MBOXQ_t *mbox;
+	uint32_t reqlen, alloclen, pg_pairs;
+	uint32_t mbox_tmo;
+	uint16_t xritag_start = 0;
+	int rc = 0;
+	uint32_t shdr_status, shdr_add_status;
+	dma_addr_t pdma_phys_bpl1;
+	union lpfc_sli4_cfg_shdr *shdr;
+
+	/* Calculate the requested length of the dma memory */
+	reqlen = count * sizeof(struct sgl_page_pairs) +
+		 sizeof(union lpfc_sli4_cfg_shdr) + sizeof(uint32_t);
+	if (reqlen > SLI4_PAGE_SIZE) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+				"6118 Block sgl registration required DMA "
+				"size (%d) great than a page\n", reqlen);
+		return -ENOMEM;
+	}
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"6119 Failed to allocate mbox cmd memory\n");
+		return -ENOMEM;
+	}
+
+	/* Allocate DMA memory and set up the non-embedded mailbox command */
+	alloclen = lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+				LPFC_MBOX_OPCODE_FCOE_POST_SGL_PAGES, reqlen,
+				LPFC_SLI4_MBX_NEMBED);
+
+	if (alloclen < reqlen) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"6120 Allocated DMA memory size (%d) is "
+				"less than the requested DMA memory "
+				"size (%d)\n", alloclen, reqlen);
+		lpfc_sli4_mbox_cmd_free(phba, mbox);
+		return -ENOMEM;
+	}
+
+	/* Get the first SGE entry from the non-embedded DMA memory */
+	viraddr = mbox->sge_array->addr[0];
+
+	/* Set up the SGL pages in the non-embedded DMA pages */
+	sgl = (struct lpfc_mbx_post_uembed_sgl_page1 *)viraddr;
+	sgl_pg_pairs = &sgl->sgl_pg_pairs;
+
+	pg_pairs = 0;
+	list_for_each_entry(lpfc_ncmd, nblist, list) {
+		/* Set up the sge entry */
+		sgl_pg_pairs->sgl_pg0_addr_lo =
+			cpu_to_le32(putPaddrLow(lpfc_ncmd->dma_phys_sgl));
+		sgl_pg_pairs->sgl_pg0_addr_hi =
+			cpu_to_le32(putPaddrHigh(lpfc_ncmd->dma_phys_sgl));
+		if (phba->cfg_sg_dma_buf_size > SGL_PAGE_SIZE)
+			pdma_phys_bpl1 = lpfc_ncmd->dma_phys_sgl +
+						SGL_PAGE_SIZE;
+		else
+			pdma_phys_bpl1 = 0;
+		sgl_pg_pairs->sgl_pg1_addr_lo =
+			cpu_to_le32(putPaddrLow(pdma_phys_bpl1));
+		sgl_pg_pairs->sgl_pg1_addr_hi =
+			cpu_to_le32(putPaddrHigh(pdma_phys_bpl1));
+		/* Keep the first xritag on the list */
+		if (pg_pairs == 0)
+			xritag_start = lpfc_ncmd->cur_iocbq.sli4_xritag;
+		sgl_pg_pairs++;
+		pg_pairs++;
+	}
+	bf_set(lpfc_post_sgl_pages_xri, sgl, xritag_start);
+	bf_set(lpfc_post_sgl_pages_xricnt, sgl, pg_pairs);
+	/* Perform endian conversion if necessary */
+	sgl->word0 = cpu_to_le32(sgl->word0);
+
+	if (!phba->sli4_hba.intr_enable)
+		rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	else {
+		mbox_tmo = lpfc_mbox_tmo_val(phba, mbox);
+		rc = lpfc_sli_issue_mbox_wait(phba, mbox, mbox_tmo);
+	}
+	shdr = (union lpfc_sli4_cfg_shdr *)&sgl->cfg_shdr;
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (rc != MBX_TIMEOUT)
+		lpfc_sli4_mbox_cmd_free(phba, mbox);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"6125 POST_SGL_BLOCK mailbox command failed "
+				"status x%x add_status x%x mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		rc = -ENXIO;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_post_nvme_sgl_list - Post blocks of nvme buffer sgls from a list
+ * @phba: pointer to lpfc hba data structure.
+ * @post_nblist: pointer to the nvme buffer list.
+ *
+ * This routine walks a list of nvme buffers that was passed in. It attempts
+ * to construct blocks of nvme buffer sgls which contains contiguous xris and
+ * uses the non-embedded SGL block post mailbox commands to post to the port.
+ * For single NVME buffer sgl with non-contiguous xri, if any, it shall use
+ * embedded SGL post mailbox command for posting. The @post_nblist passed in
+ * must be local list, thus no lock is needed when manipulate the list.
+ *
+ * Returns: 0 = failure, non-zero number of successfully posted buffers.
+ **/
+static int
+lpfc_post_nvme_sgl_list(struct lpfc_hba *phba,
+			     struct list_head *post_nblist, int sb_count)
+{
+	struct lpfc_nvme_buf *lpfc_ncmd, *lpfc_ncmd_next;
+	int status, sgl_size;
+	int post_cnt = 0, block_cnt = 0, num_posting = 0, num_posted = 0;
+	dma_addr_t pdma_phys_sgl1;
+	int last_xritag = NO_XRI;
+	int cur_xritag;
+	LIST_HEAD(prep_nblist);
+	LIST_HEAD(blck_nblist);
+	LIST_HEAD(nvme_nblist);
+
+	/* sanity check */
+	if (sb_count <= 0)
+		return -EINVAL;
+
+	sgl_size = phba->cfg_sg_dma_buf_size;
+
+	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next, post_nblist, list) {
+		list_del_init(&lpfc_ncmd->list);
+		block_cnt++;
+		if ((last_xritag != NO_XRI) &&
+		    (lpfc_ncmd->cur_iocbq.sli4_xritag != last_xritag + 1)) {
+			/* a hole in xri block, form a sgl posting block */
+			list_splice_init(&prep_nblist, &blck_nblist);
+			post_cnt = block_cnt - 1;
+			/* prepare list for next posting block */
+			list_add_tail(&lpfc_ncmd->list, &prep_nblist);
+			block_cnt = 1;
+		} else {
+			/* prepare list for next posting block */
+			list_add_tail(&lpfc_ncmd->list, &prep_nblist);
+			/* enough sgls for non-embed sgl mbox command */
+			if (block_cnt == LPFC_NEMBED_MBOX_SGL_CNT) {
+				list_splice_init(&prep_nblist, &blck_nblist);
+				post_cnt = block_cnt;
+				block_cnt = 0;
+			}
+		}
+		num_posting++;
+		last_xritag = lpfc_ncmd->cur_iocbq.sli4_xritag;
+
+		/* end of repost sgl list condition for NVME buffers */
+		if (num_posting == sb_count) {
+			if (post_cnt == 0) {
+				/* last sgl posting block */
+				list_splice_init(&prep_nblist, &blck_nblist);
+				post_cnt = block_cnt;
+			} else if (block_cnt == 1) {
+				/* last single sgl with non-contiguous xri */
+				if (sgl_size > SGL_PAGE_SIZE)
+					pdma_phys_sgl1 =
+						lpfc_ncmd->dma_phys_sgl +
+						SGL_PAGE_SIZE;
+				else
+					pdma_phys_sgl1 = 0;
+				cur_xritag = lpfc_ncmd->cur_iocbq.sli4_xritag;
+				status = lpfc_sli4_post_sgl(phba,
+						lpfc_ncmd->dma_phys_sgl,
+						pdma_phys_sgl1, cur_xritag);
+				if (status) {
+					/* failure, put on abort nvme list */
+					lpfc_ncmd->exch_busy = 1;
+				} else {
+					/* success, put on NVME buffer list */
+					lpfc_ncmd->exch_busy = 0;
+					lpfc_ncmd->status = IOSTAT_SUCCESS;
+					num_posted++;
+				}
+				/* success, put on NVME buffer sgl list */
+				list_add_tail(&lpfc_ncmd->list, &nvme_nblist);
+			}
+		}
+
+		/* continue until a nembed page worth of sgls */
+		if (post_cnt == 0)
+			continue;
+
+		/* post block of NVME buffer list sgls */
+		status = lpfc_sli4_post_nvme_sgl_block(phba, &blck_nblist,
+						       post_cnt);
+
+		/* don't reset xirtag due to hole in xri block */
+		if (block_cnt == 0)
+			last_xritag = NO_XRI;
+
+		/* reset NVME buffer post count for next round of posting */
+		post_cnt = 0;
+
+		/* put posted NVME buffer-sgl posted on NVME buffer sgl list */
+		while (!list_empty(&blck_nblist)) {
+			list_remove_head(&blck_nblist, lpfc_ncmd,
+					 struct lpfc_nvme_buf, list);
+			if (status) {
+				/* failure, put on abort nvme list */
+				lpfc_ncmd->exch_busy = 1;
+			} else {
+				/* success, put on NVME buffer list */
+				lpfc_ncmd->exch_busy = 0;
+				lpfc_ncmd->status = IOSTAT_SUCCESS;
+				num_posted++;
+			}
+			list_add_tail(&lpfc_ncmd->list, &nvme_nblist);
+		}
+	}
+	/* Push NVME buffers with sgl posted to the available list */
+	while (!list_empty(&nvme_nblist)) {
+		list_remove_head(&nvme_nblist, lpfc_ncmd,
+				 struct lpfc_nvme_buf, list);
+		lpfc_release_nvme_buf(phba, lpfc_ncmd);
+	}
+	return num_posted;
+}
+
+/**
+ * lpfc_repost_nvme_sgl_list - Repost all the allocated nvme buffer sgls
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine walks the list of nvme buffers that have been allocated and
+ * repost them to the port by using SGL block post. This is needed after a
+ * pci_function_reset/warm_start or start. The lpfc_hba_down_post_s4 routine
+ * is responsible for moving all nvme buffers on the lpfc_abts_nvme_sgl_list
+ * to the lpfc_nvme_buf_list. If the repost fails, reject all nvme buffers.
+ *
+ * Returns: 0 = success, non-zero failure.
+ **/
+int
+lpfc_repost_nvme_sgl_list(struct lpfc_hba *phba)
+{
+	LIST_HEAD(post_nblist);
+	int num_posted, rc = 0;
+
+	/* get all NVME buffers need to repost to a local list */
+	spin_lock_irq(&phba->nvme_buf_list_get_lock);
+	spin_lock(&phba->nvme_buf_list_put_lock);
+	list_splice_init(&phba->lpfc_nvme_buf_list_get, &post_nblist);
+	list_splice(&phba->lpfc_nvme_buf_list_put, &post_nblist);
+	spin_unlock(&phba->nvme_buf_list_put_lock);
+	spin_unlock_irq(&phba->nvme_buf_list_get_lock);
+
+	/* post the list of nvme buffer sgls to port if available */
+	if (!list_empty(&post_nblist)) {
+		num_posted = lpfc_post_nvme_sgl_list(phba, &post_nblist,
+						phba->sli4_hba.nvme_xri_cnt);
+		/* failed to post any nvme buffer, return error */
+		if (num_posted == 0)
+			rc = -EIO;
+	}
+	return rc;
+}
+
+/**
+ * lpfc_new_nvme_buf - Scsi buffer allocator for HBA with SLI4 IF spec
+ * @vport: The virtual port for which this call being executed.
+ * @num_to_allocate: The requested number of buffers to allocate.
+ *
+ * This routine allocates nvme buffers for device with SLI-4 interface spec,
+ * the nvme buffer contains all the necessary information needed to initiate
+ * a NVME I/O. After allocating up to @num_to_allocate NVME buffers and put
+ * them on a list, it post them to the port by using SGL block post.
+ *
+ * Return codes:
+ *   int - number of nvme buffers that were allocated and posted.
+ *   0 = failure, less than num_to_alloc is a partial failure.
+ **/
+static int
+lpfc_new_nvme_buf(struct lpfc_vport *vport, int num_to_alloc)
+{
+	struct lpfc_hba *phba = vport->phba;
+	struct lpfc_nvme_buf *lpfc_ncmd;
+	struct lpfc_iocbq *pwqeq;
+	union lpfc_wqe128 *wqe;
+	struct sli4_sge *sgl;
+	dma_addr_t pdma_phys_sgl;
+	uint16_t iotag, lxri = 0;
+	int bcnt, num_posted, sgl_size;
+	LIST_HEAD(prep_nblist);
+	LIST_HEAD(post_nblist);
+	LIST_HEAD(nvme_nblist);
+
+	sgl_size = phba->cfg_sg_dma_buf_size;
+
+	for (bcnt = 0; bcnt < num_to_alloc; bcnt++) {
+		lpfc_ncmd = kzalloc(sizeof(struct lpfc_nvme_buf), GFP_KERNEL);
+		if (!lpfc_ncmd)
+			break;
+		/*
+		 * Get memory from the pci pool to map the virt space to
+		 * pci bus space for an I/O. The DMA buffer includes the
+		 * number of SGE's necessary to support the sg_tablesize.
+		 */
+		lpfc_ncmd->data = pci_pool_alloc(phba->lpfc_sg_dma_buf_pool,
+						 GFP_KERNEL,
+						 &lpfc_ncmd->dma_handle);
+		if (!lpfc_ncmd->data) {
+			kfree(lpfc_ncmd);
+			break;
+		}
+		memset(lpfc_ncmd->data, 0, phba->cfg_sg_dma_buf_size);
+
+		lxri = lpfc_sli4_next_xritag(phba);
+		if (lxri == NO_XRI) {
+			pci_pool_free(phba->lpfc_sg_dma_buf_pool,
+				      lpfc_ncmd->data, lpfc_ncmd->dma_handle);
+			kfree(lpfc_ncmd);
+			break;
+		}
+		pwqeq = &(lpfc_ncmd->cur_iocbq);
+		wqe = (union lpfc_wqe128 *)&pwqeq->wqe;
+
+		/* Allocate iotag for lpfc_ncmd->cur_iocbq. */
+		iotag = lpfc_sli_next_iotag(phba, pwqeq);
+		if (iotag == 0) {
+			pci_pool_free(phba->lpfc_sg_dma_buf_pool,
+				      lpfc_ncmd->data, lpfc_ncmd->dma_handle);
+			kfree(lpfc_ncmd);
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+					"6121 Failed to allocated IOTAG for"
+					" XRI:0x%x\n", lxri);
+			lpfc_sli4_free_xri(phba, lxri);
+			break;
+		}
+		pwqeq->sli4_lxritag = lxri;
+		pwqeq->sli4_xritag = phba->sli4_hba.xri_ids[lxri];
+		pwqeq->iocb_flag |= LPFC_IO_NVME;
+		pwqeq->context1 = lpfc_ncmd;
+		pwqeq->wqe_cmpl = lpfc_nvme_io_cmd_wqe_cmpl;
+
+		/* Initialize local short-hand pointers. */
+		lpfc_ncmd->nvme_sgl = lpfc_ncmd->data;
+		sgl = lpfc_ncmd->nvme_sgl;
+		pdma_phys_sgl = lpfc_ncmd->dma_handle;
+		lpfc_ncmd->dma_phys_sgl = pdma_phys_sgl;
+
+		/* Rsp SGE will be filled in when we rcv an IO
+		 * from the NVME Layer to be sent.
+		 * The cmd is going to be embedded so we need a SKIP SGE.
+		 */
+		bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_SKIP);
+		bf_set(lpfc_sli4_sge_last, sgl, 0);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+		/* Fill in word 3 / sgl_len during cmd submission */
+
+		lpfc_ncmd->cur_iocbq.context1 = lpfc_ncmd;
+
+		/* Word 7 */
+		bf_set(wqe_erp, &wqe->generic.wqe_com, 0);
+		/* NVME upper layers will time things out, if needed */
+		bf_set(wqe_tmo, &wqe->generic.wqe_com, 0);
+
+		/* Word 10 */
+		bf_set(wqe_ebde_cnt, &wqe->generic.wqe_com, 0);
+		bf_set(wqe_dbde, &wqe->generic.wqe_com, 1);
+
+		/* add the nvme buffer to a post list */
+		list_add_tail(&lpfc_ncmd->list, &post_nblist);
+		spin_lock_irq(&phba->nvme_buf_list_get_lock);
+		phba->sli4_hba.nvme_xri_cnt++;
+		spin_unlock_irq(&phba->nvme_buf_list_get_lock);
+	}
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME,
+			"6114 Allocate %d out of %d requested new NVME "
+			"buffers\n", bcnt, num_to_alloc);
+
+	/* post the list of nvme buffer sgls to port if available */
+	if (!list_empty(&post_nblist))
+		num_posted = lpfc_post_nvme_sgl_list(phba,
+						     &post_nblist, bcnt);
+	else
+		num_posted = 0;
+
+	return num_posted;
+}
+
+/**
+ * lpfc_get_nvme_buf - Get a nvme buffer from lpfc_nvme_buf_list of the HBA
+ * @phba: The HBA for which this call is being executed.
+ *
+ * This routine removes a nvme buffer from head of @phba lpfc_nvme_buf_list list
+ * and returns to caller.
+ *
+ * Return codes:
+ *   NULL - Error
+ *   Pointer to lpfc_nvme_buf - Success
+ **/
+static struct lpfc_nvme_buf *
+lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
+{
+	struct lpfc_nvme_buf *lpfc_ncmd, *lpfc_ncmd_next;
+	unsigned long iflag = 0;
+	int found = 0;
+
+	spin_lock_irqsave(&phba->nvme_buf_list_get_lock, iflag);
+	list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+				 &phba->lpfc_nvme_buf_list_get, list) {
+		if (lpfc_test_rrq_active(phba, ndlp,
+					 lpfc_ncmd->cur_iocbq.sli4_lxritag))
+			continue;
+		list_del(&lpfc_ncmd->list);
+		found = 1;
+		break;
+	}
+	if (!found) {
+		spin_lock(&phba->nvme_buf_list_put_lock);
+		list_splice(&phba->lpfc_nvme_buf_list_put,
+			    &phba->lpfc_nvme_buf_list_get);
+		INIT_LIST_HEAD(&phba->lpfc_nvme_buf_list_put);
+		spin_unlock(&phba->nvme_buf_list_put_lock);
+		list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+					 &phba->lpfc_nvme_buf_list_get, list) {
+			if (lpfc_test_rrq_active(
+				phba, ndlp, lpfc_ncmd->cur_iocbq.sli4_lxritag))
+				continue;
+			list_del(&lpfc_ncmd->list);
+			found = 1;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&phba->nvme_buf_list_get_lock, iflag);
+	if (!found)
+		return NULL;
+	return  lpfc_ncmd;
+}
+
+/**
+ * lpfc_release_nvme_buf: Return a nvme buffer back to hba nvme buf list.
+ * @phba: The Hba for which this call is being executed.
+ * @lpfc_ncmd: The nvme buffer which is being released.
+ *
+ * This routine releases @lpfc_ncmd nvme buffer by adding it to tail of @phba
+ * lpfc_nvme_buf_list list. For SLI4 XRI's are tied to the nvme buffer
+ * and cannot be reused for at least RA_TOV amount of time if it was
+ * aborted.
+ **/
+static void
+lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd)
+{
+	unsigned long iflag = 0;
+
+	lpfc_ncmd->nonsg_phys = 0;
+	if (lpfc_ncmd->exch_busy) {
+		spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock,
+					iflag);
+		lpfc_ncmd->nvmeCmd = NULL;
+		list_add_tail(&lpfc_ncmd->list,
+			&phba->sli4_hba.lpfc_abts_nvme_buf_list);
+		spin_unlock_irqrestore(&phba->sli4_hba.abts_nvme_buf_list_lock,
+					iflag);
+	} else {
+		lpfc_ncmd->nvmeCmd = NULL;
+		lpfc_ncmd->cur_iocbq.iocb_flag = LPFC_IO_NVME;
+		spin_lock_irqsave(&phba->nvme_buf_list_put_lock, iflag);
+		list_add_tail(&lpfc_ncmd->list, &phba->lpfc_nvme_buf_list_put);
+		spin_unlock_irqrestore(&phba->nvme_buf_list_put_lock, iflag);
+	}
+}
+
+/**
+ * lpfc_nvme_create_localport - Create/Bind an nvme localport instance.
+ * @pvport - the lpfc_vport instance requesting a localport.
+ *
+ * This routine is invoked to create an nvme localport instance to bind
+ * to the nvme_fc_transport.  It is called once during driver load
+ * like lpfc_create_shost after all other services are initialized.
+ * It requires a vport, vpi, and wwns at call time.  Other localport
+ * parameters are modified as the driver's FCID and the Fabric WWN
+ * are established.
+ *
+ * Return codes
+ *      0 - successful
+ *      -ENOMEM - no heap memory available
+ *      other values - from nvme registration upcall
+ **/
+int
+lpfc_nvme_create_localport(struct lpfc_vport *vport)
+{
+	struct lpfc_hba  *phba = vport->phba;
+	struct nvme_fc_port_info nfcp_info;
+	struct nvme_fc_local_port *localport;
+	struct lpfc_nvme_lport *lport;
+	int len, ret = 0;
+
+	/* Initialize this localport instance.  The vport wwn usage ensures
+	 * that NPIV is accounted for.
+	 */
+	memset(&nfcp_info, 0, sizeof(struct nvme_fc_port_info));
+	nfcp_info.port_role = FC_PORT_ROLE_NVME_INITIATOR;
+	nfcp_info.node_name = wwn_to_u64(vport->fc_nodename.u.wwn);
+	nfcp_info.port_name = wwn_to_u64(vport->fc_portname.u.wwn);
+
+	/* For now need + 1 to get around NVME transport logic */
+	lpfc_nvme_template.max_sgl_segments = phba->cfg_sg_seg_cnt + 1;
+	lpfc_nvme_template.max_hw_queues = phba->cfg_nvme_io_channel;
+
+	/* localport is allocated from the stack, but the registration
+	 * call allocates heap memory as well as the private area.
+	 */
+	ret = nvme_fc_register_localport(&nfcp_info, &lpfc_nvme_template,
+					 &vport->phba->pcidev->dev, &localport);
+	if (!ret) {
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME | LOG_NVME_DISC,
+				 "6005 Successfully registered local "
+				 "NVME port num %d, localP %p, private %p, "
+				 "sg_seg %d\n",
+				 localport->port_num, localport,
+				 localport->private,
+				 lpfc_nvme_template.max_sgl_segments);
+
+		/* Private is our lport size declared in the template. */
+		lport = (struct lpfc_nvme_lport *)localport->private;
+		vport->localport = localport;
+		lport->vport = vport;
+		INIT_LIST_HEAD(&lport->rport_list);
+		vport->nvmei_support = 1;
+	}
+
+	len  = lpfc_new_nvme_buf(vport, phba->sli4_hba.nvme_xri_max);
+	vport->phba->total_nvme_bufs += len;
+	return ret;
+}
+
+/**
+ * lpfc_nvme_destroy_localport - Destroy lpfc_nvme bound to nvme transport.
+ * @pnvme: pointer to lpfc nvme data structure.
+ *
+ * This routine is invoked to destroy all lports bound to the phba.
+ * The lport memory was allocated by the nvme fc transport and is
+ * released there.  This routine ensures all rports bound to the
+ * lport have been disconnected.
+ *
+ **/
+void
+lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
+{
+	struct nvme_fc_local_port *localport;
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_nvme_rport *rport = NULL, *rport_next = NULL;
+	int ret;
+
+	if (vport->nvmei_support == 0)
+		return;
+
+	localport = vport->localport;
+	vport->localport = NULL;
+	lport = (struct lpfc_nvme_lport *)localport->private;
+
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
+			 "6011 Destroying NVME localport %p\n",
+			 localport);
+
+	list_for_each_entry_safe(rport, rport_next, &lport->rport_list, list) {
+		/* The last node ref has to get released now before the rport
+		 * private memory area is released by the transport.
+		 */
+		list_del(&rport->list);
+
+		init_completion(&rport->rport_unreg_done);
+		ret = nvme_fc_unregister_remoteport(rport->remoteport);
+		if (ret)
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
+					 "6008 rport fail destroy %x\n", ret);
+		wait_for_completion_timeout(&rport->rport_unreg_done, 5);
+	}
+	/* lport's rport list is clear.  Unregister
+	 * lport and release resources.
+	 */
+	init_completion(&lport->lport_unreg_done);
+	ret = nvme_fc_unregister_localport(localport);
+	wait_for_completion_timeout(&lport->lport_unreg_done, 5);
+
+	/* Regardless of the unregister upcall response, clear
+	 * nvmei_support.  All rports are unregistered and the
+	 * driver will clean up.
+	 */
+	vport->nvmei_support = 0;
+	if (ret == 0) {
+		lpfc_printf_vlog(vport,
+				 KERN_INFO, LOG_NVME_DISC,
+				 "6009 Unregistered lport Success\n");
+	} else {
+		lpfc_printf_vlog(vport,
+				 KERN_INFO, LOG_NVME_DISC,
+				 "6010 Unregistered lport "
+				 "Failed, status x%x\n",
+				 ret);
+	}
+}
+
+void
+lpfc_nvme_update_localport(struct lpfc_vport *vport)
+{
+	struct nvme_fc_local_port *localport;
+	struct lpfc_nvme_lport *lport;
+
+	localport = vport->localport;
+	lport = (struct lpfc_nvme_lport *)localport->private;
+
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
+			 "6012 Update NVME lport %p did x%x\n",
+			 localport, vport->fc_myDID);
+
+	localport->port_id = vport->fc_myDID;
+	if (localport->port_id == 0)
+		localport->port_role = FC_PORT_ROLE_NVME_DISCOVERY;
+	else
+		localport->port_role = FC_PORT_ROLE_NVME_INITIATOR;
+
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+			 "6030 bound lport %p to DID x%06x\n",
+			 lport, localport->port_id);
+
+}
+
+int
+lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+{
+	int ret = 0;
+	struct nvme_fc_local_port *localport;
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_nvme_rport *rport;
+	struct nvme_fc_remote_port *remote_port;
+	struct nvme_fc_port_info rpinfo;
+
+	lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NVME_DISC,
+			 "6006 Register NVME PORT. DID x%06x nlptype x%x\n",
+			 ndlp->nlp_DID, ndlp->nlp_type);
+
+	localport = vport->localport;
+	lport = (struct lpfc_nvme_lport *)localport->private;
+
+	if (ndlp->nlp_type & (NLP_NVME_TARGET | NLP_NVME_INITIATOR)) {
+
+		/* The driver isn't expecting the rport wwn to change
+		 * but it might get a different DID on a different
+		 * fabric.
+		 */
+		list_for_each_entry(rport, &lport->rport_list, list) {
+			if (rport->remoteport->port_name !=
+			    wwn_to_u64(ndlp->nlp_portname.u.wwn))
+				continue;
+			lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NVME_DISC,
+					 "6035 lport %p, found matching rport "
+					 "at wwpn 0x%llx, Data: x%x x%x x%x "
+					 "x%06x\n",
+					 lport,
+					 rport->remoteport->port_name,
+					 rport->remoteport->port_id,
+					 rport->remoteport->port_role,
+					 ndlp->nlp_type,
+					 ndlp->nlp_DID);
+			remote_port = rport->remoteport;
+			if ((remote_port->port_id == 0) &&
+			    (remote_port->port_role ==
+			     FC_PORT_ROLE_NVME_DISCOVERY)) {
+				remote_port->port_id = ndlp->nlp_DID;
+				remote_port->port_role &=
+					~FC_PORT_ROLE_NVME_DISCOVERY;
+				if (ndlp->nlp_type & NLP_NVME_TARGET)
+					remote_port->port_role |=
+						FC_PORT_ROLE_NVME_TARGET;
+				if (ndlp->nlp_type & NLP_NVME_INITIATOR)
+					remote_port->port_role |=
+						FC_PORT_ROLE_NVME_INITIATOR;
+
+				lpfc_printf_vlog(ndlp->vport, KERN_INFO,
+						 LOG_NVME_DISC,
+						 "6014 Rebinding lport to "
+						 "rport wwpn 0x%llx, "
+						 "Data: x%x x%x x%x x%06x\n",
+						 remote_port->port_name,
+						 remote_port->port_id,
+						 remote_port->port_role,
+						 ndlp->nlp_type,
+						 ndlp->nlp_DID);
+			}
+			return 0;
+		}
+
+		/* NVME rports are not preserved across devloss.
+		 * Just register this instance.
+		 */
+		rpinfo.port_id = ndlp->nlp_DID;
+		rpinfo.port_role = 0;
+		if (ndlp->nlp_type & NLP_NVME_TARGET)
+			rpinfo.port_role |= FC_PORT_ROLE_NVME_TARGET;
+		if (ndlp->nlp_type & NLP_NVME_INITIATOR)
+			rpinfo.port_role |= FC_PORT_ROLE_NVME_INITIATOR;
+		rpinfo.port_name = wwn_to_u64(ndlp->nlp_portname.u.wwn);
+		rpinfo.node_name = wwn_to_u64(ndlp->nlp_nodename.u.wwn);
+
+		ret = nvme_fc_register_remoteport(localport, &rpinfo,
+						  &remote_port);
+		if (!ret) {
+			rport = remote_port->private;
+			rport->remoteport = remote_port;
+			rport->lport = lport;
+			rport->ndlp = lpfc_nlp_get(ndlp);
+			if (!rport->ndlp)
+				return -1;
+			ndlp->nrport = rport;
+			INIT_LIST_HEAD(&rport->list);
+			list_add_tail(&rport->list, &lport->rport_list);
+			lpfc_printf_vlog(vport, KERN_INFO,
+					 LOG_NVME_DISC | LOG_NODE,
+					 "6022 Binding new rport to lport %p "
+					 "Rport WWNN 0x%llx, Rport WWPN 0x%llx "
+					 "DID x%06x Role x%x\n",
+					 lport,
+					 rpinfo.node_name, rpinfo.port_name,
+					 rpinfo.port_id, rpinfo.port_role);
+		} else {
+			lpfc_printf_vlog(vport, KERN_ERR,
+					 LOG_NVME_DISC | LOG_NODE,
+					 "6031 RemotePort Registration failed "
+					 "err: %d, DID x%06x\n",
+					 ret, ndlp->nlp_DID);
+		}
+	} else {
+		ret = -EINVAL;
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+				 "6027 Unknown nlp_type x%x on DID x%06x "
+				 "ndlp %p.  Not Registering nvme rport\n",
+				 ndlp->nlp_type, ndlp->nlp_DID, ndlp);
+	}
+	return ret;
+}
+
+/* lpfc_nvme_unregister_port - unbind the DID and port_role from this rport.
+ *
+ * There is no notion of Devloss or rport recovery from the current
+ * nvme_transport perspective.  Loss of an rport just means IO cannot
+ * be sent and recovery is completely up to the initator.
+ * For now, the driver just unbinds the DID and port_role so that
+ * no further IO can be issued.  Changes are planned for later.
+ *
+ * Notes - the ndlp reference count is not decremented here since
+ * since there is no nvme_transport api for devloss.  Node ref count
+ * is only adjusted in driver unload.
+ */
+void
+lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+{
+	int ret;
+	struct nvme_fc_local_port *localport;
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_nvme_rport *rport;
+	struct nvme_fc_remote_port *remoteport;
+
+	localport = vport->localport;
+
+	/* This is fundamental error.  The localport is always
+	 * available until driver unload.  Just exit.
+	 */
+	if (!localport)
+		return;
+
+	lport = (struct lpfc_nvme_lport *)localport->private;
+	if (!lport)
+		goto input_err;
+
+	rport = ndlp->nrport;
+	if (!rport)
+		goto input_err;
+
+	remoteport = rport->remoteport;
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
+			 "6033 Unreg nvme remoteport %p, portname x%llx, "
+			 "port_id x%06x, portstate x%x port type x%x\n",
+			 remoteport, remoteport->port_name,
+			 remoteport->port_id, remoteport->port_state,
+			 ndlp->nlp_type);
+
+	/* Sanity check ndlp type.  Only call for NVME ports. Don't
+	 * clear any rport state until the transport calls back.
+	 */
+	if (ndlp->nlp_type & (NLP_NVME_TARGET | NLP_NVME_INITIATOR)) {
+		init_completion(&rport->rport_unreg_done);
+		ret = nvme_fc_unregister_remoteport(remoteport);
+		if (ret != 0) {
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
+					 "6167 NVME unregister failed %d "
+					 "port_state x%x\n",
+					 ret, remoteport->port_state);
+		}
+
+		/* Wait for the driver's delete completion routine to finish
+		 * before proceeding.  This guarantees the transport and driver
+		 * have completed the unreg process.
+		 */
+		ret = wait_for_completion_timeout(&rport->rport_unreg_done, 5);
+		if (ret == 0) {
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
+					 "6169 Unreg nvme wait failed %d\n",
+					 ret);
+		}
+	}
+	return;
+
+ input_err:
+	lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
+			 "6168: State error: lport %p, rport%p FCID x%06x\n",
+			 vport->localport, ndlp->rport, ndlp->nlp_DID);
+}
diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h
index 2b167933fd26..4cc51086a5f7 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.h
+++ b/drivers/scsi/lpfc/lpfc_nvme.h
@@ -29,6 +29,12 @@
 
 #define LPFC_NVME_ERSP_LEN		0x20
 
+struct lpfc_nvme_qhandle {
+	uint32_t index;		/* WQ index to use */
+	uint32_t qidx;		/* queue index passed to create */
+	uint32_t cpu_id;	/* current cpu id at time of create */
+};
+
 /* Declare nvme-based local and remote port definitions. */
 struct lpfc_nvme_lport {
 	struct lpfc_vport *vport;
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 52fa5c77f3bc..ccda1f73ae84 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -6795,6 +6795,22 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 		}
 	}
 
+	if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) &&
+	    (phba->nvmet_support == 0)) {
+
+		/* register the allocated nvme sgl pool to the port */
+		rc = lpfc_repost_nvme_sgl_list(phba);
+		if (unlikely(rc)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+					"6116 Error %d during nvme sgl post "
+					"operation\n", rc);
+			/* Some NVME buffers were moved to abort nvme list */
+			/* A pci function reset will repost them */
+			rc = -ENODEV;
+			goto out_destroy_queue;
+		}
+	}
+
 	/* Post the rpi header region to the device. */
 	rc = lpfc_sli4_post_all_rpi_hdrs(phba);
 	if (unlikely(rc)) {
@@ -10492,10 +10508,7 @@ lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	/* ABTS WQE must go to the same WQ as the WQE to be aborted */
 	abtsiocbp->iocb_flag |= LPFC_IO_NVME;
 	abtsiocbp->vport = vport;
-	/* todo: assign wqe_cmpl to lpfc_nvme_abort_fcreq_cmpl
-	 * subsequent patch will add routine. For now, just skip assignment
-	 * as won't ever be called.
-	 */
+	abtsiocbp->wqe_cmpl = lpfc_nvme_abort_fcreq_cmpl;
 	retval = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abtsiocbp);
 	if (retval == IOCB_ERROR) {
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index b5b4f6b843c0..9cb8508b4b4b 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -753,6 +753,7 @@ int lpfc_sli4_queue_setup(struct lpfc_hba *);
 void lpfc_sli4_queue_unset(struct lpfc_hba *);
 int lpfc_sli4_post_sgl(struct lpfc_hba *, dma_addr_t, dma_addr_t, uint16_t);
 int lpfc_sli4_repost_scsi_sgl_list(struct lpfc_hba *);
+int lpfc_repost_nvme_sgl_list(struct lpfc_hba *phba);
 uint16_t lpfc_sli4_next_xritag(struct lpfc_hba *);
 void lpfc_sli4_free_xri(struct lpfc_hba *, int);
 int lpfc_sli4_post_async_mbox(struct lpfc_hba *);
diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c
index 07c7c5f9fdde..b0d94f2e5b44 100644
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -403,7 +403,21 @@ lpfc_vport_create(struct fc_vport *fc_vport, bool disable)
 		vport->fdmi_port_mask = phba->pport->fdmi_port_mask;
 	}
 
-	/* todo: init: register port with nvme */
+	if ((phba->nvmet_support == 0) &&
+	    ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
+	     (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))) {
+		/* Create NVME binding with nvme_fc_transport. This
+		 * ensures the vport is initialized.
+		 */
+		rc = lpfc_nvme_create_localport(vport);
+		if (rc) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6003 %s status x%x\n",
+					"NVME registration failed, ",
+					rc);
+			goto error_out;
+		}
+	}
 
 	/*
 	 * In SLI4, the vpi must be activated before it can be used
-- 
GitLab


From bd2cdd5e400f5914bc30d5cfb0a0185cf51e4424 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:33 -0800
Subject: [PATCH 0444/1084] scsi: lpfc: NVME Initiator: Add debugfs support

NVME Initiator: Add debugfs support

Adds debugfs snippets to cover the new NVME initiator functionality

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h         |   55 ++
 drivers/scsi/lpfc/lpfc_crtn.h    |    6 +-
 drivers/scsi/lpfc/lpfc_ct.c      |   27 +-
 drivers/scsi/lpfc/lpfc_debugfs.c | 1160 +++++++++++++++++++++++++++---
 drivers/scsi/lpfc/lpfc_debugfs.h |   21 +
 drivers/scsi/lpfc/lpfc_nvme.c    |  143 ++++
 drivers/scsi/lpfc/lpfc_nvme.h    |    7 +
 drivers/scsi/lpfc/lpfc_sli.c     |    5 +
 8 files changed, 1300 insertions(+), 124 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 9ad63a47425b..fdb314b3282d 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -458,6 +458,9 @@ struct lpfc_vport {
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 	struct dentry *debug_disc_trc;
 	struct dentry *debug_nodelist;
+	struct dentry *debug_nvmestat;
+	struct dentry *debug_nvmektime;
+	struct dentry *debug_cpucheck;
 	struct dentry *vport_debugfs_root;
 	struct lpfc_debugfs_trc *disc_trc;
 	atomic_t disc_trc_cnt;
@@ -984,6 +987,12 @@ struct lpfc_hba {
 	struct dentry *debug_readApp;    /* inject read app_tag errors */
 	struct dentry *debug_readRef;    /* inject read ref_tag errors */
 
+	struct dentry *debug_nvmeio_trc;
+	struct lpfc_debugfs_nvmeio_trc *nvmeio_trc;
+	atomic_t nvmeio_trc_cnt;
+	uint32_t nvmeio_trc_size;
+	uint32_t nvmeio_trc_output_idx;
+
 	/* T10 DIF error injection */
 	uint32_t lpfc_injerr_wgrd_cnt;
 	uint32_t lpfc_injerr_wapp_cnt;
@@ -1011,6 +1020,7 @@ struct lpfc_hba {
 	struct dentry *idiag_ext_acc;
 	uint8_t lpfc_idiag_last_eq;
 #endif
+	uint16_t nvmeio_trc_on;
 
 	/* Used for deferred freeing of ELS data buffers */
 	struct list_head elsbuf;
@@ -1083,6 +1093,51 @@ struct lpfc_hba {
 #define LPFC_TRANSGRESSION_LOW_RXPOWER		0x4000
 	uint16_t sfp_alarm;
 	uint16_t sfp_warning;
+
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+#define LPFC_CHECK_CPU_CNT    32
+	uint32_t cpucheck_rcv_io[LPFC_CHECK_CPU_CNT];
+	uint32_t cpucheck_xmt_io[LPFC_CHECK_CPU_CNT];
+	uint32_t cpucheck_cmpl_io[LPFC_CHECK_CPU_CNT];
+	uint32_t cpucheck_ccmpl_io[LPFC_CHECK_CPU_CNT];
+	uint16_t cpucheck_on;
+#define LPFC_CHECK_OFF		0
+#define LPFC_CHECK_NVME_IO	1
+	uint16_t ktime_on;
+	uint64_t ktime_data_samples;
+	uint64_t ktime_status_samples;
+	uint64_t ktime_last_cmd;
+	uint64_t ktime_seg1_total;
+	uint64_t ktime_seg1_min;
+	uint64_t ktime_seg1_max;
+	uint64_t ktime_seg2_total;
+	uint64_t ktime_seg2_min;
+	uint64_t ktime_seg2_max;
+	uint64_t ktime_seg3_total;
+	uint64_t ktime_seg3_min;
+	uint64_t ktime_seg3_max;
+	uint64_t ktime_seg4_total;
+	uint64_t ktime_seg4_min;
+	uint64_t ktime_seg4_max;
+	uint64_t ktime_seg5_total;
+	uint64_t ktime_seg5_min;
+	uint64_t ktime_seg5_max;
+	uint64_t ktime_seg6_total;
+	uint64_t ktime_seg6_min;
+	uint64_t ktime_seg6_max;
+	uint64_t ktime_seg7_total;
+	uint64_t ktime_seg7_min;
+	uint64_t ktime_seg7_max;
+	uint64_t ktime_seg8_total;
+	uint64_t ktime_seg8_min;
+	uint64_t ktime_seg8_max;
+	uint64_t ktime_seg9_total;
+	uint64_t ktime_seg9_min;
+	uint64_t ktime_seg9_max;
+	uint64_t ktime_seg10_total;
+	uint64_t ktime_seg10_min;
+	uint64_t ktime_seg10_max;
+#endif
 };
 
 static inline struct Scsi_Host *
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index a9c8a0a41b16..4a0db3895993 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -393,9 +393,11 @@ void lpfc_host_attrib_init(struct Scsi_Host *);
 extern void lpfc_debugfs_initialize(struct lpfc_vport *);
 extern void lpfc_debugfs_terminate(struct lpfc_vport *);
 extern void lpfc_debugfs_disc_trc(struct lpfc_vport *, int, char *, uint32_t,
-	uint32_t, uint32_t);
+				  uint32_t, uint32_t);
 extern void lpfc_debugfs_slow_ring_trc(struct lpfc_hba *, char *, uint32_t,
-	uint32_t, uint32_t);
+				       uint32_t, uint32_t);
+extern void lpfc_debugfs_nvme_trc(struct lpfc_hba *phba, char *fmt,
+				uint16_t data1, uint16_t data2, uint32_t data3);
 extern struct lpfc_hbq_init *lpfc_hbq_defs[];
 
 /* SLI4 if_type 2 externs. */
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 8cfeffe312e0..2c051369857a 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -465,6 +465,10 @@ lpfc_prep_node_fc4type(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type)
 		ndlp = lpfc_setup_disc_node(vport, Did);
 
 		if (ndlp && NLP_CHK_NODE_ACT(ndlp)) {
+			lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
+				"Parse GID_FTrsp: did:x%x flg:x%x x%x",
+				Did, ndlp->nlp_flag, vport->fc_flag);
+
 			/* By default, the driver expects to support FCP FC4 */
 			if (fc4_type == FC_TYPE_FCP)
 				ndlp->nlp_fc4_type |= NLP_FC4_FCP;
@@ -478,16 +482,24 @@ lpfc_prep_node_fc4type(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type)
 					 ndlp->nlp_flag, ndlp->nlp_fc4_type,
 					 vport->fc_flag,
 					 vport->fc_rscn_id_cnt);
-		} else
+		} else {
+			lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
+				"Skip1 GID_FTrsp: did:x%x flg:x%x cnt:%d",
+				Did, vport->fc_flag, vport->fc_rscn_id_cnt);
+
 			lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
 					 "0239 Skip x%06x NameServer Rsp "
 					 "Data: x%x x%x\n", Did,
 					 vport->fc_flag,
 					 vport->fc_rscn_id_cnt);
-
+		}
 	} else {
 		if (!(vport->fc_flag & FC_RSCN_MODE) ||
 		    lpfc_rscn_payload_check(vport, Did)) {
+			lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
+				"Query GID_FTrsp: did:x%x flg:x%x cnt:%d",
+				Did, vport->fc_flag, vport->fc_rscn_id_cnt);
+
 			/*
 			 * This NPortID was previously a FCP target,
 			 * Don't even bother to send GFF_ID.
@@ -509,12 +521,17 @@ lpfc_prep_node_fc4type(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type)
 				else
 					lpfc_setup_disc_node(vport, Did);
 			}
-		} else
+		} else {
+			lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
+				"Skip2 GID_FTrsp: did:x%x flg:x%x cnt:%d",
+				Did, vport->fc_flag, vport->fc_rscn_id_cnt);
+
 			lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
 					 "0245 Skip x%06x NameServer Rsp "
 					 "Data: x%x x%x\n", Did,
 					 vport->fc_flag,
 					 vport->fc_rscn_id_cnt);
+		}
 	}
 }
 
@@ -892,6 +909,10 @@ lpfc_cmpl_ct_cmd_gft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 	did = ((struct lpfc_sli_ct_request *)inp->virt)->un.gft.PortId;
 	did = be32_to_cpu(did);
 
+	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
+			      "GFT_ID cmpl: status:x%x/x%x did:x%x",
+			      irsp->ulpStatus, irsp->un.ulpWord[4], did);
+
 	if (irsp->ulpStatus == IOSTAT_SUCCESS) {
 		/* Good status, continue checking */
 		CTrsp = (struct lpfc_sli_ct_request *)outp->virt;
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index d9bbebe8eb37..3fcba26623eb 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -34,6 +34,9 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_fc.h>
+#include <scsi/fc/fc_fs.h>
+
+#include <linux/nvme-fc-driver.h>
 
 #include "lpfc_hw4.h"
 #include "lpfc_hw.h"
@@ -41,8 +44,9 @@
 #include "lpfc_sli4.h"
 #include "lpfc_nl.h"
 #include "lpfc_disc.h"
-#include "lpfc_scsi.h"
 #include "lpfc.h"
+#include "lpfc_scsi.h"
+#include "lpfc_nvme.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_crtn.h"
 #include "lpfc_vport.h"
@@ -99,6 +103,12 @@ module_param(lpfc_debugfs_max_slow_ring_trc, int, S_IRUGO);
 MODULE_PARM_DESC(lpfc_debugfs_max_slow_ring_trc,
 	"Set debugfs slow ring trace depth");
 
+/* This MUST be a power of 2 */
+static int lpfc_debugfs_max_nvmeio_trc;
+module_param(lpfc_debugfs_max_nvmeio_trc, int, 0444);
+MODULE_PARM_DESC(lpfc_debugfs_max_nvmeio_trc,
+		 "Set debugfs NVME IO trace depth");
+
 static int lpfc_debugfs_mask_disc_trc;
 module_param(lpfc_debugfs_mask_disc_trc, int, S_IRUGO);
 MODULE_PARM_DESC(lpfc_debugfs_mask_disc_trc,
@@ -535,6 +545,10 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 	struct lpfc_nodelist *ndlp;
 	unsigned char *statep;
+	struct nvme_fc_local_port *localport;
+	struct lpfc_nvme_lport *lport;
+	struct lpfc_nvme_rport *rport;
+	struct nvme_fc_remote_port *nrport;
 
 	cnt = (LPFC_NODELIST_SIZE / LPFC_NODELIST_ENTRY_SIZE);
 
@@ -612,6 +626,358 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
 	}
 	spin_unlock_irq(shost->host_lock);
 
+	len += snprintf(buf + len, size - len,
+				"\nNVME Lport/Rport Entries ...\n");
+
+	localport = vport->localport;
+	if (!localport)
+		goto out_exit;
+
+	spin_lock_irq(shost->host_lock);
+	lport = (struct lpfc_nvme_lport *)localport->private;
+
+	/* Port state is only one of two values for now. */
+	if (localport->port_id)
+		statep = "ONLINE";
+	else
+		statep = "UNKNOWN ";
+
+	len += snprintf(buf + len, size - len,
+			"Lport DID x%06x PortState %s\n",
+			localport->port_id, statep);
+
+	len += snprintf(buf + len, size - len, "\tRport List:\n");
+	list_for_each_entry(rport, &lport->rport_list, list) {
+		/* local short-hand pointer. */
+		nrport = rport->remoteport;
+
+		/* Port state is only one of two values for now. */
+		switch (nrport->port_state) {
+		case FC_OBJSTATE_ONLINE:
+			statep = "ONLINE";
+			break;
+		case FC_OBJSTATE_UNKNOWN:
+			statep = "UNKNOWN ";
+			break;
+		default:
+			statep = "UNSUPPORTED";
+			break;
+		}
+
+		/* Tab in to show lport ownership. */
+		len += snprintf(buf + len, size - len,
+				"\t%s Port ID:x%06x ",
+				statep, nrport->port_id);
+		len += snprintf(buf + len, size - len, "WWPN x%llx ",
+				nrport->port_name);
+		len += snprintf(buf + len, size - len, "WWNN x%llx ",
+				nrport->node_name);
+		switch (nrport->port_role) {
+		case FC_PORT_ROLE_NVME_INITIATOR:
+			len +=  snprintf(buf + len, size - len,
+					 "NVME INITIATOR ");
+			break;
+		case FC_PORT_ROLE_NVME_TARGET:
+			len +=  snprintf(buf + len, size - len,
+					 "NVME TARGET ");
+			break;
+		case FC_PORT_ROLE_NVME_DISCOVERY:
+			len +=  snprintf(buf + len, size - len,
+					 "NVME DISCOVERY ");
+			break;
+		default:
+			len +=  snprintf(buf + len, size - len,
+					 "UNKNOWN ROLE x%x",
+					 nrport->port_role);
+			break;
+		}
+
+		/* Terminate the string. */
+		len +=  snprintf(buf + len, size - len, "\n");
+	}
+
+	spin_unlock_irq(shost->host_lock);
+ out_exit:
+	return len;
+}
+
+/**
+ * lpfc_debugfs_nvmestat_data - Dump target node list to a buffer
+ * @vport: The vport to gather target node info from.
+ * @buf: The buffer to dump log into.
+ * @size: The maximum amount of data to process.
+ *
+ * Description:
+ * This routine dumps the NVME statistics associated with @vport
+ *
+ * Return Value:
+ * This routine returns the amount of bytes that were dumped into @buf and will
+ * not exceed @size.
+ **/
+static int
+lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
+{
+	struct lpfc_hba   *phba = vport->phba;
+	int len = 0;
+
+	if (phba->nvmet_support == 0) {
+		if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
+			return len;
+
+		len += snprintf(buf + len, size - len,
+				"\nNVME Lport Statistics\n");
+
+		len += snprintf(buf + len, size - len,
+				"LS: Xmt %016llx Cmpl %016llx\n",
+				phba->fc4NvmeLsRequests,
+				phba->fc4NvmeLsCmpls);
+
+		len += snprintf(buf + len, size - len,
+				"FCP: Rd %016llx Wr %016llx IO %016llx\n",
+				phba->fc4NvmeInputRequests,
+				phba->fc4NvmeOutputRequests,
+				phba->fc4NvmeControlRequests);
+
+		len += snprintf(buf + len, size - len,
+				"    Cmpl %016llx\n", phba->fc4NvmeIoCmpls);
+	}
+
+	return len;
+}
+
+
+/**
+ * lpfc_debugfs_nvmektime_data - Dump target node list to a buffer
+ * @vport: The vport to gather target node info from.
+ * @buf: The buffer to dump log into.
+ * @size: The maximum amount of data to process.
+ *
+ * Description:
+ * This routine dumps the NVME statistics associated with @vport
+ *
+ * Return Value:
+ * This routine returns the amount of bytes that were dumped into @buf and will
+ * not exceed @size.
+ **/
+static int
+lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
+{
+	struct lpfc_hba   *phba = vport->phba;
+	int len = 0;
+
+	if (phba->nvmet_support == 0) {
+		/* NVME Initiator */
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"ktime %s: Total Samples: %lld\n",
+				(phba->ktime_on ?  "Enabled" : "Disabled"),
+				phba->ktime_data_samples);
+		if (phba->ktime_data_samples == 0)
+			return len;
+
+		len += snprintf(
+			buf + len, PAGE_SIZE - len,
+			"Segment 1: Last NVME Cmd cmpl "
+			"done -to- Start of next NVME cnd (in driver)\n");
+		len += snprintf(
+			buf + len, PAGE_SIZE - len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg1_total /
+			phba->ktime_data_samples,
+			phba->ktime_seg1_min,
+			phba->ktime_seg1_max);
+		len += snprintf(
+			buf + len, PAGE_SIZE - len,
+			"Segment 2: Driver start of NVME cmd "
+			"-to- Firmware WQ doorbell\n");
+		len += snprintf(
+			buf + len, PAGE_SIZE - len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg2_total /
+			phba->ktime_data_samples,
+			phba->ktime_seg2_min,
+			phba->ktime_seg2_max);
+		len += snprintf(
+			buf + len, PAGE_SIZE - len,
+			"Segment 3: Firmware WQ doorbell -to- "
+			"MSI-X ISR cmpl\n");
+		len += snprintf(
+			buf + len, PAGE_SIZE - len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg3_total /
+			phba->ktime_data_samples,
+			phba->ktime_seg3_min,
+			phba->ktime_seg3_max);
+		len += snprintf(
+			buf + len, PAGE_SIZE - len,
+			"Segment 4: MSI-X ISR cmpl -to- "
+			"NVME cmpl done\n");
+		len += snprintf(
+			buf + len, PAGE_SIZE - len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg4_total /
+			phba->ktime_data_samples,
+			phba->ktime_seg4_min,
+			phba->ktime_seg4_max);
+		len += snprintf(
+			buf + len, PAGE_SIZE - len,
+			"Total IO avg time: %08lld\n",
+			((phba->ktime_seg1_total +
+			phba->ktime_seg2_total  +
+			phba->ktime_seg3_total +
+			phba->ktime_seg4_total) /
+			phba->ktime_data_samples));
+		return len;
+	}
+
+	return len;
+}
+
+/**
+ * lpfc_debugfs_nvmeio_trc_data - Dump NVME IO trace list to a buffer
+ * @phba: The phba to gather target node info from.
+ * @buf: The buffer to dump log into.
+ * @size: The maximum amount of data to process.
+ *
+ * Description:
+ * This routine dumps the NVME IO trace associated with @phba
+ *
+ * Return Value:
+ * This routine returns the amount of bytes that were dumped into @buf and will
+ * not exceed @size.
+ **/
+static int
+lpfc_debugfs_nvmeio_trc_data(struct lpfc_hba *phba, char *buf, int size)
+{
+	struct lpfc_debugfs_nvmeio_trc *dtp;
+	int i, state, index, skip;
+	int len = 0;
+
+	state = phba->nvmeio_trc_on;
+
+	index = (atomic_read(&phba->nvmeio_trc_cnt) + 1) &
+		(phba->nvmeio_trc_size - 1);
+	skip = phba->nvmeio_trc_output_idx;
+
+	len += snprintf(buf + len, size - len,
+			"%s IO Trace %s: next_idx %d skip %d size %d\n",
+			(phba->nvmet_support ? "NVME" : "NVMET"),
+			(state ? "Enabled" : "Disabled"),
+			index, skip, phba->nvmeio_trc_size);
+
+	if (!phba->nvmeio_trc || state)
+		return len;
+
+	/* trace MUST bhe off to continue */
+
+	for (i = index; i < phba->nvmeio_trc_size; i++) {
+		if (skip) {
+			skip--;
+			continue;
+		}
+		dtp = phba->nvmeio_trc + i;
+		phba->nvmeio_trc_output_idx++;
+
+		if (!dtp->fmt)
+			continue;
+
+		len +=  snprintf(buf + len, size - len, dtp->fmt,
+			dtp->data1, dtp->data2, dtp->data3);
+
+		if (phba->nvmeio_trc_output_idx >= phba->nvmeio_trc_size) {
+			phba->nvmeio_trc_output_idx = 0;
+			len += snprintf(buf + len, size - len,
+					"Trace Complete\n");
+			goto out;
+		}
+
+		if (len >= (size - LPFC_DEBUG_OUT_LINE_SZ)) {
+			len += snprintf(buf + len, size - len,
+					"Trace Continue (%d of %d)\n",
+					phba->nvmeio_trc_output_idx,
+					phba->nvmeio_trc_size);
+			goto out;
+		}
+	}
+	for (i = 0; i < index; i++) {
+		if (skip) {
+			skip--;
+			continue;
+		}
+		dtp = phba->nvmeio_trc + i;
+		phba->nvmeio_trc_output_idx++;
+
+		if (!dtp->fmt)
+			continue;
+
+		len +=  snprintf(buf + len, size - len, dtp->fmt,
+			dtp->data1, dtp->data2, dtp->data3);
+
+		if (phba->nvmeio_trc_output_idx >= phba->nvmeio_trc_size) {
+			phba->nvmeio_trc_output_idx = 0;
+			len += snprintf(buf + len, size - len,
+					"Trace Complete\n");
+			goto out;
+		}
+
+		if (len >= (size - LPFC_DEBUG_OUT_LINE_SZ)) {
+			len += snprintf(buf + len, size - len,
+					"Trace Continue (%d of %d)\n",
+					phba->nvmeio_trc_output_idx,
+					phba->nvmeio_trc_size);
+			goto out;
+		}
+	}
+
+	len += snprintf(buf + len, size - len,
+			"Trace Done\n");
+out:
+	return len;
+}
+
+/**
+ * lpfc_debugfs_cpucheck_data - Dump target node list to a buffer
+ * @vport: The vport to gather target node info from.
+ * @buf: The buffer to dump log into.
+ * @size: The maximum amount of data to process.
+ *
+ * Description:
+ * This routine dumps the NVME statistics associated with @vport
+ *
+ * Return Value:
+ * This routine returns the amount of bytes that were dumped into @buf and will
+ * not exceed @size.
+ **/
+static int
+lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size)
+{
+	struct lpfc_hba   *phba = vport->phba;
+	int i;
+	int len = 0;
+	uint32_t tot_xmt = 0;
+	uint32_t tot_cmpl = 0;
+
+	if (phba->nvmet_support == 0) {
+		/* NVME Initiator */
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"CPUcheck %s\n",
+				(phba->cpucheck_on & LPFC_CHECK_NVME_IO ?
+					"Enabled" : "Disabled"));
+		for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+			if (i >= LPFC_CHECK_CPU_CNT)
+				break;
+			len += snprintf(buf + len, PAGE_SIZE - len,
+					"%02d: xmit x%08x cmpl x%08x\n",
+					i, phba->cpucheck_xmt_io[i],
+					phba->cpucheck_cmpl_io[i]);
+			tot_xmt += phba->cpucheck_xmt_io[i];
+			tot_cmpl += phba->cpucheck_cmpl_io[i];
+		}
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"tot:xmit x%08x cmpl x%08x\n",
+				tot_xmt, tot_cmpl);
+		return len;
+	}
+
 	return len;
 }
 
@@ -699,6 +1065,40 @@ lpfc_debugfs_slow_ring_trc(struct lpfc_hba *phba, char *fmt,
 	return;
 }
 
+/**
+ * lpfc_debugfs_nvme_trc - Store NVME/NVMET trace log
+ * @phba: The phba to associate this trace string with for retrieval.
+ * @fmt: Format string to be displayed when dumping the log.
+ * @data1: 1st data parameter to be applied to @fmt.
+ * @data2: 2nd data parameter to be applied to @fmt.
+ * @data3: 3rd data parameter to be applied to @fmt.
+ *
+ * Description:
+ * This routine is used by the driver code to add a debugfs log entry to the
+ * nvme trace buffer associated with @phba. @fmt, @data1, @data2, and
+ * @data3 are used like printf when displaying the log.
+ **/
+inline void
+lpfc_debugfs_nvme_trc(struct lpfc_hba *phba, char *fmt,
+		      uint16_t data1, uint16_t data2, uint32_t data3)
+{
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	struct lpfc_debugfs_nvmeio_trc *dtp;
+	int index;
+
+	if (!phba->nvmeio_trc_on || !phba->nvmeio_trc)
+		return;
+
+	index = atomic_inc_return(&phba->nvmeio_trc_cnt) &
+		(phba->nvmeio_trc_size - 1);
+	dtp = phba->nvmeio_trc + index;
+	dtp->fmt = fmt;
+	dtp->data1 = data1;
+	dtp->data2 = data2;
+	dtp->data3 = data3;
+#endif
+}
+
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
 /**
  * lpfc_debugfs_disc_trc_open - Open the discovery trace log
@@ -1231,6 +1631,356 @@ lpfc_debugfs_dumpDataDif_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+
+static int
+lpfc_debugfs_nvmestat_open(struct inode *inode, struct file *file)
+{
+	struct lpfc_vport *vport = inode->i_private;
+	struct lpfc_debug *debug;
+	int rc = -ENOMEM;
+
+	debug = kmalloc(sizeof(*debug), GFP_KERNEL);
+	if (!debug)
+		goto out;
+
+	 /* Round to page boundary */
+	debug->buffer = kmalloc(LPFC_NVMESTAT_SIZE, GFP_KERNEL);
+	if (!debug->buffer) {
+		kfree(debug);
+		goto out;
+	}
+
+	debug->len = lpfc_debugfs_nvmestat_data(vport, debug->buffer,
+		LPFC_NVMESTAT_SIZE);
+
+	debug->i_private = inode->i_private;
+	file->private_data = debug;
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static int
+lpfc_debugfs_nvmektime_open(struct inode *inode, struct file *file)
+{
+	struct lpfc_vport *vport = inode->i_private;
+	struct lpfc_debug *debug;
+	int rc = -ENOMEM;
+
+	debug = kmalloc(sizeof(*debug), GFP_KERNEL);
+	if (!debug)
+		goto out;
+
+	 /* Round to page boundary */
+	debug->buffer = kmalloc(LPFC_NVMEKTIME_SIZE, GFP_KERNEL);
+	if (!debug->buffer) {
+		kfree(debug);
+		goto out;
+	}
+
+	debug->len = lpfc_debugfs_nvmektime_data(vport, debug->buffer,
+		LPFC_NVMEKTIME_SIZE);
+
+	debug->i_private = inode->i_private;
+	file->private_data = debug;
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static ssize_t
+lpfc_debugfs_nvmektime_write(struct file *file, const char __user *buf,
+			     size_t nbytes, loff_t *ppos)
+{
+	struct lpfc_debug *debug = file->private_data;
+	struct lpfc_vport *vport = (struct lpfc_vport *)debug->i_private;
+	struct lpfc_hba   *phba = vport->phba;
+	char mybuf[64];
+	char *pbuf;
+
+	if (nbytes > 64)
+		nbytes = 64;
+
+	/* Protect copy from user */
+	if (!access_ok(VERIFY_READ, buf, nbytes))
+		return -EFAULT;
+
+	memset(mybuf, 0, sizeof(mybuf));
+
+	if (copy_from_user(mybuf, buf, nbytes))
+		return -EFAULT;
+	pbuf = &mybuf[0];
+
+	if ((strncmp(pbuf, "on", sizeof("on") - 1) == 0)) {
+		phba->ktime_data_samples = 0;
+		phba->ktime_status_samples = 0;
+		phba->ktime_seg1_total = 0;
+		phba->ktime_seg1_max = 0;
+		phba->ktime_seg1_min = 0xffffffff;
+		phba->ktime_seg2_total = 0;
+		phba->ktime_seg2_max = 0;
+		phba->ktime_seg2_min = 0xffffffff;
+		phba->ktime_seg3_total = 0;
+		phba->ktime_seg3_max = 0;
+		phba->ktime_seg3_min = 0xffffffff;
+		phba->ktime_seg4_total = 0;
+		phba->ktime_seg4_max = 0;
+		phba->ktime_seg4_min = 0xffffffff;
+		phba->ktime_seg5_total = 0;
+		phba->ktime_seg5_max = 0;
+		phba->ktime_seg5_min = 0xffffffff;
+		phba->ktime_seg6_total = 0;
+		phba->ktime_seg6_max = 0;
+		phba->ktime_seg6_min = 0xffffffff;
+		phba->ktime_seg7_total = 0;
+		phba->ktime_seg7_max = 0;
+		phba->ktime_seg7_min = 0xffffffff;
+		phba->ktime_seg8_total = 0;
+		phba->ktime_seg8_max = 0;
+		phba->ktime_seg8_min = 0xffffffff;
+		phba->ktime_seg9_total = 0;
+		phba->ktime_seg9_max = 0;
+		phba->ktime_seg9_min = 0xffffffff;
+		phba->ktime_seg10_total = 0;
+		phba->ktime_seg10_max = 0;
+		phba->ktime_seg10_min = 0xffffffff;
+
+		phba->ktime_on = 1;
+		return strlen(pbuf);
+	} else if ((strncmp(pbuf, "off",
+		   sizeof("off") - 1) == 0)) {
+		phba->ktime_on = 0;
+		return strlen(pbuf);
+	} else if ((strncmp(pbuf, "zero",
+		   sizeof("zero") - 1) == 0)) {
+		phba->ktime_data_samples = 0;
+		phba->ktime_status_samples = 0;
+		phba->ktime_seg1_total = 0;
+		phba->ktime_seg1_max = 0;
+		phba->ktime_seg1_min = 0xffffffff;
+		phba->ktime_seg2_total = 0;
+		phba->ktime_seg2_max = 0;
+		phba->ktime_seg2_min = 0xffffffff;
+		phba->ktime_seg3_total = 0;
+		phba->ktime_seg3_max = 0;
+		phba->ktime_seg3_min = 0xffffffff;
+		phba->ktime_seg4_total = 0;
+		phba->ktime_seg4_max = 0;
+		phba->ktime_seg4_min = 0xffffffff;
+		phba->ktime_seg5_total = 0;
+		phba->ktime_seg5_max = 0;
+		phba->ktime_seg5_min = 0xffffffff;
+		phba->ktime_seg6_total = 0;
+		phba->ktime_seg6_max = 0;
+		phba->ktime_seg6_min = 0xffffffff;
+		phba->ktime_seg7_total = 0;
+		phba->ktime_seg7_max = 0;
+		phba->ktime_seg7_min = 0xffffffff;
+		phba->ktime_seg8_total = 0;
+		phba->ktime_seg8_max = 0;
+		phba->ktime_seg8_min = 0xffffffff;
+		phba->ktime_seg9_total = 0;
+		phba->ktime_seg9_max = 0;
+		phba->ktime_seg9_min = 0xffffffff;
+		phba->ktime_seg10_total = 0;
+		phba->ktime_seg10_max = 0;
+		phba->ktime_seg10_min = 0xffffffff;
+		return strlen(pbuf);
+	}
+	return -EINVAL;
+}
+
+static int
+lpfc_debugfs_nvmeio_trc_open(struct inode *inode, struct file *file)
+{
+	struct lpfc_hba *phba = inode->i_private;
+	struct lpfc_debug *debug;
+	int rc = -ENOMEM;
+
+	debug = kmalloc(sizeof(*debug), GFP_KERNEL);
+	if (!debug)
+		goto out;
+
+	 /* Round to page boundary */
+	debug->buffer = kmalloc(LPFC_NVMEIO_TRC_SIZE, GFP_KERNEL);
+	if (!debug->buffer) {
+		kfree(debug);
+		goto out;
+	}
+
+	debug->len = lpfc_debugfs_nvmeio_trc_data(phba, debug->buffer,
+		LPFC_NVMEIO_TRC_SIZE);
+
+	debug->i_private = inode->i_private;
+	file->private_data = debug;
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static ssize_t
+lpfc_debugfs_nvmeio_trc_write(struct file *file, const char __user *buf,
+			      size_t nbytes, loff_t *ppos)
+{
+	struct lpfc_debug *debug = file->private_data;
+	struct lpfc_hba *phba = (struct lpfc_hba *)debug->i_private;
+	int i;
+	unsigned long sz;
+	char mybuf[64];
+	char *pbuf;
+
+	if (nbytes > 64)
+		nbytes = 64;
+
+	/* Protect copy from user */
+	if (!access_ok(VERIFY_READ, buf, nbytes))
+		return -EFAULT;
+
+	memset(mybuf, 0, sizeof(mybuf));
+
+	if (copy_from_user(mybuf, buf, nbytes))
+		return -EFAULT;
+	pbuf = &mybuf[0];
+
+	if ((strncmp(pbuf, "off", sizeof("off") - 1) == 0)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0570 nvmeio_trc_off\n");
+		phba->nvmeio_trc_output_idx = 0;
+		phba->nvmeio_trc_on = 0;
+		return strlen(pbuf);
+	} else if ((strncmp(pbuf, "on", sizeof("on") - 1) == 0)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0571 nvmeio_trc_on\n");
+		phba->nvmeio_trc_output_idx = 0;
+		phba->nvmeio_trc_on = 1;
+		return strlen(pbuf);
+	}
+
+	/* We must be off to allocate the trace buffer */
+	if (phba->nvmeio_trc_on != 0)
+		return -EINVAL;
+
+	/* If not on or off, the parameter is the trace buffer size */
+	i = kstrtoul(pbuf, 0, &sz);
+	if (i)
+		return -EINVAL;
+	phba->nvmeio_trc_size = (uint32_t)sz;
+
+	/* It must be a power of 2 - round down */
+	i = 0;
+	while (sz > 1) {
+		sz = sz >> 1;
+		i++;
+	}
+	sz = (1 << i);
+	if (phba->nvmeio_trc_size != sz)
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0572 nvmeio_trc_size changed to %ld\n",
+				sz);
+	phba->nvmeio_trc_size = (uint32_t)sz;
+
+	/* If one previously exists, free it */
+	kfree(phba->nvmeio_trc);
+
+	/* Allocate new trace buffer and initialize */
+	phba->nvmeio_trc = kmalloc((sizeof(struct lpfc_debugfs_nvmeio_trc) *
+				    sz), GFP_KERNEL);
+	if (!phba->nvmeio_trc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"0573 Cannot create debugfs "
+				"nvmeio_trc buffer\n");
+		return -ENOMEM;
+	}
+	memset(phba->nvmeio_trc, 0,
+	       (sizeof(struct lpfc_debugfs_nvmeio_trc) * sz));
+	atomic_set(&phba->nvmeio_trc_cnt, 0);
+	phba->nvmeio_trc_on = 0;
+	phba->nvmeio_trc_output_idx = 0;
+
+	return strlen(pbuf);
+}
+
+static int
+lpfc_debugfs_cpucheck_open(struct inode *inode, struct file *file)
+{
+	struct lpfc_vport *vport = inode->i_private;
+	struct lpfc_debug *debug;
+	int rc = -ENOMEM;
+
+	debug = kmalloc(sizeof(*debug), GFP_KERNEL);
+	if (!debug)
+		goto out;
+
+	 /* Round to page boundary */
+	debug->buffer = kmalloc(LPFC_CPUCHECK_SIZE, GFP_KERNEL);
+	if (!debug->buffer) {
+		kfree(debug);
+		goto out;
+	}
+
+	debug->len = lpfc_debugfs_cpucheck_data(vport, debug->buffer,
+		LPFC_NVMEKTIME_SIZE);
+
+	debug->i_private = inode->i_private;
+	file->private_data = debug;
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static ssize_t
+lpfc_debugfs_cpucheck_write(struct file *file, const char __user *buf,
+			    size_t nbytes, loff_t *ppos)
+{
+	struct lpfc_debug *debug = file->private_data;
+	struct lpfc_vport *vport = (struct lpfc_vport *)debug->i_private;
+	struct lpfc_hba   *phba = vport->phba;
+	char mybuf[64];
+	char *pbuf;
+	int i;
+
+	if (nbytes > 64)
+		nbytes = 64;
+
+	/* Protect copy from user */
+	if (!access_ok(VERIFY_READ, buf, nbytes))
+		return -EFAULT;
+
+	memset(mybuf, 0, sizeof(mybuf));
+
+	if (copy_from_user(mybuf, buf, nbytes))
+		return -EFAULT;
+	pbuf = &mybuf[0];
+
+	if ((strncmp(pbuf, "on", sizeof("on") - 1) == 0)) {
+		phba->cpucheck_on |= LPFC_CHECK_NVME_IO;
+		return strlen(pbuf);
+	} else if ((strncmp(pbuf, "rcv",
+		   sizeof("rcv") - 1) == 0)) {
+		return -EINVAL;
+	} else if ((strncmp(pbuf, "off",
+		   sizeof("off") - 1) == 0)) {
+		phba->cpucheck_on = LPFC_CHECK_OFF;
+		return strlen(pbuf);
+	} else if ((strncmp(pbuf, "zero",
+		   sizeof("zero") - 1) == 0)) {
+		for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+			if (i >= LPFC_CHECK_CPU_CNT)
+				break;
+			phba->cpucheck_rcv_io[i] = 0;
+			phba->cpucheck_xmt_io[i] = 0;
+			phba->cpucheck_cmpl_io[i] = 0;
+			phba->cpucheck_ccmpl_io[i] = 0;
+		}
+		return strlen(pbuf);
+	}
+	return -EINVAL;
+}
+
 /*
  * ---------------------------------
  * iDiag debugfs file access methods
@@ -2581,6 +3331,17 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 			idiag.ptr_private = phba->sli4_hba.nvmels_cq;
 			goto pass_check;
 		}
+		/* NVME LS complete queue */
+		if (phba->sli4_hba.nvmels_cq &&
+		    phba->sli4_hba.nvmels_cq->queue_id == queid) {
+			/* Sanity check */
+			rc = lpfc_idiag_que_param_check(
+					phba->sli4_hba.nvmels_cq, index, count);
+			if (rc)
+				goto error_out;
+			idiag.ptr_private = phba->sli4_hba.nvmels_cq;
+			goto pass_check;
+		}
 		/* FCP complete queue */
 		if (phba->sli4_hba.fcp_cq) {
 			for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
@@ -2597,6 +3358,25 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 				}
 			}
 		}
+		/* NVME complete queue */
+		if (phba->sli4_hba.nvme_cq) {
+			qidx = 0;
+			do {
+				if (phba->sli4_hba.nvme_cq[qidx] &&
+				    phba->sli4_hba.nvme_cq[qidx]->queue_id ==
+				    queid) {
+					/* Sanity check */
+					rc = lpfc_idiag_que_param_check(
+						phba->sli4_hba.nvme_cq[qidx],
+						index, count);
+					if (rc)
+						goto error_out;
+					idiag.ptr_private =
+						phba->sli4_hba.nvme_cq[qidx];
+					goto pass_check;
+				}
+			} while (++qidx < phba->cfg_nvme_io_channel);
+		}
 		goto error_out;
 		break;
 	case LPFC_IDIAG_MQ:
@@ -2636,6 +3416,17 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 			idiag.ptr_private = phba->sli4_hba.nvmels_wq;
 			goto pass_check;
 		}
+		/* NVME LS work queue */
+		if (phba->sli4_hba.nvmels_wq &&
+		    phba->sli4_hba.nvmels_wq->queue_id == queid) {
+			/* Sanity check */
+			rc = lpfc_idiag_que_param_check(
+					phba->sli4_hba.nvmels_wq, index, count);
+			if (rc)
+				goto error_out;
+			idiag.ptr_private = phba->sli4_hba.nvmels_wq;
+			goto pass_check;
+		}
 		/* FCP work queue */
 		if (phba->sli4_hba.fcp_wq) {
 			for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
@@ -2668,6 +3459,27 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
 				}
 			}
 		}
+
+		/* NVME work queues */
+		if (phba->sli4_hba.nvme_wq) {
+			for (qidx = 0; qidx < phba->cfg_nvme_io_channel;
+				qidx++) {
+				if (!phba->sli4_hba.nvme_wq[qidx])
+					continue;
+				if (phba->sli4_hba.nvme_wq[qidx]->queue_id ==
+				    queid) {
+					/* Sanity check */
+					rc = lpfc_idiag_que_param_check(
+						phba->sli4_hba.nvme_wq[qidx],
+						index, count);
+					if (rc)
+						goto error_out;
+					idiag.ptr_private =
+						phba->sli4_hba.nvme_wq[qidx];
+					goto pass_check;
+				}
+			}
+		}
 		goto error_out;
 		break;
 	case LPFC_IDIAG_RQ:
@@ -3651,6 +4463,45 @@ static const struct file_operations lpfc_debugfs_op_dumpHostSlim = {
 	.release =      lpfc_debugfs_release,
 };
 
+#undef lpfc_debugfs_op_nvmestat
+static const struct file_operations lpfc_debugfs_op_nvmestat = {
+	.owner =        THIS_MODULE,
+	.open =         lpfc_debugfs_nvmestat_open,
+	.llseek =       lpfc_debugfs_lseek,
+	.read =         lpfc_debugfs_read,
+	.release =      lpfc_debugfs_release,
+};
+
+#undef lpfc_debugfs_op_nvmektime
+static const struct file_operations lpfc_debugfs_op_nvmektime = {
+	.owner =        THIS_MODULE,
+	.open =         lpfc_debugfs_nvmektime_open,
+	.llseek =       lpfc_debugfs_lseek,
+	.read =         lpfc_debugfs_read,
+	.write =	lpfc_debugfs_nvmektime_write,
+	.release =      lpfc_debugfs_release,
+};
+
+#undef lpfc_debugfs_op_nvmeio_trc
+static const struct file_operations lpfc_debugfs_op_nvmeio_trc = {
+	.owner =        THIS_MODULE,
+	.open =         lpfc_debugfs_nvmeio_trc_open,
+	.llseek =       lpfc_debugfs_lseek,
+	.read =         lpfc_debugfs_read,
+	.write =	lpfc_debugfs_nvmeio_trc_write,
+	.release =      lpfc_debugfs_release,
+};
+
+#undef lpfc_debugfs_op_cpucheck
+static const struct file_operations lpfc_debugfs_op_cpucheck = {
+	.owner =        THIS_MODULE,
+	.open =         lpfc_debugfs_cpucheck_open,
+	.llseek =       lpfc_debugfs_lseek,
+	.read =         lpfc_debugfs_read,
+	.write =	lpfc_debugfs_cpucheck_write,
+	.release =      lpfc_debugfs_release,
+};
+
 #undef lpfc_debugfs_op_dumpData
 static const struct file_operations lpfc_debugfs_op_dumpData = {
 	.owner =        THIS_MODULE,
@@ -4237,6 +5088,60 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 				lpfc_debugfs_max_slow_ring_trc));
 		}
 
+		snprintf(name, sizeof(name), "nvmeio_trc");
+		phba->debug_nvmeio_trc =
+			debugfs_create_file(name, 0644,
+					    phba->hba_debugfs_root,
+					    phba, &lpfc_debugfs_op_nvmeio_trc);
+		if (!phba->debug_nvmeio_trc) {
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
+					 "0574 No create debugfs nvmeio_trc\n");
+			goto debug_failed;
+		}
+
+		atomic_set(&phba->nvmeio_trc_cnt, 0);
+		if (lpfc_debugfs_max_nvmeio_trc) {
+			num = lpfc_debugfs_max_nvmeio_trc - 1;
+			if (num & lpfc_debugfs_max_disc_trc) {
+				/* Change to be a power of 2 */
+				num = lpfc_debugfs_max_nvmeio_trc;
+				i = 0;
+				while (num > 1) {
+					num = num >> 1;
+					i++;
+				}
+				lpfc_debugfs_max_nvmeio_trc = (1 << i);
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"0575 lpfc_debugfs_max_nvmeio_trc "
+						"changed to %d\n",
+						lpfc_debugfs_max_nvmeio_trc);
+			}
+			phba->nvmeio_trc_size = lpfc_debugfs_max_nvmeio_trc;
+
+			/* Allocate trace buffer and initialize */
+			phba->nvmeio_trc = kmalloc(
+				(sizeof(struct lpfc_debugfs_nvmeio_trc) *
+				phba->nvmeio_trc_size), GFP_KERNEL);
+
+			if (!phba->nvmeio_trc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"0576 Cannot create debugfs "
+						"nvmeio_trc buffer\n");
+				goto nvmeio_off;
+			}
+			memset(phba->nvmeio_trc, 0,
+			       (sizeof(struct lpfc_debugfs_nvmeio_trc) *
+			       phba->nvmeio_trc_size));
+			phba->nvmeio_trc_on = 1;
+			phba->nvmeio_trc_output_idx = 0;
+			phba->nvmeio_trc = NULL;
+		} else {
+nvmeio_off:
+			phba->nvmeio_trc_size = 0;
+			phba->nvmeio_trc_on = 0;
+			phba->nvmeio_trc_output_idx = 0;
+			phba->nvmeio_trc = NULL;
+		}
 	}
 
 	snprintf(name, sizeof(name), "vport%d", vport->vpi);
@@ -4301,6 +5206,39 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport)
 		goto debug_failed;
 	}
 
+	snprintf(name, sizeof(name), "nvmestat");
+	vport->debug_nvmestat =
+		debugfs_create_file(name, 0644,
+				    vport->vport_debugfs_root,
+				    vport, &lpfc_debugfs_op_nvmestat);
+	if (!vport->debug_nvmestat) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
+				 "0811 Cannot create debugfs nvmestat\n");
+		goto debug_failed;
+	}
+
+	snprintf(name, sizeof(name), "nvmektime");
+	vport->debug_nvmektime =
+		debugfs_create_file(name, 0644,
+				    vport->vport_debugfs_root,
+				    vport, &lpfc_debugfs_op_nvmektime);
+	if (!vport->debug_nvmektime) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
+				 "0815 Cannot create debugfs nvmektime\n");
+		goto debug_failed;
+	}
+
+	snprintf(name, sizeof(name), "cpucheck");
+	vport->debug_cpucheck =
+		debugfs_create_file(name, 0644,
+				    vport->vport_debugfs_root,
+				    vport, &lpfc_debugfs_op_cpucheck);
+	if (!vport->debug_cpucheck) {
+		lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
+				 "0819 Cannot create debugfs cpucheck\n");
+		goto debug_failed;
+	}
+
 	/*
 	 * The following section is for additional directories/files for the
 	 * physical port.
@@ -4465,140 +5403,126 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
 		kfree(vport->disc_trc);
 		vport->disc_trc = NULL;
 	}
-	if (vport->debug_disc_trc) {
-		debugfs_remove(vport->debug_disc_trc); /* discovery_trace */
-		vport->debug_disc_trc = NULL;
-	}
-	if (vport->debug_nodelist) {
-		debugfs_remove(vport->debug_nodelist); /* nodelist */
-		vport->debug_nodelist = NULL;
-	}
+
+	debugfs_remove(vport->debug_disc_trc); /* discovery_trace */
+	vport->debug_disc_trc = NULL;
+
+	debugfs_remove(vport->debug_nodelist); /* nodelist */
+	vport->debug_nodelist = NULL;
+
+	debugfs_remove(vport->debug_nvmestat); /* nvmestat */
+	vport->debug_nvmestat = NULL;
+
+	debugfs_remove(vport->debug_nvmektime); /* nvmektime */
+	vport->debug_nvmektime = NULL;
+
+	debugfs_remove(vport->debug_cpucheck); /* cpucheck */
+	vport->debug_cpucheck = NULL;
+
 	if (vport->vport_debugfs_root) {
 		debugfs_remove(vport->vport_debugfs_root); /* vportX */
 		vport->vport_debugfs_root = NULL;
 		atomic_dec(&phba->debugfs_vport_count);
 	}
+
 	if (atomic_read(&phba->debugfs_vport_count) == 0) {
 
-		if (phba->debug_hbqinfo) {
-			debugfs_remove(phba->debug_hbqinfo); /* hbqinfo */
-			phba->debug_hbqinfo = NULL;
-		}
-		if (phba->debug_dumpHBASlim) {
-			debugfs_remove(phba->debug_dumpHBASlim); /* HBASlim */
-			phba->debug_dumpHBASlim = NULL;
-		}
-		if (phba->debug_dumpHostSlim) {
-			debugfs_remove(phba->debug_dumpHostSlim); /* HostSlim */
-			phba->debug_dumpHostSlim = NULL;
-		}
-		if (phba->debug_dumpData) {
-			debugfs_remove(phba->debug_dumpData); /* dumpData */
-			phba->debug_dumpData = NULL;
-		}
+		debugfs_remove(phba->debug_hbqinfo); /* hbqinfo */
+		phba->debug_hbqinfo = NULL;
 
-		if (phba->debug_dumpDif) {
-			debugfs_remove(phba->debug_dumpDif); /* dumpDif */
-			phba->debug_dumpDif = NULL;
-		}
-		if (phba->debug_InjErrLBA) {
-			debugfs_remove(phba->debug_InjErrLBA); /* InjErrLBA */
-			phba->debug_InjErrLBA = NULL;
-		}
-		if (phba->debug_InjErrNPortID) {	 /* InjErrNPortID */
-			debugfs_remove(phba->debug_InjErrNPortID);
-			phba->debug_InjErrNPortID = NULL;
-		}
-		if (phba->debug_InjErrWWPN) {
-			debugfs_remove(phba->debug_InjErrWWPN); /* InjErrWWPN */
-			phba->debug_InjErrWWPN = NULL;
-		}
-		if (phba->debug_writeGuard) {
-			debugfs_remove(phba->debug_writeGuard); /* writeGuard */
-			phba->debug_writeGuard = NULL;
-		}
-		if (phba->debug_writeApp) {
-			debugfs_remove(phba->debug_writeApp); /* writeApp */
-			phba->debug_writeApp = NULL;
-		}
-		if (phba->debug_writeRef) {
-			debugfs_remove(phba->debug_writeRef); /* writeRef */
-			phba->debug_writeRef = NULL;
-		}
-		if (phba->debug_readGuard) {
-			debugfs_remove(phba->debug_readGuard); /* readGuard */
-			phba->debug_readGuard = NULL;
-		}
-		if (phba->debug_readApp) {
-			debugfs_remove(phba->debug_readApp); /* readApp */
-			phba->debug_readApp = NULL;
-		}
-		if (phba->debug_readRef) {
-			debugfs_remove(phba->debug_readRef); /* readRef */
-			phba->debug_readRef = NULL;
-		}
+		debugfs_remove(phba->debug_dumpHBASlim); /* HBASlim */
+		phba->debug_dumpHBASlim = NULL;
+
+		debugfs_remove(phba->debug_dumpHostSlim); /* HostSlim */
+		phba->debug_dumpHostSlim = NULL;
+
+		debugfs_remove(phba->debug_dumpData); /* dumpData */
+		phba->debug_dumpData = NULL;
+
+		debugfs_remove(phba->debug_dumpDif); /* dumpDif */
+		phba->debug_dumpDif = NULL;
+
+		debugfs_remove(phba->debug_InjErrLBA); /* InjErrLBA */
+		phba->debug_InjErrLBA = NULL;
+
+		debugfs_remove(phba->debug_InjErrNPortID);
+		phba->debug_InjErrNPortID = NULL;
+
+		debugfs_remove(phba->debug_InjErrWWPN); /* InjErrWWPN */
+		phba->debug_InjErrWWPN = NULL;
+
+		debugfs_remove(phba->debug_writeGuard); /* writeGuard */
+		phba->debug_writeGuard = NULL;
+
+		debugfs_remove(phba->debug_writeApp); /* writeApp */
+		phba->debug_writeApp = NULL;
+
+		debugfs_remove(phba->debug_writeRef); /* writeRef */
+		phba->debug_writeRef = NULL;
+
+		debugfs_remove(phba->debug_readGuard); /* readGuard */
+		phba->debug_readGuard = NULL;
+
+		debugfs_remove(phba->debug_readApp); /* readApp */
+		phba->debug_readApp = NULL;
+
+		debugfs_remove(phba->debug_readRef); /* readRef */
+		phba->debug_readRef = NULL;
 
 		if (phba->slow_ring_trc) {
 			kfree(phba->slow_ring_trc);
 			phba->slow_ring_trc = NULL;
 		}
-		if (phba->debug_slow_ring_trc) {
-			/* slow_ring_trace */
-			debugfs_remove(phba->debug_slow_ring_trc);
-			phba->debug_slow_ring_trc = NULL;
-		}
+
+		/* slow_ring_trace */
+		debugfs_remove(phba->debug_slow_ring_trc);
+		phba->debug_slow_ring_trc = NULL;
+
+		debugfs_remove(phba->debug_nvmeio_trc);
+		phba->debug_nvmeio_trc = NULL;
+
+		kfree(phba->nvmeio_trc);
+		phba->nvmeio_trc = NULL;
 
 		/*
 		 * iDiag release
 		 */
 		if (phba->sli_rev == LPFC_SLI_REV4) {
-			if (phba->idiag_ext_acc) {
-				/* iDiag extAcc */
-				debugfs_remove(phba->idiag_ext_acc);
-				phba->idiag_ext_acc = NULL;
-			}
-			if (phba->idiag_mbx_acc) {
-				/* iDiag mbxAcc */
-				debugfs_remove(phba->idiag_mbx_acc);
-				phba->idiag_mbx_acc = NULL;
-			}
-			if (phba->idiag_ctl_acc) {
-				/* iDiag ctlAcc */
-				debugfs_remove(phba->idiag_ctl_acc);
-				phba->idiag_ctl_acc = NULL;
-			}
-			if (phba->idiag_drb_acc) {
-				/* iDiag drbAcc */
-				debugfs_remove(phba->idiag_drb_acc);
-				phba->idiag_drb_acc = NULL;
-			}
-			if (phba->idiag_que_acc) {
-				/* iDiag queAcc */
-				debugfs_remove(phba->idiag_que_acc);
-				phba->idiag_que_acc = NULL;
-			}
-			if (phba->idiag_que_info) {
-				/* iDiag queInfo */
-				debugfs_remove(phba->idiag_que_info);
-				phba->idiag_que_info = NULL;
-			}
-			if (phba->idiag_bar_acc) {
-				/* iDiag barAcc */
-				debugfs_remove(phba->idiag_bar_acc);
-				phba->idiag_bar_acc = NULL;
-			}
-			if (phba->idiag_pci_cfg) {
-				/* iDiag pciCfg */
-				debugfs_remove(phba->idiag_pci_cfg);
-				phba->idiag_pci_cfg = NULL;
-			}
+			/* iDiag extAcc */
+			debugfs_remove(phba->idiag_ext_acc);
+			phba->idiag_ext_acc = NULL;
+
+			/* iDiag mbxAcc */
+			debugfs_remove(phba->idiag_mbx_acc);
+			phba->idiag_mbx_acc = NULL;
+
+			/* iDiag ctlAcc */
+			debugfs_remove(phba->idiag_ctl_acc);
+			phba->idiag_ctl_acc = NULL;
+
+			/* iDiag drbAcc */
+			debugfs_remove(phba->idiag_drb_acc);
+			phba->idiag_drb_acc = NULL;
+
+			/* iDiag queAcc */
+			debugfs_remove(phba->idiag_que_acc);
+			phba->idiag_que_acc = NULL;
+
+			/* iDiag queInfo */
+			debugfs_remove(phba->idiag_que_info);
+			phba->idiag_que_info = NULL;
+
+			/* iDiag barAcc */
+			debugfs_remove(phba->idiag_bar_acc);
+			phba->idiag_bar_acc = NULL;
+
+			/* iDiag pciCfg */
+			debugfs_remove(phba->idiag_pci_cfg);
+			phba->idiag_pci_cfg = NULL;
 
 			/* Finally remove the iDiag debugfs root */
-			if (phba->idiag_root) {
-				/* iDiag root */
-				debugfs_remove(phba->idiag_root);
-				phba->idiag_root = NULL;
-			}
+			debugfs_remove(phba->idiag_root);
+			phba->idiag_root = NULL;
 		}
 
 		if (phba->hba_debugfs_root) {
@@ -4607,10 +5531,8 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport)
 			atomic_dec(&lpfc_debugfs_hba_count);
 		}
 
-		if (atomic_read(&lpfc_debugfs_hba_count) == 0) {
-			debugfs_remove(lpfc_debugfs_root); /* lpfc */
-			lpfc_debugfs_root = NULL;
-		}
+		debugfs_remove(lpfc_debugfs_root); /* lpfc */
+		lpfc_debugfs_root = NULL;
 	}
 #endif
 	return;
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h
index 98814d651239..5312e0f9deb6 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -50,6 +50,14 @@ enum {
 	DUMP_NVMELS,
 };
 
+/* nvmestat output buffer size */
+#define LPFC_NVMESTAT_SIZE 8192
+#define LPFC_NVMEKTIME_SIZE 8192
+#define LPFC_CPUCHECK_SIZE 8192
+#define LPFC_NVMEIO_TRC_SIZE 8192
+
+#define LPFC_DEBUG_OUT_LINE_SZ	80
+
 /*
  * For SLI4 iDiag debugfs diagnostics tool
  */
@@ -196,6 +204,12 @@ enum {
 #define SIZE_U16 sizeof(uint16_t)
 #define SIZE_U32 sizeof(uint32_t)
 
+#define lpfc_nvmeio_data(phba, fmt, arg...) \
+	{ \
+	if (phba->nvmeio_trc_on) \
+		lpfc_debugfs_nvme_trc(phba, fmt, ##arg); \
+	}
+
 struct lpfc_debug {
 	char *i_private;
 	char op;
@@ -214,6 +228,13 @@ struct lpfc_debugfs_trc {
 	unsigned long jif;
 };
 
+struct lpfc_debugfs_nvmeio_trc {
+	char *fmt;
+	uint16_t data1;
+	uint16_t data2;
+	uint32_t data3;
+};
+
 struct lpfc_idiag_offset {
 	uint32_t last_rd;
 };
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 729803dacf15..56b4b94a372e 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -50,6 +50,7 @@
 #include "lpfc_logmsg.h"
 #include "lpfc_crtn.h"
 #include "lpfc_vport.h"
+#include "lpfc_debugfs.h"
 
 /* NVME initiator-based functions */
 
@@ -222,6 +223,9 @@ lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 			 cmdwqe->sli4_xritag, status,
 			 cmdwqe, pnvme_lsreq, cmdwqe->context3, ndlp);
 
+	lpfc_nvmeio_data(phba, "NVME LS  CMPL: xri x%x stat x%x parm x%x\n",
+			 cmdwqe->sli4_xritag, status, wcqe->parameter);
+
 	if (cmdwqe->context3) {
 		buf_ptr = (struct lpfc_dmabuf *)cmdwqe->context3;
 		lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys);
@@ -355,6 +359,9 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
 	genwqe->vport = vport;
 	genwqe->retry = retry;
 
+	lpfc_nvmeio_data(phba, "NVME LS  XMIT: xri x%x iotag x%x to x%06x\n",
+			 genwqe->sli4_xritag, genwqe->iotag, ndlp->nlp_DID);
+
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_ELS_RING, genwqe);
 	if (rc == WQE_ERROR) {
 		lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
@@ -637,6 +644,79 @@ lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport,
 	*wptr   = *dptr;	/* Word 23 */
 }
 
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+static void
+lpfc_nvme_ktime(struct lpfc_hba *phba,
+		struct lpfc_nvme_buf *lpfc_ncmd)
+{
+	uint64_t seg1, seg2, seg3, seg4;
+
+	if (!phba->ktime_on)
+		return;
+	if (!lpfc_ncmd->ts_last_cmd ||
+	    !lpfc_ncmd->ts_cmd_start ||
+	    !lpfc_ncmd->ts_cmd_wqput ||
+	    !lpfc_ncmd->ts_isr_cmpl ||
+	    !lpfc_ncmd->ts_data_nvme)
+		return;
+	if (lpfc_ncmd->ts_cmd_start < lpfc_ncmd->ts_last_cmd)
+		return;
+	if (lpfc_ncmd->ts_cmd_wqput < lpfc_ncmd->ts_cmd_start)
+		return;
+	if (lpfc_ncmd->ts_isr_cmpl < lpfc_ncmd->ts_cmd_wqput)
+		return;
+	if (lpfc_ncmd->ts_data_nvme < lpfc_ncmd->ts_isr_cmpl)
+		return;
+	/*
+	 * Segment 1 - Time from Last FCP command cmpl is handed
+	 * off to NVME Layer to start of next command.
+	 * Segment 2 - Time from Driver receives a IO cmd start
+	 * from NVME Layer to WQ put is done on IO cmd.
+	 * Segment 3 - Time from Driver WQ put is done on IO cmd
+	 * to MSI-X ISR for IO cmpl.
+	 * Segment 4 - Time from MSI-X ISR for IO cmpl to when
+	 * cmpl is handled off to the NVME Layer.
+	 */
+	seg1 = lpfc_ncmd->ts_cmd_start - lpfc_ncmd->ts_last_cmd;
+	if (seg1 > 5000000)  /* 5 ms - for sequential IOs */
+		return;
+
+	/* Calculate times relative to start of IO */
+	seg2 = (lpfc_ncmd->ts_cmd_wqput - lpfc_ncmd->ts_cmd_start);
+	seg3 = (lpfc_ncmd->ts_isr_cmpl -
+		lpfc_ncmd->ts_cmd_start) - seg2;
+	seg4 = (lpfc_ncmd->ts_data_nvme -
+		lpfc_ncmd->ts_cmd_start) - seg2 - seg3;
+	phba->ktime_data_samples++;
+	phba->ktime_seg1_total += seg1;
+	if (seg1 < phba->ktime_seg1_min)
+		phba->ktime_seg1_min = seg1;
+	else if (seg1 > phba->ktime_seg1_max)
+		phba->ktime_seg1_max = seg1;
+	phba->ktime_seg2_total += seg2;
+	if (seg2 < phba->ktime_seg2_min)
+		phba->ktime_seg2_min = seg2;
+	else if (seg2 > phba->ktime_seg2_max)
+		phba->ktime_seg2_max = seg2;
+	phba->ktime_seg3_total += seg3;
+	if (seg3 < phba->ktime_seg3_min)
+		phba->ktime_seg3_min = seg3;
+	else if (seg3 > phba->ktime_seg3_max)
+		phba->ktime_seg3_max = seg3;
+	phba->ktime_seg4_total += seg4;
+	if (seg4 < phba->ktime_seg4_min)
+		phba->ktime_seg4_min = seg4;
+	else if (seg4 > phba->ktime_seg4_max)
+		phba->ktime_seg4_max = seg4;
+
+	lpfc_ncmd->ts_last_cmd = 0;
+	lpfc_ncmd->ts_cmd_start = 0;
+	lpfc_ncmd->ts_cmd_wqput  = 0;
+	lpfc_ncmd->ts_isr_cmpl = 0;
+	lpfc_ncmd->ts_data_nvme = 0;
+}
+#endif
+
 /**
  * lpfc_nvme_io_cmd_wqe_cmpl - Complete an NVME-over-FCP IO
  * @lpfc_pnvme: Pointer to the driver's nvme instance data
@@ -680,6 +760,9 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
 	nCmd = lpfc_ncmd->nvmeCmd;
 	rport = lpfc_ncmd->nrport;
 
+	lpfc_nvmeio_data(phba, "NVME FCP CMPL: xri x%x stat x%x parm x%x\n",
+			 lpfc_ncmd->cur_iocbq.sli4_xritag,
+			 bf_get(lpfc_wcqe_c_status, wcqe), wcqe->parameter);
 	/*
 	 * Catch race where our node has transitioned, but the
 	 * transport is still transitioning.
@@ -798,6 +881,23 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
 	 * no need for dma unprep because the nvme_transport
 	 * owns the dma address.
 	 */
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->ktime_on) {
+		lpfc_ncmd->ts_isr_cmpl = pwqeIn->isr_timestamp;
+		lpfc_ncmd->ts_data_nvme = ktime_get_ns();
+		phba->ktime_last_cmd = lpfc_ncmd->ts_data_nvme;
+		lpfc_nvme_ktime(phba, lpfc_ncmd);
+	}
+	if (phba->cpucheck_on & LPFC_CHECK_NVME_IO) {
+		if (lpfc_ncmd->cpu != smp_processor_id())
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
+					 "6701 CPU Check cmpl: "
+					 "cpu %d expect %d\n",
+					 smp_processor_id(), lpfc_ncmd->cpu);
+		if (lpfc_ncmd->cpu < LPFC_CHECK_CPU_CNT)
+			phba->cpucheck_cmpl_io[lpfc_ncmd->cpu]++;
+	}
+#endif
 	nCmd->done(nCmd);
 
 	spin_lock_irqsave(&phba->hbalock, flags);
@@ -1103,11 +1203,18 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 	struct lpfc_nvme_buf *lpfc_ncmd;
 	struct lpfc_nvme_rport *rport;
 	struct lpfc_nvme_qhandle *lpfc_queue_info;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	uint64_t start = 0;
+#endif
 
 	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
 	vport = lport->vport;
 	phba = vport->phba;
 
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->ktime_on)
+		start = ktime_get_ns();
+#endif
 	rport = (struct lpfc_nvme_rport *)pnvme_rport->private;
 	lpfc_queue_info = (struct lpfc_nvme_qhandle *)hw_queue_handle;
 
@@ -1161,6 +1268,12 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 		ret = -ENOMEM;
 		goto out_fail;
 	}
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->ktime_on) {
+		lpfc_ncmd->ts_cmd_start = start;
+		lpfc_ncmd->ts_last_cmd = phba->ktime_last_cmd;
+	}
+#endif
 
 	/*
 	 * Store the data needed by the driver to issue, abort, and complete
@@ -1192,6 +1305,10 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 	 */
 	lpfc_ncmd->cur_iocbq.hba_wqidx = lpfc_queue_info->index;
 
+	lpfc_nvmeio_data(phba, "NVME FCP XMIT: xri x%x idx %d to %06x\n",
+			 lpfc_ncmd->cur_iocbq.sli4_xritag,
+			 lpfc_queue_info->index, ndlp->nlp_DID);
+
 	ret = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, &lpfc_ncmd->cur_iocbq);
 	if (ret) {
 		atomic_dec(&ndlp->cmd_pending);
@@ -1204,6 +1321,28 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 		goto out_free_nvme_buf;
 	}
 
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->ktime_on)
+		lpfc_ncmd->ts_cmd_wqput = ktime_get_ns();
+
+	if (phba->cpucheck_on & LPFC_CHECK_NVME_IO) {
+		lpfc_ncmd->cpu = smp_processor_id();
+		if (lpfc_ncmd->cpu != lpfc_queue_info->index) {
+			/* Check for admin queue */
+			if (lpfc_queue_info->qidx) {
+				lpfc_printf_vlog(vport,
+						 KERN_ERR, LOG_NVME_IOERR,
+						"6702 CPU Check cmd: "
+						"cpu %d wq %d\n",
+						lpfc_ncmd->cpu,
+						lpfc_queue_info->index);
+			}
+			lpfc_ncmd->cpu = lpfc_queue_info->index;
+		}
+		if (lpfc_ncmd->cpu < LPFC_CHECK_CPU_CNT)
+			phba->cpucheck_xmt_io[lpfc_ncmd->cpu]++;
+	}
+#endif
 	return 0;
 
  out_free_nvme_buf:
@@ -1377,6 +1516,10 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
 		return;
 	}
 
+	lpfc_nvmeio_data(phba, "NVME FCP ABORT: xri x%x idx %d to %06x\n",
+			 nvmereq_wqe->sli4_xritag,
+			 nvmereq_wqe->hba_wqidx, ndlp->nlp_DID);
+
 	/* Outstanding abort is in progress */
 	if (nvmereq_wqe->iocb_flag & LPFC_DRIVER_ABORTED) {
 		spin_unlock_irqrestore(&phba->hbalock, flags);
diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h
index 4cc51086a5f7..85961c1ddecf 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.h
+++ b/drivers/scsi/lpfc/lpfc_nvme.h
@@ -91,4 +91,11 @@ struct lpfc_nvme_buf {
 
 	wait_queue_head_t *waitq;
 	unsigned long start_time;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	uint64_t ts_cmd_start;
+	uint64_t ts_last_cmd;
+	uint64_t ts_cmd_wqput;
+	uint64_t ts_isr_cmpl;
+	uint64_t ts_data_nvme;
+#endif
 };
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index ccda1f73ae84..5f6ad67d5a73 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -13299,6 +13299,11 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 	if (unlikely(!fpeq))
 		return IRQ_NONE;
 
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->ktime_on)
+		fpeq->isr_timestamp = ktime_get_ns();
+#endif
+
 	if (lpfc_fcp_look_ahead) {
 		if (atomic_dec_and_test(&hba_eq_hdl->hba_eq_in_use))
 			lpfc_sli4_eq_clr_intr(fpeq);
-- 
GitLab


From f358dd0ca26c152a5e0922e269996268dcb98a9d Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:34 -0800
Subject: [PATCH 0445/1084] scsi: lpfc: NVME Target: Base modifications

NVME Target: Base modifications

This set of patches adds the base modifications for NVME target support

The base modifications consist of:
- Additional module parameters or configuration tuning
- Enablement of configuration mode for NVME target. Ties into the
  queueing model put into place by the initiator basemods patches.
- Target-specific buffer pools, dma pools, sgl pools

[mkp: fixed space at end of file]

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h         |   6 +
 drivers/scsi/lpfc/lpfc_attr.c    | 131 ++++++++++++++++-
 drivers/scsi/lpfc/lpfc_crtn.h    |  10 ++
 drivers/scsi/lpfc/lpfc_debugfs.c |   7 +-
 drivers/scsi/lpfc/lpfc_disc.h    |   1 +
 drivers/scsi/lpfc/lpfc_hw4.h     |  45 ++++++
 drivers/scsi/lpfc/lpfc_init.c    | 244 ++++++++++++++++++++++++++++++-
 drivers/scsi/lpfc/lpfc_mem.c     | 167 +++++++++++++++++++++
 drivers/scsi/lpfc/lpfc_nvmet.h   | 101 +++++++++++++
 drivers/scsi/lpfc/lpfc_sli.c     | 107 +++++++++++++-
 drivers/scsi/lpfc/lpfc_sli.h     |   2 +
 drivers/scsi/lpfc/lpfc_sli4.h    |   6 +
 12 files changed, 807 insertions(+), 20 deletions(-)
 create mode 100644 drivers/scsi/lpfc/lpfc_nvmet.h

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index fdb314b3282d..b7361474880e 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -741,6 +741,7 @@ struct lpfc_hba {
 	uint8_t  fcp_embed_io;
 	uint8_t  nvme_support;	/* Firmware supports NVME */
 	uint8_t  nvmet_support;	/* driver supports NVMET */
+#define LPFC_NVMET_MAX_PORTS	32
 	uint8_t  mds_diags_support;
 
 	/* HBA Config Parameters */
@@ -766,8 +767,10 @@ struct lpfc_hba {
 	uint32_t cfg_fcp_imax;
 	uint32_t cfg_fcp_cpu_map;
 	uint32_t cfg_fcp_io_channel;
+	uint32_t cfg_suppress_rsp;
 	uint32_t cfg_nvme_oas;
 	uint32_t cfg_nvme_io_channel;
+	uint32_t cfg_enable_nvmet;
 	uint32_t cfg_nvme_enable_fb;
 	uint32_t cfg_total_seg_cnt;
 	uint32_t cfg_sg_seg_cnt;
@@ -820,6 +823,7 @@ struct lpfc_hba {
 #define LPFC_ENABLE_NVME 2
 #define LPFC_ENABLE_BOTH 3
 	uint32_t io_channel_irqs;	/* number of irqs for io channels */
+	struct nvmet_fc_target_port *targetport;
 	lpfc_vpd_t vpd;		/* vital product data */
 
 	struct pci_dev *pcidev;
@@ -1103,6 +1107,8 @@ struct lpfc_hba {
 	uint16_t cpucheck_on;
 #define LPFC_CHECK_OFF		0
 #define LPFC_CHECK_NVME_IO	1
+#define LPFC_CHECK_NVMET_RCV	2
+#define LPFC_CHECK_NVMET_IO	4
 	uint16_t ktime_on;
 	uint64_t ktime_data_samples;
 	uint64_t ktime_status_samples;
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index b91b4bb1062d..835c6c1f4cd4 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -46,6 +46,7 @@
 #include "lpfc.h"
 #include "lpfc_scsi.h"
 #include "lpfc_nvme.h"
+#include "lpfc_nvmet.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_version.h"
 #include "lpfc_compat.h"
@@ -139,6 +140,7 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 	struct Scsi_Host *shost = class_to_shost(dev);
 	struct lpfc_vport *vport = shost_priv(shost);
 	struct lpfc_hba   *phba = vport->phba;
+	struct lpfc_nvmet_tgtport *tgtp;
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
 	struct lpfc_nvme_rport *rport;
@@ -150,6 +152,92 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
 		len += snprintf(buf, PAGE_SIZE, "NVME Disabled\n");
 		return len;
 	}
+	if (phba->nvmet_support) {
+		if (!phba->targetport) {
+			len = snprintf(buf, PAGE_SIZE,
+					"NVME Target: x%llx is not allocated\n",
+					wwn_to_u64(vport->fc_portname.u.wwn));
+			return len;
+		}
+		/* Port state is only one of two values for now. */
+		if (phba->targetport->port_id)
+			statep = "REGISTERED";
+		else
+			statep = "INIT";
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"NVME Target: Enabled  State %s\n",
+				statep);
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"%s%d WWPN x%llx WWNN x%llx DID x%06x\n",
+				"NVME Target: lpfc",
+				phba->brd_no,
+				wwn_to_u64(vport->fc_portname.u.wwn),
+				wwn_to_u64(vport->fc_nodename.u.wwn),
+				phba->targetport->port_id);
+
+		len += snprintf(buf + len, PAGE_SIZE,
+				"\nNVME Target: Statistics\n");
+		tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+		len += snprintf(buf+len, PAGE_SIZE-len,
+				"LS: Rcv %08x Drop %08x Abort %08x\n",
+				atomic_read(&tgtp->rcv_ls_req_in),
+				atomic_read(&tgtp->rcv_ls_req_drop),
+				atomic_read(&tgtp->xmt_ls_abort));
+		if (atomic_read(&tgtp->rcv_ls_req_in) !=
+		    atomic_read(&tgtp->rcv_ls_req_out)) {
+			len += snprintf(buf+len, PAGE_SIZE-len,
+					"Rcv LS: in %08x != out %08x\n",
+					atomic_read(&tgtp->rcv_ls_req_in),
+					atomic_read(&tgtp->rcv_ls_req_out));
+		}
+
+		len += snprintf(buf+len, PAGE_SIZE-len,
+				"LS: Xmt %08x Drop %08x Cmpl %08x Err %08x\n",
+				atomic_read(&tgtp->xmt_ls_rsp),
+				atomic_read(&tgtp->xmt_ls_drop),
+				atomic_read(&tgtp->xmt_ls_rsp_cmpl),
+				atomic_read(&tgtp->xmt_ls_rsp_error));
+
+		len += snprintf(buf+len, PAGE_SIZE-len,
+				"FCP: Rcv %08x Drop %08x\n",
+				atomic_read(&tgtp->rcv_fcp_cmd_in),
+				atomic_read(&tgtp->rcv_fcp_cmd_drop));
+
+		if (atomic_read(&tgtp->rcv_fcp_cmd_in) !=
+		    atomic_read(&tgtp->rcv_fcp_cmd_out)) {
+			len += snprintf(buf+len, PAGE_SIZE-len,
+					"Rcv FCP: in %08x != out %08x\n",
+					atomic_read(&tgtp->rcv_fcp_cmd_in),
+					atomic_read(&tgtp->rcv_fcp_cmd_out));
+		}
+
+		len += snprintf(buf+len, PAGE_SIZE-len,
+				"FCP Rsp: RD %08x rsp %08x WR %08x rsp %08x\n",
+				atomic_read(&tgtp->xmt_fcp_read),
+				atomic_read(&tgtp->xmt_fcp_read_rsp),
+				atomic_read(&tgtp->xmt_fcp_write),
+				atomic_read(&tgtp->xmt_fcp_rsp));
+
+		len += snprintf(buf+len, PAGE_SIZE-len,
+				"FCP Rsp: abort %08x drop %08x\n",
+				atomic_read(&tgtp->xmt_fcp_abort),
+				atomic_read(&tgtp->xmt_fcp_drop));
+
+		len += snprintf(buf+len, PAGE_SIZE-len,
+				"FCP Rsp Cmpl: %08x err %08x drop %08x\n",
+				atomic_read(&tgtp->xmt_fcp_rsp_cmpl),
+				atomic_read(&tgtp->xmt_fcp_rsp_error),
+				atomic_read(&tgtp->xmt_fcp_rsp_drop));
+
+		len += snprintf(buf+len, PAGE_SIZE-len,
+				"ABORT: Xmt %08x Err %08x Cmpl %08x",
+				atomic_read(&tgtp->xmt_abort_rsp),
+				atomic_read(&tgtp->xmt_abort_rsp_error),
+				atomic_read(&tgtp->xmt_abort_cmpl));
+
+		len +=  snprintf(buf+len, PAGE_SIZE-len, "\n");
+		return len;
+	}
 
 	localport = vport->localport;
 	if (!localport) {
@@ -2899,6 +2987,13 @@ lpfc_oas_lun_store(struct device *dev, struct device_attribute *attr,
 static DEVICE_ATTR(lpfc_xlane_lun, S_IRUGO | S_IWUSR,
 		   lpfc_oas_lun_show, lpfc_oas_lun_store);
 
+int lpfc_enable_nvmet_cnt;
+unsigned long long lpfc_enable_nvmet[LPFC_NVMET_MAX_PORTS] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+module_param_array(lpfc_enable_nvmet, ullong, &lpfc_enable_nvmet_cnt, 0444);
+MODULE_PARM_DESC(lpfc_enable_nvmet, "Enable HBA port(s) WWPN as a NVME Target");
+
 static int lpfc_poll = 0;
 module_param(lpfc_poll, int, S_IRUGO);
 MODULE_PARM_DESC(lpfc_poll, "FCP ring polling mode control:"
@@ -3177,6 +3272,15 @@ lpfc_vport_param_store(devloss_tmo)
 static DEVICE_ATTR(lpfc_devloss_tmo, S_IRUGO | S_IWUSR,
 		   lpfc_devloss_tmo_show, lpfc_devloss_tmo_store);
 
+/*
+ * lpfc_suppress_rsp: Enable suppress rsp feature is firmware supports it
+ * lpfc_suppress_rsp = 0  Disable
+ * lpfc_suppress_rsp = 1  Enable (default)
+ *
+ */
+LPFC_ATTR_R(suppress_rsp, 1, 0, 1,
+	    "Enable suppress rsp feature is firmware supports it");
+
 /*
  * lpfc_enable_fc4_type: Defines what FC4 types are supported.
  * Supported Values:  1 - register just FCP
@@ -3190,7 +3294,8 @@ LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_BOTH,
 /*
  * lpfc_xri_split: Defines the division of XRI resources between SCSI and NVME
  * This parameter is only used if:
- *     lpfc_enable_fc4_type is 3 - register both FCP and NVME
+ *     lpfc_enable_fc4_type is 3 - register both FCP and NVME and
+ *     port is not configured for NVMET.
  *
  * ELS/CT always get 10% of XRIs, up to a maximum of 250
  * The remaining XRIs get split up based on lpfc_xri_split per port:
@@ -4754,7 +4859,7 @@ LPFC_ATTR_R(use_msi, 2, 0, 2, "Use Message Signaled Interrupts (1) or "
 	    "MSI-X (2), if possible");
 
 /*
- * lpfc_nvme_oas: Use the oas bit when sending NVME IOs
+ * lpfc_nvme_oas: Use the oas bit when sending NVME/NVMET IOs
  *
  *      0  = NVME OAS disabled
  *      1  = NVME OAS enabled
@@ -4992,6 +5097,7 @@ struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_fcp_imax,
 	&dev_attr_lpfc_fcp_cpu_map,
 	&dev_attr_lpfc_fcp_io_channel,
+	&dev_attr_lpfc_suppress_rsp,
 	&dev_attr_lpfc_nvme_io_channel,
 	&dev_attr_lpfc_nvme_enable_fb,
 	&dev_attr_lpfc_enable_bg,
@@ -6027,6 +6133,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 		phba->cfg_poll = 0;
 	else
 		phba->cfg_poll = lpfc_poll;
+	lpfc_suppress_rsp_init(phba, lpfc_suppress_rsp);
 
 	lpfc_enable_fc4_type_init(phba, lpfc_enable_fc4_type);
 
@@ -6046,17 +6153,17 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	}
 
 	/* A value of 0 means use the number of CPUs found in the system */
-	if (phba->cfg_nvme_io_channel == 0)
-		phba->cfg_nvme_io_channel = phba->sli4_hba.num_present_cpu;
 	if (phba->cfg_fcp_io_channel == 0)
 		phba->cfg_fcp_io_channel = phba->sli4_hba.num_present_cpu;
-
-	if (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP)
-		phba->cfg_nvme_io_channel = 0;
+	if (phba->cfg_nvme_io_channel == 0)
+		phba->cfg_nvme_io_channel = phba->sli4_hba.num_present_cpu;
 
 	if (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)
 		phba->cfg_fcp_io_channel = 0;
 
+	if (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP)
+		phba->cfg_nvme_io_channel = 0;
+
 	if (phba->cfg_fcp_io_channel > phba->cfg_nvme_io_channel)
 		phba->io_channel_irqs = phba->cfg_fcp_io_channel;
 	else
@@ -6088,12 +6195,20 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 void
 lpfc_nvme_mod_param_dep(struct lpfc_hba *phba)
 {
-	phba->nvmet_support = 0;
 	if (phba->cfg_nvme_io_channel > phba->sli4_hba.num_present_cpu)
 		phba->cfg_nvme_io_channel = phba->sli4_hba.num_present_cpu;
+
 	if (phba->cfg_fcp_io_channel > phba->sli4_hba.num_present_cpu)
 		phba->cfg_fcp_io_channel = phba->sli4_hba.num_present_cpu;
 
+	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME &&
+	    phba->nvmet_support) {
+		phba->cfg_enable_fc4_type &= ~LPFC_ENABLE_FCP;
+		phba->cfg_fcp_io_channel = 0;
+	} else
+		/* Not NVME Target mode.  Turn off Target parameters. */
+		phba->nvmet_support = 0;
+
 	if (phba->cfg_fcp_io_channel > phba->cfg_nvme_io_channel)
 		phba->io_channel_irqs = phba->cfg_fcp_io_channel;
 	else
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 4a0db3895993..1e97b1a106c5 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -240,6 +240,8 @@ struct hbq_dmabuf *lpfc_els_hbq_alloc(struct lpfc_hba *);
 void lpfc_els_hbq_free(struct lpfc_hba *, struct hbq_dmabuf *);
 struct hbq_dmabuf *lpfc_sli4_rb_alloc(struct lpfc_hba *);
 void lpfc_sli4_rb_free(struct lpfc_hba *, struct hbq_dmabuf *);
+struct rqb_dmabuf *lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba);
+void lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab);
 void lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *, struct fcf_record *,
 			uint16_t);
 int lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
@@ -304,6 +306,8 @@ int lpfc_sli_issue_iocb(struct lpfc_hba *, uint32_t,
 int lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t rnum,
 			struct lpfc_iocbq *iocbq);
 struct lpfc_sglq *__lpfc_clear_active_sglq(struct lpfc_hba *phba, uint16_t xri);
+struct lpfc_sglq *__lpfc_sli_get_nvmet_sglq(struct lpfc_hba *phba,
+					    struct lpfc_iocbq *piocbq);
 void lpfc_sli_pcimem_bcopy(void *, void *, uint32_t);
 void lpfc_sli_bemem_bcopy(void *, void *, uint32_t);
 void lpfc_sli_abort_iocb_ring(struct lpfc_hba *, struct lpfc_sli_ring *);
@@ -353,6 +357,9 @@ void lpfc_sli_free_hbq(struct lpfc_hba *, struct hbq_dmabuf *);
 void *lpfc_mbuf_alloc(struct lpfc_hba *, int, dma_addr_t *);
 void __lpfc_mbuf_free(struct lpfc_hba *, void *, dma_addr_t);
 void lpfc_mbuf_free(struct lpfc_hba *, void *, dma_addr_t);
+void *lpfc_nvmet_buf_alloc(struct lpfc_hba *phba, int flags,
+			dma_addr_t *handle);
+void lpfc_nvmet_buf_free(struct lpfc_hba *phba, void *virtp, dma_addr_t dma);
 
 void lpfc_in_buf_free(struct lpfc_hba *, struct lpfc_dmabuf *);
 /* Function prototypes. */
@@ -492,6 +499,7 @@ int lpfc_selective_reset(struct lpfc_hba *);
 int lpfc_sli4_read_config(struct lpfc_hba *);
 void lpfc_sli4_node_prep(struct lpfc_hba *);
 int lpfc_sli4_els_sgl_update(struct lpfc_hba *phba);
+int lpfc_sli4_nvmet_sgl_update(struct lpfc_hba *phba);
 int lpfc_sli4_scsi_sgl_update(struct lpfc_hba *phba);
 int lpfc_sli4_nvme_sgl_update(struct lpfc_hba *phba);
 void lpfc_free_sgl_list(struct lpfc_hba *, struct list_head *);
@@ -531,3 +539,5 @@ void lpfc_nvme_mod_param_dep(struct lpfc_hba *phba);
 void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
 				struct lpfc_iocbq *cmdiocb,
 				struct lpfc_wcqe_complete *abts_cmpl);
+extern int lpfc_enable_nvmet_cnt;
+extern unsigned long long lpfc_enable_nvmet[];
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 3fcba26623eb..e54307dff8be 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -828,7 +828,6 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			phba->ktime_data_samples));
 		return len;
 	}
-
 	return len;
 }
 
@@ -1961,7 +1960,11 @@ lpfc_debugfs_cpucheck_write(struct file *file, const char __user *buf,
 		return strlen(pbuf);
 	} else if ((strncmp(pbuf, "rcv",
 		   sizeof("rcv") - 1) == 0)) {
-		return -EINVAL;
+		if (phba->nvmet_support)
+			phba->cpucheck_on |= LPFC_CHECK_NVMET_RCV;
+		else
+			return -EINVAL;
+		return strlen(pbuf);
 	} else if ((strncmp(pbuf, "off",
 		   sizeof("off") - 1) == 0)) {
 		phba->cpucheck_on = LPFC_CHECK_OFF;
diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h
index 13cc1d19b336..e305e97e05a9 100644
--- a/drivers/scsi/lpfc/lpfc_disc.h
+++ b/drivers/scsi/lpfc/lpfc_disc.h
@@ -148,6 +148,7 @@ struct lpfc_node_rrq {
 /* Defines for nlp_flag (uint32) */
 #define NLP_IGNR_REG_CMPL  0x00000001 /* Rcvd rscn before we cmpl reg login */
 #define NLP_REG_LOGIN_SEND 0x00000002   /* sent reglogin to adapter */
+#define NLP_SUPPRESS_RSP   0x00000010	/* Remote NPort supports suppress rsp */
 #define NLP_PLOGI_SND      0x00000020	/* sent PLOGI request for this entry */
 #define NLP_PRLI_SND       0x00000040	/* sent PRLI request for this entry */
 #define NLP_ADISC_SND      0x00000080	/* sent ADISC request for this entry */
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index fcc083cc00e0..9bee888b7dc4 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -3742,9 +3742,18 @@ struct wqe_common {
 #define LPFC_ELS_ID_FDISC	2
 #define LPFC_ELS_ID_LOGO	1
 #define LPFC_ELS_ID_DEFAULT	0
+#define wqe_irsp_SHIFT        4
+#define wqe_irsp_MASK         0x00000001
+#define wqe_irsp_WORD         word11
+#define wqe_sup_SHIFT         6
+#define wqe_sup_MASK          0x00000001
+#define wqe_sup_WORD          word11
 #define wqe_wqec_SHIFT        7
 #define wqe_wqec_MASK         0x00000001
 #define wqe_wqec_WORD         word11
+#define wqe_irsplen_SHIFT     8
+#define wqe_irsplen_MASK      0x0000000f
+#define wqe_irsplen_WORD      word11
 #define wqe_cqid_SHIFT        16
 #define wqe_cqid_MASK         0x0000ffff
 #define wqe_cqid_WORD         word11
@@ -4037,6 +4046,35 @@ struct fcp_icmnd64_wqe {
 	uint32_t rsvd_12_15[4];        /* word 12-15 */
 };
 
+struct fcp_trsp64_wqe {
+	struct ulp_bde64 bde;
+	uint32_t response_len;
+	uint32_t rsvd_4_5[2];
+	struct wqe_common wqe_com;      /* words 6-11 */
+	uint32_t rsvd_12_15[4];         /* word 12-15 */
+};
+
+struct fcp_tsend64_wqe {
+	struct ulp_bde64 bde;
+	uint32_t payload_offset_len;
+	uint32_t relative_offset;
+	uint32_t reserved;
+	struct wqe_common wqe_com;     /* words 6-11 */
+	uint32_t fcp_data_len;         /* word 12 */
+	uint32_t rsvd_13_15[3];        /* word 13-15 */
+};
+
+struct fcp_treceive64_wqe {
+	struct ulp_bde64 bde;
+	uint32_t payload_offset_len;
+	uint32_t relative_offset;
+	uint32_t reserved;
+	struct wqe_common wqe_com;     /* words 6-11 */
+	uint32_t fcp_data_len;         /* word 12 */
+	uint32_t rsvd_13_15[3];        /* word 13-15 */
+};
+#define TXRDY_PAYLOAD_LEN      12
+
 
 union lpfc_wqe {
 	uint32_t words[16];
@@ -4052,6 +4090,10 @@ union lpfc_wqe {
 	struct xmit_els_rsp64_wqe xmit_els_rsp;
 	struct els_request64_wqe els_req;
 	struct gen_req64_wqe gen_req;
+	struct fcp_trsp64_wqe fcp_trsp;
+	struct fcp_tsend64_wqe fcp_tsend;
+	struct fcp_treceive64_wqe fcp_treceive;
+
 };
 
 union lpfc_wqe128 {
@@ -4060,6 +4102,9 @@ union lpfc_wqe128 {
 	struct fcp_icmnd64_wqe fcp_icmd;
 	struct fcp_iread64_wqe fcp_iread;
 	struct fcp_iwrite64_wqe fcp_iwrite;
+	struct fcp_trsp64_wqe fcp_trsp;
+	struct fcp_tsend64_wqe fcp_tsend;
+	struct fcp_treceive64_wqe fcp_treceive;
 	struct xmit_seq64_wqe xmit_sequence;
 	struct gen_req64_wqe gen_req;
 };
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 474bafc63055..03fa3f02e33e 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -73,6 +73,7 @@ static int lpfc_create_bootstrap_mbox(struct lpfc_hba *);
 static int lpfc_setup_endian_order(struct lpfc_hba *);
 static void lpfc_destroy_bootstrap_mbox(struct lpfc_hba *);
 static void lpfc_free_els_sgl_list(struct lpfc_hba *);
+static void lpfc_free_nvmet_sgl_list(struct lpfc_hba *);
 static void lpfc_init_sgl_list(struct lpfc_hba *);
 static int lpfc_init_active_sgl_array(struct lpfc_hba *);
 static void lpfc_free_active_sgl(struct lpfc_hba *);
@@ -88,6 +89,7 @@ static void lpfc_sli4_oas_verify(struct lpfc_hba *phba);
 static struct scsi_transport_template *lpfc_transport_template = NULL;
 static struct scsi_transport_template *lpfc_vport_transport_template = NULL;
 static DEFINE_IDR(lpfc_hba_index);
+#define LPFC_NVMET_BUF_POST 254
 
 /**
  * lpfc_config_port_prep - Perform lpfc initialization prior to config port
@@ -1023,10 +1025,17 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
 	list_for_each_entry(sglq_entry,
 		&phba->sli4_hba.lpfc_abts_els_sgl_list, list)
 		sglq_entry->state = SGL_FREED;
+	list_for_each_entry(sglq_entry,
+		&phba->sli4_hba.lpfc_abts_nvmet_sgl_list, list)
+		sglq_entry->state = SGL_FREED;
 
 	list_splice_init(&phba->sli4_hba.lpfc_abts_els_sgl_list,
 			&phba->sli4_hba.lpfc_els_sgl_list);
 
+	if (phba->sli4_hba.nvme_wq)
+		list_splice_init(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list,
+				 &phba->sli4_hba.lpfc_nvmet_sgl_list);
+
 	spin_unlock(&phba->sli4_hba.sgl_list_lock);
 	/* abts_scsi_buf_list_lock required because worker thread uses this
 	 * list.
@@ -3320,6 +3329,128 @@ lpfc_sli4_els_sgl_update(struct lpfc_hba *phba)
 	return rc;
 }
 
+/**
+ * lpfc_sli4_nvmet_sgl_update - update xri-sgl sizing and mapping
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine first calculates the sizes of the current els and allocated
+ * scsi sgl lists, and then goes through all sgls to updates the physical
+ * XRIs assigned due to port function reset. During port initialization, the
+ * current els and allocated scsi sgl lists are 0s.
+ *
+ * Return codes
+ *   0 - successful (for now, it always returns 0)
+ **/
+int
+lpfc_sli4_nvmet_sgl_update(struct lpfc_hba *phba)
+{
+	struct lpfc_sglq *sglq_entry = NULL, *sglq_entry_next = NULL;
+	uint16_t i, lxri, xri_cnt, els_xri_cnt;
+	uint16_t nvmet_xri_cnt, tot_cnt;
+	LIST_HEAD(nvmet_sgl_list);
+	int rc;
+
+	/*
+	 * update on pci function's nvmet xri-sgl list
+	 */
+	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
+	nvmet_xri_cnt = 0;
+	tot_cnt = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt;
+
+	if (nvmet_xri_cnt > phba->sli4_hba.nvmet_xri_cnt) {
+		/* els xri-sgl expanded */
+		xri_cnt = nvmet_xri_cnt - phba->sli4_hba.nvmet_xri_cnt;
+		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+				"6302 NVMET xri-sgl cnt grew from %d to %d\n",
+				phba->sli4_hba.nvmet_xri_cnt, nvmet_xri_cnt);
+		/* allocate the additional nvmet sgls */
+		for (i = 0; i < xri_cnt; i++) {
+			sglq_entry = kzalloc(sizeof(struct lpfc_sglq),
+					     GFP_KERNEL);
+			if (sglq_entry == NULL) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+						"6303 Failure to allocate an "
+						"NVMET sgl entry:%d\n", i);
+				rc = -ENOMEM;
+				goto out_free_mem;
+			}
+			sglq_entry->buff_type = NVMET_BUFF_TYPE;
+			sglq_entry->virt = lpfc_nvmet_buf_alloc(phba, 0,
+							   &sglq_entry->phys);
+			if (sglq_entry->virt == NULL) {
+				kfree(sglq_entry);
+				lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+						"6304 Failure to allocate an "
+						"NVMET buf:%d\n", i);
+				rc = -ENOMEM;
+				goto out_free_mem;
+			}
+			sglq_entry->sgl = sglq_entry->virt;
+			memset(sglq_entry->sgl, 0,
+			       phba->cfg_sg_dma_buf_size);
+			sglq_entry->state = SGL_FREED;
+			list_add_tail(&sglq_entry->list, &nvmet_sgl_list);
+		}
+		spin_lock_irq(&phba->hbalock);
+		spin_lock(&phba->sli4_hba.sgl_list_lock);
+		list_splice_init(&nvmet_sgl_list,
+				 &phba->sli4_hba.lpfc_nvmet_sgl_list);
+		spin_unlock(&phba->sli4_hba.sgl_list_lock);
+		spin_unlock_irq(&phba->hbalock);
+	} else if (nvmet_xri_cnt < phba->sli4_hba.nvmet_xri_cnt) {
+		/* nvmet xri-sgl shrunk */
+		xri_cnt = phba->sli4_hba.nvmet_xri_cnt - nvmet_xri_cnt;
+		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+				"6305 NVMET xri-sgl count decreased from "
+				"%d to %d\n", phba->sli4_hba.nvmet_xri_cnt,
+				nvmet_xri_cnt);
+		spin_lock_irq(&phba->hbalock);
+		spin_lock(&phba->sli4_hba.sgl_list_lock);
+		list_splice_init(&phba->sli4_hba.lpfc_nvmet_sgl_list,
+				 &nvmet_sgl_list);
+		/* release extra nvmet sgls from list */
+		for (i = 0; i < xri_cnt; i++) {
+			list_remove_head(&nvmet_sgl_list,
+					 sglq_entry, struct lpfc_sglq, list);
+			if (sglq_entry) {
+				lpfc_nvmet_buf_free(phba, sglq_entry->virt,
+						    sglq_entry->phys);
+				kfree(sglq_entry);
+			}
+		}
+		list_splice_init(&nvmet_sgl_list,
+				 &phba->sli4_hba.lpfc_nvmet_sgl_list);
+		spin_unlock(&phba->sli4_hba.sgl_list_lock);
+		spin_unlock_irq(&phba->hbalock);
+	} else
+		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+				"6306 NVMET xri-sgl count unchanged: %d\n",
+				nvmet_xri_cnt);
+	phba->sli4_hba.nvmet_xri_cnt = nvmet_xri_cnt;
+
+	/* update xris to nvmet sgls on the list */
+	sglq_entry = NULL;
+	sglq_entry_next = NULL;
+	list_for_each_entry_safe(sglq_entry, sglq_entry_next,
+				 &phba->sli4_hba.lpfc_nvmet_sgl_list, list) {
+		lxri = lpfc_sli4_next_xritag(phba);
+		if (lxri == NO_XRI) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"6307 Failed to allocate xri for "
+					"NVMET sgl\n");
+			rc = -ENOMEM;
+			goto out_free_mem;
+		}
+		sglq_entry->sli4_lxritag = lxri;
+		sglq_entry->sli4_xritag = phba->sli4_hba.xri_ids[lxri];
+	}
+	return 0;
+
+out_free_mem:
+	lpfc_free_nvmet_sgl_list(phba);
+	return rc;
+}
+
 /**
  * lpfc_sli4_scsi_sgl_update - update xri-sgl sizing and mapping
  * @phba: pointer to lpfc hba data structure.
@@ -5228,11 +5359,12 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
 	init_waitqueue_head(&phba->work_waitq);
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-			"1403 Protocols supported %s %s\n",
+			"1403 Protocols supported %s %s %s\n",
 			((phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) ?
 				"SCSI" : " "),
 			((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) ?
-				"NVME" : " "));
+				"NVME" : " "),
+			(phba->nvmet_support ? "NVMET" : " "));
 
 	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
 		/* Initialize the scsi buffer list used by driver for scsi IO */
@@ -5447,11 +5579,13 @@ static int
 lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 {
 	LPFC_MBOXQ_t *mboxq;
+	MAILBOX_t *mb;
 	int rc, i, max_buf_size;
 	uint8_t pn_page[LPFC_MAX_SUPPORTED_PAGES] = {0};
 	struct lpfc_mqe *mqe;
 	int longs;
 	int fof_vectors = 0;
+	uint64_t wwn;
 
 	phba->sli4_hba.num_online_cpu = num_online_cpus();
 	phba->sli4_hba.num_present_cpu = lpfc_present_cpu;
@@ -5597,6 +5731,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 
 	/* This abort list used by worker thread */
 	spin_lock_init(&phba->sli4_hba.sgl_list_lock);
+	spin_lock_init(&phba->sli4_hba.nvmet_io_lock);
 
 	/*
 	 * Initialize driver internal slow-path work queues
@@ -5673,7 +5808,43 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		goto out_free_bsmbx;
 	}
 
+	/* Check for NVMET being configured */
 	phba->nvmet_support = 0;
+	if (lpfc_enable_nvmet_cnt) {
+
+		/* First get WWN of HBA instance */
+		lpfc_read_nv(phba, mboxq);
+		rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+		if (rc != MBX_SUCCESS) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+					"6016 Mailbox failed , mbxCmd x%x "
+					"READ_NV, mbxStatus x%x\n",
+					bf_get(lpfc_mqe_command, &mboxq->u.mqe),
+					bf_get(lpfc_mqe_status, &mboxq->u.mqe));
+			rc = -EIO;
+			goto out_free_bsmbx;
+		}
+		mb = &mboxq->u.mb;
+		memcpy(&wwn, (char *)mb->un.varRDnvp.nodename,
+		       sizeof(uint64_t));
+		wwn = cpu_to_be64(wwn);
+		phba->sli4_hba.wwnn.u.name = wwn;
+		memcpy(&wwn, (char *)mb->un.varRDnvp.portname,
+		       sizeof(uint64_t));
+		/* wwn is WWPN of HBA instance */
+		wwn = cpu_to_be64(wwn);
+		phba->sli4_hba.wwpn.u.name = wwn;
+
+		/* Check to see if it matches any module parameter */
+		for (i = 0; i < lpfc_enable_nvmet_cnt; i++) {
+			if (wwn == lpfc_enable_nvmet[i]) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"6017 NVME Target %016llx\n",
+						wwn);
+				phba->nvmet_support = 1; /* a match */
+			}
+		}
+	}
 
 	lpfc_nvme_mod_param_dep(phba);
 
@@ -5869,6 +6040,7 @@ lpfc_sli4_driver_resource_unset(struct lpfc_hba *phba)
 	/* Free the ELS sgl list */
 	lpfc_free_active_sgl(phba);
 	lpfc_free_els_sgl_list(phba);
+	lpfc_free_nvmet_sgl_list(phba);
 
 	/* Free the completion queue EQ event pool */
 	lpfc_sli4_cq_event_release_all(phba);
@@ -6089,6 +6261,33 @@ lpfc_free_els_sgl_list(struct lpfc_hba *phba)
 	lpfc_free_sgl_list(phba, &sglq_list);
 }
 
+/**
+ * lpfc_free_nvmet_sgl_list - Free nvmet sgl list.
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked to free the driver's nvmet sgl list and memory.
+ **/
+static void
+lpfc_free_nvmet_sgl_list(struct lpfc_hba *phba)
+{
+	struct lpfc_sglq *sglq_entry = NULL, *sglq_next = NULL;
+	LIST_HEAD(sglq_list);
+
+	/* Retrieve all nvmet sgls from driver list */
+	spin_lock_irq(&phba->hbalock);
+	spin_lock(&phba->sli4_hba.sgl_list_lock);
+	list_splice_init(&phba->sli4_hba.lpfc_nvmet_sgl_list, &sglq_list);
+	spin_unlock(&phba->sli4_hba.sgl_list_lock);
+	spin_unlock_irq(&phba->hbalock);
+
+	/* Now free the sgl list */
+	list_for_each_entry_safe(sglq_entry, sglq_next, &sglq_list, list) {
+		list_del(&sglq_entry->list);
+		lpfc_nvmet_buf_free(phba, sglq_entry->virt, sglq_entry->phys);
+		kfree(sglq_entry);
+	}
+}
+
 /**
  * lpfc_init_active_sgl_array - Allocate the buf to track active ELS XRIs.
  * @phba: pointer to lpfc hba data structure.
@@ -6138,6 +6337,8 @@ lpfc_init_sgl_list(struct lpfc_hba *phba)
 	/* Initialize and populate the sglq list per host/VF. */
 	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_els_sgl_list);
 	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_els_sgl_list);
+	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_sgl_list);
+	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list);
 
 	/* els xri-sgl book keeping */
 	phba->sli4_hba.els_xri_cnt = 0;
@@ -6416,6 +6617,22 @@ lpfc_create_shost(struct lpfc_hba *phba)
 	shost = lpfc_shost_from_vport(vport);
 	phba->pport = vport;
 
+	if (phba->nvmet_support) {
+		/* Only 1 vport (pport) will support NVME target */
+		if (phba->txrdy_payload_pool == NULL) {
+			phba->txrdy_payload_pool = pci_pool_create(
+				"txrdy_pool", phba->pcidev,
+				TXRDY_PAYLOAD_LEN, 16, 0);
+			if (phba->txrdy_payload_pool) {
+				phba->targetport = NULL;
+				phba->cfg_enable_fc4_type = LPFC_ENABLE_NVME;
+				lpfc_printf_log(phba, KERN_INFO,
+						LOG_INIT | LOG_NVME_DISC,
+						"6076 NVME Target Found\n");
+			}
+		}
+	}
+
 	lpfc_debugfs_initialize(vport);
 	/* Put reference to SCSI host to driver's device private data */
 	pci_set_drvdata(phba->pcidev, shost);
@@ -7459,7 +7676,7 @@ lpfc_sli4_queue_verify(struct lpfc_hba *phba)
 		phba->cfg_nvme_io_channel = io_channel;
 
 	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-			"2574 IRQs: %d, IO Channels: fcp %d nvme %d\n",
+			"2574 IO channels: irqs %d fcp %d nvme %d\n",
 			phba->io_channel_irqs, phba->cfg_fcp_io_channel,
 			phba->cfg_nvme_io_channel);
 
@@ -9164,8 +9381,9 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba)
 	if (phba->cfg_fof)
 		vectors++;
 
-	rc = pci_alloc_irq_vectors(phba->pcidev, 2, vectors,
-				PCI_IRQ_MSIX | PCI_IRQ_AFFINITY);
+	rc = pci_alloc_irq_vectors(phba->pcidev,
+				(phba->nvmet_support) ? 1 : 2,
+				vectors, PCI_IRQ_MSIX | PCI_IRQ_AFFINITY);
 	if (rc < 0) {
 		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 				"0484 PCI enable MSI-X failed (%d)\n", rc);
@@ -9447,6 +9665,8 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
 	int nvme_xri_cmpl = 1;
 	int fcp_xri_cmpl = 1;
 	int els_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list);
+	int nvmet_xri_cmpl =
+			list_empty(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list);
 
 	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP)
 		fcp_xri_cmpl =
@@ -9455,7 +9675,8 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
 		nvme_xri_cmpl =
 			list_empty(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
 
-	while (!fcp_xri_cmpl || !els_xri_cmpl || !nvme_xri_cmpl) {
+	while (!fcp_xri_cmpl || !els_xri_cmpl || !nvme_xri_cmpl ||
+	       !nvmet_xri_cmpl) {
 		if (wait_time > LPFC_XRI_EXCH_BUSY_WAIT_TMO) {
 			if (!nvme_xri_cmpl)
 				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
@@ -9488,6 +9709,9 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba)
 
 		els_xri_cmpl =
 			list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list);
+
+		nvmet_xri_cmpl =
+			list_empty(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list);
 	}
 }
 
@@ -9725,6 +9949,9 @@ lpfc_get_sli4_parameters(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
 		phba->cfg_enable_fc4_type = LPFC_ENABLE_FCP;
 	}
 
+	if (bf_get(cfg_xib, mbx_sli4_parameters) && phba->cfg_suppress_rsp)
+		phba->sli.sli_flag |= LPFC_SLI_SUPPRESS_RSP;
+
 	/* Make sure that sge_supp_len can be handled by the driver */
 	if (sli4_params->sge_supp_len > LPFC_MAX_SGE_SIZE)
 		sli4_params->sge_supp_len = LPFC_MAX_SGE_SIZE;
@@ -10376,13 +10603,15 @@ lpfc_sli4_get_els_iocb_cnt(struct lpfc_hba *phba)
  * lpfc_sli4_get_iocb_cnt - Calculate the # of total IOCBs to reserve
  * @phba: pointer to lpfc hba data structure.
  *
- * returns the number of ELS/CT
+ * returns the number of ELS/CT + NVMET IOCBs to reserve
  **/
 int
 lpfc_sli4_get_iocb_cnt(struct lpfc_hba *phba)
 {
 	int max_xri = lpfc_sli4_get_els_iocb_cnt(phba);
 
+	if (phba->nvmet_support)
+		max_xri += LPFC_NVMET_BUF_POST;
 	return max_xri;
 }
 
@@ -10755,6 +10984,7 @@ lpfc_pci_remove_one_s4(struct pci_dev *pdev)
 	/* Remove FC host and then SCSI host with the physical port */
 	fc_remove_host(shost);
 	scsi_remove_host(shost);
+	/* todo: tgt: remove targetport */
 
 	/* Perform ndlp cleanup on the physical port.  The nvme localport
 	 * is destroyed after to ensure all rports are io-disabled.
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index c65a1ec3d2e4..32db255f5216 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -40,6 +40,7 @@
 #include "lpfc.h"
 #include "lpfc_scsi.h"
 #include "lpfc_nvme.h"
+#include "lpfc_nvmet.h"
 #include "lpfc_crtn.h"
 #include "lpfc_logmsg.h"
 
@@ -441,6 +442,44 @@ lpfc_mbuf_free(struct lpfc_hba * phba, void *virt, dma_addr_t dma)
 	return;
 }
 
+/**
+ * lpfc_nvmet_buf_alloc - Allocate an nvmet_buf from the
+ * lpfc_sg_dma_buf_pool PCI pool
+ * @phba: HBA which owns the pool to allocate from
+ * @mem_flags: indicates if this is a priority (MEM_PRI) allocation
+ * @handle: used to return the DMA-mapped address of the nvmet_buf
+ *
+ * Description: Allocates a DMA-mapped buffer from the lpfc_sg_dma_buf_pool
+ * PCI pool.  Allocates from generic pci_pool_alloc function.
+ *
+ * Returns:
+ *   pointer to the allocated nvmet_buf on success
+ *   NULL on failure
+ **/
+void *
+lpfc_nvmet_buf_alloc(struct lpfc_hba *phba, int mem_flags, dma_addr_t *handle)
+{
+	void *ret;
+
+	ret = pci_pool_alloc(phba->lpfc_sg_dma_buf_pool, GFP_KERNEL, handle);
+	return ret;
+}
+
+/**
+ * lpfc_nvmet_buf_free - Free an nvmet_buf from the lpfc_sg_dma_buf_pool
+ * PCI pool
+ * @phba: HBA which owns the pool to return to
+ * @virt: nvmet_buf to free
+ * @dma: the DMA-mapped address of the lpfc_sg_dma_buf_pool to be freed
+ *
+ * Returns: None
+ **/
+void
+lpfc_nvmet_buf_free(struct lpfc_hba *phba, void *virt, dma_addr_t dma)
+{
+	pci_pool_free(phba->lpfc_sg_dma_buf_pool, virt, dma);
+}
+
 /**
  * lpfc_els_hbq_alloc - Allocate an HBQ buffer
  * @phba: HBA to allocate HBQ buffer for
@@ -553,6 +592,134 @@ lpfc_sli4_rb_free(struct lpfc_hba *phba, struct hbq_dmabuf *dmab)
 	kfree(dmab);
 }
 
+/**
+ * lpfc_sli4_nvmet_alloc - Allocate an SLI4 Receive buffer
+ * @phba: HBA to allocate a receive buffer for
+ *
+ * Description: Allocates a DMA-mapped receive buffer from the lpfc_hrb_pool PCI
+ * pool along a non-DMA-mapped container for it.
+ *
+ * Notes: Not interrupt-safe.  Must be called with no locks held.
+ *
+ * Returns:
+ *   pointer to HBQ on success
+ *   NULL on failure
+ **/
+struct rqb_dmabuf *
+lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
+{
+	struct rqb_dmabuf *dma_buf;
+	struct lpfc_iocbq *nvmewqe;
+	union lpfc_wqe128 *wqe;
+
+	dma_buf = kzalloc(sizeof(struct rqb_dmabuf), GFP_KERNEL);
+	if (!dma_buf)
+		return NULL;
+
+	dma_buf->hbuf.virt = pci_pool_alloc(phba->lpfc_hrb_pool, GFP_KERNEL,
+					    &dma_buf->hbuf.phys);
+	if (!dma_buf->hbuf.virt) {
+		kfree(dma_buf);
+		return NULL;
+	}
+	dma_buf->dbuf.virt = pci_pool_alloc(phba->lpfc_drb_pool, GFP_KERNEL,
+					    &dma_buf->dbuf.phys);
+	if (!dma_buf->dbuf.virt) {
+		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
+			      dma_buf->hbuf.phys);
+		kfree(dma_buf);
+		return NULL;
+	}
+	dma_buf->total_size = LPFC_DATA_BUF_SIZE;
+
+	dma_buf->context = kzalloc(sizeof(struct lpfc_nvmet_rcv_ctx),
+				   GFP_KERNEL);
+	if (!dma_buf->context) {
+		pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
+			      dma_buf->dbuf.phys);
+		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
+			      dma_buf->hbuf.phys);
+		kfree(dma_buf);
+		return NULL;
+	}
+
+	dma_buf->iocbq = lpfc_sli_get_iocbq(phba);
+	dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
+	if (!dma_buf->iocbq) {
+		kfree(dma_buf->context);
+		pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
+			      dma_buf->dbuf.phys);
+		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
+			      dma_buf->hbuf.phys);
+		kfree(dma_buf);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+				"2621 Ran out of nvmet iocb/WQEs\n");
+		return NULL;
+	}
+	nvmewqe = dma_buf->iocbq;
+	wqe = (union lpfc_wqe128 *)&nvmewqe->wqe;
+	/* Initialize WQE */
+	memset(wqe, 0, sizeof(union lpfc_wqe));
+	/* Word 7 */
+	bf_set(wqe_ct, &wqe->generic.wqe_com, SLI4_CT_RPI);
+	bf_set(wqe_class, &wqe->generic.wqe_com, CLASS3);
+	bf_set(wqe_pu, &wqe->generic.wqe_com, 1);
+	/* Word 10 */
+	bf_set(wqe_nvme, &wqe->fcp_tsend.wqe_com, 1);
+	bf_set(wqe_ebde_cnt, &wqe->generic.wqe_com, 0);
+	bf_set(wqe_qosd, &wqe->generic.wqe_com, 0);
+
+	dma_buf->iocbq->context1 = NULL;
+	spin_lock(&phba->sli4_hba.sgl_list_lock);
+	dma_buf->sglq = __lpfc_sli_get_nvmet_sglq(phba, dma_buf->iocbq);
+	spin_unlock(&phba->sli4_hba.sgl_list_lock);
+	if (!dma_buf->sglq) {
+		lpfc_sli_release_iocbq(phba, dma_buf->iocbq);
+		kfree(dma_buf->context);
+		pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
+			      dma_buf->dbuf.phys);
+		pci_pool_free(phba->lpfc_hrb_pool, dma_buf->hbuf.virt,
+			      dma_buf->hbuf.phys);
+		kfree(dma_buf);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+				"6132 Ran out of nvmet XRIs\n");
+		return NULL;
+	}
+	return dma_buf;
+}
+
+/**
+ * lpfc_sli4_nvmet_free - Frees a receive buffer
+ * @phba: HBA buffer was allocated for
+ * @dmab: DMA Buffer container returned by lpfc_sli4_rbq_alloc
+ *
+ * Description: Frees both the container and the DMA-mapped buffers returned by
+ * lpfc_sli4_nvmet_alloc.
+ *
+ * Notes: Can be called with or without locks held.
+ *
+ * Returns: None
+ **/
+void
+lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab)
+{
+	unsigned long flags;
+
+	__lpfc_clear_active_sglq(phba, dmab->sglq->sli4_lxritag);
+	dmab->sglq->state = SGL_FREED;
+	dmab->sglq->ndlp = NULL;
+
+	spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock, flags);
+	list_add_tail(&dmab->sglq->list, &phba->sli4_hba.lpfc_nvmet_sgl_list);
+	spin_unlock_irqrestore(&phba->sli4_hba.sgl_list_lock, flags);
+
+	lpfc_sli_release_iocbq(phba, dmab->iocbq);
+	kfree(dmab->context);
+	pci_pool_free(phba->lpfc_hrb_pool, dmab->hbuf.virt, dmab->hbuf.phys);
+	pci_pool_free(phba->lpfc_drb_pool, dmab->dbuf.virt, dmab->dbuf.phys);
+	kfree(dmab);
+}
+
 /**
  * lpfc_in_buf_free - Free a DMA buffer
  * @phba: HBA buffer is associated with
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h
new file mode 100644
index 000000000000..f7b6a3f374a4
--- /dev/null
+++ b/drivers/scsi/lpfc/lpfc_nvmet.h
@@ -0,0 +1,101 @@
+/*******************************************************************
+ * This file is part of the Emulex Linux Device Driver for         *
+ * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ ********************************************************************/
+
+#define LPFC_NVMET_MIN_SEGS		16
+#define LPFC_NVMET_DEFAULT_SEGS		64	/* 256K IOs */
+#define LPFC_NVMET_MAX_SEGS		510
+#define LPFC_NVMET_SUCCESS_LEN	12
+
+/* Used for NVME Target */
+struct lpfc_nvmet_tgtport {
+	struct lpfc_hba *phba;
+	struct completion tport_unreg_done;
+
+	/* Stats counters - lpfc_nvmet_unsol_ls_buffer */
+	atomic_t rcv_ls_req_in;
+	atomic_t rcv_ls_req_out;
+	atomic_t rcv_ls_req_drop;
+	atomic_t xmt_ls_abort;
+
+	/* Stats counters - lpfc_nvmet_xmt_ls_rsp */
+	atomic_t xmt_ls_rsp;
+	atomic_t xmt_ls_drop;
+
+	/* Stats counters - lpfc_nvmet_xmt_ls_rsp_cmp */
+	atomic_t xmt_ls_rsp_error;
+	atomic_t xmt_ls_rsp_cmpl;
+
+	/* Stats counters - lpfc_nvmet_unsol_fcp_buffer */
+	atomic_t rcv_fcp_cmd_in;
+	atomic_t rcv_fcp_cmd_out;
+	atomic_t rcv_fcp_cmd_drop;
+
+	/* Stats counters - lpfc_nvmet_xmt_fcp_op */
+	atomic_t xmt_fcp_abort;
+	atomic_t xmt_fcp_drop;
+	atomic_t xmt_fcp_read_rsp;
+	atomic_t xmt_fcp_read;
+	atomic_t xmt_fcp_write;
+	atomic_t xmt_fcp_rsp;
+
+	/* Stats counters - lpfc_nvmet_xmt_fcp_op_cmp */
+	atomic_t xmt_fcp_rsp_cmpl;
+	atomic_t xmt_fcp_rsp_error;
+	atomic_t xmt_fcp_rsp_drop;
+
+
+	/* Stats counters - lpfc_nvmet_unsol_issue_abort */
+	atomic_t xmt_abort_rsp;
+	atomic_t xmt_abort_rsp_error;
+
+	/* Stats counters - lpfc_nvmet_xmt_abort_cmp */
+	atomic_t xmt_abort_cmpl;
+};
+
+struct lpfc_nvmet_rcv_ctx {
+	union {
+		struct nvmefc_tgt_ls_req ls_req;
+		struct nvmefc_tgt_fcp_req fcp_req;
+	} ctx;
+	struct lpfc_hba *phba;
+	struct lpfc_iocbq *wqeq;
+	struct lpfc_iocbq *abort_wqeq;
+	dma_addr_t txrdy_phys;
+	uint32_t *txrdy;
+	uint32_t sid;
+	uint32_t offset;
+	uint16_t oxid;
+	uint16_t size;
+	uint16_t entry_cnt;
+	uint16_t cpu;
+	uint16_t state;
+	/* States */
+#define LPFC_NVMET_STE_FREE		0
+#define LPFC_NVMET_STE_RCV		1
+#define LPFC_NVMET_STE_DATA		2
+#define LPFC_NVMET_STE_ABORT		3
+#define LPFC_NVMET_STE_RSP		4
+#define LPFC_NVMET_STE_DONE		5
+	uint16_t flag;
+#define LPFC_NVMET_IO_INP		1
+#define LPFC_NVMET_ABORT_OP		2
+	struct rqb_dmabuf *rqb_buffer;
+};
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 5f6ad67d5a73..f2cd0f37e03f 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -45,6 +45,7 @@
 #include "lpfc.h"
 #include "lpfc_scsi.h"
 #include "lpfc_nvme.h"
+#include "lpfc_nvmet.h"
 #include "lpfc_crtn.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_compat.h"
@@ -975,6 +976,34 @@ __lpfc_sli_get_els_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq)
 	return sglq;
 }
 
+/**
+ * __lpfc_sli_get_nvmet_sglq - Allocates an iocb object from sgl pool
+ * @phba: Pointer to HBA context object.
+ * @piocb: Pointer to the iocbq.
+ *
+ * This function is called with the sgl_list lock held. This function
+ * gets a new driver sglq object from the sglq list. If the
+ * list is not empty then it is successful, it returns pointer to the newly
+ * allocated sglq object else it returns NULL.
+ **/
+struct lpfc_sglq *
+__lpfc_sli_get_nvmet_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq)
+{
+	struct list_head *lpfc_nvmet_sgl_list;
+	struct lpfc_sglq *sglq = NULL;
+
+	lpfc_nvmet_sgl_list = &phba->sli4_hba.lpfc_nvmet_sgl_list;
+
+	lockdep_assert_held(&phba->sli4_hba.sgl_list_lock);
+
+	list_remove_head(lpfc_nvmet_sgl_list, sglq, struct lpfc_sglq, list);
+	if (!sglq)
+		return NULL;
+	phba->sli4_hba.lpfc_sglq_active_list[sglq->sli4_lxritag] = sglq;
+	sglq->state = SGL_ALLOCATED;
+	return sglq;
+}
+
 /**
  * lpfc_sli_get_iocbq - Allocates an iocb object from iocb pool
  * @phba: Pointer to HBA context object.
@@ -1031,6 +1060,18 @@ __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
 
 
 	if (sglq)  {
+		if (iocbq->iocb_flag & LPFC_IO_NVMET) {
+			spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock,
+					  iflag);
+			sglq->state = SGL_FREED;
+			sglq->ndlp = NULL;
+			list_add_tail(&sglq->list,
+				      &phba->sli4_hba.lpfc_nvmet_sgl_list);
+			spin_unlock_irqrestore(
+				&phba->sli4_hba.sgl_list_lock, iflag);
+			goto out;
+		}
+
 		pring = phba->sli4_hba.els_wq->pring;
 		if ((iocbq->iocb_flag & LPFC_EXCHANGE_BUSY) &&
 			(sglq->state != SGL_XRI_ABORTED)) {
@@ -1056,13 +1097,15 @@ __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
 		}
 	}
 
+out:
 	/*
 	 * Clean all volatile data fields, preserve iotag and node struct.
 	 */
 	memset((char *)iocbq + start_clean, 0, sizeof(*iocbq) - start_clean);
 	iocbq->sli4_lxritag = NO_XRI;
 	iocbq->sli4_xritag = NO_XRI;
-	iocbq->iocb_flag &= ~(LPFC_IO_NVME | LPFC_IO_NVME_LS);
+	iocbq->iocb_flag &= ~(LPFC_IO_NVME | LPFC_IO_NVMET |
+			      LPFC_IO_NVME_LS);
 	list_add_tail(&iocbq->list, &phba->lpfc_iocb_list);
 }
 
@@ -2450,6 +2493,14 @@ lpfc_complete_unsol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 {
 	int i;
 
+	switch (fch_type) {
+	case FC_TYPE_NVME:
+		/* todo: tgt: forward NVME LS to transport */
+		return 1;
+	default:
+		break;
+	}
+
 	/* unSolicited Responses */
 	if (pring->prt[0].profile) {
 		if (pring->prt[0].lpfc_sli_rcv_unsol_event)
@@ -6761,7 +6812,31 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 	}
 	phba->sli4_hba.els_xri_cnt = rc;
 
-	if (phba->nvmet_support == 0) {
+	if (phba->nvmet_support) {
+		/* update host nvmet xri-sgl sizes and mappings */
+		rc = lpfc_sli4_nvmet_sgl_update(phba);
+		if (unlikely(rc)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+					"6308 Failed to update nvmet-sgl size "
+					"and mapping: %d\n", rc);
+			goto out_destroy_queue;
+		}
+
+		/* register the nvmet sgl pool to the port */
+		rc = lpfc_sli4_repost_sgl_list(
+			phba,
+			&phba->sli4_hba.lpfc_nvmet_sgl_list,
+			phba->sli4_hba.nvmet_xri_cnt);
+		if (unlikely(rc < 0)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
+					"3117 Error %d during nvmet "
+					"sgl post\n", rc);
+			rc = -ENODEV;
+			goto out_destroy_queue;
+		}
+		phba->sli4_hba.nvmet_xri_cnt = rc;
+		/* todo: tgt: create targetport */
+	} else {
 		/* update host scsi xri-sgl sizes and mappings */
 		rc = lpfc_sli4_scsi_sgl_update(phba);
 		if (unlikely(rc)) {
@@ -13006,7 +13081,7 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 
 	if (phba->sli4_hba.nvme_cq_map &&
 	    (cqid == phba->sli4_hba.nvme_cq_map[qidx])) {
-		/* Process NVME command completion */
+		/* Process NVME / NVMET command completion */
 		cq = phba->sli4_hba.nvme_cq[qidx];
 		goto  process_cq;
 	}
@@ -17912,6 +17987,7 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
 		    struct lpfc_iocbq *pwqe)
 {
 	union lpfc_wqe *wqe = &pwqe->wqe;
+	struct lpfc_nvmet_rcv_ctx *ctxp;
 	struct lpfc_queue *wq;
 	struct lpfc_sglq *sglq;
 	struct lpfc_sli_ring *pring;
@@ -17961,5 +18037,30 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
 		return 0;
 	}
 
+	/* NVMET requests */
+	if (pwqe->iocb_flag & LPFC_IO_NVMET) {
+		/* Get the IO distribution (hba_wqidx) for WQ assignment. */
+		pring = phba->sli4_hba.nvme_wq[pwqe->hba_wqidx]->pring;
+
+		spin_lock_irqsave(&pring->ring_lock, iflags);
+		ctxp = pwqe->context2;
+		sglq = ctxp->rqb_buffer->sglq;
+		if (pwqe->sli4_xritag ==  NO_XRI) {
+			pwqe->sli4_lxritag = sglq->sli4_lxritag;
+			pwqe->sli4_xritag = sglq->sli4_xritag;
+		}
+		bf_set(wqe_xri_tag, &pwqe->wqe.xmit_bls_rsp.wqe_com,
+		       pwqe->sli4_xritag);
+		wq = phba->sli4_hba.nvme_wq[pwqe->hba_wqidx];
+		bf_set(wqe_cqid, &wqe->generic.wqe_com,
+		      phba->sli4_hba.nvme_cq[pwqe->hba_wqidx]->queue_id);
+		if (lpfc_sli4_wq_put(wq, wqe)) {
+			spin_unlock_irqrestore(&pring->ring_lock, iflags);
+			return WQE_ERROR;
+		}
+		lpfc_sli_ringtxcmpl_put(phba, pring, pwqe);
+		spin_unlock_irqrestore(&pring->ring_lock, iflags);
+		return 0;
+	}
 	return WQE_ERROR;
 }
diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index 72520125251b..f6cea02adc7c 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h
@@ -93,6 +93,7 @@ struct lpfc_iocbq {
 #define LPFC_PRLI_FCP_REQ	0x100000 /* This is an NVME PRLI. */
 #define LPFC_IO_NVME	        0x200000 /* NVME FCP command */
 #define LPFC_IO_NVME_LS		0x400000 /* NVME LS command */
+#define LPFC_IO_NVMET		0x800000 /* NVMET command */
 
 	uint32_t drvrTimeout;	/* driver timeout in seconds */
 	struct lpfc_vport *vport;/* virtual port pointer */
@@ -317,6 +318,7 @@ struct lpfc_sli {
 #define LPFC_BLOCK_MGMT_IO        0x800	/* Don't allow mgmt mbx or iocb cmds */
 #define LPFC_MENLO_MAINT          0x1000 /* need for menl fw download */
 #define LPFC_SLI_ASYNC_MBX_BLK    0x2000 /* Async mailbox is blocked */
+#define LPFC_SLI_SUPPRESS_RSP     0x4000 /* Suppress RSP feature is supported */
 
 	struct lpfc_sli_ring *sli3_ring;
 
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 9cb8508b4b4b..99546eaef087 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -109,6 +109,7 @@ enum lpfc_sli4_queue_subtype {
 	LPFC_FCP,
 	LPFC_ELS,
 	LPFC_NVME,
+	LPFC_NVMET,
 	LPFC_NVME_LS,
 	LPFC_USOL
 };
@@ -610,8 +611,11 @@ struct lpfc_sli4_hba {
 	uint16_t scsi_xri_cnt;
 	uint16_t scsi_xri_start;
 	uint16_t els_xri_cnt;
+	uint16_t nvmet_xri_cnt;
 	struct list_head lpfc_els_sgl_list;
 	struct list_head lpfc_abts_els_sgl_list;
+	struct list_head lpfc_nvmet_sgl_list;
+	struct list_head lpfc_abts_nvmet_sgl_list;
 	struct list_head lpfc_abts_scsi_buf_list;
 	struct list_head lpfc_abts_nvme_buf_list;
 	struct lpfc_sglq **lpfc_sglq_active_list;
@@ -643,6 +647,7 @@ struct lpfc_sli4_hba {
 	spinlock_t abts_nvme_buf_list_lock; /* list of aborted SCSI IOs */
 	spinlock_t abts_scsi_buf_list_lock; /* list of aborted SCSI IOs */
 	spinlock_t sgl_list_lock; /* list of aborted els IOs */
+	spinlock_t nvmet_io_lock;
 	uint32_t physical_port;
 
 	/* CPU to vector mapping information */
@@ -655,6 +660,7 @@ struct lpfc_sli4_hba {
 enum lpfc_sge_type {
 	GEN_BUFF_TYPE,
 	SCSI_BUFF_TYPE,
+	NVMET_BUFF_TYPE
 };
 
 enum lpfc_sgl_state {
-- 
GitLab


From 2d7dbc4c2775eb30df97be00090adbfcc7fc5086 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:35 -0800
Subject: [PATCH 0446/1084] scsi: lpfc: NVME Target: Receive buffer updates

NVME Target: Receive buffer updates

Allocates buffer pools and configures adapter interfaces to handle
receive buffer (asynchronous FCP CMD ius, first burst data)
from the adapter. Splits by protocol, etc.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h         |   3 +
 drivers/scsi/lpfc/lpfc_attr.c    |  81 +++-
 drivers/scsi/lpfc/lpfc_crtn.h    |   1 +
 drivers/scsi/lpfc/lpfc_debugfs.c |  25 +-
 drivers/scsi/lpfc/lpfc_hw4.h     | 347 +++++++++++++++++-
 drivers/scsi/lpfc/lpfc_init.c    | 233 +++++++++++-
 drivers/scsi/lpfc/lpfc_mbox.c    |  87 +++++
 drivers/scsi/lpfc/lpfc_sli.c     | 608 ++++++++++++++++++++++++++++++-
 drivers/scsi/lpfc/lpfc_sli4.h    |  11 +
 9 files changed, 1374 insertions(+), 22 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index b7361474880e..8a4090c6771c 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -770,8 +770,11 @@ struct lpfc_hba {
 	uint32_t cfg_suppress_rsp;
 	uint32_t cfg_nvme_oas;
 	uint32_t cfg_nvme_io_channel;
+	uint32_t cfg_nvmet_mrq;
+	uint32_t cfg_nvmet_mrq_post;
 	uint32_t cfg_enable_nvmet;
 	uint32_t cfg_nvme_enable_fb;
+	uint32_t cfg_nvmet_fb_size;
 	uint32_t cfg_total_seg_cnt;
 	uint32_t cfg_sg_seg_cnt;
 	uint32_t cfg_sg_dma_buf_size;
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 835c6c1f4cd4..700a68f303f3 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -58,6 +58,10 @@
 #define LPFC_MIN_DEVLOSS_TMO	1
 #define LPFC_MAX_DEVLOSS_TMO	255
 
+#define LPFC_DEF_MRQ_POST	256
+#define LPFC_MIN_MRQ_POST	32
+#define LPFC_MAX_MRQ_POST	512
+
 /*
  * Write key size should be multiple of 4. If write key is changed
  * make sure that library write key is also changed.
@@ -3281,6 +3285,24 @@ static DEVICE_ATTR(lpfc_devloss_tmo, S_IRUGO | S_IWUSR,
 LPFC_ATTR_R(suppress_rsp, 1, 0, 1,
 	    "Enable suppress rsp feature is firmware supports it");
 
+/*
+ * lpfc_nvmet_mrq: Specify number of RQ pairs for processing NVMET cmds
+ * lpfc_nvmet_mrq = 1  use a single RQ pair
+ * lpfc_nvmet_mrq >= 2  use specified RQ pairs for MRQ
+ *
+ */
+LPFC_ATTR_R(nvmet_mrq,
+	    1, 1, 16,
+	    "Specify number of RQ pairs for processing NVMET cmds");
+
+/*
+ * lpfc_nvmet_mrq_post: Specify number buffers to post on every MRQ
+ *
+ */
+LPFC_ATTR_R(nvmet_mrq_post, LPFC_DEF_MRQ_POST,
+	    LPFC_MIN_MRQ_POST, LPFC_MAX_MRQ_POST,
+	    "Specify number of buffers to post on every MRQ");
+
 /*
  * lpfc_enable_fc4_type: Defines what FC4 types are supported.
  * Supported Values:  1 - register just FCP
@@ -4657,13 +4679,28 @@ LPFC_VPORT_ATTR_RW(first_burst_size, 0, 0, 65536,
 		   "First burst size for Targets that support first burst");
 
 /*
-* lpfc_nvme_enable_fb: Enable NVME first burst on I and T functions.
-* For the Initiator (I), enabling this parameter means that an NVME
-* PRLI response with FBA enabled and an FB_SIZE set to a nonzero value
-* will be processed by the initiator for subsequent NVME FCP IO.
+* lpfc_nvmet_fb_size: NVME Target mode supported first burst size.
+* When the driver is configured as an NVME target, this value is
+* communicated to the NVME initiator in the PRLI response.  It is
+* used only when the lpfc_nvme_enable_fb and lpfc_nvmet_support
+* parameters are set and the target is sending the PRLI RSP.
 * Parameter supported on physical port only - no NPIV support.
-* Value range is [0,1]. Default value is 0 (disabled).
+* Value range is [0,65536]. Default value is 0.
 */
+LPFC_ATTR_RW(nvmet_fb_size, 0, 0, 65536,
+	     "NVME Target mode first burst size in 512B increments.");
+
+/*
+ * lpfc_nvme_enable_fb: Enable NVME first burst on I and T functions.
+ * For the Initiator (I), enabling this parameter means that an NVMET
+ * PRLI response with FBA enabled and an FB_SIZE set to a nonzero value will be
+ * processed by the initiator for subsequent NVME FCP IO. For the target
+ * function (T), enabling this parameter qualifies the lpfc_nvmet_fb_size
+ * driver parameter as the target function's first burst size returned to the
+ * initiator in the target's NVME PRLI response. Parameter supported on physical
+ * port only - no NPIV support.
+ * Value range is [0,1]. Default value is 0 (disabled).
+ */
 LPFC_ATTR_RW(nvme_enable_fb, 0, 0, 1,
 	     "Enable First Burst feature on I and T functions.");
 
@@ -5099,7 +5136,10 @@ struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_fcp_io_channel,
 	&dev_attr_lpfc_suppress_rsp,
 	&dev_attr_lpfc_nvme_io_channel,
+	&dev_attr_lpfc_nvmet_mrq,
+	&dev_attr_lpfc_nvmet_mrq_post,
 	&dev_attr_lpfc_nvme_enable_fb,
+	&dev_attr_lpfc_nvmet_fb_size,
 	&dev_attr_lpfc_enable_bg,
 	&dev_attr_lpfc_soft_wwnn,
 	&dev_attr_lpfc_soft_wwpn,
@@ -6136,9 +6176,12 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
 	lpfc_suppress_rsp_init(phba, lpfc_suppress_rsp);
 
 	lpfc_enable_fc4_type_init(phba, lpfc_enable_fc4_type);
+	lpfc_nvmet_mrq_init(phba, lpfc_nvmet_mrq);
+	lpfc_nvmet_mrq_post_init(phba, lpfc_nvmet_mrq_post);
 
 	/* Initialize first burst. Target vs Initiator are different. */
 	lpfc_nvme_enable_fb_init(phba, lpfc_nvme_enable_fb);
+	lpfc_nvmet_fb_size_init(phba, lpfc_nvmet_fb_size);
 	lpfc_fcp_io_channel_init(phba, lpfc_fcp_io_channel);
 	lpfc_nvme_io_channel_init(phba, lpfc_nvme_io_channel);
 
@@ -6205,9 +6248,35 @@ lpfc_nvme_mod_param_dep(struct lpfc_hba *phba)
 	    phba->nvmet_support) {
 		phba->cfg_enable_fc4_type &= ~LPFC_ENABLE_FCP;
 		phba->cfg_fcp_io_channel = 0;
-	} else
+
+		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
+				"6013 %s x%x fb_size x%x, fb_max x%x\n",
+				"NVME Target PRLI ACC enable_fb ",
+				phba->cfg_nvme_enable_fb,
+				phba->cfg_nvmet_fb_size,
+				LPFC_NVMET_FB_SZ_MAX);
+
+		if (phba->cfg_nvme_enable_fb == 0)
+			phba->cfg_nvmet_fb_size = 0;
+		else {
+			if (phba->cfg_nvmet_fb_size > LPFC_NVMET_FB_SZ_MAX)
+				phba->cfg_nvmet_fb_size = LPFC_NVMET_FB_SZ_MAX;
+		}
+
+		/* Adjust lpfc_nvmet_mrq to avoid running out of WQE slots */
+		if (phba->cfg_nvmet_mrq > phba->cfg_nvme_io_channel) {
+			phba->cfg_nvmet_mrq = phba->cfg_nvme_io_channel;
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
+					"6018 Adjust lpfc_nvmet_mrq to %d\n",
+					phba->cfg_nvmet_mrq);
+		}
+	} else {
 		/* Not NVME Target mode.  Turn off Target parameters. */
 		phba->nvmet_support = 0;
+		phba->cfg_nvmet_mrq = 0;
+		phba->cfg_nvmet_mrq_post = 0;
+		phba->cfg_nvmet_fb_size = 0;
+	}
 
 	if (phba->cfg_fcp_io_channel > phba->cfg_nvme_io_channel)
 		phba->io_channel_irqs = phba->cfg_fcp_io_channel;
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 1e97b1a106c5..2e7a7f7f43b2 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -229,6 +229,7 @@ void lpfc_reg_vfi(struct lpfcMboxq *, struct lpfc_vport *, dma_addr_t);
 void lpfc_init_vpi(struct lpfc_hba *, struct lpfcMboxq *, uint16_t);
 void lpfc_unreg_vfi(struct lpfcMboxq *, struct lpfc_vport *);
 void lpfc_reg_fcfi(struct lpfc_hba *, struct lpfcMboxq *);
+void lpfc_reg_fcfi_mrq(struct lpfc_hba *phba, struct lpfcMboxq *mbox, int mode);
 void lpfc_unreg_fcfi(struct lpfcMboxq *, uint16_t);
 void lpfc_resume_rpi(struct lpfcMboxq *, struct lpfc_nodelist *);
 int lpfc_check_pending_fcoe_event(struct lpfc_hba *, uint8_t);
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index e54307dff8be..abc39f605960 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -2837,7 +2837,7 @@ __lpfc_idiag_print_rqpair(struct lpfc_queue *qp, struct lpfc_queue *datqp,
 
 static int
 lpfc_idiag_cqs_for_eq(struct lpfc_hba *phba, char *pbuffer,
-		int *len, int max_cnt, int eq_id)
+		int *len, int max_cnt, int eqidx, int eq_id)
 {
 	struct lpfc_queue *qp;
 	int qidx, rc;
@@ -2880,6 +2880,27 @@ lpfc_idiag_cqs_for_eq(struct lpfc_hba *phba, char *pbuffer,
 			return 1;
 	}
 
+	if (phba->cfg_nvmet_mrq > eqidx) {
+		/* NVMET CQset */
+		qp = phba->sli4_hba.nvmet_cqset[eqidx];
+		*len = __lpfc_idiag_print_cq(qp, "NVMET CQset", pbuffer, *len);
+
+		/* Reset max counter */
+		qp->CQ_max_cqe = 0;
+
+		if (*len >= max_cnt)
+			return 1;
+
+		/* RQ header */
+		qp = phba->sli4_hba.nvmet_mrq_hdr[eqidx];
+		*len = __lpfc_idiag_print_rqpair(qp,
+				phba->sli4_hba.nvmet_mrq_data[eqidx],
+				"NVMET MRQ", pbuffer, *len);
+
+		if (*len >= max_cnt)
+			return 1;
+	}
+
 	return 0;
 }
 
@@ -2977,7 +2998,7 @@ lpfc_idiag_queinfo_read(struct file *file, char __user *buf, size_t nbytes,
 
 		/* will dump both fcp and nvme cqs/wqs for the eq */
 		rc = lpfc_idiag_cqs_for_eq(phba, pbuffer, &len,
-			max_cnt, qp->queue_id);
+			max_cnt, x, qp->queue_id);
 		if (rc)
 			goto too_big;
 
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index 9bee888b7dc4..0fddb2317875 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -962,6 +962,7 @@ struct mbox_header {
 #define LPFC_MBOX_OPCODE_FCOE_DELETE_FCF		0x0A
 #define LPFC_MBOX_OPCODE_FCOE_POST_HDR_TEMPLATE		0x0B
 #define LPFC_MBOX_OPCODE_FCOE_REDISCOVER_FCF		0x10
+#define LPFC_MBOX_OPCODE_FCOE_CQ_CREATE_SET		0x1D
 #define LPFC_MBOX_OPCODE_FCOE_SET_FCLINK_SETTINGS	0x21
 #define LPFC_MBOX_OPCODE_FCOE_LINK_DIAG_STATE		0x22
 #define LPFC_MBOX_OPCODE_FCOE_LINK_DIAG_LOOPBACK	0x23
@@ -1143,6 +1144,116 @@ struct lpfc_mbx_cq_create {
 	} u;
 };
 
+struct lpfc_mbx_cq_create_set {
+	union  lpfc_sli4_cfg_shdr cfg_shdr;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_cq_create_set_page_size_SHIFT	16	/* Version 2 Only */
+#define lpfc_mbx_cq_create_set_page_size_MASK	0x000000FF
+#define lpfc_mbx_cq_create_set_page_size_WORD	word0
+#define lpfc_mbx_cq_create_set_num_pages_SHIFT	0
+#define lpfc_mbx_cq_create_set_num_pages_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_num_pages_WORD	word0
+			uint32_t word1;
+#define lpfc_mbx_cq_create_set_evt_SHIFT	31
+#define lpfc_mbx_cq_create_set_evt_MASK		0x00000001
+#define lpfc_mbx_cq_create_set_evt_WORD		word1
+#define lpfc_mbx_cq_create_set_valid_SHIFT	29
+#define lpfc_mbx_cq_create_set_valid_MASK	0x00000001
+#define lpfc_mbx_cq_create_set_valid_WORD	word1
+#define lpfc_mbx_cq_create_set_cqe_cnt_SHIFT	27
+#define lpfc_mbx_cq_create_set_cqe_cnt_MASK	0x00000003
+#define lpfc_mbx_cq_create_set_cqe_cnt_WORD	word1
+#define lpfc_mbx_cq_create_set_cqe_size_SHIFT	25
+#define lpfc_mbx_cq_create_set_cqe_size_MASK	0x00000003
+#define lpfc_mbx_cq_create_set_cqe_size_WORD	word1
+#define lpfc_mbx_cq_create_set_auto_SHIFT	15
+#define lpfc_mbx_cq_create_set_auto_MASK	0x0000001
+#define lpfc_mbx_cq_create_set_auto_WORD	word1
+#define lpfc_mbx_cq_create_set_nodelay_SHIFT	14
+#define lpfc_mbx_cq_create_set_nodelay_MASK	0x00000001
+#define lpfc_mbx_cq_create_set_nodelay_WORD	word1
+#define lpfc_mbx_cq_create_set_clswm_SHIFT	12
+#define lpfc_mbx_cq_create_set_clswm_MASK	0x00000003
+#define lpfc_mbx_cq_create_set_clswm_WORD	word1
+			uint32_t word2;
+#define lpfc_mbx_cq_create_set_arm_SHIFT	31
+#define lpfc_mbx_cq_create_set_arm_MASK		0x00000001
+#define lpfc_mbx_cq_create_set_arm_WORD		word2
+#define lpfc_mbx_cq_create_set_num_cq_SHIFT	0
+#define lpfc_mbx_cq_create_set_num_cq_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_num_cq_WORD	word2
+			uint32_t word3;
+#define lpfc_mbx_cq_create_set_eq_id1_SHIFT	16
+#define lpfc_mbx_cq_create_set_eq_id1_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id1_WORD	word3
+#define lpfc_mbx_cq_create_set_eq_id0_SHIFT	0
+#define lpfc_mbx_cq_create_set_eq_id0_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id0_WORD	word3
+			uint32_t word4;
+#define lpfc_mbx_cq_create_set_eq_id3_SHIFT	16
+#define lpfc_mbx_cq_create_set_eq_id3_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id3_WORD	word4
+#define lpfc_mbx_cq_create_set_eq_id2_SHIFT	0
+#define lpfc_mbx_cq_create_set_eq_id2_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id2_WORD	word4
+			uint32_t word5;
+#define lpfc_mbx_cq_create_set_eq_id5_SHIFT	16
+#define lpfc_mbx_cq_create_set_eq_id5_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id5_WORD	word5
+#define lpfc_mbx_cq_create_set_eq_id4_SHIFT	0
+#define lpfc_mbx_cq_create_set_eq_id4_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id4_WORD	word5
+			uint32_t word6;
+#define lpfc_mbx_cq_create_set_eq_id7_SHIFT	16
+#define lpfc_mbx_cq_create_set_eq_id7_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id7_WORD	word6
+#define lpfc_mbx_cq_create_set_eq_id6_SHIFT	0
+#define lpfc_mbx_cq_create_set_eq_id6_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id6_WORD	word6
+			uint32_t word7;
+#define lpfc_mbx_cq_create_set_eq_id9_SHIFT	16
+#define lpfc_mbx_cq_create_set_eq_id9_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id9_WORD	word7
+#define lpfc_mbx_cq_create_set_eq_id8_SHIFT	0
+#define lpfc_mbx_cq_create_set_eq_id8_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id8_WORD	word7
+			uint32_t word8;
+#define lpfc_mbx_cq_create_set_eq_id11_SHIFT	16
+#define lpfc_mbx_cq_create_set_eq_id11_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id11_WORD	word8
+#define lpfc_mbx_cq_create_set_eq_id10_SHIFT	0
+#define lpfc_mbx_cq_create_set_eq_id10_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id10_WORD	word8
+			uint32_t word9;
+#define lpfc_mbx_cq_create_set_eq_id13_SHIFT	16
+#define lpfc_mbx_cq_create_set_eq_id13_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id13_WORD	word9
+#define lpfc_mbx_cq_create_set_eq_id12_SHIFT	0
+#define lpfc_mbx_cq_create_set_eq_id12_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id12_WORD	word9
+			uint32_t word10;
+#define lpfc_mbx_cq_create_set_eq_id15_SHIFT	16
+#define lpfc_mbx_cq_create_set_eq_id15_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id15_WORD	word10
+#define lpfc_mbx_cq_create_set_eq_id14_SHIFT	0
+#define lpfc_mbx_cq_create_set_eq_id14_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_eq_id14_WORD	word10
+			struct dma_address page[1];
+		} request;
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_cq_create_set_num_alloc_SHIFT	16
+#define lpfc_mbx_cq_create_set_num_alloc_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_num_alloc_WORD	word0
+#define lpfc_mbx_cq_create_set_base_id_SHIFT	0
+#define lpfc_mbx_cq_create_set_base_id_MASK	0x0000FFFF
+#define lpfc_mbx_cq_create_set_base_id_WORD	word0
+		} response;
+	} u;
+};
+
 struct lpfc_mbx_cq_destroy {
 	struct mbox_header header;
 	union {
@@ -1252,10 +1363,10 @@ struct rq_context {
 #define LPFC_RQ_RING_SIZE_1024		10	/* 1024 entries */
 #define LPFC_RQ_RING_SIZE_2048		11	/* 2048 entries */
 #define LPFC_RQ_RING_SIZE_4096		12	/* 4096 entries */
-#define lpfc_rq_context_rqe_count_1_SHIFT	16	/* Version 1 Only */
+#define lpfc_rq_context_rqe_count_1_SHIFT	16	/* Version 1-2 Only */
 #define lpfc_rq_context_rqe_count_1_MASK	0x0000FFFF
 #define lpfc_rq_context_rqe_count_1_WORD	word0
-#define lpfc_rq_context_rqe_size_SHIFT	8		/* Version 1 Only */
+#define lpfc_rq_context_rqe_size_SHIFT	8		/* Version 1-2 Only */
 #define lpfc_rq_context_rqe_size_MASK	0x0000000F
 #define lpfc_rq_context_rqe_size_WORD	word0
 #define LPFC_RQE_SIZE_8		2
@@ -1267,7 +1378,13 @@ struct rq_context {
 #define lpfc_rq_context_page_size_MASK	0x000000FF
 #define lpfc_rq_context_page_size_WORD	word0
 #define	LPFC_RQ_PAGE_SIZE_4096	0x1
-	uint32_t reserved1;
+	uint32_t word1;
+#define lpfc_rq_context_data_size_SHIFT	16		/* Version 2 Only */
+#define lpfc_rq_context_data_size_MASK	0x0000FFFF
+#define lpfc_rq_context_data_size_WORD	word1
+#define lpfc_rq_context_hdr_size_SHIFT	0		/* Version 2 Only */
+#define lpfc_rq_context_hdr_size_MASK	0x0000FFFF
+#define lpfc_rq_context_hdr_size_WORD	word1
 	uint32_t word2;
 #define lpfc_rq_context_cq_id_SHIFT	16
 #define lpfc_rq_context_cq_id_MASK	0x000003FF
@@ -1275,6 +1392,9 @@ struct rq_context {
 #define lpfc_rq_context_buf_size_SHIFT	0
 #define lpfc_rq_context_buf_size_MASK	0x0000FFFF
 #define lpfc_rq_context_buf_size_WORD	word2
+#define lpfc_rq_context_base_cq_SHIFT	0		/* Version 2 Only */
+#define lpfc_rq_context_base_cq_MASK	0x0000FFFF
+#define lpfc_rq_context_base_cq_WORD	word2
 	uint32_t buffer_size;				/* Version 1 Only */
 };
 
@@ -1296,10 +1416,65 @@ struct lpfc_mbx_rq_create {
 #define lpfc_mbx_rq_create_ulp_num_MASK		0x000000FF
 #define lpfc_mbx_rq_create_ulp_num_WORD		word0
 			struct rq_context context;
-			struct dma_address page[LPFC_MAX_WQ_PAGE];
+			struct dma_address page[LPFC_MAX_RQ_PAGE];
 		} request;
 		struct {
 			uint32_t word0;
+#define lpfc_mbx_rq_create_q_cnt_v2_SHIFT	16
+#define lpfc_mbx_rq_create_q_cnt_v2_MASK	0x0000FFFF
+#define lpfc_mbx_rq_create_q_cnt_v2_WORD	word0
+#define lpfc_mbx_rq_create_q_id_SHIFT		0
+#define lpfc_mbx_rq_create_q_id_MASK		0x0000FFFF
+#define lpfc_mbx_rq_create_q_id_WORD		word0
+			uint32_t doorbell_offset;
+			uint32_t word2;
+#define lpfc_mbx_rq_create_bar_set_SHIFT	0
+#define lpfc_mbx_rq_create_bar_set_MASK		0x0000FFFF
+#define lpfc_mbx_rq_create_bar_set_WORD		word2
+#define lpfc_mbx_rq_create_db_format_SHIFT	16
+#define lpfc_mbx_rq_create_db_format_MASK	0x0000FFFF
+#define lpfc_mbx_rq_create_db_format_WORD	word2
+		} response;
+	} u;
+};
+
+struct lpfc_mbx_rq_create_v2 {
+	union  lpfc_sli4_cfg_shdr cfg_shdr;
+	union {
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_rq_create_num_pages_SHIFT	0
+#define lpfc_mbx_rq_create_num_pages_MASK	0x0000FFFF
+#define lpfc_mbx_rq_create_num_pages_WORD	word0
+#define lpfc_mbx_rq_create_rq_cnt_SHIFT		16
+#define lpfc_mbx_rq_create_rq_cnt_MASK		0x000000FF
+#define lpfc_mbx_rq_create_rq_cnt_WORD		word0
+#define lpfc_mbx_rq_create_dua_SHIFT		16
+#define lpfc_mbx_rq_create_dua_MASK		0x00000001
+#define lpfc_mbx_rq_create_dua_WORD		word0
+#define lpfc_mbx_rq_create_bqu_SHIFT		17
+#define lpfc_mbx_rq_create_bqu_MASK		0x00000001
+#define lpfc_mbx_rq_create_bqu_WORD		word0
+#define lpfc_mbx_rq_create_ulp_num_SHIFT	24
+#define lpfc_mbx_rq_create_ulp_num_MASK		0x000000FF
+#define lpfc_mbx_rq_create_ulp_num_WORD		word0
+#define lpfc_mbx_rq_create_dim_SHIFT		29
+#define lpfc_mbx_rq_create_dim_MASK		0x00000001
+#define lpfc_mbx_rq_create_dim_WORD		word0
+#define lpfc_mbx_rq_create_dfd_SHIFT		30
+#define lpfc_mbx_rq_create_dfd_MASK		0x00000001
+#define lpfc_mbx_rq_create_dfd_WORD		word0
+#define lpfc_mbx_rq_create_dnb_SHIFT		31
+#define lpfc_mbx_rq_create_dnb_MASK		0x00000001
+#define lpfc_mbx_rq_create_dnb_WORD		word0
+			struct rq_context context;
+			struct dma_address page[1];
+		} request;
+		struct {
+			uint32_t word0;
+#define lpfc_mbx_rq_create_q_cnt_v2_SHIFT	16
+#define lpfc_mbx_rq_create_q_cnt_v2_MASK	0x0000FFFF
+#define lpfc_mbx_rq_create_q_cnt_v2_WORD	word0
 #define lpfc_mbx_rq_create_q_id_SHIFT		0
 #define lpfc_mbx_rq_create_q_id_MASK		0x0000FFFF
 #define lpfc_mbx_rq_create_q_id_WORD		word0
@@ -2213,6 +2388,160 @@ struct lpfc_mbx_reg_fcfi {
 #define lpfc_reg_fcfi_vlan_tag_WORD	word8
 };
 
+struct lpfc_mbx_reg_fcfi_mrq {
+	uint32_t word1;
+#define lpfc_reg_fcfi_mrq_info_index_SHIFT	0
+#define lpfc_reg_fcfi_mrq_info_index_MASK	0x0000FFFF
+#define lpfc_reg_fcfi_mrq_info_index_WORD	word1
+#define lpfc_reg_fcfi_mrq_fcfi_SHIFT		16
+#define lpfc_reg_fcfi_mrq_fcfi_MASK		0x0000FFFF
+#define lpfc_reg_fcfi_mrq_fcfi_WORD		word1
+	uint32_t word2;
+#define lpfc_reg_fcfi_mrq_rq_id1_SHIFT		0
+#define lpfc_reg_fcfi_mrq_rq_id1_MASK		0x0000FFFF
+#define lpfc_reg_fcfi_mrq_rq_id1_WORD		word2
+#define lpfc_reg_fcfi_mrq_rq_id0_SHIFT		16
+#define lpfc_reg_fcfi_mrq_rq_id0_MASK		0x0000FFFF
+#define lpfc_reg_fcfi_mrq_rq_id0_WORD		word2
+	uint32_t word3;
+#define lpfc_reg_fcfi_mrq_rq_id3_SHIFT		0
+#define lpfc_reg_fcfi_mrq_rq_id3_MASK		0x0000FFFF
+#define lpfc_reg_fcfi_mrq_rq_id3_WORD		word3
+#define lpfc_reg_fcfi_mrq_rq_id2_SHIFT		16
+#define lpfc_reg_fcfi_mrq_rq_id2_MASK		0x0000FFFF
+#define lpfc_reg_fcfi_mrq_rq_id2_WORD		word3
+	uint32_t word4;
+#define lpfc_reg_fcfi_mrq_type_match0_SHIFT	24
+#define lpfc_reg_fcfi_mrq_type_match0_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_type_match0_WORD	word4
+#define lpfc_reg_fcfi_mrq_type_mask0_SHIFT	16
+#define lpfc_reg_fcfi_mrq_type_mask0_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_type_mask0_WORD	word4
+#define lpfc_reg_fcfi_mrq_rctl_match0_SHIFT	8
+#define lpfc_reg_fcfi_mrq_rctl_match0_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_rctl_match0_WORD	word4
+#define lpfc_reg_fcfi_mrq_rctl_mask0_SHIFT	0
+#define lpfc_reg_fcfi_mrq_rctl_mask0_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_rctl_mask0_WORD	word4
+	uint32_t word5;
+#define lpfc_reg_fcfi_mrq_type_match1_SHIFT	24
+#define lpfc_reg_fcfi_mrq_type_match1_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_type_match1_WORD	word5
+#define lpfc_reg_fcfi_mrq_type_mask1_SHIFT	16
+#define lpfc_reg_fcfi_mrq_type_mask1_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_type_mask1_WORD	word5
+#define lpfc_reg_fcfi_mrq_rctl_match1_SHIFT	8
+#define lpfc_reg_fcfi_mrq_rctl_match1_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_rctl_match1_WORD	word5
+#define lpfc_reg_fcfi_mrq_rctl_mask1_SHIFT	0
+#define lpfc_reg_fcfi_mrq_rctl_mask1_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_rctl_mask1_WORD	word5
+	uint32_t word6;
+#define lpfc_reg_fcfi_mrq_type_match2_SHIFT	24
+#define lpfc_reg_fcfi_mrq_type_match2_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_type_match2_WORD	word6
+#define lpfc_reg_fcfi_mrq_type_mask2_SHIFT	16
+#define lpfc_reg_fcfi_mrq_type_mask2_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_type_mask2_WORD	word6
+#define lpfc_reg_fcfi_mrq_rctl_match2_SHIFT	8
+#define lpfc_reg_fcfi_mrq_rctl_match2_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_rctl_match2_WORD	word6
+#define lpfc_reg_fcfi_mrq_rctl_mask2_SHIFT	0
+#define lpfc_reg_fcfi_mrq_rctl_mask2_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_rctl_mask2_WORD	word6
+	uint32_t word7;
+#define lpfc_reg_fcfi_mrq_type_match3_SHIFT	24
+#define lpfc_reg_fcfi_mrq_type_match3_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_type_match3_WORD	word7
+#define lpfc_reg_fcfi_mrq_type_mask3_SHIFT	16
+#define lpfc_reg_fcfi_mrq_type_mask3_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_type_mask3_WORD	word7
+#define lpfc_reg_fcfi_mrq_rctl_match3_SHIFT	8
+#define lpfc_reg_fcfi_mrq_rctl_match3_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_rctl_match3_WORD	word7
+#define lpfc_reg_fcfi_mrq_rctl_mask3_SHIFT	0
+#define lpfc_reg_fcfi_mrq_rctl_mask3_MASK	0x000000FF
+#define lpfc_reg_fcfi_mrq_rctl_mask3_WORD	word7
+	uint32_t word8;
+#define lpfc_reg_fcfi_mrq_ptc7_SHIFT		31
+#define lpfc_reg_fcfi_mrq_ptc7_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_ptc7_WORD		word8
+#define lpfc_reg_fcfi_mrq_ptc6_SHIFT		30
+#define lpfc_reg_fcfi_mrq_ptc6_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_ptc6_WORD		word8
+#define lpfc_reg_fcfi_mrq_ptc5_SHIFT		29
+#define lpfc_reg_fcfi_mrq_ptc5_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_ptc5_WORD		word8
+#define lpfc_reg_fcfi_mrq_ptc4_SHIFT		28
+#define lpfc_reg_fcfi_mrq_ptc4_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_ptc4_WORD		word8
+#define lpfc_reg_fcfi_mrq_ptc3_SHIFT		27
+#define lpfc_reg_fcfi_mrq_ptc3_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_ptc3_WORD		word8
+#define lpfc_reg_fcfi_mrq_ptc2_SHIFT		26
+#define lpfc_reg_fcfi_mrq_ptc2_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_ptc2_WORD		word8
+#define lpfc_reg_fcfi_mrq_ptc1_SHIFT		25
+#define lpfc_reg_fcfi_mrq_ptc1_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_ptc1_WORD		word8
+#define lpfc_reg_fcfi_mrq_ptc0_SHIFT		24
+#define lpfc_reg_fcfi_mrq_ptc0_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_ptc0_WORD		word8
+#define lpfc_reg_fcfi_mrq_pt7_SHIFT		23
+#define lpfc_reg_fcfi_mrq_pt7_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_pt7_WORD		word8
+#define lpfc_reg_fcfi_mrq_pt6_SHIFT		22
+#define lpfc_reg_fcfi_mrq_pt6_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_pt6_WORD		word8
+#define lpfc_reg_fcfi_mrq_pt5_SHIFT		21
+#define lpfc_reg_fcfi_mrq_pt5_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_pt5_WORD		word8
+#define lpfc_reg_fcfi_mrq_pt4_SHIFT		20
+#define lpfc_reg_fcfi_mrq_pt4_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_pt4_WORD		word8
+#define lpfc_reg_fcfi_mrq_pt3_SHIFT		19
+#define lpfc_reg_fcfi_mrq_pt3_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_pt3_WORD		word8
+#define lpfc_reg_fcfi_mrq_pt2_SHIFT		18
+#define lpfc_reg_fcfi_mrq_pt2_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_pt2_WORD		word8
+#define lpfc_reg_fcfi_mrq_pt1_SHIFT		17
+#define lpfc_reg_fcfi_mrq_pt1_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_pt1_WORD		word8
+#define lpfc_reg_fcfi_mrq_pt0_SHIFT		16
+#define lpfc_reg_fcfi_mrq_pt0_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_pt0_WORD		word8
+#define lpfc_reg_fcfi_mrq_xmv_SHIFT		15
+#define lpfc_reg_fcfi_mrq_xmv_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_xmv_WORD		word8
+#define lpfc_reg_fcfi_mrq_mode_SHIFT		13
+#define lpfc_reg_fcfi_mrq_mode_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_mode_WORD		word8
+#define lpfc_reg_fcfi_mrq_vv_SHIFT		12
+#define lpfc_reg_fcfi_mrq_vv_MASK		0x00000001
+#define lpfc_reg_fcfi_mrq_vv_WORD		word8
+#define lpfc_reg_fcfi_mrq_vlan_tag_SHIFT	0
+#define lpfc_reg_fcfi_mrq_vlan_tag_MASK		0x00000FFF
+#define lpfc_reg_fcfi_mrq_vlan_tag_WORD		word8
+	uint32_t word9;
+#define lpfc_reg_fcfi_mrq_policy_SHIFT		12
+#define lpfc_reg_fcfi_mrq_policy_MASK		0x0000000F
+#define lpfc_reg_fcfi_mrq_policy_WORD		word9
+#define lpfc_reg_fcfi_mrq_filter_SHIFT		8
+#define lpfc_reg_fcfi_mrq_filter_MASK		0x0000000F
+#define lpfc_reg_fcfi_mrq_filter_WORD		word9
+#define lpfc_reg_fcfi_mrq_npairs_SHIFT		0
+#define lpfc_reg_fcfi_mrq_npairs_MASK		0x000000FF
+#define lpfc_reg_fcfi_mrq_npairs_WORD		word9
+	uint32_t word10;
+	uint32_t word11;
+	uint32_t word12;
+	uint32_t word13;
+	uint32_t word14;
+	uint32_t word15;
+	uint32_t word16;
+};
+
 struct lpfc_mbx_unreg_fcfi {
 	uint32_t word1_rsv;
 	uint32_t word2;
@@ -2392,6 +2721,9 @@ struct lpfc_mbx_request_features {
 #define lpfc_mbx_rq_ftr_rq_perfh_SHIFT		11
 #define lpfc_mbx_rq_ftr_rq_perfh_MASK		0x00000001
 #define lpfc_mbx_rq_ftr_rq_perfh_WORD		word2
+#define lpfc_mbx_rq_ftr_rq_mrqp_SHIFT		16
+#define lpfc_mbx_rq_ftr_rq_mrqp_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rq_mrqp_WORD		word2
 	uint32_t word3;
 #define lpfc_mbx_rq_ftr_rsp_iaab_SHIFT		0
 #define lpfc_mbx_rq_ftr_rsp_iaab_MASK		0x00000001
@@ -2420,6 +2752,9 @@ struct lpfc_mbx_request_features {
 #define lpfc_mbx_rq_ftr_rsp_perfh_SHIFT		11
 #define lpfc_mbx_rq_ftr_rsp_perfh_MASK		0x00000001
 #define lpfc_mbx_rq_ftr_rsp_perfh_WORD		word3
+#define lpfc_mbx_rq_ftr_rsp_mrqp_SHIFT		16
+#define lpfc_mbx_rq_ftr_rsp_mrqp_MASK		0x00000001
+#define lpfc_mbx_rq_ftr_rsp_mrqp_WORD		word3
 };
 
 struct lpfc_mbx_supp_pages {
@@ -3312,14 +3647,17 @@ struct lpfc_mqe {
 		struct lpfc_mbx_del_fcf_tbl_entry del_fcf_entry;
 		struct lpfc_mbx_redisc_fcf_tbl redisc_fcf_tbl;
 		struct lpfc_mbx_reg_fcfi reg_fcfi;
+		struct lpfc_mbx_reg_fcfi_mrq reg_fcfi_mrq;
 		struct lpfc_mbx_unreg_fcfi unreg_fcfi;
 		struct lpfc_mbx_mq_create mq_create;
 		struct lpfc_mbx_mq_create_ext mq_create_ext;
 		struct lpfc_mbx_eq_create eq_create;
 		struct lpfc_mbx_modify_eq_delay eq_delay;
 		struct lpfc_mbx_cq_create cq_create;
+		struct lpfc_mbx_cq_create_set cq_create_set;
 		struct lpfc_mbx_wq_create wq_create;
 		struct lpfc_mbx_rq_create rq_create;
+		struct lpfc_mbx_rq_create_v2 rq_create_v2;
 		struct lpfc_mbx_mq_destroy mq_destroy;
 		struct lpfc_mbx_eq_destroy eq_destroy;
 		struct lpfc_mbx_cq_destroy cq_destroy;
@@ -3972,6 +4310,7 @@ struct lpfc_nvme_prli {
 #define prli_fb_sz_SHIFT                0
 #define prli_fb_sz_MASK                 0x0000ffff
 #define prli_fb_sz_WORD                 word5
+#define LPFC_NVMET_FB_SZ_MAX  65536   /* Driver target mode only. */
 };
 
 struct create_xri_wqe {
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 03fa3f02e33e..e6cbc0bac029 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -3354,8 +3354,15 @@ lpfc_sli4_nvmet_sgl_update(struct lpfc_hba *phba)
 	 * update on pci function's nvmet xri-sgl list
 	 */
 	els_xri_cnt = lpfc_sli4_get_els_iocb_cnt(phba);
-	nvmet_xri_cnt = 0;
+	nvmet_xri_cnt = phba->cfg_nvmet_mrq * phba->cfg_nvmet_mrq_post;
 	tot_cnt = phba->sli4_hba.max_cfg_param.max_xri - els_xri_cnt;
+	if (nvmet_xri_cnt > tot_cnt) {
+		phba->cfg_nvmet_mrq_post = tot_cnt / phba->cfg_nvmet_mrq;
+		nvmet_xri_cnt = phba->cfg_nvmet_mrq * phba->cfg_nvmet_mrq_post;
+		lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+				"6301 NVMET post-sgl count changed to %d\n",
+				phba->cfg_nvmet_mrq_post);
+	}
 
 	if (nvmet_xri_cnt > phba->sli4_hba.nvmet_xri_cnt) {
 		/* els xri-sgl expanded */
@@ -7674,11 +7681,13 @@ lpfc_sli4_queue_verify(struct lpfc_hba *phba)
 		phba->cfg_fcp_io_channel = io_channel;
 	if (phba->cfg_nvme_io_channel > io_channel)
 		phba->cfg_nvme_io_channel = io_channel;
+	if (phba->cfg_nvme_io_channel < phba->cfg_nvmet_mrq)
+		phba->cfg_nvmet_mrq = phba->cfg_nvme_io_channel;
 
 	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-			"2574 IO channels: irqs %d fcp %d nvme %d\n",
+			"2574 IO channels: irqs %d fcp %d nvme %d MRQ: %d\n",
 			phba->io_channel_irqs, phba->cfg_fcp_io_channel,
-			phba->cfg_nvme_io_channel);
+			phba->cfg_nvme_io_channel, phba->cfg_nvmet_mrq);
 
 	/* Get EQ depth from module parameter, fake the default for now */
 	phba->sli4_hba.eq_esize = LPFC_EQE_SIZE_4B;
@@ -7768,7 +7777,7 @@ int
 lpfc_sli4_queue_create(struct lpfc_hba *phba)
 {
 	struct lpfc_queue *qdesc;
-	int idx, io_channel;
+	int idx, io_channel, max;
 
 	/*
 	 * Create HBA Record arrays.
@@ -7845,7 +7854,6 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 			goto out_error;
 		}
 
-
 		phba->sli4_hba.nvme_wq = kcalloc(phba->cfg_nvme_io_channel,
 						sizeof(struct lpfc_queue *),
 						GFP_KERNEL);
@@ -7870,6 +7878,39 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 					"fast-path CQ map\n");
 			goto out_error;
 		}
+
+		if (phba->nvmet_support) {
+			phba->sli4_hba.nvmet_cqset = kcalloc(
+					phba->cfg_nvmet_mrq,
+					sizeof(struct lpfc_queue *),
+					GFP_KERNEL);
+			if (!phba->sli4_hba.nvmet_cqset) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"3121 Fail allocate memory for "
+					"fast-path CQ set array\n");
+				goto out_error;
+			}
+			phba->sli4_hba.nvmet_mrq_hdr = kcalloc(
+					phba->cfg_nvmet_mrq,
+					sizeof(struct lpfc_queue *),
+					GFP_KERNEL);
+			if (!phba->sli4_hba.nvmet_mrq_hdr) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"3122 Fail allocate memory for "
+					"fast-path RQ set hdr array\n");
+				goto out_error;
+			}
+			phba->sli4_hba.nvmet_mrq_data = kcalloc(
+					phba->cfg_nvmet_mrq,
+					sizeof(struct lpfc_queue *),
+					GFP_KERNEL);
+			if (!phba->sli4_hba.nvmet_mrq_data) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"3124 Fail allocate memory for "
+					"fast-path RQ set data array\n");
+				goto out_error;
+			}
+		}
 	}
 
 	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_wq_list);
@@ -7897,6 +7938,30 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 		if (lpfc_alloc_nvme_wq_cq(phba, idx))
 			goto out_error;
 
+	/* allocate MRQ CQs */
+	max = phba->cfg_nvme_io_channel;
+	if (max < phba->cfg_nvmet_mrq)
+		max = phba->cfg_nvmet_mrq;
+
+	for (idx = 0; idx < max; idx++)
+		if (lpfc_alloc_nvme_wq_cq(phba, idx))
+			goto out_error;
+
+	if (phba->nvmet_support) {
+		for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
+			qdesc = lpfc_sli4_queue_alloc(phba,
+					phba->sli4_hba.cq_esize,
+					phba->sli4_hba.cq_ecount);
+			if (!qdesc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"3142 Failed allocate NVME "
+					"CQ Set (%d)\n", idx);
+				goto out_error;
+			}
+			phba->sli4_hba.nvmet_cqset[idx] = qdesc;
+		}
+	}
+
 	/*
 	 * Create Slow Path Completion Queues (CQs)
 	 */
@@ -7999,6 +8064,44 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 	}
 	phba->sli4_hba.dat_rq = qdesc;
 
+	if (phba->nvmet_support) {
+		for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) {
+			/* Create NVMET Receive Queue for header */
+			qdesc = lpfc_sli4_queue_alloc(phba,
+						      phba->sli4_hba.rq_esize,
+						      phba->sli4_hba.rq_ecount);
+			if (!qdesc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"3146 Failed allocate "
+						"receive HRQ\n");
+				goto out_error;
+			}
+			phba->sli4_hba.nvmet_mrq_hdr[idx] = qdesc;
+
+			/* Only needed for header of RQ pair */
+			qdesc->rqbp = kzalloc(sizeof(struct lpfc_rqb),
+					      GFP_KERNEL);
+			if (qdesc->rqbp == NULL) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"6131 Failed allocate "
+						"Header RQBP\n");
+				goto out_error;
+			}
+
+			/* Create NVMET Receive Queue for data */
+			qdesc = lpfc_sli4_queue_alloc(phba,
+						      phba->sli4_hba.rq_esize,
+						      phba->sli4_hba.rq_ecount);
+			if (!qdesc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"3156 Failed allocate "
+						"receive DRQ\n");
+				goto out_error;
+			}
+			phba->sli4_hba.nvmet_mrq_data[idx] = qdesc;
+		}
+	}
+
 	/* Create the Queues needed for Flash Optimized Fabric operations */
 	if (phba->cfg_fof)
 		lpfc_fof_queue_create(phba);
@@ -8085,6 +8188,14 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
 	/* Release NVME CQ mapping array */
 	lpfc_sli4_release_queue_map(&phba->sli4_hba.nvme_cq_map);
 
+	lpfc_sli4_release_queues(&phba->sli4_hba.nvmet_cqset,
+					phba->cfg_nvmet_mrq);
+
+	lpfc_sli4_release_queues(&phba->sli4_hba.nvmet_mrq_hdr,
+					phba->cfg_nvmet_mrq);
+	lpfc_sli4_release_queues(&phba->sli4_hba.nvmet_mrq_data,
+					phba->cfg_nvmet_mrq);
+
 	/* Release mailbox command work queue */
 	__lpfc_sli4_release_queue(&phba->sli4_hba.mbx_wq);
 
@@ -8422,6 +8533,44 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 			(uint32_t)rc);
 		goto out_destroy;
 	}
+	if (phba->nvmet_support) {
+		if (!phba->sli4_hba.nvmet_cqset) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"3165 Fast-path NVME CQ Set "
+					"array not allocated\n");
+			rc = -ENOMEM;
+			goto out_destroy;
+		}
+		if (phba->cfg_nvmet_mrq > 1) {
+			rc = lpfc_cq_create_set(phba,
+					phba->sli4_hba.nvmet_cqset,
+					phba->sli4_hba.hba_eq,
+					LPFC_WCQ, LPFC_NVMET);
+			if (rc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"3164 Failed setup of NVME CQ "
+						"Set, rc = 0x%x\n",
+						(uint32_t)rc);
+				goto out_destroy;
+			}
+		} else {
+			/* Set up NVMET Receive Complete Queue */
+			rc = lpfc_cq_create(phba, phba->sli4_hba.nvmet_cqset[0],
+					    phba->sli4_hba.hba_eq[0],
+					    LPFC_WCQ, LPFC_NVMET);
+			if (rc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"6089 Failed setup NVMET CQ: "
+						"rc = 0x%x\n", (uint32_t)rc);
+				goto out_destroy;
+			}
+			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+					"6090 NVMET CQ setup: cq-id=%d, "
+					"parent eq-id=%d\n",
+					phba->sli4_hba.nvmet_cqset[0]->queue_id,
+					phba->sli4_hba.hba_eq[0]->queue_id);
+		}
+	}
 
 	/* Set up slow-path ELS WQ/CQ */
 	if (!phba->sli4_hba.els_cq || !phba->sli4_hba.els_wq) {
@@ -8473,6 +8622,58 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 				phba->sli4_hba.nvmels_cq->queue_id);
 	}
 
+	/*
+	 * Create NVMET Receive Queue (RQ)
+	 */
+	if (phba->nvmet_support) {
+		if ((!phba->sli4_hba.nvmet_cqset) ||
+		    (!phba->sli4_hba.nvmet_mrq_hdr) ||
+		    (!phba->sli4_hba.nvmet_mrq_data)) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"6130 MRQ CQ Queues not "
+					"allocated\n");
+			rc = -ENOMEM;
+			goto out_destroy;
+		}
+		if (phba->cfg_nvmet_mrq > 1) {
+			rc = lpfc_mrq_create(phba,
+					     phba->sli4_hba.nvmet_mrq_hdr,
+					     phba->sli4_hba.nvmet_mrq_data,
+					     phba->sli4_hba.nvmet_cqset,
+					     LPFC_NVMET);
+			if (rc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"6098 Failed setup of NVMET "
+						"MRQ: rc = 0x%x\n",
+						(uint32_t)rc);
+				goto out_destroy;
+			}
+
+		} else {
+			rc = lpfc_rq_create(phba,
+					    phba->sli4_hba.nvmet_mrq_hdr[0],
+					    phba->sli4_hba.nvmet_mrq_data[0],
+					    phba->sli4_hba.nvmet_cqset[0],
+					    LPFC_NVMET);
+			if (rc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"6057 Failed setup of NVMET "
+						"Receive Queue: rc = 0x%x\n",
+						(uint32_t)rc);
+				goto out_destroy;
+			}
+
+			lpfc_printf_log(
+				phba, KERN_INFO, LOG_INIT,
+				"6099 NVMET RQ setup: hdr-rq-id=%d, "
+				"dat-rq-id=%d parent cq-id=%d\n",
+				phba->sli4_hba.nvmet_mrq_hdr[0]->queue_id,
+				phba->sli4_hba.nvmet_mrq_data[0]->queue_id,
+				phba->sli4_hba.nvmet_cqset[0]->queue_id);
+
+		}
+	}
+
 	if (!phba->sli4_hba.hdr_rq || !phba->sli4_hba.dat_rq) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
 				"0540 Receive Queue not allocated\n");
@@ -8589,6 +8790,21 @@ lpfc_sli4_queue_unset(struct lpfc_hba *phba)
 		for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++)
 			lpfc_cq_destroy(phba, phba->sli4_hba.nvme_cq[qidx]);
 
+	/* Unset NVMET MRQ queue */
+	if (phba->sli4_hba.nvmet_mrq_hdr) {
+		for (qidx = 0; qidx < phba->cfg_nvmet_mrq; qidx++)
+			lpfc_rq_destroy(phba,
+					phba->sli4_hba.nvmet_mrq_hdr[qidx],
+					phba->sli4_hba.nvmet_mrq_data[qidx]);
+	}
+
+	/* Unset NVMET CQ Set complete queue */
+	if (phba->sli4_hba.nvmet_cqset) {
+		for (qidx = 0; qidx < phba->cfg_nvmet_mrq; qidx++)
+			lpfc_cq_destroy(phba,
+					phba->sli4_hba.nvmet_cqset[qidx]);
+	}
+
 	/* Unset FCP response complete queue */
 	if (phba->sli4_hba.fcp_cq)
 		for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++)
@@ -9935,6 +10151,7 @@ lpfc_get_sli4_parameters(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
 	    !phba->nvme_support) {
 		phba->nvme_support = 0;
 		phba->nvmet_support = 0;
+		phba->cfg_nvmet_mrq = 0;
 		phba->cfg_nvme_io_channel = 0;
 		phba->io_channel_irqs = phba->cfg_fcp_io_channel;
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_NVME,
@@ -10875,12 +11092,14 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
 	if (phba->intr_type != MSIX) {
 		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP)
 			phba->cfg_fcp_io_channel = 1;
-		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
+		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
 			phba->cfg_nvme_io_channel = 1;
+			if (phba->nvmet_support)
+				phba->cfg_nvmet_mrq = 1;
+		}
 		phba->io_channel_irqs = 1;
 	}
 
-
 	/* Set up SLI-4 HBA */
 	if (lpfc_sli4_hba_setup(phba)) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index c20dc25a1e79..8f4bfdfd9910 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -2081,6 +2081,9 @@ lpfc_request_features(struct lpfc_hba *phba, struct lpfcMboxq *mboxq)
 	if (phba->max_vpi && phba->cfg_enable_npiv)
 		bf_set(lpfc_mbx_rq_ftr_rq_npiv, &mboxq->u.mqe.un.req_ftrs, 1);
 
+	if (phba->nvmet_support)
+		bf_set(lpfc_mbx_rq_ftr_rq_mrqp, &mboxq->u.mqe.un.req_ftrs, 1);
+
 	return;
 }
 
@@ -2448,6 +2451,26 @@ lpfc_reg_fcfi(struct lpfc_hba *phba, struct lpfcMboxq *mbox)
 		/* addr mode is bit wise inverted value of fcf addr_mode */
 		bf_set(lpfc_reg_fcfi_mam, reg_fcfi,
 		       (~phba->fcf.addr_mode) & 0x3);
+	} else {
+		/* This is ONLY for NVMET MRQ == 1 */
+		if (phba->cfg_nvmet_mrq != 1)
+			return;
+
+		bf_set(lpfc_reg_fcfi_rq_id0, reg_fcfi,
+		       phba->sli4_hba.nvmet_mrq_hdr[0]->queue_id);
+		/* Match type FCP - rq_id0 */
+		bf_set(lpfc_reg_fcfi_type_match0, reg_fcfi, FC_TYPE_FCP);
+		bf_set(lpfc_reg_fcfi_type_mask0, reg_fcfi, 0xff);
+		bf_set(lpfc_reg_fcfi_rctl_match0, reg_fcfi,
+		       FC_RCTL_DD_UNSOL_CMD);
+
+		bf_set(lpfc_reg_fcfi_rq_id1, reg_fcfi,
+		       phba->sli4_hba.hdr_rq->queue_id);
+		/* Match everything else - rq_id1 */
+		bf_set(lpfc_reg_fcfi_type_match1, reg_fcfi, 0);
+		bf_set(lpfc_reg_fcfi_type_mask1, reg_fcfi, 0);
+		bf_set(lpfc_reg_fcfi_rctl_match1, reg_fcfi, 0);
+		bf_set(lpfc_reg_fcfi_rctl_mask1, reg_fcfi, 0);
 	}
 	bf_set(lpfc_reg_fcfi_rq_id2, reg_fcfi, REG_FCF_INVALID_QID);
 	bf_set(lpfc_reg_fcfi_rq_id3, reg_fcfi, REG_FCF_INVALID_QID);
@@ -2460,6 +2483,70 @@ lpfc_reg_fcfi(struct lpfc_hba *phba, struct lpfcMboxq *mbox)
 	}
 }
 
+/**
+ * lpfc_reg_fcfi_mrq - Initialize the REG_FCFI_MRQ mailbox command
+ * @phba: pointer to the hba structure containing the FCF index and RQ ID.
+ * @mbox: pointer to lpfc mbox command to initialize.
+ * @mode: 0 to register FCFI, 1 to register MRQs
+ *
+ * The REG_FCFI_MRQ mailbox command supports Fibre Channel Forwarders (FCFs).
+ * The SLI Host uses the command to activate an FCF after it has acquired FCF
+ * information via a READ_FCF mailbox command. This mailbox command also is used
+ * to indicate where received unsolicited frames from this FCF will be sent. By
+ * default this routine will set up the FCF to forward all unsolicited frames
+ * the the RQ ID passed in the @phba. This can be overridden by the caller for
+ * more complicated setups.
+ **/
+void
+lpfc_reg_fcfi_mrq(struct lpfc_hba *phba, struct lpfcMboxq *mbox, int mode)
+{
+	struct lpfc_mbx_reg_fcfi_mrq *reg_fcfi;
+
+	/* This is ONLY for MRQ */
+	if (phba->cfg_nvmet_mrq <= 1)
+		return;
+
+	memset(mbox, 0, sizeof(*mbox));
+	reg_fcfi = &mbox->u.mqe.un.reg_fcfi_mrq;
+	bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_REG_FCFI_MRQ);
+	if (mode == 0) {
+		bf_set(lpfc_reg_fcfi_mrq_info_index, reg_fcfi,
+		       phba->fcf.current_rec.fcf_indx);
+		if (phba->fcf.current_rec.vlan_id != LPFC_FCOE_NULL_VID) {
+			bf_set(lpfc_reg_fcfi_mrq_vv, reg_fcfi, 1);
+			bf_set(lpfc_reg_fcfi_mrq_vlan_tag, reg_fcfi,
+			       phba->fcf.current_rec.vlan_id);
+		}
+		return;
+	}
+
+	bf_set(lpfc_reg_fcfi_mrq_rq_id0, reg_fcfi,
+	       phba->sli4_hba.nvmet_mrq_hdr[0]->queue_id);
+	/* Match NVME frames of type FCP (protocol NVME) - rq_id0 */
+	bf_set(lpfc_reg_fcfi_mrq_type_match0, reg_fcfi, FC_TYPE_FCP);
+	bf_set(lpfc_reg_fcfi_mrq_type_mask0, reg_fcfi, 0xff);
+	bf_set(lpfc_reg_fcfi_mrq_rctl_match0, reg_fcfi, FC_RCTL_DD_UNSOL_CMD);
+	bf_set(lpfc_reg_fcfi_mrq_rctl_mask0, reg_fcfi, 0xff);
+	bf_set(lpfc_reg_fcfi_mrq_ptc0, reg_fcfi, 1);
+	bf_set(lpfc_reg_fcfi_mrq_pt0, reg_fcfi, 1);
+
+	bf_set(lpfc_reg_fcfi_mrq_policy, reg_fcfi, 3); /* NVME connection id */
+	bf_set(lpfc_reg_fcfi_mrq_mode, reg_fcfi, 1);
+	bf_set(lpfc_reg_fcfi_mrq_filter, reg_fcfi, 1); /* rq_id0 */
+	bf_set(lpfc_reg_fcfi_mrq_npairs, reg_fcfi, phba->cfg_nvmet_mrq);
+
+	bf_set(lpfc_reg_fcfi_mrq_rq_id1, reg_fcfi,
+	       phba->sli4_hba.hdr_rq->queue_id);
+	/* Match everything - rq_id1 */
+	bf_set(lpfc_reg_fcfi_mrq_type_match1, reg_fcfi, 0);
+	bf_set(lpfc_reg_fcfi_mrq_type_mask1, reg_fcfi, 0);
+	bf_set(lpfc_reg_fcfi_mrq_rctl_match1, reg_fcfi, 0);
+	bf_set(lpfc_reg_fcfi_mrq_rctl_mask1, reg_fcfi, 0);
+
+	bf_set(lpfc_reg_fcfi_mrq_rq_id2, reg_fcfi, REG_FCF_INVALID_QID);
+	bf_set(lpfc_reg_fcfi_mrq_rq_id3, reg_fcfi, REG_FCF_INVALID_QID);
+}
+
 /**
  * lpfc_unreg_fcfi - Initialize the UNREG_FCFI mailbox command
  * @mbox: pointer to lpfc mbox command to initialize.
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index f2cd0f37e03f..72164acb6656 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -2027,6 +2027,29 @@ lpfc_sli_hbqbuf_get(struct list_head *rb_list)
 	return container_of(d_buf, struct hbq_dmabuf, dbuf);
 }
 
+/**
+ * lpfc_sli_rqbuf_get - Remove the first dma buffer off of an RQ list
+ * @phba: Pointer to HBA context object.
+ * @hbqno: HBQ number.
+ *
+ * This function removes the first RQ buffer on an RQ buffer list and returns a
+ * pointer to that buffer. If it finds no buffers on the list it returns NULL.
+ **/
+static struct rqb_dmabuf *
+lpfc_sli_rqbuf_get(struct lpfc_hba *phba, struct lpfc_queue *hrq)
+{
+	struct lpfc_dmabuf *h_buf;
+	struct lpfc_rqb *rqbp;
+
+	rqbp = hrq->rqbp;
+	list_remove_head(&rqbp->rqb_buffer_list, h_buf,
+			 struct lpfc_dmabuf, list);
+	if (!h_buf)
+		return NULL;
+	rqbp->buffer_count--;
+	return container_of(h_buf, struct rqb_dmabuf, hbuf);
+}
+
 /**
  * lpfc_sli_hbqbuf_find - Find the hbq buffer associated with a tag
  * @phba: Pointer to HBA context object.
@@ -5271,6 +5294,14 @@ lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba)
 			lpfc_sli4_eq_release(phba->sli4_hba.hba_eq[qidx],
 						LPFC_QUEUE_REARM);
 
+	if (phba->nvmet_support) {
+		for (qidx = 0; qidx < phba->cfg_nvmet_mrq; qidx++) {
+			lpfc_sli4_cq_release(
+				phba->sli4_hba.nvmet_cqset[qidx],
+				LPFC_QUEUE_REARM);
+		}
+	}
+
 	if (phba->cfg_fof)
 		lpfc_sli4_eq_release(phba->sli4_hba.fof_eq, LPFC_QUEUE_REARM);
 }
@@ -6485,7 +6516,7 @@ lpfc_set_host_data(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
 int
 lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 {
-	int rc;
+	int rc, i;
 	LPFC_MBOXQ_t *mboxq;
 	struct lpfc_mqe *mqe;
 	uint8_t *vpd;
@@ -6494,6 +6525,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 	struct Scsi_Host *shost = lpfc_shost_from_vport(phba->pport);
 	struct lpfc_vport *vport = phba->pport;
 	struct lpfc_dmabuf *mp;
+	struct lpfc_rqb *rqbp;
 
 	/* Perform a PCI function reset to start from clean */
 	rc = lpfc_pci_function_reset(phba);
@@ -6856,6 +6888,29 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 		}
 	}
 
+	if (phba->nvmet_support && phba->cfg_nvmet_mrq) {
+
+		/* Post initial buffers to all RQs created */
+		for (i = 0; i < phba->cfg_nvmet_mrq; i++) {
+			rqbp = phba->sli4_hba.nvmet_mrq_hdr[i]->rqbp;
+			INIT_LIST_HEAD(&rqbp->rqb_buffer_list);
+			rqbp->rqb_alloc_buffer = lpfc_sli4_nvmet_alloc;
+			rqbp->rqb_free_buffer = lpfc_sli4_nvmet_free;
+			rqbp->entry_count = 256;
+			rqbp->buffer_count = 0;
+
+			/* Divide by 4 and round down to multiple of 16 */
+			rc = (phba->cfg_nvmet_mrq_post >> 2) & 0xfff8;
+			phba->sli4_hba.nvmet_mrq_hdr[i]->entry_repost = rc;
+			phba->sli4_hba.nvmet_mrq_data[i]->entry_repost = rc;
+
+			lpfc_post_rq_buffer(
+				phba, phba->sli4_hba.nvmet_mrq_hdr[i],
+				phba->sli4_hba.nvmet_mrq_data[i],
+				phba->cfg_nvmet_mrq_post);
+		}
+	}
+
 	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
 		/* register the allocated scsi sgl pool to the port */
 		rc = lpfc_sli4_repost_scsi_sgl_list(phba);
@@ -6898,7 +6953,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 	lpfc_sli4_node_prep(phba);
 
 	if (!(phba->hba_flag & HBA_FCOE_MODE)) {
-		if (phba->nvmet_support == 0) {
+		if ((phba->nvmet_support == 0) || (phba->cfg_nvmet_mrq == 1)) {
 			/*
 			 * The FC Port needs to register FCFI (index 0)
 			 */
@@ -6910,6 +6965,26 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 			rc = 0;
 			phba->fcf.fcfi = bf_get(lpfc_reg_fcfi_fcfi,
 						&mboxq->u.mqe.un.reg_fcfi);
+		} else {
+			/* We are a NVME Target mode with MRQ > 1 */
+
+			/* First register the FCFI */
+			lpfc_reg_fcfi_mrq(phba, mboxq, 0);
+			mboxq->vport = phba->pport;
+			rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+			if (rc != MBX_SUCCESS)
+				goto out_unset_queue;
+			rc = 0;
+			phba->fcf.fcfi = bf_get(lpfc_reg_fcfi_mrq_fcfi,
+						&mboxq->u.mqe.un.reg_fcfi_mrq);
+
+			/* Next register the MRQs */
+			lpfc_reg_fcfi_mrq(phba, mboxq, 1);
+			mboxq->vport = phba->pport;
+			rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+			if (rc != MBX_SUCCESS)
+				goto out_unset_queue;
+			rc = 0;
 		}
 		/* Check if the port is configured to be disabled */
 		lpfc_sli_read_link_ste(phba);
@@ -12987,6 +13062,101 @@ lpfc_sli4_fp_handle_rel_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 				"miss-matched qid: wcqe-qid=x%x\n", hba_wqid);
 }
 
+/**
+ * lpfc_sli4_nvmet_handle_rcqe - Process a receive-queue completion queue entry
+ * @phba: Pointer to HBA context object.
+ * @rcqe: Pointer to receive-queue completion queue entry.
+ *
+ * This routine process a receive-queue completion queue entry.
+ *
+ * Return: true if work posted to worker thread, otherwise false.
+ **/
+static bool
+lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
+			    struct lpfc_rcqe *rcqe)
+{
+	bool workposted = false;
+	struct lpfc_queue *hrq;
+	struct lpfc_queue *drq;
+	struct rqb_dmabuf *dma_buf;
+	struct fc_frame_header *fc_hdr;
+	uint32_t status, rq_id;
+	unsigned long iflags;
+	uint32_t fctl, idx;
+
+	if ((phba->nvmet_support == 0) ||
+	    (phba->sli4_hba.nvmet_cqset == NULL))
+		return workposted;
+
+	idx = cq->queue_id - phba->sli4_hba.nvmet_cqset[0]->queue_id;
+	hrq = phba->sli4_hba.nvmet_mrq_hdr[idx];
+	drq = phba->sli4_hba.nvmet_mrq_data[idx];
+
+	/* sanity check on queue memory */
+	if (unlikely(!hrq) || unlikely(!drq))
+		return workposted;
+
+	if (bf_get(lpfc_cqe_code, rcqe) == CQE_CODE_RECEIVE_V1)
+		rq_id = bf_get(lpfc_rcqe_rq_id_v1, rcqe);
+	else
+		rq_id = bf_get(lpfc_rcqe_rq_id, rcqe);
+
+	if ((phba->nvmet_support == 0) ||
+	    (rq_id != hrq->queue_id))
+		return workposted;
+
+	status = bf_get(lpfc_rcqe_status, rcqe);
+	switch (status) {
+	case FC_STATUS_RQ_BUF_LEN_EXCEEDED:
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"6126 Receive Frame Truncated!!\n");
+		hrq->RQ_buf_trunc++;
+		break;
+	case FC_STATUS_RQ_SUCCESS:
+		lpfc_sli4_rq_release(hrq, drq);
+		spin_lock_irqsave(&phba->hbalock, iflags);
+		dma_buf = lpfc_sli_rqbuf_get(phba, hrq);
+		if (!dma_buf) {
+			hrq->RQ_no_buf_found++;
+			spin_unlock_irqrestore(&phba->hbalock, iflags);
+			goto out;
+		}
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		hrq->RQ_rcv_buf++;
+		fc_hdr = (struct fc_frame_header *)dma_buf->hbuf.virt;
+
+		/* Just some basic sanity checks on FCP Command frame */
+		fctl = (fc_hdr->fh_f_ctl[0] << 16 |
+		fc_hdr->fh_f_ctl[1] << 8 |
+		fc_hdr->fh_f_ctl[2]);
+		if (((fctl &
+		    (FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT)) !=
+		    (FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT)) ||
+		    (fc_hdr->fh_seq_cnt != 0)) /* 0 byte swapped is still 0 */
+			goto drop;
+
+		if (fc_hdr->fh_type == FC_TYPE_FCP) {
+			dma_buf->bytes_recv = bf_get(lpfc_rcqe_length,  rcqe);
+			/* todo: tgt: forward cmd iu to transport */
+			return false;
+		}
+drop:
+		lpfc_in_buf_free(phba, &dma_buf->dbuf);
+		break;
+	case FC_STATUS_INSUFF_BUF_NEED_BUF:
+	case FC_STATUS_INSUFF_BUF_FRM_DISC:
+		hrq->RQ_no_posted_buf++;
+		/* Post more buffers if possible */
+		spin_lock_irqsave(&phba->hbalock, iflags);
+		phba->hba_flag |= HBA_POST_RECEIVE_BUFFER;
+		spin_unlock_irqrestore(&phba->hbalock, iflags);
+		workposted = true;
+		break;
+	}
+out:
+	return workposted;
+}
+
 /**
  * lpfc_sli4_fp_handle_cqe - Process fast-path work queue completion entry
  * @cq: Pointer to the completion queue.
@@ -13035,6 +13205,10 @@ lpfc_sli4_fp_handle_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	case CQE_CODE_RECEIVE_V1:
 	case CQE_CODE_RECEIVE:
 		phba->last_completion_time = jiffies;
+		if (cq->subtype == LPFC_NVMET) {
+			workposted = lpfc_sli4_nvmet_handle_rcqe(
+				phba, cq, (struct lpfc_rcqe *)&wcqe);
+		}
 		break;
 	default:
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
@@ -13064,7 +13238,7 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 	struct lpfc_queue *cq = NULL;
 	struct lpfc_cqe *cqe;
 	bool workposted = false;
-	uint16_t cqid;
+	uint16_t cqid, id;
 	int ecount = 0;
 
 	if (unlikely(bf_get_le32(lpfc_eqe_major_code, eqe) != 0)) {
@@ -13079,6 +13253,15 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
 	/* Get the reference to the corresponding CQ */
 	cqid = bf_get_le32(lpfc_eqe_resource_id, eqe);
 
+	if (phba->cfg_nvmet_mrq && phba->sli4_hba.nvmet_cqset) {
+		id = phba->sli4_hba.nvmet_cqset[0]->queue_id;
+		if ((cqid >= id) && (cqid < (id + phba->cfg_nvmet_mrq))) {
+			/* Process NVMET unsol rcv */
+			cq = phba->sli4_hba.nvmet_cqset[cqid - id];
+			goto  process_cq;
+		}
+	}
+
 	if (phba->sli4_hba.nvme_cq_map &&
 	    (cqid == phba->sli4_hba.nvme_cq_map[qidx])) {
 		/* Process NVME / NVMET command completion */
@@ -13962,6 +14145,234 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
 	return status;
 }
 
+/**
+ * lpfc_cq_create_set - Create a set of Completion Queues on the HBA for MRQ
+ * @phba: HBA structure that indicates port to create a queue on.
+ * @cqp: The queue structure array to use to create the completion queues.
+ * @eqp: The event queue array to bind these completion queues to.
+ *
+ * This function creates a set of  completion queue, s to support MRQ
+ * as detailed in @cqp, on a port,
+ * described by @phba by sending a CREATE_CQ_SET mailbox command to the HBA.
+ *
+ * The @phba struct is used to send mailbox command to HBA. The @cq struct
+ * is used to get the entry count and entry size that are necessary to
+ * determine the number of pages to allocate and use for this queue. The @eq
+ * is used to indicate which event queue to bind this completion queue to. This
+ * function will send the CREATE_CQ_SET mailbox command to the HBA to setup the
+ * completion queue. This function is asynchronous and will wait for the mailbox
+ * command to finish before continuing.
+ *
+ * On success this function will return a zero. If unable to allocate enough
+ * memory this function will return -ENOMEM. If the queue create mailbox command
+ * fails this function will return -ENXIO.
+ **/
+int
+lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
+		   struct lpfc_queue **eqp, uint32_t type, uint32_t subtype)
+{
+	struct lpfc_queue *cq;
+	struct lpfc_queue *eq;
+	struct lpfc_mbx_cq_create_set *cq_set;
+	struct lpfc_dmabuf *dmabuf;
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, alloclen, status = 0;
+	int cnt, idx, numcq, page_idx = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+	uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz;
+
+	/* sanity check on queue memory */
+	numcq = phba->cfg_nvmet_mrq;
+	if (!cqp || !eqp || !numcq)
+		return -ENODEV;
+	if (!phba->sli4_hba.pc_sli4_params.supported)
+		hw_page_size = SLI4_PAGE_SIZE;
+
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+
+	length = sizeof(struct lpfc_mbx_cq_create_set);
+	length += ((numcq * cqp[0]->page_count) *
+		   sizeof(struct dma_address));
+	alloclen = lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+			LPFC_MBOX_OPCODE_FCOE_CQ_CREATE_SET, length,
+			LPFC_SLI4_MBX_NEMBED);
+	if (alloclen < length) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"3098 Allocated DMA memory size (%d) is "
+				"less than the requested DMA memory size "
+				"(%d)\n", alloclen, length);
+		status = -ENOMEM;
+		goto out;
+	}
+	cq_set = mbox->sge_array->addr[0];
+	shdr = (union lpfc_sli4_cfg_shdr *)&cq_set->cfg_shdr;
+	bf_set(lpfc_mbox_hdr_version, &shdr->request, 0);
+
+	for (idx = 0; idx < numcq; idx++) {
+		cq = cqp[idx];
+		eq = eqp[idx];
+		if (!cq || !eq) {
+			status = -ENOMEM;
+			goto out;
+		}
+
+		switch (idx) {
+		case 0:
+			bf_set(lpfc_mbx_cq_create_set_page_size,
+			       &cq_set->u.request,
+			       (hw_page_size / SLI4_PAGE_SIZE));
+			bf_set(lpfc_mbx_cq_create_set_num_pages,
+			       &cq_set->u.request, cq->page_count);
+			bf_set(lpfc_mbx_cq_create_set_evt,
+			       &cq_set->u.request, 1);
+			bf_set(lpfc_mbx_cq_create_set_valid,
+			       &cq_set->u.request, 1);
+			bf_set(lpfc_mbx_cq_create_set_cqe_size,
+			       &cq_set->u.request, 0);
+			bf_set(lpfc_mbx_cq_create_set_num_cq,
+			       &cq_set->u.request, numcq);
+			switch (cq->entry_count) {
+			default:
+				lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+						"3118 Bad CQ count. (%d)\n",
+						cq->entry_count);
+				if (cq->entry_count < 256) {
+					status = -EINVAL;
+					goto out;
+				}
+				/* otherwise default to smallest (drop thru) */
+			case 256:
+				bf_set(lpfc_mbx_cq_create_set_cqe_cnt,
+				       &cq_set->u.request, LPFC_CQ_CNT_256);
+				break;
+			case 512:
+				bf_set(lpfc_mbx_cq_create_set_cqe_cnt,
+				       &cq_set->u.request, LPFC_CQ_CNT_512);
+				break;
+			case 1024:
+				bf_set(lpfc_mbx_cq_create_set_cqe_cnt,
+				       &cq_set->u.request, LPFC_CQ_CNT_1024);
+				break;
+			}
+			bf_set(lpfc_mbx_cq_create_set_eq_id0,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 1:
+			bf_set(lpfc_mbx_cq_create_set_eq_id1,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 2:
+			bf_set(lpfc_mbx_cq_create_set_eq_id2,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 3:
+			bf_set(lpfc_mbx_cq_create_set_eq_id3,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 4:
+			bf_set(lpfc_mbx_cq_create_set_eq_id4,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 5:
+			bf_set(lpfc_mbx_cq_create_set_eq_id5,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 6:
+			bf_set(lpfc_mbx_cq_create_set_eq_id6,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 7:
+			bf_set(lpfc_mbx_cq_create_set_eq_id7,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 8:
+			bf_set(lpfc_mbx_cq_create_set_eq_id8,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 9:
+			bf_set(lpfc_mbx_cq_create_set_eq_id9,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 10:
+			bf_set(lpfc_mbx_cq_create_set_eq_id10,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 11:
+			bf_set(lpfc_mbx_cq_create_set_eq_id11,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 12:
+			bf_set(lpfc_mbx_cq_create_set_eq_id12,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 13:
+			bf_set(lpfc_mbx_cq_create_set_eq_id13,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 14:
+			bf_set(lpfc_mbx_cq_create_set_eq_id14,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		case 15:
+			bf_set(lpfc_mbx_cq_create_set_eq_id15,
+			       &cq_set->u.request, eq->queue_id);
+			break;
+		}
+
+		/* link the cq onto the parent eq child list */
+		list_add_tail(&cq->list, &eq->child_list);
+		/* Set up completion queue's type and subtype */
+		cq->type = type;
+		cq->subtype = subtype;
+		cq->assoc_qid = eq->queue_id;
+		cq->host_index = 0;
+		cq->hba_index = 0;
+
+		rc = 0;
+		list_for_each_entry(dmabuf, &cq->page_list, list) {
+			memset(dmabuf->virt, 0, hw_page_size);
+			cnt = page_idx + dmabuf->buffer_tag;
+			cq_set->u.request.page[cnt].addr_lo =
+					putPaddrLow(dmabuf->phys);
+			cq_set->u.request.page[cnt].addr_hi =
+					putPaddrHigh(dmabuf->phys);
+			rc++;
+		}
+		page_idx += rc;
+	}
+
+	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"3119 CQ_CREATE_SET mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+		goto out;
+	}
+	rc = bf_get(lpfc_mbx_cq_create_set_base_id, &cq_set->u.response);
+	if (rc == 0xFFFF) {
+		status = -ENXIO;
+		goto out;
+	}
+
+	for (idx = 0; idx < numcq; idx++) {
+		cq = cqp[idx];
+		cq->queue_id = rc + idx;
+	}
+
+out:
+	lpfc_sli4_mbox_cmd_free(phba, mbox);
+	return status;
+}
+
 /**
  * lpfc_mq_create_fb_init - Send MCC_CREATE without async events registration
  * @phba: HBA structure that indicates port to create a queue on.
@@ -14692,6 +15103,197 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
 	return status;
 }
 
+/**
+ * lpfc_mrq_create - Create MRQ Receive Queues on the HBA
+ * @phba: HBA structure that indicates port to create a queue on.
+ * @hrqp: The queue structure array to use to create the header receive queues.
+ * @drqp: The queue structure array to use to create the data receive queues.
+ * @cqp: The completion queue array to bind these receive queues to.
+ *
+ * This function creates a receive buffer queue pair , as detailed in @hrq and
+ * @drq, on a port, described by @phba by sending a RQ_CREATE mailbox command
+ * to the HBA.
+ *
+ * The @phba struct is used to send mailbox command to HBA. The @drq and @hrq
+ * struct is used to get the entry count that is necessary to determine the
+ * number of pages to use for this queue. The @cq is used to indicate which
+ * completion queue to bind received buffers that are posted to these queues to.
+ * This function will send the RQ_CREATE mailbox command to the HBA to setup the
+ * receive queue pair. This function is asynchronous and will wait for the
+ * mailbox command to finish before continuing.
+ *
+ * On success this function will return a zero. If unable to allocate enough
+ * memory this function will return -ENOMEM. If the queue create mailbox command
+ * fails this function will return -ENXIO.
+ **/
+int
+lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
+		struct lpfc_queue **drqp, struct lpfc_queue **cqp,
+		uint32_t subtype)
+{
+	struct lpfc_queue *hrq, *drq, *cq;
+	struct lpfc_mbx_rq_create_v2 *rq_create;
+	struct lpfc_dmabuf *dmabuf;
+	LPFC_MBOXQ_t *mbox;
+	int rc, length, alloclen, status = 0;
+	int cnt, idx, numrq, page_idx = 0;
+	uint32_t shdr_status, shdr_add_status;
+	union lpfc_sli4_cfg_shdr *shdr;
+	uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz;
+
+	numrq = phba->cfg_nvmet_mrq;
+	/* sanity check on array memory */
+	if (!hrqp || !drqp || !cqp || !numrq)
+		return -ENODEV;
+	if (!phba->sli4_hba.pc_sli4_params.supported)
+		hw_page_size = SLI4_PAGE_SIZE;
+
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+
+	length = sizeof(struct lpfc_mbx_rq_create_v2);
+	length += ((2 * numrq * hrqp[0]->page_count) *
+		   sizeof(struct dma_address));
+
+	alloclen = lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
+				    LPFC_MBOX_OPCODE_FCOE_RQ_CREATE, length,
+				    LPFC_SLI4_MBX_NEMBED);
+	if (alloclen < length) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+				"3099 Allocated DMA memory size (%d) is "
+				"less than the requested DMA memory size "
+				"(%d)\n", alloclen, length);
+		status = -ENOMEM;
+		goto out;
+	}
+
+
+
+	rq_create = mbox->sge_array->addr[0];
+	shdr = (union lpfc_sli4_cfg_shdr *)&rq_create->cfg_shdr;
+
+	bf_set(lpfc_mbox_hdr_version, &shdr->request, LPFC_Q_CREATE_VERSION_2);
+	cnt = 0;
+
+	for (idx = 0; idx < numrq; idx++) {
+		hrq = hrqp[idx];
+		drq = drqp[idx];
+		cq  = cqp[idx];
+
+		if (hrq->entry_count != drq->entry_count) {
+			status = -EINVAL;
+			goto out;
+		}
+
+		/* sanity check on queue memory */
+		if (!hrq || !drq || !cq) {
+			status = -ENODEV;
+			goto out;
+		}
+
+		if (idx == 0) {
+			bf_set(lpfc_mbx_rq_create_num_pages,
+			       &rq_create->u.request,
+			       hrq->page_count);
+			bf_set(lpfc_mbx_rq_create_rq_cnt,
+			       &rq_create->u.request, (numrq * 2));
+			bf_set(lpfc_mbx_rq_create_dnb, &rq_create->u.request,
+			       1);
+			bf_set(lpfc_rq_context_base_cq,
+			       &rq_create->u.request.context,
+			       cq->queue_id);
+			bf_set(lpfc_rq_context_data_size,
+			       &rq_create->u.request.context,
+			       LPFC_DATA_BUF_SIZE);
+			bf_set(lpfc_rq_context_hdr_size,
+			       &rq_create->u.request.context,
+			       LPFC_HDR_BUF_SIZE);
+			bf_set(lpfc_rq_context_rqe_count_1,
+			       &rq_create->u.request.context,
+			       hrq->entry_count);
+			bf_set(lpfc_rq_context_rqe_size,
+			       &rq_create->u.request.context,
+			       LPFC_RQE_SIZE_8);
+			bf_set(lpfc_rq_context_page_size,
+			       &rq_create->u.request.context,
+			       (PAGE_SIZE/SLI4_PAGE_SIZE));
+		}
+		rc = 0;
+		list_for_each_entry(dmabuf, &hrq->page_list, list) {
+			memset(dmabuf->virt, 0, hw_page_size);
+			cnt = page_idx + dmabuf->buffer_tag;
+			rq_create->u.request.page[cnt].addr_lo =
+					putPaddrLow(dmabuf->phys);
+			rq_create->u.request.page[cnt].addr_hi =
+					putPaddrHigh(dmabuf->phys);
+			rc++;
+		}
+		page_idx += rc;
+
+		rc = 0;
+		list_for_each_entry(dmabuf, &drq->page_list, list) {
+			memset(dmabuf->virt, 0, hw_page_size);
+			cnt = page_idx + dmabuf->buffer_tag;
+			rq_create->u.request.page[cnt].addr_lo =
+					putPaddrLow(dmabuf->phys);
+			rq_create->u.request.page[cnt].addr_hi =
+					putPaddrHigh(dmabuf->phys);
+			rc++;
+		}
+		page_idx += rc;
+
+		hrq->db_format = LPFC_DB_RING_FORMAT;
+		hrq->db_regaddr = phba->sli4_hba.RQDBregaddr;
+		hrq->type = LPFC_HRQ;
+		hrq->assoc_qid = cq->queue_id;
+		hrq->subtype = subtype;
+		hrq->host_index = 0;
+		hrq->hba_index = 0;
+
+		drq->db_format = LPFC_DB_RING_FORMAT;
+		drq->db_regaddr = phba->sli4_hba.RQDBregaddr;
+		drq->type = LPFC_DRQ;
+		drq->assoc_qid = cq->queue_id;
+		drq->subtype = subtype;
+		drq->host_index = 0;
+		drq->hba_index = 0;
+
+		list_add_tail(&hrq->list, &cq->child_list);
+		list_add_tail(&drq->list, &cq->child_list);
+	}
+
+	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
+	/* The IOCTL status is embedded in the mailbox subheader. */
+	shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+	shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+	if (shdr_status || shdr_add_status || rc) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"3120 RQ_CREATE mailbox failed with "
+				"status x%x add_status x%x, mbx status x%x\n",
+				shdr_status, shdr_add_status, rc);
+		status = -ENXIO;
+		goto out;
+	}
+	rc = bf_get(lpfc_mbx_rq_create_q_id, &rq_create->u.response);
+	if (rc == 0xFFFF) {
+		status = -ENXIO;
+		goto out;
+	}
+
+	/* Initialize all RQs with associated queue id */
+	for (idx = 0; idx < numrq; idx++) {
+		hrq = hrqp[idx];
+		hrq->queue_id = rc + (2 * idx);
+		drq = drqp[idx];
+		drq->queue_id = rc + (2 * idx) + 1;
+	}
+
+out:
+	lpfc_sli4_mbox_cmd_free(phba, mbox);
+	return status;
+}
+
 /**
  * lpfc_eq_destroy - Destroy an event Queue on the HBA
  * @eq: The queue structure associated with the queue to destroy.
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 99546eaef087..3bfdff1a4c53 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -550,6 +550,9 @@ struct lpfc_sli4_hba {
 	struct lpfc_queue **hba_eq;  /* Event queues for HBA */
 	struct lpfc_queue **fcp_cq;  /* Fast-path FCP compl queue */
 	struct lpfc_queue **nvme_cq; /* Fast-path NVME compl queue */
+	struct lpfc_queue **nvmet_cqset; /* Fast-path NVMET CQ Set queues */
+	struct lpfc_queue **nvmet_mrq_hdr; /* Fast-path NVMET hdr MRQs */
+	struct lpfc_queue **nvmet_mrq_data; /* Fast-path NVMET data MRQs */
 	struct lpfc_queue **fcp_wq;  /* Fast-path FCP work queue */
 	struct lpfc_queue **nvme_wq; /* Fast-path NVME work queue */
 	uint16_t *fcp_cq_map;
@@ -655,6 +658,8 @@ struct lpfc_sli4_hba {
 	uint16_t num_online_cpu;
 	uint16_t num_present_cpu;
 	uint16_t curr_disp_cpu;
+
+	uint16_t nvmet_mrq_post_idx;
 };
 
 enum lpfc_sge_type {
@@ -742,12 +747,18 @@ int lpfc_eq_create(struct lpfc_hba *, struct lpfc_queue *, uint32_t);
 int lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq);
 int lpfc_cq_create(struct lpfc_hba *, struct lpfc_queue *,
 			struct lpfc_queue *, uint32_t, uint32_t);
+int lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
+			struct lpfc_queue **eqp, uint32_t type,
+			uint32_t subtype);
 int32_t lpfc_mq_create(struct lpfc_hba *, struct lpfc_queue *,
 		       struct lpfc_queue *, uint32_t);
 int lpfc_wq_create(struct lpfc_hba *, struct lpfc_queue *,
 			struct lpfc_queue *, uint32_t);
 int lpfc_rq_create(struct lpfc_hba *, struct lpfc_queue *,
 			struct lpfc_queue *, struct lpfc_queue *, uint32_t);
+int lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
+			struct lpfc_queue **drqp, struct lpfc_queue **cqp,
+			uint32_t subtype);
 void lpfc_rq_adjust_repost(struct lpfc_hba *, struct lpfc_queue *, int);
 int lpfc_eq_destroy(struct lpfc_hba *, struct lpfc_queue *);
 int lpfc_cq_destroy(struct lpfc_hba *, struct lpfc_queue *);
-- 
GitLab


From 8c258641e01cfcc5620c4fb191300bea224bcc99 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:36 -0800
Subject: [PATCH 0447/1084] scsi: lpfc: NVME Target: Merge into FC discovery

NVME Target: Merge into FC discovery

Adds NVME PRLI handling and Nameserver registrations for NVME

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_ct.c        |  8 +++-
 drivers/scsi/lpfc/lpfc_els.c       | 61 ++++++++++++++++++++++--
 drivers/scsi/lpfc/lpfc_hbadisc.c   | 27 +++++++++--
 drivers/scsi/lpfc/lpfc_hw.h        | 10 +++-
 drivers/scsi/lpfc/lpfc_nportdisc.c | 75 ++++++++++++++++++++++++++++--
 5 files changed, 167 insertions(+), 14 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 2c051369857a..d576da4d3afb 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -1433,7 +1433,13 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 		if (((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
 		     (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) &&
 		    (context == FC_TYPE_NVME)) {
-			lpfc_nvme_update_localport(vport);
+			if ((vport == phba->pport) && phba->nvmet_support) {
+				CtReq->un.rff.fbits = (FC4_FEATURE_TARGET |
+					FC4_FEATURE_NVME_DISC);
+				/* todo: update targetport attributes */
+			} else {
+				lpfc_nvme_update_localport(vport);
+			}
 			CtReq->un.rff.type_code = context;
 
 		} else if (((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 23546b3c950c..5ee3ae88a080 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -2001,11 +2001,21 @@ lpfc_issue_els_plogi(struct lpfc_vport *vport, uint32_t did, uint8_t retry)
 		sp->cmn.fcphHigh = FC_PH3;
 
 	sp->cmn.valid_vendor_ver_level = 0;
+	memset(sp->un.vendorVersion, 0, sizeof(sp->un.vendorVersion));
 
 	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
 		"Issue PLOGI:     did:x%x",
 		did, 0, 0);
 
+	/* If our firmware supports this feature, convey that
+	 * information to the target using the vendor specific field.
+	 */
+	if (phba->sli.sli_flag & LPFC_SLI_SUPPRESS_RSP) {
+		sp->cmn.valid_vendor_ver_level = 1;
+		sp->un.vv.vid = cpu_to_be32(LPFC_VV_EMLX_ID);
+		sp->un.vv.flags = cpu_to_be32(LPFC_VV_SUPPRESS_RSP);
+	}
+
 	phba->fc_stat.elsXmitPLOGI++;
 	elsiocb->iocb_cmpl = lpfc_cmpl_els_plogi;
 	ret = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, elsiocb, 0);
@@ -2207,7 +2217,13 @@ lpfc_issue_els_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		    !phba->nvmet_support)
 			bf_set(prli_fba, npr_nvme, 1);
 
-		bf_set(prli_init, npr_nvme, 1);
+		if (phba->nvmet_support) {
+			bf_set(prli_tgt, npr_nvme, 1);
+			bf_set(prli_disc, npr_nvme, 1);
+
+		} else {
+			bf_set(prli_init, npr_nvme, 1);
+		}
 		npr_nvme->word1 = cpu_to_be32(npr_nvme->word1);
 		npr_nvme->word4 = cpu_to_be32(npr_nvme->word4);
 		elsiocb->iocb_flag |= LPFC_PRLI_NVME_REQ;
@@ -2619,8 +2635,11 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		phba->pport->fc_myDID = 0;
 
 		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
-		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
-			lpfc_nvme_update_localport(phba->pport);
+		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
+			if (!phba->nvmet_support)
+				lpfc_nvme_update_localport(phba->pport);
+			/* todo: tgt: update targetport attributes */
+		}
 
 		mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 		if (mbox) {
@@ -4074,10 +4093,25 @@ lpfc_els_rsp_acc(struct lpfc_vport *vport, uint32_t flag,
 			       sizeof(struct lpfc_name));
 			memcpy(&sp->nodeName, &vport->fc_sparam.nodeName,
 			       sizeof(struct lpfc_name));
-		} else
+		} else {
 			memcpy(pcmd, &vport->fc_sparam,
 			       sizeof(struct serv_parm));
 
+			sp->cmn.valid_vendor_ver_level = 0;
+			memset(sp->un.vendorVersion, 0,
+			       sizeof(sp->un.vendorVersion));
+
+			/* If our firmware supports this feature, convey that
+			 * info to the target using the vendor specific field.
+			 */
+			if (phba->sli.sli_flag & LPFC_SLI_SUPPRESS_RSP) {
+				sp->cmn.valid_vendor_ver_level = 1;
+				sp->un.vv.vid = cpu_to_be32(LPFC_VV_EMLX_ID);
+				sp->un.vv.flags =
+					cpu_to_be32(LPFC_VV_SUPPRESS_RSP);
+			}
+		}
+
 		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
 			"Issue ACC FLOGI/PLOGI: did:x%x flg:x%x",
 			ndlp->nlp_DID, ndlp->nlp_flag, 0);
@@ -4397,7 +4431,22 @@ lpfc_els_rsp_prli_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb,
 		bf_set(prli_type_code, npr_nvme, PRLI_NVME_TYPE);
 		bf_set(prli_estabImagePair, npr_nvme, 0);  /* Should be 0 */
 		bf_set(prli_acc_rsp_code, npr_nvme, PRLI_REQ_EXECUTED);
-		bf_set(prli_init, npr_nvme, 1);
+		if (phba->nvmet_support) {
+			bf_set(prli_tgt, npr_nvme, 1);
+			bf_set(prli_disc, npr_nvme, 1);
+			if (phba->cfg_nvme_enable_fb) {
+				bf_set(prli_fba, npr_nvme, 1);
+
+				/* TBD.  Target mode needs to post buffers
+				 * that support the configured first burst
+				 * byte size.
+				 */
+				bf_set(prli_fb_sz, npr_nvme,
+				       phba->cfg_nvmet_fb_size);
+			}
+		} else {
+			bf_set(prli_init, npr_nvme, 1);
+		}
 
 		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
 				 "6015 NVME issue PRLI ACC word1 x%08x "
@@ -5815,6 +5864,8 @@ lpfc_rscn_recovery_check(struct lpfc_vport *vport)
 		    (ndlp->nlp_state == NLP_STE_UNUSED_NODE) ||
 		    !lpfc_rscn_payload_check(vport, ndlp->nlp_DID))
 			continue;
+		if (vport->phba->nvmet_support)
+			continue;
 		lpfc_disc_state_machine(vport, ndlp, NULL,
 					NLP_EVT_DEVICE_RECOVERY);
 		lpfc_cancel_retry_delay_tmo(vport, ndlp);
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 8936f5d91c87..6bbb988dd8da 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -910,8 +910,11 @@ lpfc_linkdown(struct lpfc_hba *phba)
 			vports[i]->fc_myDID = 0;
 
 			if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
-			    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
-				lpfc_nvme_update_localport(vports[i]);
+			    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
+				if (!phba->nvmet_support)
+					lpfc_nvme_update_localport(vports[i]);
+				/* todo: tgt: update targetport attributes */
+			}
 		}
 	}
 	lpfc_destroy_vport_work_array(phba, vports);
@@ -3583,8 +3586,11 @@ lpfc_mbx_cmpl_reg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 		vport->fc_myDID = 0;
 
 		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
-		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME))
-			lpfc_nvme_update_localport(vport);
+		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
+			if (!phba->nvmet_support)
+				lpfc_nvme_update_localport(vport);
+			/* todo: update targetport attributes */
+		}
 		goto out;
 	}
 
@@ -4175,6 +4181,11 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 				 */
 				vport->phba->nport_event_cnt++;
 				lpfc_nvme_register_port(vport, ndlp);
+			} else {
+				/* Just take an NDLP ref count since the
+				 * target does not register rports.
+				 */
+				lpfc_nlp_get(ndlp);
 			}
 		}
 	}
@@ -5096,6 +5107,8 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did)
 			return NULL;
 		lpfc_nlp_init(vport, ndlp, did);
 		lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+		if (vport->phba->nvmet_support)
+			return ndlp;
 		spin_lock_irq(shost->host_lock);
 		ndlp->nlp_flag |= NLP_NPR_2B_DISC;
 		spin_unlock_irq(shost->host_lock);
@@ -5104,6 +5117,8 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did)
 		ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_NPR_NODE);
 		if (!ndlp)
 			return NULL;
+		if (vport->phba->nvmet_support)
+			return ndlp;
 		spin_lock_irq(shost->host_lock);
 		ndlp->nlp_flag |= NLP_NPR_2B_DISC;
 		spin_unlock_irq(shost->host_lock);
@@ -5123,6 +5138,8 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did)
 			 * delay timeout is not needed.
 			 */
 			lpfc_cancel_retry_delay_tmo(vport, ndlp);
+			if (vport->phba->nvmet_support)
+				return ndlp;
 			spin_lock_irq(shost->host_lock);
 			ndlp->nlp_flag |= NLP_NPR_2B_DISC;
 			spin_unlock_irq(shost->host_lock);
@@ -5138,6 +5155,8 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did)
 		    ndlp->nlp_flag & NLP_RCV_PLOGI)
 			return NULL;
 		lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+		if (vport->phba->nvmet_support)
+			return ndlp;
 		spin_lock_irq(shost->host_lock);
 		ndlp->nlp_flag |= NLP_NPR_2B_DISC;
 		spin_unlock_irq(shost->host_lock);
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index 883e6d2a7bc7..c2221377e246 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -515,7 +515,15 @@ struct serv_parm {	/* Structure is in Big Endian format */
 	struct class_parms cls2;
 	struct class_parms cls3;
 	struct class_parms cls4;
-	uint8_t vendorVersion[16];
+	union {
+		uint8_t vendorVersion[16];
+		struct {
+			uint32_t vid;
+#define LPFC_VV_EMLX_ID	0x454d4c58	/* EMLX */
+			uint32_t flags;
+#define LPFC_VV_SUPPRESS_RSP	1
+		} vv;
+	} un;
 };
 
 /*
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 470f9586192f..0716818f269f 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -288,6 +288,7 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 	uint32_t ed_tov;
 	LPFC_MBOXQ_t *mbox;
 	struct ls_rjt stat;
+	uint32_t vid, flag;
 	int rc;
 
 	memset(&stat, 0, sizeof (struct ls_rjt));
@@ -423,6 +424,15 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		lpfc_can_disctmo(vport);
 	}
 
+	ndlp->nlp_flag &= ~NLP_SUPPRESS_RSP;
+	if ((phba->sli.sli_flag & LPFC_SLI_SUPPRESS_RSP) &&
+	    sp->cmn.valid_vendor_ver_level) {
+		vid = be32_to_cpu(sp->un.vv.vid);
+		flag = be32_to_cpu(sp->un.vv.flags);
+		if ((vid == LPFC_VV_EMLX_ID) && (flag & LPFC_VV_SUPPRESS_RSP))
+			ndlp->nlp_flag |= NLP_SUPPRESS_RSP;
+	}
+
 	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 	if (!mbox)
 		goto out;
@@ -744,6 +754,14 @@ lpfc_rcv_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		}
 		if (npr->Retry)
 			ndlp->nlp_fcp_info |= NLP_FCP_2_DEVICE;
+
+		/* If this driver is in nvme target mode, set the ndlp's fc4
+		 * type to NVME provided the PRLI response claims NVME FC4
+		 * type.  Target mode does not issue gft_id so doesn't get
+		 * the fc4 type set until now.
+		 */
+		if ((phba->nvmet_support) && (npr->prliType == PRLI_NVME_TYPE))
+			ndlp->nlp_fc4_type |= NLP_FC4_NVME;
 	}
 	if (rport) {
 		/* We need to update the rport role values */
@@ -1041,6 +1059,7 @@ lpfc_cmpl_plogi_plogi_issue(struct lpfc_vport *vport,
 	struct lpfc_iocbq  *cmdiocb, *rspiocb;
 	struct lpfc_dmabuf *pcmd, *prsp, *mp;
 	uint32_t *lp;
+	uint32_t vid, flag;
 	IOCB_t *irsp;
 	struct serv_parm *sp;
 	uint32_t ed_tov;
@@ -1109,6 +1128,16 @@ lpfc_cmpl_plogi_plogi_issue(struct lpfc_vport *vport,
 			ed_tov = (phba->fc_edtov + 999999) / 1000000;
 		}
 
+		ndlp->nlp_flag &= ~NLP_SUPPRESS_RSP;
+		if ((phba->sli.sli_flag & LPFC_SLI_SUPPRESS_RSP) &&
+		    sp->cmn.valid_vendor_ver_level) {
+			vid = be32_to_cpu(sp->un.vv.vid);
+			flag = be32_to_cpu(sp->un.vv.flags);
+			if ((vid == LPFC_VV_EMLX_ID) &&
+			    (flag & LPFC_VV_SUPPRESS_RSP))
+				ndlp->nlp_flag |= NLP_SUPPRESS_RSP;
+		}
+
 		/*
 		 * Use the larger EDTOV
 		 * RATOV = 2 * EDTOV for pt-to-pt
@@ -1504,9 +1533,37 @@ lpfc_rcv_prli_reglogin_issue(struct lpfc_vport *vport,
 			     uint32_t evt)
 {
 	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+	struct ls_rjt     stat;
 
-	/* Initiator mode. */
-	lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp);
+	if (vport->phba->nvmet_support) {
+		/* NVME Target mode.  Handle and respond to the PRLI and
+		 * transition to UNMAPPED provided the RPI has completed
+		 * registration.
+		 */
+		if (ndlp->nlp_flag & NLP_RPI_REGISTERED) {
+			lpfc_rcv_prli(vport, ndlp, cmdiocb);
+			lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp);
+			lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+		} else {
+			/* RPI registration has not completed. Reject the PRLI
+			 * to prevent an illegal state transition when the
+			 * rpi registration does complete.
+			 */
+			lpfc_printf_vlog(vport, KERN_WARNING, LOG_NVME_DISC,
+					 "6115 NVMET ndlp rpi %d state "
+					 "unknown, state x%x flags x%08x\n",
+					 ndlp->nlp_rpi, ndlp->nlp_state,
+					 ndlp->nlp_flag);
+			memset(&stat, 0, sizeof(struct ls_rjt));
+			stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+			stat.un.b.lsRjtRsnCodeExp = LSEXP_CMD_IN_PROGRESS;
+			lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb,
+					    ndlp, NULL);
+		}
+	} else {
+		/* Initiator mode. */
+		lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp);
+	}
 
 	return ndlp->nlp_state;
 }
@@ -1668,7 +1725,12 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
 		lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
 		lpfc_issue_els_prli(vport, ndlp, 0);
 	} else {
-		/* Only Fabric ports should transition */
+		if ((vport->fc_flag & FC_PT2PT) && phba->nvmet_support)
+			phba->targetport->port_id = vport->fc_myDID;
+
+		/* Only Fabric ports should transition. NVME target
+		 * must complete PRLI.
+		 */
 		if (ndlp->nlp_type & NLP_FABRIC) {
 			ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
 			lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
@@ -1714,6 +1776,13 @@ lpfc_device_recov_reglogin_issue(struct lpfc_vport *vport,
 	lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
 	spin_lock_irq(shost->host_lock);
 
+	/* If we are a target we won't immediately transition into PRLI,
+	 * so if REG_LOGIN already completed we don't need to ignore it.
+	 */
+	if (!(ndlp->nlp_flag & NLP_RPI_REGISTERED) ||
+	    !vport->phba->nvmet_support)
+		ndlp->nlp_flag |= NLP_IGNR_REG_CMPL;
+
 	ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC);
 	spin_unlock_irq(shost->host_lock);
 	lpfc_disc_set_adisc(vport, ndlp);
-- 
GitLab


From d613b6a7aa922690e341c9ff0faba66ae299b5ad Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:37 -0800
Subject: [PATCH 0448/1084] scsi: lpfc: NVME Target: bind to nvmet_fc api

NVME Target: Tie in to NVME Fabrics nvmet_fc LLDD target api

Adds the routines to:
- register and deregister the FC port as a nvmet-fc targetport
- binding of nvme queues to adapter WQs
- receipt and passing of NVME LS's to transport, sending transport response
- receipt of NVME FCP CMD IUs, processing FCP target io data transmission
  commands; transmission of FCP io response
- Abort operations for tgt io exchanges

[mkp: fixed space at end of file warning]

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/Makefile       |    2 +-
 drivers/scsi/lpfc/lpfc_crtn.h    |   10 +
 drivers/scsi/lpfc/lpfc_ct.c      |    2 +-
 drivers/scsi/lpfc/lpfc_els.c     |    5 +-
 drivers/scsi/lpfc/lpfc_hbadisc.c |   10 +-
 drivers/scsi/lpfc/lpfc_init.c    |   14 +-
 drivers/scsi/lpfc/lpfc_mem.c     |   45 +
 drivers/scsi/lpfc/lpfc_nvmet.c   | 1671 ++++++++++++++++++++++++++++++
 drivers/scsi/lpfc/lpfc_sli.c     |    8 +-
 9 files changed, 1752 insertions(+), 15 deletions(-)
 create mode 100644 drivers/scsi/lpfc/lpfc_nvmet.c

diff --git a/drivers/scsi/lpfc/Makefile b/drivers/scsi/lpfc/Makefile
index cd7e1fcf52c6..30a6a35abafd 100644
--- a/drivers/scsi/lpfc/Makefile
+++ b/drivers/scsi/lpfc/Makefile
@@ -31,4 +31,4 @@ obj-$(CONFIG_SCSI_LPFC) := lpfc.o
 lpfc-objs := lpfc_mem.o lpfc_sli.o lpfc_ct.o lpfc_els.o \
 	lpfc_hbadisc.o	lpfc_init.o lpfc_mbox.o lpfc_nportdisc.o   \
 	lpfc_scsi.o lpfc_attr.o lpfc_vport.o lpfc_debugfs.o lpfc_bsg.o \
-	lpfc_nvme.o
+	lpfc_nvme.o lpfc_nvmet.o
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 2e7a7f7f43b2..f56015b78525 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -363,6 +363,8 @@ void *lpfc_nvmet_buf_alloc(struct lpfc_hba *phba, int flags,
 void lpfc_nvmet_buf_free(struct lpfc_hba *phba, void *virtp, dma_addr_t dma);
 
 void lpfc_in_buf_free(struct lpfc_hba *, struct lpfc_dmabuf *);
+void lpfc_rq_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp);
+
 /* Function prototypes. */
 const char* lpfc_info(struct Scsi_Host *);
 int lpfc_scan_finished(struct Scsi_Host *, unsigned long);
@@ -536,6 +538,14 @@ int lpfc_nvme_register_port(struct lpfc_vport *vport,
 int lpfc_nvme_create_localport(struct lpfc_vport *vport);
 void lpfc_nvme_destroy_localport(struct lpfc_vport *vport);
 void lpfc_nvme_update_localport(struct lpfc_vport *vport);
+int lpfc_nvmet_create_targetport(struct lpfc_hba *phba);
+int lpfc_nvmet_update_targetport(struct lpfc_hba *phba);
+void lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba);
+void lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba,
+			struct lpfc_sli_ring *pring, struct lpfc_iocbq *piocb);
+void lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba,
+			struct lpfc_sli_ring *pring,
+			struct rqb_dmabuf *nvmebuf, uint64_t isr_ts);
 void lpfc_nvme_mod_param_dep(struct lpfc_hba *phba);
 void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
 				struct lpfc_iocbq *cmdiocb,
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index d576da4d3afb..53bdfe421251 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -1436,7 +1436,7 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 			if ((vport == phba->pport) && phba->nvmet_support) {
 				CtReq->un.rff.fbits = (FC4_FEATURE_TARGET |
 					FC4_FEATURE_NVME_DISC);
-				/* todo: update targetport attributes */
+				lpfc_nvmet_update_targetport(phba);
 			} else {
 				lpfc_nvme_update_localport(vport);
 			}
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 5ee3ae88a080..1c0ac509e6db 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -2636,9 +2636,10 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 
 		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
 		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
-			if (!phba->nvmet_support)
+			if (phba->nvmet_support)
+				lpfc_nvmet_update_targetport(phba);
+			else
 				lpfc_nvme_update_localport(phba->pport);
-			/* todo: tgt: update targetport attributes */
 		}
 
 		mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 6bbb988dd8da..542797bd3134 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -911,9 +911,10 @@ lpfc_linkdown(struct lpfc_hba *phba)
 
 			if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
 			    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
-				if (!phba->nvmet_support)
+				if (phba->nvmet_support)
+					lpfc_nvmet_update_targetport(phba);
+				else
 					lpfc_nvme_update_localport(vports[i]);
-				/* todo: tgt: update targetport attributes */
 			}
 		}
 	}
@@ -3587,9 +3588,10 @@ lpfc_mbx_cmpl_reg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 
 		if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
 		    (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
-			if (!phba->nvmet_support)
+			if (phba->nvmet_support)
+				lpfc_nvmet_update_targetport(phba);
+			else
 				lpfc_nvme_update_localport(vport);
-			/* todo: update targetport attributes */
 		}
 		goto out;
 	}
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index e6cbc0bac029..5eafd0db758d 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -10397,7 +10397,13 @@ lpfc_pci_remove_one_s3(struct pci_dev *pdev)
 	/* Remove FC host and then SCSI host with the physical port */
 	fc_remove_host(shost);
 	scsi_remove_host(shost);
+
+	/* Perform ndlp cleanup on the physical port.  The nvme and nvmet
+	 * localports are destroyed after to cleanup all transport memory.
+	 */
 	lpfc_cleanup(vport);
+	lpfc_nvmet_destroy_targetport(phba);
+	lpfc_nvme_destroy_localport(vport);
 
 	/*
 	 * Bring down the SLI Layer. This step disable all interrupts,
@@ -11203,13 +11209,13 @@ lpfc_pci_remove_one_s4(struct pci_dev *pdev)
 	/* Remove FC host and then SCSI host with the physical port */
 	fc_remove_host(shost);
 	scsi_remove_host(shost);
-	/* todo: tgt: remove targetport */
 
-	/* Perform ndlp cleanup on the physical port.  The nvme localport
-	 * is destroyed after to ensure all rports are io-disabled.
+	/* Perform ndlp cleanup on the physical port.  The nvme and nvmet
+	 * localports are destroyed after to cleanup all transport memory.
 	 */
-	lpfc_nvme_destroy_localport(vport);
 	lpfc_cleanup(vport);
+	lpfc_nvmet_destroy_targetport(phba);
+	lpfc_nvme_destroy_localport(vport);
 
 	/*
 	 * Bring down the SLI Layer. This step disables all interrupts,
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 32db255f5216..2437ec5f4863 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -763,3 +763,48 @@ lpfc_in_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp)
 	}
 	return;
 }
+
+/**
+ * lpfc_rq_buf_free - Free a RQ DMA buffer
+ * @phba: HBA buffer is associated with
+ * @mp: Buffer to free
+ *
+ * Description: Frees the given DMA buffer in the appropriate way given by
+ * reposting it to its associated RQ so it can be reused.
+ *
+ * Notes: Takes phba->hbalock.  Can be called with or without other locks held.
+ *
+ * Returns: None
+ **/
+void
+lpfc_rq_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp)
+{
+	struct lpfc_rqb *rqbp;
+	struct lpfc_rqe hrqe;
+	struct lpfc_rqe drqe;
+	struct rqb_dmabuf *rqb_entry;
+	unsigned long flags;
+	int rc;
+
+	if (!mp)
+		return;
+
+	rqb_entry = container_of(mp, struct rqb_dmabuf, hbuf);
+	rqbp = rqb_entry->hrq->rqbp;
+
+	spin_lock_irqsave(&phba->hbalock, flags);
+	list_del(&rqb_entry->hbuf.list);
+	hrqe.address_lo = putPaddrLow(rqb_entry->hbuf.phys);
+	hrqe.address_hi = putPaddrHigh(rqb_entry->hbuf.phys);
+	drqe.address_lo = putPaddrLow(rqb_entry->dbuf.phys);
+	drqe.address_hi = putPaddrHigh(rqb_entry->dbuf.phys);
+	rc = lpfc_sli4_rq_put(rqb_entry->hrq, rqb_entry->drq, &hrqe, &drqe);
+	if (rc < 0) {
+		(rqbp->rqb_free_buffer)(phba, rqb_entry);
+	} else {
+		list_add_tail(&rqb_entry->hbuf.list, &rqbp->rqb_buffer_list);
+		rqbp->buffer_count++;
+	}
+
+	spin_unlock_irqrestore(&phba->hbalock, flags);
+}
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
new file mode 100644
index 000000000000..d4efd86bb156
--- /dev/null
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -0,0 +1,1671 @@
+/*******************************************************************
+ * This file is part of the Emulex Linux Device Driver for         *
+ * Fibre Channsel Host Bus Adapters.                                *
+ * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
+ * EMULEX and SLI are trademarks of Emulex.                        *
+ * www.emulex.com                                                  *
+ * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
+ *                                                                 *
+ * This program is free software; you can redistribute it and/or   *
+ * modify it under the terms of version 2 of the GNU General       *
+ * Public License as published by the Free Software Foundation.    *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
+ * more details, a copy of which can be found in the file COPYING  *
+ * included with this package.                                     *
+ ********************************************************************/
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <asm/unaligned.h>
+#include <linux/crc-t10dif.h>
+#include <net/checksum.h>
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_eh.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/scsi_transport_fc.h>
+#include <scsi/fc/fc_fs.h>
+
+#include <../drivers/nvme/host/nvme.h>
+#include <linux/nvme-fc-driver.h>
+
+#include "lpfc_version.h"
+#include "lpfc_hw4.h"
+#include "lpfc_hw.h"
+#include "lpfc_sli.h"
+#include "lpfc_sli4.h"
+#include "lpfc_nl.h"
+#include "lpfc_disc.h"
+#include "lpfc.h"
+#include "lpfc_scsi.h"
+#include "lpfc_nvme.h"
+#include "lpfc_nvmet.h"
+#include "lpfc_logmsg.h"
+#include "lpfc_crtn.h"
+#include "lpfc_vport.h"
+
+static struct lpfc_iocbq *lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *,
+						 struct lpfc_nvmet_rcv_ctx *,
+						 dma_addr_t rspbuf,
+						 uint16_t rspsize);
+static struct lpfc_iocbq *lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *,
+						  struct lpfc_nvmet_rcv_ctx *);
+static int lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *,
+					  struct lpfc_nvmet_rcv_ctx *,
+					  uint32_t, uint16_t);
+static int lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *,
+					    struct lpfc_nvmet_rcv_ctx *,
+					    uint32_t, uint16_t);
+static int lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *,
+					   struct lpfc_nvmet_rcv_ctx *,
+					   uint32_t, uint16_t);
+
+/**
+ * lpfc_nvmet_xmt_ls_rsp_cmp - Completion handler for LS Response
+ * @phba: Pointer to HBA context object.
+ * @cmdwqe: Pointer to driver command WQE object.
+ * @wcqe: Pointer to driver response CQE object.
+ *
+ * The function is called from SLI ring event handler with no
+ * lock held. This function is the completion handler for NVME LS commands
+ * The function frees memory resources used for the NVME commands.
+ **/
+static void
+lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
+			  struct lpfc_wcqe_complete *wcqe)
+{
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct nvmefc_tgt_ls_req *rsp;
+	struct lpfc_nvmet_rcv_ctx *ctxp;
+	uint32_t status, result;
+
+	status = bf_get(lpfc_wcqe_c_status, wcqe);
+	result = wcqe->parameter;
+	if (!phba->targetport)
+		goto out;
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+
+	if (status)
+		atomic_inc(&tgtp->xmt_ls_rsp_error);
+	else
+		atomic_inc(&tgtp->xmt_ls_rsp_cmpl);
+
+out:
+	ctxp = cmdwqe->context2;
+	rsp = &ctxp->ctx.ls_req;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
+			"6038 %s: Entrypoint: ctx %p status %x/%x\n", __func__,
+			ctxp, status, result);
+
+	lpfc_nlp_put(cmdwqe->context1);
+	cmdwqe->context2 = NULL;
+	cmdwqe->context3 = NULL;
+	lpfc_sli_release_iocbq(phba, cmdwqe);
+	rsp->done(rsp);
+	kfree(ctxp);
+}
+
+/**
+ * lpfc_nvmet_rq_post - Repost a NVMET RQ DMA buffer and clean up context
+ * @phba: HBA buffer is associated with
+ * @ctxp: context to clean up
+ * @mp: Buffer to free
+ *
+ * Description: Frees the given DMA buffer in the appropriate way given by
+ * reposting it to its associated RQ so it can be reused.
+ *
+ * Notes: Takes phba->hbalock.  Can be called with or without other locks held.
+ *
+ * Returns: None
+ **/
+void
+lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp,
+		   struct lpfc_dmabuf *mp)
+{
+	if (ctxp) {
+		if (ctxp->txrdy) {
+			pci_pool_free(phba->txrdy_payload_pool, ctxp->txrdy,
+				      ctxp->txrdy_phys);
+			ctxp->txrdy = NULL;
+			ctxp->txrdy_phys = 0;
+		}
+		ctxp->state = LPFC_NVMET_STE_FREE;
+	}
+	lpfc_rq_buf_free(phba, mp);
+}
+
+/**
+ * lpfc_nvmet_xmt_fcp_op_cmp - Completion handler for FCP Response
+ * @phba: Pointer to HBA context object.
+ * @cmdwqe: Pointer to driver command WQE object.
+ * @wcqe: Pointer to driver response CQE object.
+ *
+ * The function is called from SLI ring event handler with no
+ * lock held. This function is the completion handler for NVME FCP commands
+ * The function frees memory resources used for the NVME commands.
+ **/
+static void
+lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
+			  struct lpfc_wcqe_complete *wcqe)
+{
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct nvmefc_tgt_fcp_req *rsp;
+	struct lpfc_nvmet_rcv_ctx *ctxp;
+	uint32_t status, result, op, start_clean;
+
+	ctxp = cmdwqe->context2;
+	rsp = &ctxp->ctx.fcp_req;
+	op = rsp->op;
+	ctxp->flag &= ~LPFC_NVMET_IO_INP;
+
+	status = bf_get(lpfc_wcqe_c_status, wcqe);
+	result = wcqe->parameter;
+
+	if (!phba->targetport)
+		goto out;
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	if (status) {
+		rsp->fcp_error = NVME_SC_DATA_XFER_ERROR;
+		rsp->transferred_length = 0;
+		atomic_inc(&tgtp->xmt_fcp_rsp_error);
+	} else {
+		rsp->fcp_error = NVME_SC_SUCCESS;
+		if (op == NVMET_FCOP_RSP)
+			rsp->transferred_length = rsp->rsplen;
+		else
+			rsp->transferred_length = rsp->transfer_length;
+		atomic_inc(&tgtp->xmt_fcp_rsp_cmpl);
+	}
+
+out:
+	if ((op == NVMET_FCOP_READDATA_RSP) ||
+	    (op == NVMET_FCOP_RSP)) {
+		/* Sanity check */
+		ctxp->state = LPFC_NVMET_STE_DONE;
+		ctxp->entry_cnt++;
+		rsp->done(rsp);
+		/* Let Abort cmpl repost the context */
+		if (!(ctxp->flag & LPFC_NVMET_ABORT_OP))
+			lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+	} else {
+		ctxp->entry_cnt++;
+		start_clean = offsetof(struct lpfc_iocbq, wqe);
+		memset(((char *)cmdwqe) + start_clean, 0,
+		       (sizeof(struct lpfc_iocbq) - start_clean));
+		rsp->done(rsp);
+	}
+}
+
+static int
+lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
+		      struct nvmefc_tgt_ls_req *rsp)
+{
+	struct lpfc_nvmet_rcv_ctx *ctxp =
+		container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.ls_req);
+	struct lpfc_hba *phba = ctxp->phba;
+	struct hbq_dmabuf *nvmebuf =
+		(struct hbq_dmabuf *)ctxp->rqb_buffer;
+	struct lpfc_iocbq *nvmewqeq;
+	struct lpfc_nvmet_tgtport *nvmep = tgtport->private;
+	struct lpfc_dmabuf dmabuf;
+	struct ulp_bde64 bpl;
+	int rc;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
+			"6023 %s: Entrypoint ctx %p %p\n", __func__,
+			ctxp, tgtport);
+
+	nvmewqeq = lpfc_nvmet_prep_ls_wqe(phba, ctxp, rsp->rspdma,
+				      rsp->rsplen);
+	if (nvmewqeq == NULL) {
+		atomic_inc(&nvmep->xmt_ls_drop);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6150 LS Drop IO x%x: Prep\n",
+				ctxp->oxid);
+		lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+		lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp,
+						ctxp->sid, ctxp->oxid);
+		return -ENOMEM;
+	}
+
+	/* Save numBdes for bpl2sgl */
+	nvmewqeq->rsvd2 = 1;
+	nvmewqeq->hba_wqidx = 0;
+	nvmewqeq->context3 = &dmabuf;
+	dmabuf.virt = &bpl;
+	bpl.addrLow = nvmewqeq->wqe.xmit_sequence.bde.addrLow;
+	bpl.addrHigh = nvmewqeq->wqe.xmit_sequence.bde.addrHigh;
+	bpl.tus.f.bdeSize = rsp->rsplen;
+	bpl.tus.f.bdeFlags = 0;
+	bpl.tus.w = le32_to_cpu(bpl.tus.w);
+
+	nvmewqeq->wqe_cmpl = lpfc_nvmet_xmt_ls_rsp_cmp;
+	nvmewqeq->iocb_cmpl = NULL;
+	nvmewqeq->context2 = ctxp;
+
+	rc = lpfc_sli4_issue_wqe(phba, LPFC_ELS_RING, nvmewqeq);
+	if (rc == WQE_SUCCESS) {
+		/*
+		 * Okay to repost buffer here, but wait till cmpl
+		 * before freeing ctxp and iocbq.
+		 */
+		lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+		ctxp->rqb_buffer = 0;
+		atomic_inc(&nvmep->xmt_ls_rsp);
+		return 0;
+	}
+	/* Give back resources */
+	atomic_inc(&nvmep->xmt_ls_drop);
+	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+			"6151 LS Drop IO x%x: Issue %d\n",
+			ctxp->oxid, rc);
+
+	lpfc_nlp_put(nvmewqeq->context1);
+
+	lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+	lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid);
+	return -ENXIO;
+}
+
+static int
+lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
+		      struct nvmefc_tgt_fcp_req *rsp)
+{
+	struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private;
+	struct lpfc_nvmet_rcv_ctx *ctxp =
+		container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
+	struct lpfc_hba *phba = ctxp->phba;
+	struct lpfc_iocbq *nvmewqeq;
+	unsigned long iflags;
+	int rc;
+
+	if (rsp->op == NVMET_FCOP_ABORT) {
+		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+				"6103 Abort op: oxri x%x %d cnt %d\n",
+				ctxp->oxid, ctxp->state, ctxp->entry_cnt);
+		atomic_inc(&lpfc_nvmep->xmt_fcp_abort);
+		ctxp->entry_cnt++;
+		ctxp->flag |= LPFC_NVMET_ABORT_OP;
+		if (ctxp->flag & LPFC_NVMET_IO_INP)
+			lpfc_nvmet_sol_fcp_issue_abort(phba, ctxp, ctxp->sid,
+						       ctxp->oxid);
+		else
+			lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid,
+							 ctxp->oxid);
+		return 0;
+	}
+
+	/* Sanity check */
+	if (ctxp->state == LPFC_NVMET_STE_ABORT) {
+		atomic_inc(&lpfc_nvmep->xmt_fcp_drop);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6102 Bad state IO x%x aborted\n",
+				ctxp->oxid);
+		goto aerr;
+	}
+
+	nvmewqeq = lpfc_nvmet_prep_fcp_wqe(phba, ctxp);
+	if (nvmewqeq == NULL) {
+		atomic_inc(&lpfc_nvmep->xmt_fcp_drop);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6152 FCP Drop IO x%x: Prep\n",
+				ctxp->oxid);
+		goto aerr;
+	}
+
+	nvmewqeq->wqe_cmpl = lpfc_nvmet_xmt_fcp_op_cmp;
+	nvmewqeq->iocb_cmpl = NULL;
+	nvmewqeq->context2 = ctxp;
+	nvmewqeq->iocb_flag |=  LPFC_IO_NVMET;
+	ctxp->wqeq->hba_wqidx = rsp->hwqid;
+
+	/* For now we take hbalock */
+	spin_lock_irqsave(&phba->hbalock, iflags);
+	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, nvmewqeq);
+	spin_unlock_irqrestore(&phba->hbalock, iflags);
+	if (rc == WQE_SUCCESS) {
+		ctxp->flag |= LPFC_NVMET_IO_INP;
+		return 0;
+	}
+
+	/* Give back resources */
+	atomic_inc(&lpfc_nvmep->xmt_fcp_drop);
+	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+			"6153 FCP Drop IO x%x: Issue: %d\n",
+			ctxp->oxid, rc);
+
+	ctxp->wqeq->hba_wqidx = 0;
+	nvmewqeq->context2 = NULL;
+	nvmewqeq->context3 = NULL;
+aerr:
+	return -ENXIO;
+}
+
+static void
+lpfc_nvmet_targetport_delete(struct nvmet_fc_target_port *targetport)
+{
+	struct lpfc_nvmet_tgtport *tport = targetport->private;
+
+	/* release any threads waiting for the unreg to complete */
+	complete(&tport->tport_unreg_done);
+}
+
+static struct nvmet_fc_target_template lpfc_tgttemplate = {
+	.targetport_delete = lpfc_nvmet_targetport_delete,
+	.xmt_ls_rsp     = lpfc_nvmet_xmt_ls_rsp,
+	.fcp_op         = lpfc_nvmet_xmt_fcp_op,
+
+	.max_hw_queues  = 1,
+	.max_sgl_segments = LPFC_NVMET_DEFAULT_SEGS,
+	.max_dif_sgl_segments = LPFC_NVMET_DEFAULT_SEGS,
+	.dma_boundary = 0xFFFFFFFF,
+
+	/* optional features */
+	.target_features = 0,
+	/* sizes of additional private data for data structures */
+	.target_priv_sz = sizeof(struct lpfc_nvmet_tgtport),
+};
+
+int
+lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
+{
+	struct lpfc_vport  *vport = phba->pport;
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct nvmet_fc_port_info pinfo;
+	int error = 0;
+
+	if (phba->targetport)
+		return 0;
+
+	memset(&pinfo, 0, sizeof(struct nvmet_fc_port_info));
+	pinfo.node_name = wwn_to_u64(vport->fc_nodename.u.wwn);
+	pinfo.port_name = wwn_to_u64(vport->fc_portname.u.wwn);
+	pinfo.port_id = vport->fc_myDID;
+
+	lpfc_tgttemplate.max_hw_queues = phba->cfg_nvme_io_channel;
+	lpfc_tgttemplate.max_sgl_segments = phba->cfg_sg_seg_cnt;
+	lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP |
+					   NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED;
+
+	error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate,
+					     &phba->pcidev->dev,
+					     &phba->targetport);
+	if (error) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
+				"6025 Cannot register NVME targetport "
+				"x%x\n", error);
+		phba->targetport = NULL;
+	} else {
+		tgtp = (struct lpfc_nvmet_tgtport *)
+			phba->targetport->private;
+		tgtp->phba = phba;
+
+		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
+				"6026 Registered NVME "
+				"targetport: %p, private %p "
+				"portnm %llx nodenm %llx\n",
+				phba->targetport, tgtp,
+				pinfo.port_name, pinfo.node_name);
+
+		atomic_set(&tgtp->rcv_ls_req_in, 0);
+		atomic_set(&tgtp->rcv_ls_req_out, 0);
+		atomic_set(&tgtp->rcv_ls_req_drop, 0);
+		atomic_set(&tgtp->xmt_ls_abort, 0);
+		atomic_set(&tgtp->xmt_ls_rsp, 0);
+		atomic_set(&tgtp->xmt_ls_drop, 0);
+		atomic_set(&tgtp->xmt_ls_rsp_error, 0);
+		atomic_set(&tgtp->xmt_ls_rsp_cmpl, 0);
+		atomic_set(&tgtp->rcv_fcp_cmd_in, 0);
+		atomic_set(&tgtp->rcv_fcp_cmd_out, 0);
+		atomic_set(&tgtp->rcv_fcp_cmd_drop, 0);
+		atomic_set(&tgtp->xmt_fcp_abort, 0);
+		atomic_set(&tgtp->xmt_fcp_drop, 0);
+		atomic_set(&tgtp->xmt_fcp_read_rsp, 0);
+		atomic_set(&tgtp->xmt_fcp_read, 0);
+		atomic_set(&tgtp->xmt_fcp_write, 0);
+		atomic_set(&tgtp->xmt_fcp_rsp, 0);
+		atomic_set(&tgtp->xmt_fcp_rsp_cmpl, 0);
+		atomic_set(&tgtp->xmt_fcp_rsp_error, 0);
+		atomic_set(&tgtp->xmt_fcp_rsp_drop, 0);
+		atomic_set(&tgtp->xmt_abort_rsp, 0);
+		atomic_set(&tgtp->xmt_abort_rsp_error, 0);
+		atomic_set(&tgtp->xmt_abort_cmpl, 0);
+	}
+	return error;
+}
+
+int
+lpfc_nvmet_update_targetport(struct lpfc_hba *phba)
+{
+	struct lpfc_vport  *vport = phba->pport;
+
+	if (!phba->targetport)
+		return 0;
+
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
+			 "6007 Update NVMET port %p did x%x\n",
+			 phba->targetport, vport->fc_myDID);
+
+	phba->targetport->port_id = vport->fc_myDID;
+	return 0;
+}
+
+void
+lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
+{
+	struct lpfc_nvmet_tgtport *tgtp;
+
+	if (phba->nvmet_support == 0)
+		return;
+	if (phba->targetport) {
+		tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+		init_completion(&tgtp->tport_unreg_done);
+		nvmet_fc_unregister_targetport(phba->targetport);
+		wait_for_completion_timeout(&tgtp->tport_unreg_done, 5);
+	}
+	phba->targetport = NULL;
+}
+
+/**
+ * lpfc_nvmet_unsol_ls_buffer - Process an unsolicited event data buffer
+ * @phba: pointer to lpfc hba data structure.
+ * @pring: pointer to a SLI ring.
+ * @nvmebuf: pointer to lpfc nvme command HBQ data structure.
+ *
+ * This routine is used for processing the WQE associated with a unsolicited
+ * event. It first determines whether there is an existing ndlp that matches
+ * the DID from the unsolicited WQE. If not, it will create a new one with
+ * the DID from the unsolicited WQE. The ELS command from the unsolicited
+ * WQE is then used to invoke the proper routine and to set up proper state
+ * of the discovery state machine.
+ **/
+static void
+lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+			   struct hbq_dmabuf *nvmebuf)
+{
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct fc_frame_header *fc_hdr;
+	struct lpfc_nvmet_rcv_ctx *ctxp;
+	uint32_t *payload;
+	uint32_t size, oxid, sid, rc;
+
+	if (!nvmebuf || !phba->targetport) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6154 LS Drop IO\n");
+		goto dropit;
+	}
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	payload = (uint32_t *)(nvmebuf->dbuf.virt);
+	fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt);
+	size = bf_get(lpfc_rcqe_length,  &nvmebuf->cq_event.cqe.rcqe_cmpl);
+	oxid = be16_to_cpu(fc_hdr->fh_ox_id);
+	sid = sli4_sid_from_fc_hdr(fc_hdr);
+
+	ctxp = kzalloc(sizeof(struct lpfc_nvmet_rcv_ctx), GFP_ATOMIC);
+	if (ctxp == NULL) {
+		atomic_inc(&tgtp->rcv_ls_req_drop);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6155 LS Drop IO x%x: Alloc\n",
+				oxid);
+dropit:
+		if (nvmebuf)
+			lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+		return;
+	}
+	ctxp->phba = phba;
+	ctxp->size = size;
+	ctxp->oxid = oxid;
+	ctxp->sid = sid;
+	ctxp->wqeq = NULL;
+	ctxp->state = LPFC_NVMET_STE_RCV;
+	ctxp->rqb_buffer = (void *)nvmebuf;
+	/*
+	 * The calling sequence should be:
+	 * nvmet_fc_rcv_ls_req -> lpfc_nvmet_xmt_ls_rsp/cmp ->_req->done
+	 * lpfc_nvmet_xmt_ls_rsp_cmp should free the allocated ctxp.
+	 */
+	atomic_inc(&tgtp->rcv_ls_req_in);
+	rc = nvmet_fc_rcv_ls_req(phba->targetport, &ctxp->ctx.ls_req,
+				 payload, size);
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
+			"6037 %s: ctx %p sz %d rc %d: %08x %08x %08x "
+			"%08x %08x %08x\n", __func__, ctxp, size, rc,
+			*payload, *(payload+1), *(payload+2),
+			*(payload+3), *(payload+4), *(payload+5));
+	if (rc == 0) {
+		atomic_inc(&tgtp->rcv_ls_req_out);
+		return;
+	}
+	atomic_inc(&tgtp->rcv_ls_req_drop);
+	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+			"6156 LS Drop IO x%x: nvmet_fc_rcv_ls_req %d\n",
+			ctxp->oxid, rc);
+
+	/* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */
+	if (nvmebuf)
+		lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+
+	atomic_inc(&tgtp->xmt_ls_abort);
+	lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, sid, oxid);
+}
+
+/**
+ * lpfc_nvmet_unsol_fcp_buffer - Process an unsolicited event data buffer
+ * @phba: pointer to lpfc hba data structure.
+ * @pring: pointer to a SLI ring.
+ * @nvmebuf: pointer to lpfc nvme command HBQ data structure.
+ *
+ * This routine is used for processing the WQE associated with a unsolicited
+ * event. It first determines whether there is an existing ndlp that matches
+ * the DID from the unsolicited WQE. If not, it will create a new one with
+ * the DID from the unsolicited WQE. The ELS command from the unsolicited
+ * WQE is then used to invoke the proper routine and to set up proper state
+ * of the discovery state machine.
+ **/
+static void
+lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
+			    struct lpfc_sli_ring *pring,
+			    struct rqb_dmabuf *nvmebuf,
+			    uint64_t isr_timestamp)
+{
+	struct lpfc_nvmet_rcv_ctx *ctxp;
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct fc_frame_header *fc_hdr;
+	uint32_t *payload;
+	uint32_t size, oxid, sid, rc;
+
+	oxid = 0;
+	if (!nvmebuf || !phba->targetport) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6157 FCP Drop IO\n");
+		goto dropit;
+	}
+
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	payload = (uint32_t *)(nvmebuf->dbuf.virt);
+	fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt);
+	size = nvmebuf->bytes_recv;
+	oxid = be16_to_cpu(fc_hdr->fh_ox_id);
+	sid = sli4_sid_from_fc_hdr(fc_hdr);
+
+	ctxp = (struct lpfc_nvmet_rcv_ctx *)nvmebuf->context;
+	if (ctxp == NULL) {
+		atomic_inc(&tgtp->rcv_fcp_cmd_drop);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6158 FCP Drop IO x%x: Alloc\n",
+				oxid);
+		lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
+		/* Cannot send ABTS without context */
+		return;
+	}
+	memset(ctxp, 0, sizeof(ctxp->ctx));
+	ctxp->wqeq = NULL;
+	ctxp->txrdy = NULL;
+	ctxp->offset = 0;
+	ctxp->phba = phba;
+	ctxp->size = size;
+	ctxp->oxid = oxid;
+	ctxp->sid = sid;
+	ctxp->state = LPFC_NVMET_STE_RCV;
+	ctxp->rqb_buffer = nvmebuf;
+	ctxp->entry_cnt = 1;
+	ctxp->flag = 0;
+
+	atomic_inc(&tgtp->rcv_fcp_cmd_in);
+	/*
+	 * The calling sequence should be:
+	 * nvmet_fc_rcv_fcp_req -> lpfc_nvmet_xmt_fcp_op/cmp -> req->done
+	 * lpfc_nvmet_xmt_fcp_op_cmp should free the allocated ctxp.
+	 */
+	rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->ctx.fcp_req,
+				  payload, size);
+
+	/* Process FCP command */
+	if (rc == 0) {
+		atomic_inc(&tgtp->rcv_fcp_cmd_out);
+		return;
+	}
+
+	atomic_inc(&tgtp->rcv_fcp_cmd_drop);
+	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+			"6159 FCP Drop IO x%x: nvmet_fc_rcv_fcp_req x%x\n",
+			ctxp->oxid, rc);
+dropit:
+	if (oxid) {
+		lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, sid, oxid);
+		return;
+	}
+
+	if (nvmebuf) {
+		nvmebuf->iocbq->hba_wqidx = 0;
+		/* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */
+		lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
+	}
+}
+
+/**
+ * lpfc_nvmet_unsol_ls_event - Process an unsolicited event from an nvme nport
+ * @phba: pointer to lpfc hba data structure.
+ * @pring: pointer to a SLI ring.
+ * @nvmebuf: pointer to received nvme data structure.
+ *
+ * This routine is used to process an unsolicited event received from a SLI
+ * (Service Level Interface) ring. The actual processing of the data buffer
+ * associated with the unsolicited event is done by invoking the routine
+ * lpfc_nvmet_unsol_ls_buffer() after properly set up the buffer from the
+ * SLI RQ on which the unsolicited event was received.
+ **/
+void
+lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+			  struct lpfc_iocbq *piocb)
+{
+	struct lpfc_dmabuf *d_buf;
+	struct hbq_dmabuf *nvmebuf;
+
+	d_buf = piocb->context2;
+	nvmebuf = container_of(d_buf, struct hbq_dmabuf, dbuf);
+
+	if (phba->nvmet_support == 0) {
+		lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+		return;
+	}
+	lpfc_nvmet_unsol_ls_buffer(phba, pring, nvmebuf);
+}
+
+/**
+ * lpfc_nvmet_unsol_fcp_event - Process an unsolicited event from an nvme nport
+ * @phba: pointer to lpfc hba data structure.
+ * @pring: pointer to a SLI ring.
+ * @nvmebuf: pointer to received nvme data structure.
+ *
+ * This routine is used to process an unsolicited event received from a SLI
+ * (Service Level Interface) ring. The actual processing of the data buffer
+ * associated with the unsolicited event is done by invoking the routine
+ * lpfc_nvmet_unsol_fcp_buffer() after properly set up the buffer from the
+ * SLI RQ on which the unsolicited event was received.
+ **/
+void
+lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba,
+			   struct lpfc_sli_ring *pring,
+			   struct rqb_dmabuf *nvmebuf,
+			   uint64_t isr_timestamp)
+{
+	if (phba->nvmet_support == 0) {
+		lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
+		return;
+	}
+	lpfc_nvmet_unsol_fcp_buffer(phba, pring, nvmebuf,
+				    isr_timestamp);
+}
+
+/**
+ * lpfc_nvmet_prep_ls_wqe - Allocate and prepare a lpfc wqe data structure
+ * @phba: pointer to a host N_Port data structure.
+ * @ctxp: Context info for NVME LS Request
+ * @rspbuf: DMA buffer of NVME command.
+ * @rspsize: size of the NVME command.
+ *
+ * This routine is used for allocating a lpfc-WQE data structure from
+ * the driver lpfc-WQE free-list and prepare the WQE with the parameters
+ * passed into the routine for discovery state machine to issue an Extended
+ * Link Service (NVME) commands. It is a generic lpfc-WQE allocation
+ * and preparation routine that is used by all the discovery state machine
+ * routines and the NVME command-specific fields will be later set up by
+ * the individual discovery machine routines after calling this routine
+ * allocating and preparing a generic WQE data structure. It fills in the
+ * Buffer Descriptor Entries (BDEs), allocates buffers for both command
+ * payload and response payload (if expected). The reference count on the
+ * ndlp is incremented by 1 and the reference to the ndlp is put into
+ * context1 of the WQE data structure for this WQE to hold the ndlp
+ * reference for the command's callback function to access later.
+ *
+ * Return code
+ *   Pointer to the newly allocated/prepared nvme wqe data structure
+ *   NULL - when nvme wqe data structure allocation/preparation failed
+ **/
+static struct lpfc_iocbq *
+lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *phba,
+		       struct lpfc_nvmet_rcv_ctx *ctxp,
+		       dma_addr_t rspbuf, uint16_t rspsize)
+{
+	struct lpfc_nodelist *ndlp;
+	struct lpfc_iocbq *nvmewqe;
+	union lpfc_wqe *wqe;
+
+	if (!lpfc_is_link_up(phba)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
+				"6104 lpfc_nvmet_prep_ls_wqe: link err: "
+				"NPORT x%x oxid:x%x\n",
+				ctxp->sid, ctxp->oxid);
+		return NULL;
+	}
+
+	/* Allocate buffer for  command wqe */
+	nvmewqe = lpfc_sli_get_iocbq(phba);
+	if (nvmewqe == NULL) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
+				"6105 lpfc_nvmet_prep_ls_wqe: No WQE: "
+				"NPORT x%x oxid:x%x\n",
+				ctxp->sid, ctxp->oxid);
+		return NULL;
+	}
+
+	ndlp = lpfc_findnode_did(phba->pport, ctxp->sid);
+	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) ||
+	    ((ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) &&
+	    (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
+				"6106 lpfc_nvmet_prep_ls_wqe: No ndlp: "
+				"NPORT x%x oxid:x%x\n",
+				ctxp->sid, ctxp->oxid);
+		goto nvme_wqe_free_wqeq_exit;
+	}
+	ctxp->wqeq = nvmewqe;
+
+	/* prevent preparing wqe with NULL ndlp reference */
+	nvmewqe->context1 = lpfc_nlp_get(ndlp);
+	if (nvmewqe->context1 == NULL)
+		goto nvme_wqe_free_wqeq_exit;
+	nvmewqe->context2 = ctxp;
+
+	wqe = &nvmewqe->wqe;
+	memset(wqe, 0, sizeof(union lpfc_wqe));
+
+	/* Words 0 - 2 */
+	wqe->xmit_sequence.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64;
+	wqe->xmit_sequence.bde.tus.f.bdeSize = rspsize;
+	wqe->xmit_sequence.bde.addrLow = le32_to_cpu(putPaddrLow(rspbuf));
+	wqe->xmit_sequence.bde.addrHigh = le32_to_cpu(putPaddrHigh(rspbuf));
+
+	/* Word 3 */
+
+	/* Word 4 */
+
+	/* Word 5 */
+	bf_set(wqe_dfctl, &wqe->xmit_sequence.wge_ctl, 0);
+	bf_set(wqe_ls, &wqe->xmit_sequence.wge_ctl, 1);
+	bf_set(wqe_la, &wqe->xmit_sequence.wge_ctl, 0);
+	bf_set(wqe_rctl, &wqe->xmit_sequence.wge_ctl, FC_RCTL_DD_SOL_CTL);
+	bf_set(wqe_type, &wqe->xmit_sequence.wge_ctl, FC_TYPE_NVME);
+
+	/* Word 6 */
+	bf_set(wqe_ctxt_tag, &wqe->xmit_sequence.wqe_com,
+	       phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]);
+	bf_set(wqe_xri_tag, &wqe->xmit_sequence.wqe_com, nvmewqe->sli4_xritag);
+
+	/* Word 7 */
+	bf_set(wqe_cmnd, &wqe->xmit_sequence.wqe_com,
+	       CMD_XMIT_SEQUENCE64_WQE);
+	bf_set(wqe_ct, &wqe->xmit_sequence.wqe_com, SLI4_CT_RPI);
+	bf_set(wqe_class, &wqe->xmit_sequence.wqe_com, CLASS3);
+	bf_set(wqe_pu, &wqe->xmit_sequence.wqe_com, 0);
+
+	/* Word 8 */
+	wqe->xmit_sequence.wqe_com.abort_tag = nvmewqe->iotag;
+
+	/* Word 9 */
+	bf_set(wqe_reqtag, &wqe->xmit_sequence.wqe_com, nvmewqe->iotag);
+	/* Needs to be set by caller */
+	bf_set(wqe_rcvoxid, &wqe->xmit_sequence.wqe_com, ctxp->oxid);
+
+	/* Word 10 */
+	bf_set(wqe_dbde, &wqe->xmit_sequence.wqe_com, 1);
+	bf_set(wqe_iod, &wqe->xmit_sequence.wqe_com, LPFC_WQE_IOD_WRITE);
+	bf_set(wqe_lenloc, &wqe->xmit_sequence.wqe_com,
+	       LPFC_WQE_LENLOC_WORD12);
+	bf_set(wqe_ebde_cnt, &wqe->xmit_sequence.wqe_com, 0);
+
+	/* Word 11 */
+	bf_set(wqe_cqid, &wqe->xmit_sequence.wqe_com,
+	       LPFC_WQE_CQ_ID_DEFAULT);
+	bf_set(wqe_cmd_type, &wqe->xmit_sequence.wqe_com,
+	       OTHER_COMMAND);
+
+	/* Word 12 */
+	wqe->xmit_sequence.xmit_len = rspsize;
+
+	nvmewqe->retry = 1;
+	nvmewqe->vport = phba->pport;
+	nvmewqe->drvrTimeout = (phba->fc_ratov * 3) + LPFC_DRVR_TIMEOUT;
+	nvmewqe->iocb_flag |= LPFC_IO_NVME_LS;
+
+	/* Xmit NVME response to remote NPORT <did> */
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
+			"6039 Xmit NVME LS response to remote "
+			"NPORT x%x iotag:x%x oxid:x%x size:x%x\n",
+			ndlp->nlp_DID, nvmewqe->iotag, ctxp->oxid,
+			rspsize);
+	return nvmewqe;
+
+nvme_wqe_free_wqeq_exit:
+	nvmewqe->context2 = NULL;
+	nvmewqe->context3 = NULL;
+	lpfc_sli_release_iocbq(phba, nvmewqe);
+	return NULL;
+}
+
+
+static struct lpfc_iocbq *
+lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
+			struct lpfc_nvmet_rcv_ctx *ctxp)
+{
+	struct nvmefc_tgt_fcp_req *rsp = &ctxp->ctx.fcp_req;
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct sli4_sge *sgl;
+	struct lpfc_nodelist *ndlp;
+	struct lpfc_iocbq *nvmewqe;
+	struct scatterlist *sgel;
+	union lpfc_wqe128 *wqe;
+	uint32_t *txrdy;
+	dma_addr_t physaddr;
+	int i, cnt;
+	int xc = 1;
+
+	if (!lpfc_is_link_up(phba)) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6107 lpfc_nvmet_prep_fcp_wqe: link err:"
+				"NPORT x%x oxid:x%x\n", ctxp->sid,
+				ctxp->oxid);
+		return NULL;
+	}
+
+	ndlp = lpfc_findnode_did(phba->pport, ctxp->sid);
+	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) ||
+	    ((ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) &&
+	     (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6108 lpfc_nvmet_prep_fcp_wqe: no ndlp: "
+				"NPORT x%x oxid:x%x\n",
+				ctxp->sid, ctxp->oxid);
+		return NULL;
+	}
+
+	if (rsp->sg_cnt > phba->cfg_sg_seg_cnt) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6109 lpfc_nvmet_prep_fcp_wqe: seg cnt err: "
+				"NPORT x%x oxid:x%x\n",
+				ctxp->sid, ctxp->oxid);
+		return NULL;
+	}
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	nvmewqe = ctxp->wqeq;
+	if (nvmewqe == NULL) {
+		/* Allocate buffer for  command wqe */
+		nvmewqe = ctxp->rqb_buffer->iocbq;
+		if (nvmewqe == NULL) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+					"6110 lpfc_nvmet_prep_fcp_wqe: No "
+					"WQE: NPORT x%x oxid:x%x\n",
+					ctxp->sid, ctxp->oxid);
+			return NULL;
+		}
+		ctxp->wqeq = nvmewqe;
+		xc = 0; /* create new XRI */
+		nvmewqe->sli4_lxritag = NO_XRI;
+		nvmewqe->sli4_xritag = NO_XRI;
+	}
+
+	/* Sanity check */
+	if (((ctxp->state == LPFC_NVMET_STE_RCV) &&
+	    (ctxp->entry_cnt == 1)) ||
+	    ((ctxp->state == LPFC_NVMET_STE_DATA) &&
+	    (ctxp->entry_cnt > 1))) {
+		wqe = (union lpfc_wqe128 *)&nvmewqe->wqe;
+	} else {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6111 Wrong state %s: %d  cnt %d\n",
+				__func__, ctxp->state, ctxp->entry_cnt);
+		return NULL;
+	}
+
+	sgl  = (struct sli4_sge *)ctxp->rqb_buffer->sglq->sgl;
+	switch (rsp->op) {
+	case NVMET_FCOP_READDATA:
+	case NVMET_FCOP_READDATA_RSP:
+		/* Words 0 - 2 : The first sg segment */
+		sgel = &rsp->sg[0];
+		physaddr = sg_dma_address(sgel);
+		wqe->fcp_tsend.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64;
+		wqe->fcp_tsend.bde.tus.f.bdeSize = sg_dma_len(sgel);
+		wqe->fcp_tsend.bde.addrLow = cpu_to_le32(putPaddrLow(physaddr));
+		wqe->fcp_tsend.bde.addrHigh =
+			cpu_to_le32(putPaddrHigh(physaddr));
+
+		/* Word 3 */
+		wqe->fcp_tsend.payload_offset_len = 0;
+
+		/* Word 4 */
+		wqe->fcp_tsend.relative_offset = ctxp->offset;
+
+		/* Word 5 */
+
+		/* Word 6 */
+		bf_set(wqe_ctxt_tag, &wqe->fcp_tsend.wqe_com,
+		       phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]);
+		bf_set(wqe_xri_tag, &wqe->fcp_tsend.wqe_com,
+		       nvmewqe->sli4_xritag);
+
+		/* Word 7 */
+		bf_set(wqe_cmnd, &wqe->fcp_tsend.wqe_com, CMD_FCP_TSEND64_WQE);
+
+		/* Word 8 */
+		wqe->fcp_tsend.wqe_com.abort_tag = nvmewqe->iotag;
+
+		/* Word 9 */
+		bf_set(wqe_reqtag, &wqe->fcp_tsend.wqe_com, nvmewqe->iotag);
+		bf_set(wqe_rcvoxid, &wqe->fcp_tsend.wqe_com, ctxp->oxid);
+
+		/* Word 10 */
+		bf_set(wqe_nvme, &wqe->fcp_tsend.wqe_com, 1);
+		bf_set(wqe_dbde, &wqe->fcp_tsend.wqe_com, 1);
+		bf_set(wqe_iod, &wqe->fcp_tsend.wqe_com, LPFC_WQE_IOD_WRITE);
+		bf_set(wqe_lenloc, &wqe->fcp_tsend.wqe_com,
+		       LPFC_WQE_LENLOC_WORD12);
+		bf_set(wqe_ebde_cnt, &wqe->fcp_tsend.wqe_com, 0);
+		bf_set(wqe_xc, &wqe->fcp_tsend.wqe_com, xc);
+		bf_set(wqe_nvme, &wqe->fcp_tsend.wqe_com, 1);
+		if (phba->cfg_nvme_oas)
+			bf_set(wqe_oas, &wqe->fcp_tsend.wqe_com, 1);
+
+		/* Word 11 */
+		bf_set(wqe_cqid, &wqe->fcp_tsend.wqe_com,
+		       LPFC_WQE_CQ_ID_DEFAULT);
+		bf_set(wqe_cmd_type, &wqe->fcp_tsend.wqe_com,
+		       FCP_COMMAND_TSEND);
+
+		/* Word 12 */
+		wqe->fcp_tsend.fcp_data_len = rsp->transfer_length;
+
+		/* Setup 2 SKIP SGEs */
+		sgl->addr_hi = 0;
+		sgl->addr_lo = 0;
+		sgl->word2 = 0;
+		bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_SKIP);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+		sgl->sge_len = 0;
+		sgl++;
+		sgl->addr_hi = 0;
+		sgl->addr_lo = 0;
+		sgl->word2 = 0;
+		bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_SKIP);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+		sgl->sge_len = 0;
+		sgl++;
+		if (rsp->op == NVMET_FCOP_READDATA_RSP) {
+			atomic_inc(&tgtp->xmt_fcp_read_rsp);
+			bf_set(wqe_ar, &wqe->fcp_tsend.wqe_com, 1);
+			if ((ndlp->nlp_flag & NLP_SUPPRESS_RSP) &&
+			    (rsp->rsplen == 12)) {
+				bf_set(wqe_sup, &wqe->fcp_tsend.wqe_com, 1);
+				bf_set(wqe_wqes, &wqe->fcp_tsend.wqe_com, 0);
+				bf_set(wqe_irsp, &wqe->fcp_tsend.wqe_com, 0);
+				bf_set(wqe_irsplen, &wqe->fcp_tsend.wqe_com, 0);
+			} else {
+				bf_set(wqe_sup, &wqe->fcp_tsend.wqe_com, 0);
+				bf_set(wqe_wqes, &wqe->fcp_tsend.wqe_com, 1);
+				bf_set(wqe_irsp, &wqe->fcp_tsend.wqe_com, 1);
+				bf_set(wqe_irsplen, &wqe->fcp_tsend.wqe_com,
+				       ((rsp->rsplen >> 2) - 1));
+				memcpy(&wqe->words[16], rsp->rspaddr,
+				       rsp->rsplen);
+			}
+		} else {
+			atomic_inc(&tgtp->xmt_fcp_read);
+
+			bf_set(wqe_sup, &wqe->fcp_tsend.wqe_com, 0);
+			bf_set(wqe_wqes, &wqe->fcp_tsend.wqe_com, 0);
+			bf_set(wqe_irsp, &wqe->fcp_tsend.wqe_com, 0);
+			bf_set(wqe_ar, &wqe->fcp_tsend.wqe_com, 0);
+			bf_set(wqe_irsplen, &wqe->fcp_tsend.wqe_com, 0);
+		}
+		ctxp->state = LPFC_NVMET_STE_DATA;
+		break;
+
+	case NVMET_FCOP_WRITEDATA:
+		/* Words 0 - 2 : The first sg segment */
+		txrdy = pci_pool_alloc(phba->txrdy_payload_pool,
+				       GFP_KERNEL, &physaddr);
+		if (!txrdy) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+					"6041 Bad txrdy buffer: oxid x%x\n",
+					ctxp->oxid);
+			return NULL;
+		}
+		ctxp->txrdy = txrdy;
+		ctxp->txrdy_phys = physaddr;
+		wqe->fcp_treceive.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64;
+		wqe->fcp_treceive.bde.tus.f.bdeSize = TXRDY_PAYLOAD_LEN;
+		wqe->fcp_treceive.bde.addrLow =
+			cpu_to_le32(putPaddrLow(physaddr));
+		wqe->fcp_treceive.bde.addrHigh =
+			cpu_to_le32(putPaddrHigh(physaddr));
+
+		/* Word 3 */
+		wqe->fcp_treceive.payload_offset_len = TXRDY_PAYLOAD_LEN;
+
+		/* Word 4 */
+		wqe->fcp_treceive.relative_offset = ctxp->offset;
+
+		/* Word 5 */
+
+		/* Word 6 */
+		bf_set(wqe_ctxt_tag, &wqe->fcp_treceive.wqe_com,
+		       phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]);
+		bf_set(wqe_xri_tag, &wqe->fcp_treceive.wqe_com,
+		       nvmewqe->sli4_xritag);
+
+		/* Word 7 */
+		bf_set(wqe_ar, &wqe->fcp_treceive.wqe_com, 0);
+		bf_set(wqe_cmnd, &wqe->fcp_treceive.wqe_com,
+		       CMD_FCP_TRECEIVE64_WQE);
+
+		/* Word 8 */
+		wqe->fcp_treceive.wqe_com.abort_tag = nvmewqe->iotag;
+
+		/* Word 9 */
+		bf_set(wqe_reqtag, &wqe->fcp_treceive.wqe_com, nvmewqe->iotag);
+		bf_set(wqe_rcvoxid, &wqe->fcp_treceive.wqe_com, ctxp->oxid);
+
+		/* Word 10 */
+		bf_set(wqe_nvme, &wqe->fcp_treceive.wqe_com, 1);
+		bf_set(wqe_dbde, &wqe->fcp_treceive.wqe_com, 1);
+		bf_set(wqe_iod, &wqe->fcp_treceive.wqe_com, LPFC_WQE_IOD_READ);
+		bf_set(wqe_lenloc, &wqe->fcp_treceive.wqe_com,
+		       LPFC_WQE_LENLOC_WORD12);
+		bf_set(wqe_xc, &wqe->fcp_treceive.wqe_com, xc);
+		bf_set(wqe_wqes, &wqe->fcp_treceive.wqe_com, 0);
+		bf_set(wqe_irsp, &wqe->fcp_treceive.wqe_com, 0);
+		bf_set(wqe_irsplen, &wqe->fcp_treceive.wqe_com, 0);
+		bf_set(wqe_nvme, &wqe->fcp_treceive.wqe_com, 1);
+		if (phba->cfg_nvme_oas)
+			bf_set(wqe_oas, &wqe->fcp_treceive.wqe_com, 1);
+
+		/* Word 11 */
+		bf_set(wqe_cqid, &wqe->fcp_treceive.wqe_com,
+		       LPFC_WQE_CQ_ID_DEFAULT);
+		bf_set(wqe_cmd_type, &wqe->fcp_treceive.wqe_com,
+		       FCP_COMMAND_TRECEIVE);
+		bf_set(wqe_sup, &wqe->fcp_tsend.wqe_com, 0);
+
+		/* Word 12 */
+		wqe->fcp_tsend.fcp_data_len = rsp->transfer_length;
+
+		/* Setup 1 TXRDY and 1 SKIP SGE */
+		txrdy[0] = 0;
+		txrdy[1] = cpu_to_be32(rsp->transfer_length);
+		txrdy[2] = 0;
+
+		sgl->addr_hi = putPaddrHigh(physaddr);
+		sgl->addr_lo = putPaddrLow(physaddr);
+		sgl->word2 = 0;
+		bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_DATA);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+		sgl->sge_len = cpu_to_le32(TXRDY_PAYLOAD_LEN);
+		sgl++;
+		sgl->addr_hi = 0;
+		sgl->addr_lo = 0;
+		sgl->word2 = 0;
+		bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_SKIP);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+		sgl->sge_len = 0;
+		sgl++;
+		ctxp->state = LPFC_NVMET_STE_DATA;
+		atomic_inc(&tgtp->xmt_fcp_write);
+		break;
+
+	case NVMET_FCOP_RSP:
+		/* Words 0 - 2 */
+		sgel = &rsp->sg[0];
+		physaddr = rsp->rspdma;
+		wqe->fcp_trsp.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64;
+		wqe->fcp_trsp.bde.tus.f.bdeSize = rsp->rsplen;
+		wqe->fcp_trsp.bde.addrLow =
+			cpu_to_le32(putPaddrLow(physaddr));
+		wqe->fcp_trsp.bde.addrHigh =
+			cpu_to_le32(putPaddrHigh(physaddr));
+
+		/* Word 3 */
+		wqe->fcp_trsp.response_len = rsp->rsplen;
+
+		/* Word 4 */
+		wqe->fcp_trsp.rsvd_4_5[0] = 0;
+
+
+		/* Word 5 */
+
+		/* Word 6 */
+		bf_set(wqe_ctxt_tag, &wqe->fcp_trsp.wqe_com,
+		       phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]);
+		bf_set(wqe_xri_tag, &wqe->fcp_trsp.wqe_com,
+		       nvmewqe->sli4_xritag);
+
+		/* Word 7 */
+		bf_set(wqe_ag, &wqe->fcp_trsp.wqe_com, 1);
+		bf_set(wqe_cmnd, &wqe->fcp_trsp.wqe_com, CMD_FCP_TRSP64_WQE);
+
+		/* Word 8 */
+		wqe->fcp_trsp.wqe_com.abort_tag = nvmewqe->iotag;
+
+		/* Word 9 */
+		bf_set(wqe_reqtag, &wqe->fcp_trsp.wqe_com, nvmewqe->iotag);
+		bf_set(wqe_rcvoxid, &wqe->fcp_trsp.wqe_com, ctxp->oxid);
+
+		/* Word 10 */
+		bf_set(wqe_nvme, &wqe->fcp_trsp.wqe_com, 1);
+		bf_set(wqe_dbde, &wqe->fcp_trsp.wqe_com, 0);
+		bf_set(wqe_iod, &wqe->fcp_trsp.wqe_com, LPFC_WQE_IOD_WRITE);
+		bf_set(wqe_lenloc, &wqe->fcp_trsp.wqe_com,
+		       LPFC_WQE_LENLOC_WORD3);
+		bf_set(wqe_xc, &wqe->fcp_trsp.wqe_com, xc);
+		bf_set(wqe_nvme, &wqe->fcp_trsp.wqe_com, 1);
+		if (phba->cfg_nvme_oas)
+			bf_set(wqe_oas, &wqe->fcp_trsp.wqe_com, 1);
+
+		/* Word 11 */
+		bf_set(wqe_cqid, &wqe->fcp_trsp.wqe_com,
+		       LPFC_WQE_CQ_ID_DEFAULT);
+		bf_set(wqe_cmd_type, &wqe->fcp_trsp.wqe_com,
+		       FCP_COMMAND_TRSP);
+		bf_set(wqe_sup, &wqe->fcp_tsend.wqe_com, 0);
+		ctxp->state = LPFC_NVMET_STE_RSP;
+
+		if (rsp->rsplen == LPFC_NVMET_SUCCESS_LEN) {
+			/* Good response - all zero's on wire */
+			bf_set(wqe_wqes, &wqe->fcp_trsp.wqe_com, 0);
+			bf_set(wqe_irsp, &wqe->fcp_trsp.wqe_com, 0);
+			bf_set(wqe_irsplen, &wqe->fcp_trsp.wqe_com, 0);
+		} else {
+			bf_set(wqe_wqes, &wqe->fcp_trsp.wqe_com, 1);
+			bf_set(wqe_irsp, &wqe->fcp_trsp.wqe_com, 1);
+			bf_set(wqe_irsplen, &wqe->fcp_trsp.wqe_com,
+			       ((rsp->rsplen >> 2) - 1));
+			memcpy(&wqe->words[16], rsp->rspaddr, rsp->rsplen);
+		}
+
+		/* Use rspbuf, NOT sg list */
+		rsp->sg_cnt = 0;
+		sgl->word2 = 0;
+		atomic_inc(&tgtp->xmt_fcp_rsp);
+		break;
+
+	default:
+		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
+				"6064 Unknown Rsp Op %d\n",
+				rsp->op);
+		return NULL;
+	}
+
+	nvmewqe->retry = 1;
+	nvmewqe->vport = phba->pport;
+	nvmewqe->drvrTimeout = (phba->fc_ratov * 3) + LPFC_DRVR_TIMEOUT;
+	nvmewqe->context1 = ndlp;
+
+	for (i = 0; i < rsp->sg_cnt; i++) {
+		sgel = &rsp->sg[i];
+		physaddr = sg_dma_address(sgel);
+		cnt = sg_dma_len(sgel);
+		sgl->addr_hi = putPaddrHigh(physaddr);
+		sgl->addr_lo = putPaddrLow(physaddr);
+		sgl->word2 = 0;
+		bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_DATA);
+		bf_set(lpfc_sli4_sge_offset, sgl, ctxp->offset);
+		if ((i+1) == rsp->sg_cnt)
+			bf_set(lpfc_sli4_sge_last, sgl, 1);
+		sgl->word2 = cpu_to_le32(sgl->word2);
+		sgl->sge_len = cpu_to_le32(cnt);
+		sgl++;
+		ctxp->offset += cnt;
+	}
+	return nvmewqe;
+}
+
+/**
+ * lpfc_nvmet_sol_fcp_abort_cmp - Completion handler for ABTS
+ * @phba: Pointer to HBA context object.
+ * @cmdwqe: Pointer to driver command WQE object.
+ * @wcqe: Pointer to driver response CQE object.
+ *
+ * The function is called from SLI ring event handler with no
+ * lock held. This function is the completion handler for NVME ABTS for FCP cmds
+ * The function frees memory resources used for the NVME commands.
+ **/
+static void
+lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
+			     struct lpfc_wcqe_complete *wcqe)
+{
+	struct lpfc_nvmet_rcv_ctx *ctxp;
+	struct lpfc_nvmet_tgtport *tgtp;
+	uint32_t status, result;
+
+	ctxp = cmdwqe->context2;
+	status = bf_get(lpfc_wcqe_c_status, wcqe);
+	result = wcqe->parameter;
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	atomic_inc(&tgtp->xmt_abort_cmpl);
+
+	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
+			"6165 Abort cmpl: xri x%x WCQE: %08x %08x %08x %08x\n",
+			ctxp->oxid, wcqe->word0, wcqe->total_data_placed,
+			result, wcqe->word3);
+
+	ctxp->state = LPFC_NVMET_STE_DONE;
+	lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+
+	cmdwqe->context2 = NULL;
+	cmdwqe->context3 = NULL;
+	lpfc_sli_release_iocbq(phba, cmdwqe);
+}
+
+/**
+ * lpfc_nvmet_xmt_fcp_abort_cmp - Completion handler for ABTS
+ * @phba: Pointer to HBA context object.
+ * @cmdwqe: Pointer to driver command WQE object.
+ * @wcqe: Pointer to driver response CQE object.
+ *
+ * The function is called from SLI ring event handler with no
+ * lock held. This function is the completion handler for NVME ABTS for FCP cmds
+ * The function frees memory resources used for the NVME commands.
+ **/
+static void
+lpfc_nvmet_xmt_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
+			     struct lpfc_wcqe_complete *wcqe)
+{
+	struct lpfc_nvmet_rcv_ctx *ctxp;
+	struct lpfc_nvmet_tgtport *tgtp;
+	uint32_t status, result;
+
+	ctxp = cmdwqe->context2;
+	status = bf_get(lpfc_wcqe_c_status, wcqe);
+	result = wcqe->parameter;
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	atomic_inc(&tgtp->xmt_abort_cmpl);
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+			"6070 Abort cmpl: ctx %p WCQE: %08x %08x %08x %08x\n",
+			ctxp, wcqe->word0, wcqe->total_data_placed,
+			result, wcqe->word3);
+
+	if (ctxp) {
+		/* Sanity check */
+		if (ctxp->state != LPFC_NVMET_STE_ABORT) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
+					"6112 ABORT Wrong state:%d oxid x%x\n",
+					ctxp->state, ctxp->oxid);
+		}
+		ctxp->state = LPFC_NVMET_STE_DONE;
+		lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+		cmdwqe->context2 = NULL;
+		cmdwqe->context3 = NULL;
+	}
+}
+
+/**
+ * lpfc_nvmet_xmt_ls_abort_cmp - Completion handler for ABTS
+ * @phba: Pointer to HBA context object.
+ * @cmdwqe: Pointer to driver command WQE object.
+ * @wcqe: Pointer to driver response CQE object.
+ *
+ * The function is called from SLI ring event handler with no
+ * lock held. This function is the completion handler for NVME ABTS for LS cmds
+ * The function frees memory resources used for the NVME commands.
+ **/
+static void
+lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
+			    struct lpfc_wcqe_complete *wcqe)
+{
+	struct lpfc_nvmet_rcv_ctx *ctxp;
+	struct lpfc_nvmet_tgtport *tgtp;
+	uint32_t status, result;
+
+	ctxp = cmdwqe->context2;
+	status = bf_get(lpfc_wcqe_c_status, wcqe);
+	result = wcqe->parameter;
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	atomic_inc(&tgtp->xmt_abort_cmpl);
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+			"6083 Abort cmpl: ctx %p WCQE: %08x %08x %08x %08x\n",
+			ctxp, wcqe->word0, wcqe->total_data_placed,
+			result, wcqe->word3);
+
+	if (ctxp) {
+		cmdwqe->context2 = NULL;
+		cmdwqe->context3 = NULL;
+		lpfc_sli_release_iocbq(phba, cmdwqe);
+		kfree(ctxp);
+	} else
+		lpfc_sli_release_iocbq(phba, cmdwqe);
+}
+
+static int
+lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
+			     struct lpfc_nvmet_rcv_ctx *ctxp,
+			     uint32_t sid, uint16_t xri)
+{
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct lpfc_iocbq *abts_wqeq;
+	union lpfc_wqe *wqe_abts;
+	struct lpfc_nodelist *ndlp;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+			"6067 %s: Entrypoint: sid %x xri %x\n", __func__,
+			sid, xri);
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+
+	ndlp = lpfc_findnode_did(phba->pport, sid);
+	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) ||
+	    ((ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) &&
+	    (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) {
+		atomic_inc(&tgtp->xmt_abort_rsp_error);
+		lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
+				"6134 Drop ABTS - wrong NDLP state x%x.\n",
+				ndlp->nlp_state);
+
+		/* No failure to an ABTS request. */
+		return 0;
+	}
+
+	abts_wqeq = ctxp->wqeq;
+	wqe_abts = &abts_wqeq->wqe;
+	ctxp->state = LPFC_NVMET_STE_ABORT;
+
+	/*
+	 * Since we zero the whole WQE, we need to ensure we set the WQE fields
+	 * that were initialized in lpfc_sli4_nvmet_alloc.
+	 */
+	memset(wqe_abts, 0, sizeof(union lpfc_wqe));
+
+	/* Word 5 */
+	bf_set(wqe_dfctl, &wqe_abts->xmit_sequence.wge_ctl, 0);
+	bf_set(wqe_ls, &wqe_abts->xmit_sequence.wge_ctl, 1);
+	bf_set(wqe_la, &wqe_abts->xmit_sequence.wge_ctl, 0);
+	bf_set(wqe_rctl, &wqe_abts->xmit_sequence.wge_ctl, FC_RCTL_BA_ABTS);
+	bf_set(wqe_type, &wqe_abts->xmit_sequence.wge_ctl, FC_TYPE_BLS);
+
+	/* Word 6 */
+	bf_set(wqe_ctxt_tag, &wqe_abts->xmit_sequence.wqe_com,
+	       phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]);
+	bf_set(wqe_xri_tag, &wqe_abts->xmit_sequence.wqe_com,
+	       abts_wqeq->sli4_xritag);
+
+	/* Word 7 */
+	bf_set(wqe_cmnd, &wqe_abts->xmit_sequence.wqe_com,
+	       CMD_XMIT_SEQUENCE64_WQE);
+	bf_set(wqe_ct, &wqe_abts->xmit_sequence.wqe_com, SLI4_CT_RPI);
+	bf_set(wqe_class, &wqe_abts->xmit_sequence.wqe_com, CLASS3);
+	bf_set(wqe_pu, &wqe_abts->xmit_sequence.wqe_com, 0);
+
+	/* Word 8 */
+	wqe_abts->xmit_sequence.wqe_com.abort_tag = abts_wqeq->iotag;
+
+	/* Word 9 */
+	bf_set(wqe_reqtag, &wqe_abts->xmit_sequence.wqe_com, abts_wqeq->iotag);
+	/* Needs to be set by caller */
+	bf_set(wqe_rcvoxid, &wqe_abts->xmit_sequence.wqe_com, xri);
+
+	/* Word 10 */
+	bf_set(wqe_dbde, &wqe_abts->xmit_sequence.wqe_com, 1);
+	bf_set(wqe_iod, &wqe_abts->xmit_sequence.wqe_com, LPFC_WQE_IOD_WRITE);
+	bf_set(wqe_lenloc, &wqe_abts->xmit_sequence.wqe_com,
+	       LPFC_WQE_LENLOC_WORD12);
+	bf_set(wqe_ebde_cnt, &wqe_abts->xmit_sequence.wqe_com, 0);
+	bf_set(wqe_qosd, &wqe_abts->xmit_sequence.wqe_com, 0);
+
+	/* Word 11 */
+	bf_set(wqe_cqid, &wqe_abts->xmit_sequence.wqe_com,
+	       LPFC_WQE_CQ_ID_DEFAULT);
+	bf_set(wqe_cmd_type, &wqe_abts->xmit_sequence.wqe_com,
+	       OTHER_COMMAND);
+
+	abts_wqeq->vport = phba->pport;
+	abts_wqeq->context1 = ndlp;
+	abts_wqeq->context2 = ctxp;
+	abts_wqeq->context3 = NULL;
+	abts_wqeq->rsvd2 = 0;
+	/* hba_wqidx should already be setup from command we are aborting */
+	abts_wqeq->iocb.ulpCommand = CMD_XMIT_SEQUENCE64_CR;
+	abts_wqeq->iocb.ulpLe = 1;
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+			"6069 Issue ABTS to xri x%x reqtag x%x\n",
+			xri, abts_wqeq->iotag);
+	return 1;
+}
+
+static int
+lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
+			       struct lpfc_nvmet_rcv_ctx *ctxp,
+			       uint32_t sid, uint16_t xri)
+{
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct lpfc_iocbq *abts_wqeq;
+	union lpfc_wqe *abts_wqe;
+	struct lpfc_nodelist *ndlp;
+	unsigned long flags;
+	int rc;
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	if (!ctxp->wqeq) {
+		ctxp->wqeq = ctxp->rqb_buffer->iocbq;
+		ctxp->wqeq->hba_wqidx = 0;
+	}
+
+	ndlp = lpfc_findnode_did(phba->pport, sid);
+	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) ||
+	    ((ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) &&
+	    (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) {
+		atomic_inc(&tgtp->xmt_abort_rsp_error);
+		lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
+				"6160 Drop ABTS - wrong NDLP state x%x.\n",
+				ndlp->nlp_state);
+
+		/* No failure to an ABTS request. */
+		return 0;
+	}
+
+	/* Issue ABTS for this WQE based on iotag */
+	ctxp->abort_wqeq = lpfc_sli_get_iocbq(phba);
+	if (!ctxp->abort_wqeq) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
+				"6161 Abort failed: No wqeqs: "
+				"xri: x%x\n", ctxp->oxid);
+		/* No failure to an ABTS request. */
+		return 0;
+	}
+	abts_wqeq = ctxp->abort_wqeq;
+	abts_wqe = &abts_wqeq->wqe;
+	ctxp->state = LPFC_NVMET_STE_ABORT;
+
+	/* Announce entry to new IO submit field. */
+	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
+			"6162 Abort Request to rport DID x%06x "
+			"for xri x%x x%x\n",
+			ctxp->sid, ctxp->oxid, ctxp->wqeq->sli4_xritag);
+
+	/* If the hba is getting reset, this flag is set.  It is
+	 * cleared when the reset is complete and rings reestablished.
+	 */
+	spin_lock_irqsave(&phba->hbalock, flags);
+	/* driver queued commands are in process of being flushed */
+	if (phba->hba_flag & HBA_NVME_IOQ_FLUSH) {
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+				"6163 Driver in reset cleanup - flushing "
+				"NVME Req now. hba_flag x%x oxid x%x\n",
+				phba->hba_flag, ctxp->oxid);
+		lpfc_sli_release_iocbq(phba, abts_wqeq);
+		return 0;
+	}
+
+	/* Outstanding abort is in progress */
+	if (abts_wqeq->iocb_flag & LPFC_DRIVER_ABORTED) {
+		spin_unlock_irqrestore(&phba->hbalock, flags);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+				"6164 Outstanding NVME I/O Abort Request "
+				"still pending on oxid x%x\n",
+				ctxp->oxid);
+		lpfc_sli_release_iocbq(phba, abts_wqeq);
+		return 0;
+	}
+
+	/* Ready - mark outstanding as aborted by driver. */
+	abts_wqeq->iocb_flag |= LPFC_DRIVER_ABORTED;
+
+	/* WQEs are reused.  Clear stale data and set key fields to
+	 * zero like ia, iaab, iaar, xri_tag, and ctxt_tag.
+	 */
+	memset(abts_wqe, 0, sizeof(union lpfc_wqe));
+
+	/* word 3 */
+	bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG);
+
+	/* word 7 */
+	bf_set(wqe_ct, &abts_wqe->abort_cmd.wqe_com, 0);
+	bf_set(wqe_cmnd, &abts_wqe->abort_cmd.wqe_com, CMD_ABORT_XRI_CX);
+
+	/* word 8 - tell the FW to abort the IO associated with this
+	 * outstanding exchange ID.
+	 */
+	abts_wqe->abort_cmd.wqe_com.abort_tag = ctxp->wqeq->sli4_xritag;
+
+	/* word 9 - this is the iotag for the abts_wqe completion. */
+	bf_set(wqe_reqtag, &abts_wqe->abort_cmd.wqe_com,
+	       abts_wqeq->iotag);
+
+	/* word 10 */
+	bf_set(wqe_qosd, &abts_wqe->abort_cmd.wqe_com, 1);
+	bf_set(wqe_lenloc, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_LENLOC_NONE);
+
+	/* word 11 */
+	bf_set(wqe_cmd_type, &abts_wqe->abort_cmd.wqe_com, OTHER_COMMAND);
+	bf_set(wqe_wqec, &abts_wqe->abort_cmd.wqe_com, 1);
+	bf_set(wqe_cqid, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_CQ_ID_DEFAULT);
+
+	/* ABTS WQE must go to the same WQ as the WQE to be aborted */
+	abts_wqeq->hba_wqidx = ctxp->wqeq->hba_wqidx;
+	abts_wqeq->wqe_cmpl = lpfc_nvmet_sol_fcp_abort_cmp;
+	abts_wqeq->iocb_cmpl = 0;
+	abts_wqeq->iocb_flag |= LPFC_IO_NVME;
+	abts_wqeq->context2 = ctxp;
+	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq);
+	spin_unlock_irqrestore(&phba->hbalock, flags);
+	if (rc == WQE_SUCCESS)
+		return 0;
+
+	lpfc_sli_release_iocbq(phba, abts_wqeq);
+	lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+			"6166 Failed abts issue_wqe with status x%x "
+			"for oxid x%x.\n",
+			rc, ctxp->oxid);
+	return 1;
+}
+
+
+static int
+lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
+				 struct lpfc_nvmet_rcv_ctx *ctxp,
+				 uint32_t sid, uint16_t xri)
+{
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct lpfc_iocbq *abts_wqeq;
+	unsigned long flags;
+	int rc;
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	if (!ctxp->wqeq) {
+		ctxp->wqeq = ctxp->rqb_buffer->iocbq;
+		ctxp->wqeq->hba_wqidx = 0;
+	}
+
+	rc = lpfc_nvmet_unsol_issue_abort(phba, ctxp, sid, xri);
+	if (rc == 0)
+		goto aerr;
+
+	spin_lock_irqsave(&phba->hbalock, flags);
+	abts_wqeq = ctxp->wqeq;
+	abts_wqeq->wqe_cmpl = lpfc_nvmet_xmt_fcp_abort_cmp;
+	abts_wqeq->iocb_cmpl = 0;
+	abts_wqeq->iocb_flag |= LPFC_IO_NVMET;
+	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq);
+	spin_unlock_irqrestore(&phba->hbalock, flags);
+	if (rc == WQE_SUCCESS) {
+		atomic_inc(&tgtp->xmt_abort_rsp);
+		return 0;
+	}
+
+aerr:
+	lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
+	atomic_inc(&tgtp->xmt_abort_rsp_error);
+	lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
+			"6135 Failed to Issue ABTS for oxid x%x. Status x%x\n",
+			ctxp->oxid, rc);
+	return 1;
+}
+
+static int
+lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba,
+				struct lpfc_nvmet_rcv_ctx *ctxp,
+				uint32_t sid, uint16_t xri)
+{
+	struct lpfc_nvmet_tgtport *tgtp;
+	struct lpfc_iocbq *abts_wqeq;
+	union lpfc_wqe *wqe_abts;
+	unsigned long flags;
+	int rc;
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	if (!ctxp->wqeq) {
+		/* Issue ABTS for this WQE based on iotag */
+		ctxp->wqeq = lpfc_sli_get_iocbq(phba);
+		if (!ctxp->wqeq) {
+			lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
+					"6068 Abort failed: No wqeqs: "
+					"xri: x%x\n", xri);
+			/* No failure to an ABTS request. */
+			kfree(ctxp);
+			return 0;
+		}
+	}
+	abts_wqeq = ctxp->wqeq;
+	wqe_abts = &abts_wqeq->wqe;
+	lpfc_nvmet_unsol_issue_abort(phba, ctxp, sid, xri);
+
+	spin_lock_irqsave(&phba->hbalock, flags);
+	abts_wqeq->wqe_cmpl = lpfc_nvmet_xmt_ls_abort_cmp;
+	abts_wqeq->iocb_cmpl = 0;
+	abts_wqeq->iocb_flag |=  LPFC_IO_NVME_LS;
+	rc = lpfc_sli4_issue_wqe(phba, LPFC_ELS_RING, abts_wqeq);
+	spin_unlock_irqrestore(&phba->hbalock, flags);
+	if (rc == WQE_SUCCESS) {
+		atomic_inc(&tgtp->xmt_abort_rsp);
+		return 0;
+	}
+
+	atomic_inc(&tgtp->xmt_abort_rsp_error);
+	abts_wqeq->context2 = NULL;
+	abts_wqeq->context3 = NULL;
+	lpfc_sli_release_iocbq(phba, abts_wqeq);
+	kfree(ctxp);
+	lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
+			"6056 Failed to Issue ABTS. Status x%x\n", rc);
+	return 0;
+}
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 72164acb6656..84565f810392 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -2518,7 +2518,7 @@ lpfc_complete_unsol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 
 	switch (fch_type) {
 	case FC_TYPE_NVME:
-		/* todo: tgt: forward NVME LS to transport */
+		lpfc_nvmet_unsol_ls_event(phba, pring, saveq);
 		return 1;
 	default:
 		break;
@@ -6867,7 +6867,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
 			goto out_destroy_queue;
 		}
 		phba->sli4_hba.nvmet_xri_cnt = rc;
-		/* todo: tgt: create targetport */
+		lpfc_nvmet_create_targetport(phba);
 	} else {
 		/* update host scsi xri-sgl sizes and mappings */
 		rc = lpfc_sli4_scsi_sgl_update(phba);
@@ -13137,7 +13137,9 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 
 		if (fc_hdr->fh_type == FC_TYPE_FCP) {
 			dma_buf->bytes_recv = bf_get(lpfc_rcqe_length,  rcqe);
-			/* todo: tgt: forward cmd iu to transport */
+			lpfc_nvmet_unsol_fcp_event(
+				phba, phba->sli4_hba.els_wq->pring, dma_buf,
+				cq->assoc_qp->isr_timestamp);
 			return false;
 		}
 drop:
-- 
GitLab


From 2b65e18202fd0f109b739dd6717286edca7a2b0d Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:38 -0800
Subject: [PATCH 0449/1084] scsi: lpfc: NVME Target: Add debugfs support

NVME Target: Add debugfs support

Adds debugfs snippets to cover the new NVME target functionality

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_debugfs.c | 303 ++++++++++++++++++++++++++++-
 drivers/scsi/lpfc/lpfc_nvmet.c   | 317 ++++++++++++++++++++++++++++++-
 drivers/scsi/lpfc/lpfc_nvmet.h   |  13 ++
 3 files changed, 628 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index abc39f605960..f92796d8dc42 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -47,6 +47,7 @@
 #include "lpfc.h"
 #include "lpfc_scsi.h"
 #include "lpfc_nvme.h"
+#include "lpfc_nvmet.h"
 #include "lpfc_logmsg.h"
 #include "lpfc_crtn.h"
 #include "lpfc_vport.h"
@@ -543,11 +544,13 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
 	int len = 0;
 	int cnt;
 	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+	struct lpfc_hba  *phba = vport->phba;
 	struct lpfc_nodelist *ndlp;
 	unsigned char *statep;
 	struct nvme_fc_local_port *localport;
 	struct lpfc_nvme_lport *lport;
 	struct lpfc_nvme_rport *rport;
+	struct lpfc_nvmet_tgtport *tgtp;
 	struct nvme_fc_remote_port *nrport;
 
 	cnt = (LPFC_NODELIST_SIZE / LPFC_NODELIST_ENTRY_SIZE);
@@ -626,6 +629,27 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
 	}
 	spin_unlock_irq(shost->host_lock);
 
+	if (phba->nvmet_support && phba->targetport && (vport == phba->pport)) {
+		tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+		len += snprintf(buf + len, size - len,
+				"\nNVME Targetport Entry ...\n");
+
+		/* Port state is only one of two values for now. */
+		if (phba->targetport->port_id)
+			statep = "REGISTERED";
+		else
+			statep = "INIT";
+		len += snprintf(buf + len, size - len,
+				"TGT WWNN x%llx WWPN x%llx State %s\n",
+				wwn_to_u64(vport->fc_nodename.u.wwn),
+				wwn_to_u64(vport->fc_portname.u.wwn),
+				statep);
+		len += snprintf(buf + len, size - len,
+				"    Targetport DID x%06x\n",
+				phba->targetport->port_id);
+		goto out_exit;
+	}
+
 	len += snprintf(buf + len, size - len,
 				"\nNVME Lport/Rport Entries ...\n");
 
@@ -718,9 +742,75 @@ static int
 lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
 {
 	struct lpfc_hba   *phba = vport->phba;
+	struct lpfc_nvmet_tgtport *tgtp;
 	int len = 0;
 
-	if (phba->nvmet_support == 0) {
+	if (phba->nvmet_support) {
+		if (!phba->targetport)
+			return len;
+		tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+		len += snprintf(buf+len, size-len,
+				"\nNVME Targetport Statistics\n");
+
+		len += snprintf(buf+len, size-len,
+				"LS: Rcv %08x Drop %08x Abort %08x\n",
+				atomic_read(&tgtp->rcv_ls_req_in),
+				atomic_read(&tgtp->rcv_ls_req_drop),
+				atomic_read(&tgtp->xmt_ls_abort));
+		if (atomic_read(&tgtp->rcv_ls_req_in) !=
+		    atomic_read(&tgtp->rcv_ls_req_out)) {
+			len += snprintf(buf+len, size-len,
+					"Rcv LS: in %08x != out %08x\n",
+					atomic_read(&tgtp->rcv_ls_req_in),
+					atomic_read(&tgtp->rcv_ls_req_out));
+		}
+
+		len += snprintf(buf+len, size-len,
+				"LS: Xmt %08x Drop %08x Cmpl %08x Err %08x\n",
+				atomic_read(&tgtp->xmt_ls_rsp),
+				atomic_read(&tgtp->xmt_ls_drop),
+				atomic_read(&tgtp->xmt_ls_rsp_cmpl),
+				atomic_read(&tgtp->xmt_ls_rsp_error));
+
+		len += snprintf(buf+len, size-len,
+				"FCP: Rcv %08x Drop %08x\n",
+				atomic_read(&tgtp->rcv_fcp_cmd_in),
+				atomic_read(&tgtp->rcv_fcp_cmd_drop));
+
+		if (atomic_read(&tgtp->rcv_fcp_cmd_in) !=
+		    atomic_read(&tgtp->rcv_fcp_cmd_out)) {
+			len += snprintf(buf+len, size-len,
+					"Rcv FCP: in %08x != out %08x\n",
+					atomic_read(&tgtp->rcv_fcp_cmd_in),
+					atomic_read(&tgtp->rcv_fcp_cmd_out));
+		}
+
+		len += snprintf(buf+len, size-len,
+				"FCP Rsp: read %08x readrsp %08x write %08x rsp %08x\n",
+				atomic_read(&tgtp->xmt_fcp_read),
+				atomic_read(&tgtp->xmt_fcp_read_rsp),
+				atomic_read(&tgtp->xmt_fcp_write),
+				atomic_read(&tgtp->xmt_fcp_rsp));
+
+		len += snprintf(buf+len, size-len,
+				"FCP Rsp: abort %08x drop %08x\n",
+				atomic_read(&tgtp->xmt_fcp_abort),
+				atomic_read(&tgtp->xmt_fcp_drop));
+
+		len += snprintf(buf+len, size-len,
+				"FCP Rsp Cmpl: %08x err %08x drop %08x\n",
+				atomic_read(&tgtp->xmt_fcp_rsp_cmpl),
+				atomic_read(&tgtp->xmt_fcp_rsp_error),
+				atomic_read(&tgtp->xmt_fcp_rsp_drop));
+
+		len += snprintf(buf+len, size-len,
+				"ABORT: Xmt %08x Err %08x Cmpl %08x",
+				atomic_read(&tgtp->xmt_abort_rsp),
+				atomic_read(&tgtp->xmt_abort_rsp_error),
+				atomic_read(&tgtp->xmt_abort_cmpl));
+
+		len +=  snprintf(buf+len, size-len, "\n");
+	} else {
 		if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
 			return len;
 
@@ -828,6 +918,121 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			phba->ktime_data_samples));
 		return len;
 	}
+
+	/* NVME Target */
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"ktime %s: Total Samples: %lld %lld\n",
+			(phba->ktime_on ? "Enabled" : "Disabled"),
+			phba->ktime_data_samples,
+			phba->ktime_status_samples);
+	if (phba->ktime_data_samples == 0)
+		return len;
+
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"Segment 1: MSI-X ISR Rcv cmd -to- "
+			"cmd pass to NVME Layer\n");
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg1_total /
+			phba->ktime_data_samples,
+			phba->ktime_seg1_min,
+			phba->ktime_seg1_max);
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"Segment 2: cmd pass to NVME Layer- "
+			"-to- Driver rcv cmd OP (action)\n");
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg2_total /
+			phba->ktime_data_samples,
+			phba->ktime_seg2_min,
+			phba->ktime_seg2_max);
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"Segment 3: Driver rcv cmd OP -to- "
+			"Firmware WQ doorbell: cmd\n");
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg3_total /
+			phba->ktime_data_samples,
+			phba->ktime_seg3_min,
+			phba->ktime_seg3_max);
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"Segment 4: Firmware WQ doorbell: cmd "
+			"-to- MSI-X ISR for cmd cmpl\n");
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg4_total /
+			phba->ktime_data_samples,
+			phba->ktime_seg4_min,
+			phba->ktime_seg4_max);
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"Segment 5: MSI-X ISR for cmd cmpl "
+			"-to- NVME layer passed cmd done\n");
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg5_total /
+			phba->ktime_data_samples,
+			phba->ktime_seg5_min,
+			phba->ktime_seg5_max);
+
+	if (phba->ktime_status_samples == 0) {
+		len += snprintf(buf + len, PAGE_SIZE-len,
+				"Total: cmd received by MSI-X ISR "
+				"-to- cmd completed on wire\n");
+		len += snprintf(buf + len, PAGE_SIZE-len,
+				"avg:%08lld min:%08lld "
+				"max %08lld\n",
+				phba->ktime_seg10_total /
+				phba->ktime_data_samples,
+				phba->ktime_seg10_min,
+				phba->ktime_seg10_max);
+		return len;
+	}
+
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"Segment 6: NVME layer passed cmd done "
+			"-to- Driver rcv rsp status OP\n");
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg6_total /
+			phba->ktime_status_samples,
+			phba->ktime_seg6_min,
+			phba->ktime_seg6_max);
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"Segment 7: Driver rcv rsp status OP "
+			"-to- Firmware WQ doorbell: status\n");
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg7_total /
+			phba->ktime_status_samples,
+			phba->ktime_seg7_min,
+			phba->ktime_seg7_max);
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"Segment 8: Firmware WQ doorbell: status"
+			" -to- MSI-X ISR for status cmpl\n");
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg8_total /
+			phba->ktime_status_samples,
+			phba->ktime_seg8_min,
+			phba->ktime_seg8_max);
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"Segment 9: MSI-X ISR for status cmpl  "
+			"-to- NVME layer passed status done\n");
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg9_total /
+			phba->ktime_status_samples,
+			phba->ktime_seg9_min,
+			phba->ktime_seg9_max);
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"Total: cmd received by MSI-X ISR -to- "
+			"cmd completed on wire\n");
+	len += snprintf(buf + len, PAGE_SIZE-len,
+			"avg:%08lld min:%08lld max %08lld\n",
+			phba->ktime_seg10_total /
+			phba->ktime_status_samples,
+			phba->ktime_seg10_min,
+			phba->ktime_seg10_max);
 	return len;
 }
 
@@ -953,7 +1158,9 @@ lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size)
 	int i;
 	int len = 0;
 	uint32_t tot_xmt = 0;
+	uint32_t tot_rcv = 0;
 	uint32_t tot_cmpl = 0;
+	uint32_t tot_ccmpl = 0;
 
 	if (phba->nvmet_support == 0) {
 		/* NVME Initiator */
@@ -977,6 +1184,33 @@ lpfc_debugfs_cpucheck_data(struct lpfc_vport *vport, char *buf, int size)
 		return len;
 	}
 
+	/* NVME Target */
+	len += snprintf(buf + len, PAGE_SIZE - len,
+			"CPUcheck %s ",
+			(phba->cpucheck_on & LPFC_CHECK_NVMET_IO ?
+				"IO Enabled - " : "IO Disabled - "));
+	len += snprintf(buf + len, PAGE_SIZE - len,
+			"%s\n",
+			(phba->cpucheck_on & LPFC_CHECK_NVMET_RCV ?
+				"Rcv Enabled\n" : "Rcv Disabled\n"));
+	for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+		if (i >= LPFC_CHECK_CPU_CNT)
+			break;
+		len += snprintf(buf + len, PAGE_SIZE - len,
+				"%02d: xmit x%08x ccmpl x%08x "
+				"cmpl x%08x rcv x%08x\n",
+				i, phba->cpucheck_xmt_io[i],
+				phba->cpucheck_ccmpl_io[i],
+				phba->cpucheck_cmpl_io[i],
+				phba->cpucheck_rcv_io[i]);
+		tot_xmt += phba->cpucheck_xmt_io[i];
+		tot_rcv += phba->cpucheck_rcv_io[i];
+		tot_cmpl += phba->cpucheck_cmpl_io[i];
+		tot_ccmpl += phba->cpucheck_ccmpl_io[i];
+	}
+	len += snprintf(buf + len, PAGE_SIZE - len,
+			"tot:xmit x%08x ccmpl x%08x cmpl x%08x rcv x%08x\n",
+			tot_xmt, tot_ccmpl, tot_cmpl, tot_rcv);
 	return len;
 }
 
@@ -1660,6 +1894,65 @@ lpfc_debugfs_nvmestat_open(struct inode *inode, struct file *file)
 	return rc;
 }
 
+static ssize_t
+lpfc_debugfs_nvmestat_write(struct file *file, const char __user *buf,
+			    size_t nbytes, loff_t *ppos)
+{
+	struct lpfc_debug *debug = file->private_data;
+	struct lpfc_vport *vport = (struct lpfc_vport *)debug->i_private;
+	struct lpfc_hba   *phba = vport->phba;
+	struct lpfc_nvmet_tgtport *tgtp;
+	char mybuf[64];
+	char *pbuf;
+
+	if (!phba->targetport)
+		return -ENXIO;
+
+	if (nbytes > 64)
+		nbytes = 64;
+
+	/* Protect copy from user */
+	if (!access_ok(VERIFY_READ, buf, nbytes))
+		return -EFAULT;
+
+	memset(mybuf, 0, sizeof(mybuf));
+
+	if (copy_from_user(mybuf, buf, nbytes))
+		return -EFAULT;
+	pbuf = &mybuf[0];
+
+	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	if ((strncmp(pbuf, "reset", strlen("reset")) == 0) ||
+	    (strncmp(pbuf, "zero", strlen("zero")) == 0)) {
+		atomic_set(&tgtp->rcv_ls_req_in, 0);
+		atomic_set(&tgtp->rcv_ls_req_out, 0);
+		atomic_set(&tgtp->rcv_ls_req_drop, 0);
+		atomic_set(&tgtp->xmt_ls_abort, 0);
+		atomic_set(&tgtp->xmt_ls_rsp, 0);
+		atomic_set(&tgtp->xmt_ls_drop, 0);
+		atomic_set(&tgtp->xmt_ls_rsp_error, 0);
+		atomic_set(&tgtp->xmt_ls_rsp_cmpl, 0);
+
+		atomic_set(&tgtp->rcv_fcp_cmd_in, 0);
+		atomic_set(&tgtp->rcv_fcp_cmd_out, 0);
+		atomic_set(&tgtp->rcv_fcp_cmd_drop, 0);
+		atomic_set(&tgtp->xmt_fcp_abort, 0);
+		atomic_set(&tgtp->xmt_fcp_drop, 0);
+		atomic_set(&tgtp->xmt_fcp_read_rsp, 0);
+		atomic_set(&tgtp->xmt_fcp_read, 0);
+		atomic_set(&tgtp->xmt_fcp_write, 0);
+		atomic_set(&tgtp->xmt_fcp_rsp, 0);
+		atomic_set(&tgtp->xmt_fcp_rsp_cmpl, 0);
+		atomic_set(&tgtp->xmt_fcp_rsp_error, 0);
+		atomic_set(&tgtp->xmt_fcp_rsp_drop, 0);
+
+		atomic_set(&tgtp->xmt_abort_rsp, 0);
+		atomic_set(&tgtp->xmt_abort_rsp_error, 0);
+		atomic_set(&tgtp->xmt_abort_cmpl, 0);
+	}
+	return nbytes;
+}
+
 static int
 lpfc_debugfs_nvmektime_open(struct inode *inode, struct file *file)
 {
@@ -1956,7 +2249,10 @@ lpfc_debugfs_cpucheck_write(struct file *file, const char __user *buf,
 	pbuf = &mybuf[0];
 
 	if ((strncmp(pbuf, "on", sizeof("on") - 1) == 0)) {
-		phba->cpucheck_on |= LPFC_CHECK_NVME_IO;
+		if (phba->nvmet_support)
+			phba->cpucheck_on |= LPFC_CHECK_NVMET_IO;
+		else
+			phba->cpucheck_on |= LPFC_CHECK_NVME_IO;
 		return strlen(pbuf);
 	} else if ((strncmp(pbuf, "rcv",
 		   sizeof("rcv") - 1) == 0)) {
@@ -2880,7 +3176,7 @@ lpfc_idiag_cqs_for_eq(struct lpfc_hba *phba, char *pbuffer,
 			return 1;
 	}
 
-	if (phba->cfg_nvmet_mrq > eqidx) {
+	if (eqidx < phba->cfg_nvmet_mrq) {
 		/* NVMET CQset */
 		qp = phba->sli4_hba.nvmet_cqset[eqidx];
 		*len = __lpfc_idiag_print_cq(qp, "NVMET CQset", pbuffer, *len);
@@ -4493,6 +4789,7 @@ static const struct file_operations lpfc_debugfs_op_nvmestat = {
 	.open =         lpfc_debugfs_nvmestat_open,
 	.llseek =       lpfc_debugfs_lseek,
 	.read =         lpfc_debugfs_read,
+	.write =	lpfc_debugfs_nvmestat_write,
 	.release =      lpfc_debugfs_release,
 };
 
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index d4efd86bb156..a476d538360f 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -51,6 +51,7 @@
 #include "lpfc_logmsg.h"
 #include "lpfc_crtn.h"
 #include "lpfc_vport.h"
+#include "lpfc_debugfs.h"
 
 static struct lpfc_iocbq *lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *,
 						 struct lpfc_nvmet_rcv_ctx *,
@@ -103,6 +104,9 @@ lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	ctxp = cmdwqe->context2;
 	rsp = &ctxp->ctx.ls_req;
 
+	lpfc_nvmeio_data(phba, "NVMET LS  CMPL: xri x%x stat x%x result x%x\n",
+			 ctxp->oxid, status, result);
+
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
 			"6038 %s: Entrypoint: ctx %p status %x/%x\n", __func__,
 			ctxp, status, result);
@@ -144,6 +148,170 @@ lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp,
 	lpfc_rq_buf_free(phba, mp);
 }
 
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+static void
+lpfc_nvmet_ktime(struct lpfc_hba *phba,
+		 struct lpfc_nvmet_rcv_ctx *ctxp)
+{
+	uint64_t seg1, seg2, seg3, seg4, seg5;
+	uint64_t seg6, seg7, seg8, seg9, seg10;
+
+	if (!phba->ktime_on)
+		return;
+
+	if (!ctxp->ts_isr_cmd || !ctxp->ts_cmd_nvme ||
+	    !ctxp->ts_nvme_data || !ctxp->ts_data_wqput ||
+	    !ctxp->ts_isr_data || !ctxp->ts_data_nvme ||
+	    !ctxp->ts_nvme_status || !ctxp->ts_status_wqput ||
+	    !ctxp->ts_isr_status || !ctxp->ts_status_nvme)
+		return;
+
+	if (ctxp->ts_isr_cmd  > ctxp->ts_cmd_nvme)
+		return;
+	if (ctxp->ts_cmd_nvme > ctxp->ts_nvme_data)
+		return;
+	if (ctxp->ts_nvme_data > ctxp->ts_data_wqput)
+		return;
+	if (ctxp->ts_data_wqput > ctxp->ts_isr_data)
+		return;
+	if (ctxp->ts_isr_data > ctxp->ts_data_nvme)
+		return;
+	if (ctxp->ts_data_nvme > ctxp->ts_nvme_status)
+		return;
+	if (ctxp->ts_nvme_status > ctxp->ts_status_wqput)
+		return;
+	if (ctxp->ts_status_wqput > ctxp->ts_isr_status)
+		return;
+	if (ctxp->ts_isr_status > ctxp->ts_status_nvme)
+		return;
+	/*
+	 * Segment 1 - Time from FCP command received by MSI-X ISR
+	 * to FCP command is passed to NVME Layer.
+	 * Segment 2 - Time from FCP command payload handed
+	 * off to NVME Layer to Driver receives a Command op
+	 * from NVME Layer.
+	 * Segment 3 - Time from Driver receives a Command op
+	 * from NVME Layer to Command is put on WQ.
+	 * Segment 4 - Time from Driver WQ put is done
+	 * to MSI-X ISR for Command cmpl.
+	 * Segment 5 - Time from MSI-X ISR for Command cmpl to
+	 * Command cmpl is passed to NVME Layer.
+	 * Segment 6 - Time from Command cmpl is passed to NVME
+	 * Layer to Driver receives a RSP op from NVME Layer.
+	 * Segment 7 - Time from Driver receives a RSP op from
+	 * NVME Layer to WQ put is done on TRSP FCP Status.
+	 * Segment 8 - Time from Driver WQ put is done on TRSP
+	 * FCP Status to MSI-X ISR for TRSP cmpl.
+	 * Segment 9 - Time from MSI-X ISR for TRSP cmpl to
+	 * TRSP cmpl is passed to NVME Layer.
+	 * Segment 10 - Time from FCP command received by
+	 * MSI-X ISR to command is completed on wire.
+	 * (Segments 1 thru 8) for READDATA / WRITEDATA
+	 * (Segments 1 thru 4) for READDATA_RSP
+	 */
+	seg1 = ctxp->ts_cmd_nvme - ctxp->ts_isr_cmd;
+	seg2 = (ctxp->ts_nvme_data - ctxp->ts_isr_cmd) - seg1;
+	seg3 = (ctxp->ts_data_wqput - ctxp->ts_isr_cmd) -
+		seg1 - seg2;
+	seg4 = (ctxp->ts_isr_data - ctxp->ts_isr_cmd) -
+		seg1 - seg2 - seg3;
+	seg5 = (ctxp->ts_data_nvme - ctxp->ts_isr_cmd) -
+		seg1 - seg2 - seg3 - seg4;
+
+	/* For auto rsp commands seg6 thru seg10 will be 0 */
+	if (ctxp->ts_nvme_status > ctxp->ts_data_nvme) {
+		seg6 = (ctxp->ts_nvme_status -
+			ctxp->ts_isr_cmd) -
+			seg1 - seg2 - seg3 - seg4 - seg5;
+		seg7 = (ctxp->ts_status_wqput -
+			ctxp->ts_isr_cmd) -
+			seg1 - seg2 - seg3 -
+			seg4 - seg5 - seg6;
+		seg8 = (ctxp->ts_isr_status -
+			ctxp->ts_isr_cmd) -
+			seg1 - seg2 - seg3 - seg4 -
+			seg5 - seg6 - seg7;
+		seg9 = (ctxp->ts_status_nvme -
+			ctxp->ts_isr_cmd) -
+			seg1 - seg2 - seg3 - seg4 -
+			seg5 - seg6 - seg7 - seg8;
+		seg10 = (ctxp->ts_isr_status -
+			ctxp->ts_isr_cmd);
+	} else {
+		seg6 =  0;
+		seg7 =  0;
+		seg8 =  0;
+		seg9 =  0;
+		seg10 = (ctxp->ts_isr_data - ctxp->ts_isr_cmd);
+	}
+
+	phba->ktime_seg1_total += seg1;
+	if (seg1 < phba->ktime_seg1_min)
+		phba->ktime_seg1_min = seg1;
+	else if (seg1 > phba->ktime_seg1_max)
+		phba->ktime_seg1_max = seg1;
+
+	phba->ktime_seg2_total += seg2;
+	if (seg2 < phba->ktime_seg2_min)
+		phba->ktime_seg2_min = seg2;
+	else if (seg2 > phba->ktime_seg2_max)
+		phba->ktime_seg2_max = seg2;
+
+	phba->ktime_seg3_total += seg3;
+	if (seg3 < phba->ktime_seg3_min)
+		phba->ktime_seg3_min = seg3;
+	else if (seg3 > phba->ktime_seg3_max)
+		phba->ktime_seg3_max = seg3;
+
+	phba->ktime_seg4_total += seg4;
+	if (seg4 < phba->ktime_seg4_min)
+		phba->ktime_seg4_min = seg4;
+	else if (seg4 > phba->ktime_seg4_max)
+		phba->ktime_seg4_max = seg4;
+
+	phba->ktime_seg5_total += seg5;
+	if (seg5 < phba->ktime_seg5_min)
+		phba->ktime_seg5_min = seg5;
+	else if (seg5 > phba->ktime_seg5_max)
+		phba->ktime_seg5_max = seg5;
+
+	phba->ktime_data_samples++;
+	if (!seg6)
+		goto out;
+
+	phba->ktime_seg6_total += seg6;
+	if (seg6 < phba->ktime_seg6_min)
+		phba->ktime_seg6_min = seg6;
+	else if (seg6 > phba->ktime_seg6_max)
+		phba->ktime_seg6_max = seg6;
+
+	phba->ktime_seg7_total += seg7;
+	if (seg7 < phba->ktime_seg7_min)
+		phba->ktime_seg7_min = seg7;
+	else if (seg7 > phba->ktime_seg7_max)
+		phba->ktime_seg7_max = seg7;
+
+	phba->ktime_seg8_total += seg8;
+	if (seg8 < phba->ktime_seg8_min)
+		phba->ktime_seg8_min = seg8;
+	else if (seg8 > phba->ktime_seg8_max)
+		phba->ktime_seg8_max = seg8;
+
+	phba->ktime_seg9_total += seg9;
+	if (seg9 < phba->ktime_seg9_min)
+		phba->ktime_seg9_min = seg9;
+	else if (seg9 > phba->ktime_seg9_max)
+		phba->ktime_seg9_max = seg9;
+out:
+	phba->ktime_seg10_total += seg10;
+	if (seg10 < phba->ktime_seg10_min)
+		phba->ktime_seg10_min = seg10;
+	else if (seg10 > phba->ktime_seg10_max)
+		phba->ktime_seg10_max = seg10;
+	phba->ktime_status_samples++;
+}
+#endif
+
 /**
  * lpfc_nvmet_xmt_fcp_op_cmp - Completion handler for FCP Response
  * @phba: Pointer to HBA context object.
@@ -162,6 +330,9 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	struct nvmefc_tgt_fcp_req *rsp;
 	struct lpfc_nvmet_rcv_ctx *ctxp;
 	uint32_t status, result, op, start_clean;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	uint32_t id;
+#endif
 
 	ctxp = cmdwqe->context2;
 	rsp = &ctxp->ctx.fcp_req;
@@ -174,6 +345,9 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	if (!phba->targetport)
 		goto out;
 
+	lpfc_nvmeio_data(phba, "NVMET FCP CMPL: xri x%x op x%x status x%x\n",
+			 ctxp->oxid, op, status);
+
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 	if (status) {
 		rsp->fcp_error = NVME_SC_DATA_XFER_ERROR;
@@ -194,7 +368,44 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 		/* Sanity check */
 		ctxp->state = LPFC_NVMET_STE_DONE;
 		ctxp->entry_cnt++;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+		if (phba->ktime_on) {
+			if (rsp->op == NVMET_FCOP_READDATA_RSP) {
+				ctxp->ts_isr_data =
+					cmdwqe->isr_timestamp;
+				ctxp->ts_data_nvme =
+					ktime_get_ns();
+				ctxp->ts_nvme_status =
+					ctxp->ts_data_nvme;
+				ctxp->ts_status_wqput =
+					ctxp->ts_data_nvme;
+				ctxp->ts_isr_status =
+					ctxp->ts_data_nvme;
+				ctxp->ts_status_nvme =
+					ctxp->ts_data_nvme;
+			} else {
+				ctxp->ts_isr_status =
+					cmdwqe->isr_timestamp;
+				ctxp->ts_status_nvme =
+					ktime_get_ns();
+			}
+		}
+		if (phba->cpucheck_on & LPFC_CHECK_NVMET_IO) {
+			id = smp_processor_id();
+			if (ctxp->cpu != id)
+				lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+						"6703 CPU Check cmpl: "
+						"cpu %d expect %d\n",
+						id, ctxp->cpu);
+			if (ctxp->cpu < LPFC_CHECK_CPU_CNT)
+				phba->cpucheck_cmpl_io[id]++;
+		}
+#endif
 		rsp->done(rsp);
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+		if (phba->ktime_on)
+			lpfc_nvmet_ktime(phba, ctxp);
+#endif
 		/* Let Abort cmpl repost the context */
 		if (!(ctxp->flag & LPFC_NVMET_ABORT_OP))
 			lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf);
@@ -203,6 +414,22 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 		start_clean = offsetof(struct lpfc_iocbq, wqe);
 		memset(((char *)cmdwqe) + start_clean, 0,
 		       (sizeof(struct lpfc_iocbq) - start_clean));
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+		if (phba->ktime_on) {
+			ctxp->ts_isr_data = cmdwqe->isr_timestamp;
+			ctxp->ts_data_nvme = ktime_get_ns();
+		}
+		if (phba->cpucheck_on & LPFC_CHECK_NVMET_IO) {
+			id = smp_processor_id();
+			if (ctxp->cpu != id)
+				lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+						"6704 CPU Check cmdcmpl: "
+						"cpu %d expect %d\n",
+						id, ctxp->cpu);
+			if (ctxp->cpu < LPFC_CHECK_CPU_CNT)
+				phba->cpucheck_ccmpl_io[id]++;
+		}
+#endif
 		rsp->done(rsp);
 	}
 }
@@ -254,6 +481,9 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
 	nvmewqeq->iocb_cmpl = NULL;
 	nvmewqeq->context2 = ctxp;
 
+	lpfc_nvmeio_data(phba, "NVMET LS  RESP: xri x%x wqidx x%x len x%x\n",
+			 ctxp->oxid, nvmewqeq->hba_wqidx, rsp->rsplen);
+
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_ELS_RING, nvmewqeq);
 	if (rc == WQE_SUCCESS) {
 		/*
@@ -288,12 +518,39 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 	struct lpfc_hba *phba = ctxp->phba;
 	struct lpfc_iocbq *nvmewqeq;
 	unsigned long iflags;
-	int rc;
+	int rc, id;
+
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->ktime_on) {
+		if (rsp->op == NVMET_FCOP_RSP)
+			ctxp->ts_nvme_status = ktime_get_ns();
+		else
+			ctxp->ts_nvme_data = ktime_get_ns();
+	}
+	if (phba->cpucheck_on & LPFC_CHECK_NVMET_IO) {
+		id = smp_processor_id();
+		ctxp->cpu = id;
+		if (id < LPFC_CHECK_CPU_CNT)
+			phba->cpucheck_xmt_io[id]++;
+		if (rsp->hwqid != id) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+					"6705 CPU Check OP: "
+					"cpu %d expect %d\n",
+					id, rsp->hwqid);
+			ctxp->cpu = rsp->hwqid;
+		}
+	}
+#endif
 
 	if (rsp->op == NVMET_FCOP_ABORT) {
 		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
 				"6103 Abort op: oxri x%x %d cnt %d\n",
 				ctxp->oxid, ctxp->state, ctxp->entry_cnt);
+
+		lpfc_nvmeio_data(phba, "NVMET FCP ABRT: "
+				 "xri x%x state x%x cnt x%x\n",
+				 ctxp->oxid, ctxp->state, ctxp->entry_cnt);
+
 		atomic_inc(&lpfc_nvmep->xmt_fcp_abort);
 		ctxp->entry_cnt++;
 		ctxp->flag |= LPFC_NVMET_ABORT_OP;
@@ -330,12 +587,23 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 	nvmewqeq->iocb_flag |=  LPFC_IO_NVMET;
 	ctxp->wqeq->hba_wqidx = rsp->hwqid;
 
+	lpfc_nvmeio_data(phba, "NVMET FCP CMND: xri x%x op x%x len x%x\n",
+			 ctxp->oxid, rsp->op, rsp->rsplen);
+
 	/* For now we take hbalock */
 	spin_lock_irqsave(&phba->hbalock, iflags);
 	rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, nvmewqeq);
 	spin_unlock_irqrestore(&phba->hbalock, iflags);
 	if (rc == WQE_SUCCESS) {
 		ctxp->flag |= LPFC_NVMET_IO_INP;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+		if (!phba->ktime_on)
+			return 0;
+		if (rsp->op == NVMET_FCOP_RSP)
+			ctxp->ts_status_wqput = ktime_get_ns();
+		else
+			ctxp->ts_data_wqput = ktime_get_ns();
+#endif
 		return 0;
 	}
 
@@ -503,6 +771,9 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	if (!nvmebuf || !phba->targetport) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 				"6154 LS Drop IO\n");
+		oxid = 0;
+		size = 0;
+		sid = 0;
 		goto dropit;
 	}
 
@@ -520,6 +791,9 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 				"6155 LS Drop IO x%x: Alloc\n",
 				oxid);
 dropit:
+		lpfc_nvmeio_data(phba, "NVMET LS  DROP: "
+				 "xri x%x sz %d from %06x\n",
+				 oxid, size, sid);
 		if (nvmebuf)
 			lpfc_in_buf_free(phba, &nvmebuf->dbuf);
 		return;
@@ -531,6 +805,9 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 	ctxp->wqeq = NULL;
 	ctxp->state = LPFC_NVMET_STE_RCV;
 	ctxp->rqb_buffer = (void *)nvmebuf;
+
+	lpfc_nvmeio_data(phba, "NVMET LS   RCV: xri x%x sz %d from %06x\n",
+			 oxid, size, sid);
 	/*
 	 * The calling sequence should be:
 	 * nvmet_fc_rcv_ls_req -> lpfc_nvmet_xmt_ls_rsp/cmp ->_req->done
@@ -545,10 +822,15 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 			"%08x %08x %08x\n", __func__, ctxp, size, rc,
 			*payload, *(payload+1), *(payload+2),
 			*(payload+3), *(payload+4), *(payload+5));
+
 	if (rc == 0) {
 		atomic_inc(&tgtp->rcv_ls_req_out);
 		return;
 	}
+
+	lpfc_nvmeio_data(phba, "NVMET LS  DROP: xri x%x sz %d from %06x\n",
+			 oxid, size, sid);
+
 	atomic_inc(&tgtp->rcv_ls_req_drop);
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 			"6156 LS Drop IO x%x: nvmet_fc_rcv_ls_req %d\n",
@@ -586,11 +868,16 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	struct fc_frame_header *fc_hdr;
 	uint32_t *payload;
 	uint32_t size, oxid, sid, rc;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	uint32_t id;
+#endif
 
-	oxid = 0;
 	if (!nvmebuf || !phba->targetport) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 				"6157 FCP Drop IO\n");
+		oxid = 0;
+		size = 0;
+		sid = 0;
 		goto dropit;
 	}
 
@@ -625,6 +912,30 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	ctxp->entry_cnt = 1;
 	ctxp->flag = 0;
 
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (phba->ktime_on) {
+		ctxp->ts_isr_cmd = isr_timestamp;
+		ctxp->ts_cmd_nvme = ktime_get_ns();
+		ctxp->ts_nvme_data = 0;
+		ctxp->ts_data_wqput = 0;
+		ctxp->ts_isr_data = 0;
+		ctxp->ts_data_nvme = 0;
+		ctxp->ts_nvme_status = 0;
+		ctxp->ts_status_wqput = 0;
+		ctxp->ts_isr_status = 0;
+		ctxp->ts_status_nvme = 0;
+	}
+
+	if (phba->cpucheck_on & LPFC_CHECK_NVMET_RCV) {
+		id = smp_processor_id();
+		if (id < LPFC_CHECK_CPU_CNT)
+			phba->cpucheck_rcv_io[id]++;
+	}
+#endif
+
+	lpfc_nvmeio_data(phba, "NVMET FCP  RCV: xri x%x sz %d from %06x\n",
+			 oxid, size, sid);
+
 	atomic_inc(&tgtp->rcv_fcp_cmd_in);
 	/*
 	 * The calling sequence should be:
@@ -645,6 +956,8 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 			"6159 FCP Drop IO x%x: nvmet_fc_rcv_fcp_req x%x\n",
 			ctxp->oxid, rc);
 dropit:
+	lpfc_nvmeio_data(phba, "NVMET FCP DROP: xri x%x sz %d from %06x\n",
+			 oxid, size, sid);
 	if (oxid) {
 		lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, sid, oxid);
 		return;
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h
index f7b6a3f374a4..4aa18d31a5f7 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.h
+++ b/drivers/scsi/lpfc/lpfc_nvmet.h
@@ -98,4 +98,17 @@ struct lpfc_nvmet_rcv_ctx {
 #define LPFC_NVMET_IO_INP		1
 #define LPFC_NVMET_ABORT_OP		2
 	struct rqb_dmabuf *rqb_buffer;
+
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	uint64_t ts_isr_cmd;
+	uint64_t ts_cmd_nvme;
+	uint64_t ts_nvme_data;
+	uint64_t ts_data_wqput;
+	uint64_t ts_isr_data;
+	uint64_t ts_data_nvme;
+	uint64_t ts_nvme_status;
+	uint64_t ts_status_wqput;
+	uint64_t ts_isr_status;
+	uint64_t ts_status_nvme;
+#endif
 };
-- 
GitLab


From d080abe0a8693c58a4b35b74bab3526d8b554ca3 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:39 -0800
Subject: [PATCH 0450/1084] scsi: lpfc: Update copyrights

Update copyrights to 2017 for all files touched in this patch set

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/Makefile         | 4 +++-
 drivers/scsi/lpfc/lpfc.h           | 4 +++-
 drivers/scsi/lpfc/lpfc_attr.c      | 4 +++-
 drivers/scsi/lpfc/lpfc_attr.h      | 4 +++-
 drivers/scsi/lpfc/lpfc_bsg.c       | 4 +++-
 drivers/scsi/lpfc/lpfc_bsg.h       | 6 ++++--
 drivers/scsi/lpfc/lpfc_compat.h    | 4 +++-
 drivers/scsi/lpfc/lpfc_crtn.h      | 4 +++-
 drivers/scsi/lpfc/lpfc_ct.c        | 4 +++-
 drivers/scsi/lpfc/lpfc_debugfs.c   | 4 +++-
 drivers/scsi/lpfc/lpfc_debugfs.h   | 4 +++-
 drivers/scsi/lpfc/lpfc_disc.h      | 4 +++-
 drivers/scsi/lpfc/lpfc_els.c       | 4 +++-
 drivers/scsi/lpfc/lpfc_hbadisc.c   | 4 +++-
 drivers/scsi/lpfc/lpfc_hw.h        | 4 +++-
 drivers/scsi/lpfc/lpfc_hw4.h       | 6 ++++--
 drivers/scsi/lpfc/lpfc_ids.h       | 4 +++-
 drivers/scsi/lpfc/lpfc_init.c      | 6 ++++--
 drivers/scsi/lpfc/lpfc_logmsg.h    | 4 +++-
 drivers/scsi/lpfc/lpfc_mbox.c      | 4 +++-
 drivers/scsi/lpfc/lpfc_mem.c       | 4 +++-
 drivers/scsi/lpfc/lpfc_nl.h        | 4 +++-
 drivers/scsi/lpfc/lpfc_nportdisc.c | 4 +++-
 drivers/scsi/lpfc/lpfc_nvme.c      | 4 +++-
 drivers/scsi/lpfc/lpfc_nvme.h      | 4 +++-
 drivers/scsi/lpfc/lpfc_nvmet.c     | 6 ++++--
 drivers/scsi/lpfc/lpfc_nvmet.h     | 4 +++-
 drivers/scsi/lpfc/lpfc_scsi.c      | 4 +++-
 drivers/scsi/lpfc/lpfc_scsi.h      | 4 +++-
 drivers/scsi/lpfc/lpfc_sli.c       | 4 +++-
 drivers/scsi/lpfc/lpfc_sli.h       | 4 +++-
 drivers/scsi/lpfc/lpfc_sli4.h      | 4 +++-
 drivers/scsi/lpfc/lpfc_version.h   | 8 ++++++--
 drivers/scsi/lpfc/lpfc_vport.c     | 4 +++-
 drivers/scsi/lpfc/lpfc_vport.h     | 4 +++-
 35 files changed, 112 insertions(+), 40 deletions(-)

diff --git a/drivers/scsi/lpfc/Makefile b/drivers/scsi/lpfc/Makefile
index 30a6a35abafd..cb6aa802c48e 100644
--- a/drivers/scsi/lpfc/Makefile
+++ b/drivers/scsi/lpfc/Makefile
@@ -1,9 +1,11 @@
 #/*******************************************************************
 # * This file is part of the Emulex Linux Device Driver for         *
 # * Fibre Channel Host Bus Adapters.                                *
+# * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+# * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
 # * Copyright (C) 2004-2012 Emulex.  All rights reserved.           *
 # * EMULEX and SLI are trademarks of Emulex.                        *
-# * www.emulex.com                                                  *
+# * www.broadcom.com                                                *
 # *                                                                 *
 # * This program is free software; you can redistribute it and/or   *
 # * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 8a4090c6771c..0bba2e30b4f0 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 700a68f303f3..4114cf4308d0 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_attr.h b/drivers/scsi/lpfc/lpfc_attr.h
index b2bd28e965fa..d56dafcdd563 100644
--- a/drivers/scsi/lpfc/lpfc_attr.h
+++ b/drivers/scsi/lpfc/lpfc_attr.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index 55a2270cadfc..18157d2840a3 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2009-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_bsg.h b/drivers/scsi/lpfc/lpfc_bsg.h
index f2247aa4fa17..e7d95a4e8042 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.h
+++ b/drivers/scsi/lpfc/lpfc_bsg.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2010-2015 Emulex.  All rights reserved.                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
+ * Copyright (C) 2010-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_compat.h b/drivers/scsi/lpfc/lpfc_compat.h
index c88e556ea62e..6b32b0ae7506 100644
--- a/drivers/scsi/lpfc/lpfc_compat.h
+++ b/drivers/scsi/lpfc/lpfc_compat.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2011 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index f56015b78525..843dd73004da 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 53bdfe421251..c22bb3f887e1 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index f92796d8dc42..599fde4ea8b1 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2007-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h
index 5312e0f9deb6..c05f56c3023f 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2007-2011 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h
index e305e97e05a9..f4ff99d95db3 100644
--- a/drivers/scsi/lpfc/lpfc_disc.h
+++ b/drivers/scsi/lpfc/lpfc_disc.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2013 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 1c0ac509e6db..2d26440e6f2f 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 542797bd3134..194a14d5f8a9 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h
index c2221377e246..15ca21484150 100644
--- a/drivers/scsi/lpfc/lpfc_hw.h
+++ b/drivers/scsi/lpfc/lpfc_hw.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index 0fddb2317875..cfdb068a3bfc 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2009-2016 Emulex.  All rights reserved.                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
+ * Copyright (C) 2009-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_ids.h b/drivers/scsi/lpfc/lpfc_ids.h
index 5733feafe25f..0ba3733eb36d 100644
--- a/drivers/scsi/lpfc/lpfc_ids.h
+++ b/drivers/scsi/lpfc/lpfc_ids.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 5eafd0db758d..0ee429d773f3 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
@@ -12117,5 +12119,5 @@ module_init(lpfc_init);
 module_exit(lpfc_exit);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION(LPFC_MODULE_DESC);
-MODULE_AUTHOR("Emulex Corporation - tech.support@emulex.com");
+MODULE_AUTHOR("Broadcom");
 MODULE_VERSION("0:" LPFC_DRIVER_VERSION);
diff --git a/drivers/scsi/lpfc/lpfc_logmsg.h b/drivers/scsi/lpfc/lpfc_logmsg.h
index 3faf7a07bfd4..3b654ad08d1f 100644
--- a/drivers/scsi/lpfc/lpfc_logmsg.h
+++ b/drivers/scsi/lpfc/lpfc_logmsg.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2009 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c
index 8f4bfdfd9910..a928f5187fa4 100644
--- a/drivers/scsi/lpfc/lpfc_mbox.c
+++ b/drivers/scsi/lpfc/lpfc_mbox.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 2437ec5f4863..c61d8d692ede 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2014 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_nl.h b/drivers/scsi/lpfc/lpfc_nl.h
index f2b1bbcb196f..b93e78f671fb 100644
--- a/drivers/scsi/lpfc/lpfc_nl.h
+++ b/drivers/scsi/lpfc/lpfc_nl.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2010 Emulex.  All rights reserved.                *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 0716818f269f..061626bdf701 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -1,9 +1,11 @@
  /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 56b4b94a372e..625b6589a34d 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h
index 85961c1ddecf..b2fae5e813f8 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.h
+++ b/drivers/scsi/lpfc/lpfc_nvme.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index a476d538360f..c421e1738ee9 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
- * Fibre Channsel Host Bus Adapters.                                *
+ * Fibre Channsel Host Bus Adapters.                               *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h
index 4aa18d31a5f7..ca96f05c1604 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.h
+++ b/drivers/scsi/lpfc/lpfc_nvmet.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 6ba2b3412337..9d6384af9fce 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h
index def0c0a5b17e..5da7e15400cb 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.h
+++ b/drivers/scsi/lpfc/lpfc_scsi.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 84565f810392..d8d8693040ac 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h
index f6cea02adc7c..9085306ddd78 100644
--- a/drivers/scsi/lpfc/lpfc_sli.h
+++ b/drivers/scsi/lpfc/lpfc_sli.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 3bfdff1a4c53..91153c9f6d18 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2009-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 0ee0623a354c..0d9353514071 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
  * modify it under the terms of version 2 of the GNU General       *
@@ -30,4 +32,6 @@
 
 #define LPFC_MODULE_DESC "Emulex LightPulse Fibre Channel SCSI driver " \
 		LPFC_DRIVER_VERSION
-#define LPFC_COPYRIGHT "Copyright(c) 2004-2016 Emulex.  All rights reserved."
+#define LPFC_COPYRIGHT "Copyright (C) 2017 Broadcom. All Rights Reserved. " \
+		"The term \"Broadcom\" refers to Broadcom Limited " \
+		"and/or its subsidiaries."
diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c
index b0d94f2e5b44..c0eeea0694cb 100644
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
diff --git a/drivers/scsi/lpfc/lpfc_vport.h b/drivers/scsi/lpfc/lpfc_vport.h
index 6b2c94eb8134..62295971f66c 100644
--- a/drivers/scsi/lpfc/lpfc_vport.h
+++ b/drivers/scsi/lpfc/lpfc_vport.h
@@ -1,9 +1,11 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
+ * Copyright (C) 2017 Broadcom. All Rights Reserved. The term      *
+ * “Broadcom” refers to Broadcom Limited and/or its subsidiaries.  *
  * Copyright (C) 2004-2006 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
- * www.emulex.com                                                  *
+ * www.broadcom.com                                                *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
  *                                                                 *
  * This program is free software; you can redistribute it and/or   *
-- 
GitLab


From 693dcc316525d15fcf5a0a8a8e1663985ebdb02c Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sun, 12 Feb 2017 13:52:40 -0800
Subject: [PATCH 0451/1084] scsi: lpfc: Update lpfc version to 11.2.0.7

Update lpfc version to 11.2.0.7

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 0d9353514071..86c6c9b26b82 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "11.2.0.4"
+#define LPFC_DRIVER_VERSION "11.2.0.7"
 #define LPFC_DRIVER_NAME		"lpfc"
 
 /* Used for SLI 2/3 */
-- 
GitLab


From 75d943f3e4222fdd4f907b71bd9d1730412e7ce9 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Wed, 22 Feb 2017 08:20:47 -0800
Subject: [PATCH 0452/1084] scsi: lpfc: add missing Kconfig NVME dependencies

Add missing Kconfig NVME dependencies.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index d4023bf1e739..255843446e99 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1240,6 +1240,7 @@ config SCSI_LPFC
 	tristate "Emulex LightPulse Fibre Channel Support"
 	depends on PCI && SCSI
 	depends on SCSI_FC_ATTRS
+	depends on NVME_FC && NVME_TARGET_FC
 	select CRC_T10DIF
 	help
           This lpfc driver supports the Emulex LightPulse
-- 
GitLab


From f54f2cb540b53d55a81d620e816810d59be5cb1b Mon Sep 17 00:00:00 2001
From: Michael Hernandez <michael.hernandez@cavium.com>
Date: Wed, 15 Feb 2017 15:37:19 -0800
Subject: [PATCH 0453/1084] scsi: qla2xxx: Cleaned up queue configuration code.

This patch cleaned up queue configuration code, such that once
initialized, we should not touch msix_count value.  This will prevent
incorrect numbers of MSI-X vectors requested while performing target
mode configuration.

[mkp: fixed Fixes: hash]

Cc: <stable@vger.kernel.org>
Fixes: d74595278f4a ("scsi: qla2xxx: Add multiple queue pair functionality.")
Signed-off-by: Michael Hernandez <michael.hernandez@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_os.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index d01c90c7dd04..8174cee8eb53 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1815,6 +1815,7 @@ qla2x00_iospace_config(struct qla_hw_data *ha)
 
 	/* Determine queue resources */
 	ha->max_req_queues = ha->max_rsp_queues = 1;
+	ha->msix_count = QLA_BASE_VECTORS;
 	if (!ql2xmqsupport || (!IS_QLA25XX(ha) && !IS_QLA81XX(ha)))
 		goto mqiobase_exit;
 
@@ -1842,9 +1843,8 @@ qla2x00_iospace_config(struct qla_hw_data *ha)
 		    "BAR 3 not enabled.\n");
 
 mqiobase_exit:
-	ha->msix_count = ha->max_rsp_queues + 1;
 	ql_dbg_pci(ql_dbg_init, ha->pdev, 0x001c,
-	    "MSIX Count:%d.\n", ha->msix_count);
+	    "MSIX Count: %d.\n", ha->msix_count);
 	return (0);
 
 iospace_error_exit:
@@ -1892,6 +1892,7 @@ qla83xx_iospace_config(struct qla_hw_data *ha)
 	/* 83XX 26XX always use MQ type access for queues
 	 * - mbar 2, a.k.a region 4 */
 	ha->max_req_queues = ha->max_rsp_queues = 1;
+	ha->msix_count = QLA_BASE_VECTORS;
 	ha->mqiobase = ioremap(pci_resource_start(ha->pdev, 4),
 			pci_resource_len(ha->pdev, 4));
 
@@ -1934,14 +1935,8 @@ qla83xx_iospace_config(struct qla_hw_data *ha)
 		    "BAR 1 not enabled.\n");
 
 mqiobase_exit:
-	ha->msix_count = ha->max_rsp_queues + 1;
-	if (QLA_TGT_MODE_ENABLED())
-		ha->msix_count++;
-
-	qlt_83xx_iospace_config(ha);
-
 	ql_dbg_pci(ql_dbg_init, ha->pdev, 0x011f,
-	    "MSIX Count:%d.\n", ha->msix_count);
+	    "MSIX Count: %d.\n", ha->msix_count);
 	return 0;
 
 iospace_error_exit:
-- 
GitLab


From d0d2c68b759bbf678e078fd0c71b5fde65a9392c Mon Sep 17 00:00:00 2001
From: Michael Hernandez <michael.hernandez@cavium.com>
Date: Wed, 15 Feb 2017 15:37:20 -0800
Subject: [PATCH 0454/1084] scsi: qla2xxx: Fix response queue count for Target
 mode.

Target mode initialization was not calculating response queue values
correctly resulting into one less MSI-X vector.

[mkp: fixed Fixes: hash]

Cc: <stable@vger.kernel.org>
Fixes: 093df73771ba ("scsi: qla2xxx: Fix Target mode handling with Multiqueue changes.")
Signed-off-by: Michael Hernandez <michael.hernandez@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_os.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 8174cee8eb53..71b6b20ae82b 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1916,12 +1916,13 @@ qla83xx_iospace_config(struct qla_hw_data *ha)
 		if (ql2xmqsupport) {
 			/* MB interrupt uses 1 vector */
 			ha->max_req_queues = ha->msix_count - 1;
-			ha->max_rsp_queues = ha->max_req_queues;
 
 			/* ATIOQ needs 1 vector. That's 1 less QPair */
 			if (QLA_TGT_MODE_ENABLED())
 				ha->max_req_queues--;
 
+			ha->max_rsp_queues = ha->max_req_queues;
+
 			/* Queue pairs is the max value minus
 			 * the base queue pair */
 			ha->max_qpairs = ha->max_req_queues - 1;
-- 
GitLab


From 67f2db8792f96d8f7521461635d25f9c80245d80 Mon Sep 17 00:00:00 2001
From: Michael Hernandez <michael.hernandez@cavium.com>
Date: Wed, 15 Feb 2017 15:37:21 -0800
Subject: [PATCH 0455/1084] scsi: qla2xxx: Fix Regression introduced by
 pci_alloc_irq_vectors_affinity call.

For target mode, we need to increase minimum vectors value by one to
account for ATIO queue.

Following stack trace will be seen

Call Trace:
qla24xx_config_rings+0x15a/0x230 [qla2xxx]
qla2x00_init_rings+0x1a1/0x3a0 [qla2xxx]
qla2x00_restart_isp+0x5c/0x120 [qla2xxx]
qla2x00_abort_isp+0x138/0x430 [qla2xxx]
? __schedule+0x260/0x580
qla2x00_do_dpc+0x3bc/0x920 [qla2xxx]
? qla2x00_relogin+0x290/0x290 [qla2xxx]
? schedule+0x3a/0xa0
? qla2x00_relogin+0x290/0x290 [qla2xxx]
kthread+0x103/0x140
? __kthread_init_worker+0x40/0x40
ret_from_fork+0x29/0x40

RIP: qlt_24xx_config_rings+0x6c/0x90

[mkp: fixed Fixes: hash]

Cc: <stable@vger.kernel.org>
Fixes: 17e5fc58588b ("scsi: qla2xxx: fix MSI-X vector affinity")
Signed-off-by: Michael Hernandez <michael.hernandez@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_isr.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index edc2264db45b..87fc921ce391 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -3015,14 +3015,17 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp)
 	int i, ret;
 	struct qla_msix_entry *qentry;
 	scsi_qla_host_t *vha = pci_get_drvdata(ha->pdev);
+	int min_vecs = QLA_BASE_VECTORS;
 	struct irq_affinity desc = {
 		.pre_vectors = QLA_BASE_VECTORS,
 	};
 
-	if (QLA_TGT_MODE_ENABLED() && IS_ATIO_MSIX_CAPABLE(ha))
+	if (QLA_TGT_MODE_ENABLED() && IS_ATIO_MSIX_CAPABLE(ha)) {
 		desc.pre_vectors++;
+		min_vecs++;
+	}
 
-	ret = pci_alloc_irq_vectors_affinity(ha->pdev, QLA_BASE_VECTORS,
+	ret = pci_alloc_irq_vectors_affinity(ha->pdev, min_vecs,
 			ha->msix_count, PCI_IRQ_MSIX | PCI_IRQ_AFFINITY,
 			&desc);
 
-- 
GitLab


From 61d8658b4a435eac729966cc94cdda077a8df5cd Mon Sep 17 00:00:00 2001
From: "Dupuis, Chad" <chad.dupuis@cavium.com>
Date: Wed, 15 Feb 2017 06:28:23 -0800
Subject: [PATCH 0456/1084] scsi: qedf: Add QLogic FastLinQ offload FCoE driver
 framework.

The QLogic FastLinQ Driver for FCoE (qedf) is the FCoE specific module
for 41000 Series Converged Network Adapters by QLogic. This patch
consists of following changes:

- MAINTAINERS Makefile and Kconfig changes for qedf
- PCI driver registration
- libfc/fcoe host level initialization
- SCSI host template initialization and callbacks
- Debugfs and log level infrastructure
- Link handling
- Firmware interface structures
- QED core module initialization
- Light L2 interface callbacks
- I/O request initialization
- Firmware I/O completion handling
- Firmware ELS request/response handling
- FIP request/response handled by the driver itself

Signed-off-by: Nilesh Javali <nilesh.javali@cavium.com>
Signed-off-by: Manish Rangankar <manish.rangankar@cavium.com>
Signed-off-by: Saurav Kashyap <saurav.kashyap@cavium.com>
Signed-off-by: Arun Easi <arun.easi@cavium.com>
Signed-off-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 MAINTAINERS                      |    6 +
 drivers/scsi/Kconfig             |    1 +
 drivers/scsi/Makefile            |    1 +
 drivers/scsi/qedf/Kconfig        |   11 +
 drivers/scsi/qedf/Makefile       |    5 +
 drivers/scsi/qedf/qedf.h         |  545 +++++
 drivers/scsi/qedf/qedf_attr.c    |  165 ++
 drivers/scsi/qedf/qedf_dbg.c     |  195 ++
 drivers/scsi/qedf/qedf_dbg.h     |  154 ++
 drivers/scsi/qedf/qedf_debugfs.c |  460 ++++
 drivers/scsi/qedf/qedf_els.c     |  949 +++++++++
 drivers/scsi/qedf/qedf_fip.c     |  269 +++
 drivers/scsi/qedf/qedf_hsi.h     |  422 ++++
 drivers/scsi/qedf/qedf_io.c      | 2282 ++++++++++++++++++++
 drivers/scsi/qedf/qedf_main.c    | 3336 ++++++++++++++++++++++++++++++
 drivers/scsi/qedf/qedf_version.h |   15 +
 16 files changed, 8816 insertions(+)
 create mode 100644 drivers/scsi/qedf/Kconfig
 create mode 100644 drivers/scsi/qedf/Makefile
 create mode 100644 drivers/scsi/qedf/qedf.h
 create mode 100644 drivers/scsi/qedf/qedf_attr.c
 create mode 100644 drivers/scsi/qedf/qedf_dbg.c
 create mode 100644 drivers/scsi/qedf/qedf_dbg.h
 create mode 100644 drivers/scsi/qedf/qedf_debugfs.c
 create mode 100644 drivers/scsi/qedf/qedf_els.c
 create mode 100644 drivers/scsi/qedf/qedf_fip.c
 create mode 100644 drivers/scsi/qedf/qedf_hsi.h
 create mode 100644 drivers/scsi/qedf/qedf_io.c
 create mode 100644 drivers/scsi/qedf/qedf_main.c
 create mode 100644 drivers/scsi/qedf/qedf_version.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 545633d6663d..8a0e01477043 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10242,6 +10242,12 @@ L:	linux-scsi@vger.kernel.org
 S:	Supported
 F:	drivers/scsi/qedi/
 
+QLOGIC QL41xxx FCOE DRIVER
+M:	QLogic-Storage-Upstream@cavium.com
+L:	linux-scsi@vger.kernel.org
+S:	Supported
+F:	drivers/scsi/qedf/
+
 QNX4 FILESYSTEM
 M:	Anders Larsen <al@alarsen.net>
 W:	http://www.alarsen.net/linux/qnx4fs/
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 255843446e99..8aa9bd34123e 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1235,6 +1235,7 @@ config SCSI_QLOGICPTI
 source "drivers/scsi/qla2xxx/Kconfig"
 source "drivers/scsi/qla4xxx/Kconfig"
 source "drivers/scsi/qedi/Kconfig"
+source "drivers/scsi/qedf/Kconfig"
 
 config SCSI_LPFC
 	tristate "Emulex LightPulse Fibre Channel Support"
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 736b77414a4b..fc2855565a51 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_FCOE)		+= fcoe/
 obj-$(CONFIG_FCOE_FNIC)		+= fnic/
 obj-$(CONFIG_SCSI_SNIC)		+= snic/
 obj-$(CONFIG_SCSI_BNX2X_FCOE)	+= libfc/ fcoe/ bnx2fc/
+obj-$(CONFIG_QEDF)		+= qedf/
 obj-$(CONFIG_ISCSI_TCP) 	+= libiscsi.o	libiscsi_tcp.o iscsi_tcp.o
 obj-$(CONFIG_INFINIBAND_ISER) 	+= libiscsi.o
 obj-$(CONFIG_ISCSI_BOOT_SYSFS)	+= iscsi_boot_sysfs.o
diff --git a/drivers/scsi/qedf/Kconfig b/drivers/scsi/qedf/Kconfig
new file mode 100644
index 000000000000..943f5ee45807
--- /dev/null
+++ b/drivers/scsi/qedf/Kconfig
@@ -0,0 +1,11 @@
+config QEDF
+	tristate "QLogic QEDF 25/40/100Gb FCoE Initiator Driver Support"
+	depends on PCI && SCSI
+	depends on QED
+        depends on LIBFC
+        depends on LIBFCOE
+	select QED_LL2
+	select QED_FCOE
+	---help---
+	This driver supports FCoE offload for the QLogic FastLinQ
+	41000 Series Converged Network Adapters.
diff --git a/drivers/scsi/qedf/Makefile b/drivers/scsi/qedf/Makefile
new file mode 100644
index 000000000000..64e9f507ce32
--- /dev/null
+++ b/drivers/scsi/qedf/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_QEDF) := qedf.o
+qedf-y = qedf_dbg.o qedf_main.o qedf_io.o qedf_fip.o \
+	 qedf_attr.o qedf_els.o
+
+qedf-$(CONFIG_DEBUG_FS) += qedf_debugfs.o
diff --git a/drivers/scsi/qedf/qedf.h b/drivers/scsi/qedf/qedf.h
new file mode 100644
index 000000000000..96346a1b1515
--- /dev/null
+++ b/drivers/scsi/qedf/qedf.h
@@ -0,0 +1,545 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 Cavium Inc.
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+#ifndef _QEDFC_H_
+#define _QEDFC_H_
+
+#include <scsi/libfcoe.h>
+#include <scsi/libfc.h>
+#include <scsi/fc/fc_fip.h>
+#include <scsi/fc/fc_fc2.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/fc_encode.h>
+#include <linux/version.h>
+
+
+/* qedf_hsi.h needs to before included any qed includes */
+#include "qedf_hsi.h"
+
+#include <linux/qed/qed_if.h>
+#include <linux/qed/qed_fcoe_if.h>
+#include <linux/qed/qed_ll2_if.h>
+#include "qedf_version.h"
+#include "qedf_dbg.h"
+
+/* Helpers to extract upper and lower 32-bits of pointer */
+#define U64_HI(val) ((u32)(((u64)(val)) >> 32))
+#define U64_LO(val) ((u32)(((u64)(val)) & 0xffffffff))
+
+#define QEDF_DESCR "QLogic FCoE Offload Driver"
+#define QEDF_MODULE_NAME "qedf"
+
+#define QEDF_MIN_XID		0
+#define QEDF_MAX_SCSI_XID	(NUM_TASKS_PER_CONNECTION - 1)
+#define QEDF_MAX_ELS_XID	4095
+#define QEDF_FLOGI_RETRY_CNT	3
+#define QEDF_RPORT_RETRY_CNT	255
+#define QEDF_MAX_SESSIONS	1024
+#define QEDF_MAX_PAYLOAD	2048
+#define QEDF_MAX_BDS_PER_CMD	256
+#define QEDF_MAX_BD_LEN		0xffff
+#define QEDF_BD_SPLIT_SZ	0x1000
+#define QEDF_PAGE_SIZE		4096
+#define QED_HW_DMA_BOUNDARY     0xfff
+#define QEDF_MAX_SGLEN_FOR_CACHESGL		((1U << 16) - 1)
+#define QEDF_MFS		(QEDF_MAX_PAYLOAD + \
+	sizeof(struct fc_frame_header))
+#define QEDF_MAX_NPIV		64
+#define QEDF_TM_TIMEOUT		10
+#define QEDF_ABORT_TIMEOUT	10
+#define QEDF_CLEANUP_TIMEOUT	10
+#define QEDF_MAX_CDB_LEN	16
+
+#define UPSTREAM_REMOVE		1
+#define UPSTREAM_KEEP		1
+
+struct qedf_mp_req {
+	uint8_t tm_flags;
+
+	uint32_t req_len;
+	void *req_buf;
+	dma_addr_t req_buf_dma;
+	struct fcoe_sge *mp_req_bd;
+	dma_addr_t mp_req_bd_dma;
+	struct fc_frame_header req_fc_hdr;
+
+	uint32_t resp_len;
+	void *resp_buf;
+	dma_addr_t resp_buf_dma;
+	struct fcoe_sge *mp_resp_bd;
+	dma_addr_t mp_resp_bd_dma;
+	struct fc_frame_header resp_fc_hdr;
+};
+
+struct qedf_els_cb_arg {
+	struct qedf_ioreq *aborted_io_req;
+	struct qedf_ioreq *io_req;
+	u8 op; /* Used to keep track of ELS op */
+	uint16_t l2_oxid;
+	u32 offset; /* Used for sequence cleanup */
+	u8 r_ctl; /* Used for sequence cleanup */
+};
+
+enum qedf_ioreq_event {
+	QEDF_IOREQ_EV_ABORT_SUCCESS,
+	QEDF_IOREQ_EV_ABORT_FAILED,
+	QEDF_IOREQ_EV_SEND_RRQ,
+	QEDF_IOREQ_EV_ELS_TMO,
+	QEDF_IOREQ_EV_ELS_ERR_DETECT,
+	QEDF_IOREQ_EV_ELS_FLUSH,
+	QEDF_IOREQ_EV_CLEANUP_SUCCESS,
+	QEDF_IOREQ_EV_CLEANUP_FAILED,
+};
+
+#define FC_GOOD		0
+#define FCOE_FCP_RSP_FLAGS_FCP_RESID_OVER	(0x1<<2)
+#define FCOE_FCP_RSP_FLAGS_FCP_RESID_UNDER	(0x1<<3)
+#define CMD_SCSI_STATUS(Cmnd)			((Cmnd)->SCp.Status)
+#define FCOE_FCP_RSP_FLAGS_FCP_RSP_LEN_VALID	(0x1<<0)
+#define FCOE_FCP_RSP_FLAGS_FCP_SNS_LEN_VALID	(0x1<<1)
+struct qedf_ioreq {
+	struct list_head link;
+	uint16_t xid;
+	struct scsi_cmnd *sc_cmd;
+	bool use_slowpath; /* Use slow SGL for this I/O */
+#define QEDF_SCSI_CMD		1
+#define QEDF_TASK_MGMT_CMD	2
+#define QEDF_ABTS		3
+#define QEDF_ELS		4
+#define QEDF_CLEANUP		5
+#define QEDF_SEQ_CLEANUP	6
+	u8 cmd_type;
+#define QEDF_CMD_OUTSTANDING		0x0
+#define QEDF_CMD_IN_ABORT		0x1
+#define QEDF_CMD_IN_CLEANUP		0x2
+#define QEDF_CMD_SRR_SENT		0x3
+	u8 io_req_flags;
+	struct qedf_rport *fcport;
+	unsigned long flags;
+	enum qedf_ioreq_event event;
+	size_t data_xfer_len;
+	struct kref refcount;
+	struct qedf_cmd_mgr *cmd_mgr;
+	struct io_bdt *bd_tbl;
+	struct delayed_work timeout_work;
+	struct completion tm_done;
+	struct completion abts_done;
+	struct fcoe_task_context *task;
+	int idx;
+/*
+ * Need to allocate enough room for both sense data and FCP response data
+ * which has a max length of 8 bytes according to spec.
+ */
+#define QEDF_SCSI_SENSE_BUFFERSIZE	(SCSI_SENSE_BUFFERSIZE + 8)
+	uint8_t *sense_buffer;
+	dma_addr_t sense_buffer_dma;
+	u32 fcp_resid;
+	u32 fcp_rsp_len;
+	u32 fcp_sns_len;
+	u8 cdb_status;
+	u8 fcp_status;
+	u8 fcp_rsp_code;
+	u8 scsi_comp_flags;
+#define QEDF_MAX_REUSE		0xfff
+	u16 reuse_count;
+	struct qedf_mp_req mp_req;
+	void (*cb_func)(struct qedf_els_cb_arg *cb_arg);
+	struct qedf_els_cb_arg *cb_arg;
+	int fp_idx;
+	unsigned int cpu;
+	unsigned int int_cpu;
+#define QEDF_IOREQ_SLOW_SGE		0
+#define QEDF_IOREQ_SINGLE_SGE		1
+#define QEDF_IOREQ_FAST_SGE		2
+	u8 sge_type;
+	struct delayed_work rrq_work;
+
+	/* Used for sequence level recovery; i.e. REC/SRR */
+	uint32_t rx_buf_off;
+	uint32_t tx_buf_off;
+	uint32_t rx_id;
+	uint32_t task_retry_identifier;
+
+	/*
+	 * Used to tell if we need to return a SCSI command
+	 * during some form of error processing.
+	 */
+	bool return_scsi_cmd_on_abts;
+};
+
+extern struct workqueue_struct *qedf_io_wq;
+
+struct qedf_rport {
+	spinlock_t rport_lock;
+#define QEDF_RPORT_SESSION_READY 1
+#define QEDF_RPORT_UPLOADING_CONNECTION	2
+	unsigned long flags;
+	unsigned long retry_delay_timestamp;
+	struct fc_rport *rport;
+	struct fc_rport_priv *rdata;
+	struct qedf_ctx *qedf;
+	u32 handle; /* Handle from qed */
+	u32 fw_cid; /* fw_cid from qed */
+	void __iomem *p_doorbell;
+	/* Send queue management */
+	atomic_t free_sqes;
+	atomic_t num_active_ios;
+	struct fcoe_wqe *sq;
+	dma_addr_t sq_dma;
+	u16 sq_prod_idx;
+	u16 fw_sq_prod_idx;
+	u16 sq_con_idx;
+	u32 sq_mem_size;
+	void *sq_pbl;
+	dma_addr_t sq_pbl_dma;
+	u32 sq_pbl_size;
+	u32 sid;
+#define	QEDF_RPORT_TYPE_DISK		1
+#define	QEDF_RPORT_TYPE_TAPE		2
+	uint dev_type; /* Disk or tape */
+	struct list_head peers;
+};
+
+/* Used to contain LL2 skb's in ll2_skb_list */
+struct qedf_skb_work {
+	struct work_struct work;
+	struct sk_buff *skb;
+	struct qedf_ctx *qedf;
+};
+
+struct qedf_fastpath {
+#define	QEDF_SB_ID_NULL		0xffff
+	u16		sb_id;
+	struct qed_sb_info	*sb_info;
+	struct qedf_ctx *qedf;
+	/* Keep track of number of completions on this fastpath */
+	unsigned long completions;
+	uint32_t cq_num_entries;
+};
+
+/* Used to pass fastpath information needed to process CQEs */
+struct qedf_io_work {
+	struct work_struct work;
+	struct fcoe_cqe cqe;
+	struct qedf_ctx *qedf;
+	struct fc_frame *fp;
+};
+
+struct qedf_glbl_q_params {
+	u64	hw_p_cq;	/* Completion queue PBL */
+	u64	hw_p_rq;	/* Request queue PBL */
+	u64	hw_p_cmdq;	/* Command queue PBL */
+};
+
+struct global_queue {
+	struct fcoe_cqe *cq;
+	dma_addr_t cq_dma;
+	u32 cq_mem_size;
+	u32 cq_cons_idx; /* Completion queue consumer index */
+	u32 cq_prod_idx;
+
+	void *cq_pbl;
+	dma_addr_t cq_pbl_dma;
+	u32 cq_pbl_size;
+};
+
+/* I/O tracing entry */
+#define QEDF_IO_TRACE_SIZE		2048
+struct qedf_io_log {
+#define QEDF_IO_TRACE_REQ		0
+#define QEDF_IO_TRACE_RSP		1
+	uint8_t direction;
+	uint16_t task_id;
+	uint32_t port_id; /* Remote port fabric ID */
+	int lun;
+	char op; /* SCSI CDB */
+	uint8_t lba[4];
+	unsigned int bufflen; /* SCSI buffer length */
+	unsigned int sg_count; /* Number of SG elements */
+	int result; /* Result passed back to mid-layer */
+	unsigned long jiffies; /* Time stamp when I/O logged */
+	int refcount; /* Reference count for task id */
+	unsigned int req_cpu; /* CPU that the task is queued on */
+	unsigned int int_cpu; /* Interrupt CPU that the task is received on */
+	unsigned int rsp_cpu; /* CPU that task is returned on */
+	u8 sge_type; /* Did we take the slow, single or fast SGE path */
+};
+
+/* Number of entries in BDQ */
+#define QEDF_BDQ_SIZE			256
+#define QEDF_BDQ_BUF_SIZE		2072
+
+/* DMA coherent buffers for BDQ */
+struct qedf_bdq_buf {
+	void *buf_addr;
+	dma_addr_t buf_dma;
+};
+
+/* Main adapter struct */
+struct qedf_ctx {
+	struct qedf_dbg_ctx dbg_ctx;
+	struct fcoe_ctlr ctlr;
+	struct fc_lport *lport;
+	u8 data_src_addr[ETH_ALEN];
+#define QEDF_LINK_DOWN		0
+#define QEDF_LINK_UP		1
+	atomic_t link_state;
+#define QEDF_DCBX_PENDING	0
+#define QEDF_DCBX_DONE		1
+	atomic_t dcbx;
+	uint16_t max_scsi_xid;
+	uint16_t max_els_xid;
+#define QEDF_NULL_VLAN_ID	-1
+#define QEDF_FALLBACK_VLAN	1002
+#define QEDF_DEFAULT_PRIO	3
+	int vlan_id;
+	uint vlan_hw_insert:1;
+	struct qed_dev *cdev;
+	struct qed_dev_fcoe_info dev_info;
+	struct qed_int_info int_info;
+	uint16_t last_command;
+	spinlock_t hba_lock;
+	struct pci_dev *pdev;
+	u64 wwnn;
+	u64 wwpn;
+	u8 __aligned(16) mac[ETH_ALEN];
+	struct list_head fcports;
+	atomic_t num_offloads;
+	unsigned int curr_conn_id;
+	struct workqueue_struct *ll2_recv_wq;
+	struct workqueue_struct *link_update_wq;
+	struct delayed_work link_update;
+	struct delayed_work link_recovery;
+	struct completion flogi_compl;
+	struct completion fipvlan_compl;
+
+	/*
+	 * Used to tell if we're in the window where we are waiting for
+	 * the link to come back up before informting fcoe that the link is
+	 * done.
+	 */
+	atomic_t link_down_tmo_valid;
+#define QEDF_TIMER_INTERVAL		(1 * HZ)
+	struct timer_list timer; /* One second book keeping timer */
+#define QEDF_DRAIN_ACTIVE		1
+#define QEDF_LL2_STARTED		2
+#define QEDF_UNLOADING			3
+#define QEDF_GRCDUMP_CAPTURE		4
+#define QEDF_IN_RECOVERY		5
+#define QEDF_DBG_STOP_IO		6
+	unsigned long flags; /* Miscellaneous state flags */
+	int fipvlan_retries;
+	u8 num_queues;
+	struct global_queue **global_queues;
+	/* Pointer to array of queue structures */
+	struct qedf_glbl_q_params *p_cpuq;
+	/* Physical address of array of queue structures */
+	dma_addr_t hw_p_cpuq;
+
+	struct qedf_bdq_buf bdq[QEDF_BDQ_SIZE];
+	void *bdq_pbl;
+	dma_addr_t bdq_pbl_dma;
+	size_t bdq_pbl_mem_size;
+	void *bdq_pbl_list;
+	dma_addr_t bdq_pbl_list_dma;
+	u8 bdq_pbl_list_num_entries;
+	void __iomem *bdq_primary_prod;
+	void __iomem *bdq_secondary_prod;
+	uint16_t bdq_prod_idx;
+
+	/* Structure for holding all the fastpath for this qedf_ctx */
+	struct qedf_fastpath *fp_array;
+	struct qed_fcoe_tid tasks;
+	struct qedf_cmd_mgr *cmd_mgr;
+	/* Holds the PF parameters we pass to qed to start he FCoE function */
+	struct qed_pf_params pf_params;
+	/* Used to time middle path ELS and TM commands */
+	struct workqueue_struct *timer_work_queue;
+
+#define QEDF_IO_WORK_MIN		64
+	mempool_t *io_mempool;
+	struct workqueue_struct *dpc_wq;
+
+	u32 slow_sge_ios;
+	u32 fast_sge_ios;
+	u32 single_sge_ios;
+
+	uint8_t	*grcdump;
+	uint32_t grcdump_size;
+
+	struct qedf_io_log io_trace_buf[QEDF_IO_TRACE_SIZE];
+	spinlock_t io_trace_lock;
+	uint16_t io_trace_idx;
+
+	bool stop_io_on_error;
+
+	u32 flogi_cnt;
+	u32 flogi_failed;
+
+	/* Used for fc statistics */
+	u64 input_requests;
+	u64 output_requests;
+	u64 control_requests;
+	u64 packet_aborts;
+	u64 alloc_failures;
+};
+
+struct io_bdt {
+	struct qedf_ioreq *io_req;
+	struct fcoe_sge *bd_tbl;
+	dma_addr_t bd_tbl_dma;
+	u16 bd_valid;
+};
+
+struct qedf_cmd_mgr {
+	struct qedf_ctx *qedf;
+	u16 idx;
+	struct io_bdt **io_bdt_pool;
+#define FCOE_PARAMS_NUM_TASKS		4096
+	struct qedf_ioreq cmds[FCOE_PARAMS_NUM_TASKS];
+	spinlock_t lock;
+	atomic_t free_list_cnt;
+};
+
+/* Stolen from qed_cxt_api.h and adapted for qed_fcoe_info
+ * Usage:
+ *
+ * void *ptr;
+ * ptr = qedf_get_task_mem(&qedf->tasks, 128);
+ */
+static inline void *qedf_get_task_mem(struct qed_fcoe_tid *info, u32 tid)
+{
+	return (void *)(info->blocks[tid / info->num_tids_per_block] +
+			(tid % info->num_tids_per_block) * info->size);
+}
+
+static inline void qedf_stop_all_io(struct qedf_ctx *qedf)
+{
+	set_bit(QEDF_DBG_STOP_IO, &qedf->flags);
+}
+
+/*
+ * Externs
+ */
+#define QEDF_DEFAULT_LOG_MASK		0x3CFB6
+extern const struct qed_fcoe_ops *qed_ops;
+extern uint qedf_dump_frames;
+extern uint qedf_io_tracing;
+extern uint qedf_stop_io_on_error;
+extern uint qedf_link_down_tmo;
+#define QEDF_RETRY_DELAY_MAX		20 /* 2 seconds */
+extern bool qedf_retry_delay;
+extern uint qedf_debug;
+
+extern struct qedf_cmd_mgr *qedf_cmd_mgr_alloc(struct qedf_ctx *qedf);
+extern void qedf_cmd_mgr_free(struct qedf_cmd_mgr *cmgr);
+extern int qedf_queuecommand(struct Scsi_Host *host,
+	struct scsi_cmnd *sc_cmd);
+extern void qedf_fip_send(struct fcoe_ctlr *fip, struct sk_buff *skb);
+extern void qedf_update_src_mac(struct fc_lport *lport, u8 *addr);
+extern u8 *qedf_get_src_mac(struct fc_lport *lport);
+extern void qedf_fip_recv(struct qedf_ctx *qedf, struct sk_buff *skb);
+extern void qedf_fcoe_send_vlan_req(struct qedf_ctx *qedf);
+extern void qedf_scsi_completion(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *io_req);
+extern void qedf_process_warning_compl(struct qedf_ctx *qedf,
+	struct fcoe_cqe *cqe, struct qedf_ioreq *io_req);
+extern void qedf_process_error_detect(struct qedf_ctx *qedf,
+	struct fcoe_cqe *cqe, struct qedf_ioreq *io_req);
+extern void qedf_flush_active_ios(struct qedf_rport *fcport, int lun);
+extern void qedf_release_cmd(struct kref *ref);
+extern int qedf_initiate_abts(struct qedf_ioreq *io_req,
+	bool return_scsi_cmd_on_abts);
+extern void qedf_process_abts_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *io_req);
+extern struct qedf_ioreq *qedf_alloc_cmd(struct qedf_rport *fcport,
+	u8 cmd_type);
+
+extern struct device_attribute *qedf_host_attrs[];
+extern void qedf_cmd_timer_set(struct qedf_ctx *qedf, struct qedf_ioreq *io_req,
+	unsigned int timer_msec);
+extern int qedf_init_mp_req(struct qedf_ioreq *io_req);
+extern void qedf_init_mp_task(struct qedf_ioreq *io_req,
+	struct fcoe_task_context *task_ctx);
+extern void qedf_add_to_sq(struct qedf_rport *fcport, u16 xid,
+	u32 ptu_invalidate, enum fcoe_task_type req_type, u32 offset);
+extern void qedf_ring_doorbell(struct qedf_rport *fcport);
+extern void qedf_process_els_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *els_req);
+extern int qedf_send_rrq(struct qedf_ioreq *aborted_io_req);
+extern int qedf_send_adisc(struct qedf_rport *fcport, struct fc_frame *fp);
+extern int qedf_initiate_cleanup(struct qedf_ioreq *io_req,
+	bool return_scsi_cmd_on_abts);
+extern void qedf_process_cleanup_compl(struct qedf_ctx *qedf,
+	struct fcoe_cqe *cqe, struct qedf_ioreq *io_req);
+extern int qedf_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags);
+extern void qedf_process_tmf_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *io_req);
+extern void qedf_process_cqe(struct qedf_ctx *qedf, struct fcoe_cqe *cqe);
+extern void qedf_scsi_done(struct qedf_ctx *qedf, struct qedf_ioreq *io_req,
+	int result);
+extern void qedf_set_vlan_id(struct qedf_ctx *qedf, int vlan_id);
+extern void qedf_create_sysfs_ctx_attr(struct qedf_ctx *qedf);
+extern void qedf_remove_sysfs_ctx_attr(struct qedf_ctx *qedf);
+extern void qedf_capture_grc_dump(struct qedf_ctx *qedf);
+extern void qedf_wait_for_upload(struct qedf_ctx *qedf);
+extern void qedf_process_unsol_compl(struct qedf_ctx *qedf, uint16_t que_idx,
+	struct fcoe_cqe *cqe);
+extern void qedf_restart_rport(struct qedf_rport *fcport);
+extern int qedf_send_rec(struct qedf_ioreq *orig_io_req);
+extern int qedf_post_io_req(struct qedf_rport *fcport,
+	struct qedf_ioreq *io_req);
+extern void qedf_process_seq_cleanup_compl(struct qedf_ctx *qedf,
+	struct fcoe_cqe *cqe, struct qedf_ioreq *io_req);
+extern int qedf_send_flogi(struct qedf_ctx *qedf);
+extern void qedf_fp_io_handler(struct work_struct *work);
+
+#define FCOE_WORD_TO_BYTE  4
+#define QEDF_MAX_TASK_NUM	0xFFFF
+
+struct fip_vlan {
+	struct ethhdr eth;
+	struct fip_header fip;
+	struct {
+		struct fip_mac_desc mac;
+		struct fip_wwn_desc wwnn;
+	} desc;
+};
+
+/* SQ/CQ Sizes */
+#define GBL_RSVD_TASKS			16
+#define NUM_TASKS_PER_CONNECTION	1024
+#define NUM_RW_TASKS_PER_CONNECTION	512
+#define FCOE_PARAMS_CQ_NUM_ENTRIES	FCOE_PARAMS_NUM_TASKS
+
+#define FCOE_PARAMS_CMDQ_NUM_ENTRIES	FCOE_PARAMS_NUM_TASKS
+#define SQ_NUM_ENTRIES			NUM_TASKS_PER_CONNECTION
+
+#define QEDF_FCOE_PARAMS_GL_RQ_PI              0
+#define QEDF_FCOE_PARAMS_GL_CMD_PI             1
+
+#define QEDF_READ                     (1 << 1)
+#define QEDF_WRITE                    (1 << 0)
+#define MAX_FIBRE_LUNS			0xffffffff
+
+#define QEDF_MAX_NUM_CQS		8
+
+/*
+ * PCI function probe defines
+ */
+/* Probe/remove called during normal PCI probe */
+#define	QEDF_MODE_NORMAL		0
+/* Probe/remove called from qed error recovery */
+#define QEDF_MODE_RECOVERY		1
+
+#define SUPPORTED_25000baseKR_Full    (1<<27)
+#define SUPPORTED_50000baseKR2_Full   (1<<28)
+#define SUPPORTED_100000baseKR4_Full  (1<<29)
+#define SUPPORTED_100000baseCR4_Full  (1<<30)
+
+#endif
diff --git a/drivers/scsi/qedf/qedf_attr.c b/drivers/scsi/qedf/qedf_attr.c
new file mode 100644
index 000000000000..47720611ad2c
--- /dev/null
+++ b/drivers/scsi/qedf/qedf_attr.c
@@ -0,0 +1,165 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 Cavium Inc.
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+#include "qedf.h"
+
+static ssize_t
+qedf_fcoe_mac_show(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct fc_lport *lport = shost_priv(class_to_shost(dev));
+	u32 port_id;
+	u8 lport_src_id[3];
+	u8 fcoe_mac[6];
+
+	port_id = fc_host_port_id(lport->host);
+	lport_src_id[2] = (port_id & 0x000000FF);
+	lport_src_id[1] = (port_id & 0x0000FF00) >> 8;
+	lport_src_id[0] = (port_id & 0x00FF0000) >> 16;
+	fc_fcoe_set_mac(fcoe_mac, lport_src_id);
+
+	return scnprintf(buf, PAGE_SIZE, "%pM\n", fcoe_mac);
+}
+
+static DEVICE_ATTR(fcoe_mac, S_IRUGO, qedf_fcoe_mac_show, NULL);
+
+struct device_attribute *qedf_host_attrs[] = {
+	&dev_attr_fcoe_mac,
+	NULL,
+};
+
+extern const struct qed_fcoe_ops *qed_ops;
+
+inline bool qedf_is_vport(struct qedf_ctx *qedf)
+{
+	return (!(qedf->lport->vport == NULL));
+}
+
+/* Get base qedf for physical port from vport */
+static struct qedf_ctx *qedf_get_base_qedf(struct qedf_ctx *qedf)
+{
+	struct fc_lport *lport;
+	struct fc_lport *base_lport;
+
+	if (!(qedf_is_vport(qedf)))
+		return NULL;
+
+	lport = qedf->lport;
+	base_lport = shost_priv(vport_to_shost(lport->vport));
+	return (struct qedf_ctx *)(lport_priv(base_lport));
+}
+
+void qedf_capture_grc_dump(struct qedf_ctx *qedf)
+{
+	struct qedf_ctx *base_qedf;
+
+	/* Make sure we use the base qedf to take the GRC dump */
+	if (qedf_is_vport(qedf))
+		base_qedf = qedf_get_base_qedf(qedf);
+	else
+		base_qedf = qedf;
+
+	if (test_bit(QEDF_GRCDUMP_CAPTURE, &base_qedf->flags)) {
+		QEDF_INFO(&(base_qedf->dbg_ctx), QEDF_LOG_INFO,
+		    "GRC Dump already captured.\n");
+		return;
+	}
+
+
+	qedf_get_grc_dump(base_qedf->cdev, qed_ops->common,
+	    &base_qedf->grcdump, &base_qedf->grcdump_size);
+	QEDF_ERR(&(base_qedf->dbg_ctx), "GRC Dump captured.\n");
+	set_bit(QEDF_GRCDUMP_CAPTURE, &base_qedf->flags);
+	qedf_uevent_emit(base_qedf->lport->host, QEDF_UEVENT_CODE_GRCDUMP,
+	    NULL);
+}
+
+static ssize_t
+qedf_sysfs_read_grcdump(struct file *filep, struct kobject *kobj,
+			struct bin_attribute *ba, char *buf, loff_t off,
+			size_t count)
+{
+	ssize_t ret = 0;
+	struct fc_lport *lport = shost_priv(dev_to_shost(container_of(kobj,
+							struct device, kobj)));
+	struct qedf_ctx *qedf = lport_priv(lport);
+
+	if (test_bit(QEDF_GRCDUMP_CAPTURE, &qedf->flags)) {
+		ret = memory_read_from_buffer(buf, count, &off,
+		    qedf->grcdump, qedf->grcdump_size);
+	} else {
+		QEDF_ERR(&(qedf->dbg_ctx), "GRC Dump not captured!\n");
+	}
+
+	return ret;
+}
+
+static ssize_t
+qedf_sysfs_write_grcdump(struct file *filep, struct kobject *kobj,
+			struct bin_attribute *ba, char *buf, loff_t off,
+			size_t count)
+{
+	struct fc_lport *lport = NULL;
+	struct qedf_ctx *qedf = NULL;
+	long reading;
+	int ret = 0;
+	char msg[40];
+
+	if (off != 0)
+		return ret;
+
+
+	lport = shost_priv(dev_to_shost(container_of(kobj,
+	    struct device, kobj)));
+	qedf = lport_priv(lport);
+
+	buf[1] = 0;
+	ret = kstrtol(buf, 10, &reading);
+	if (ret) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Invalid input, err(%d)\n", ret);
+		return ret;
+	}
+
+	memset(msg, 0, sizeof(msg));
+	switch (reading) {
+	case 0:
+		memset(qedf->grcdump, 0, qedf->grcdump_size);
+		clear_bit(QEDF_GRCDUMP_CAPTURE, &qedf->flags);
+		break;
+	case 1:
+		qedf_capture_grc_dump(qedf);
+		break;
+	}
+
+	return count;
+}
+
+static struct bin_attribute sysfs_grcdump_attr = {
+	.attr = {
+		.name = "grcdump",
+		.mode = S_IRUSR | S_IWUSR,
+	},
+	.size = 0,
+	.read = qedf_sysfs_read_grcdump,
+	.write = qedf_sysfs_write_grcdump,
+};
+
+static struct sysfs_bin_attrs bin_file_entries[] = {
+	{"grcdump", &sysfs_grcdump_attr},
+	{NULL},
+};
+
+void qedf_create_sysfs_ctx_attr(struct qedf_ctx *qedf)
+{
+	qedf_create_sysfs_attr(qedf->lport->host, bin_file_entries);
+}
+
+void qedf_remove_sysfs_ctx_attr(struct qedf_ctx *qedf)
+{
+	qedf_remove_sysfs_attr(qedf->lport->host, bin_file_entries);
+}
diff --git a/drivers/scsi/qedf/qedf_dbg.c b/drivers/scsi/qedf/qedf_dbg.c
new file mode 100644
index 000000000000..e023f5d0dc12
--- /dev/null
+++ b/drivers/scsi/qedf/qedf_dbg.c
@@ -0,0 +1,195 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 Cavium Inc.
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+#include "qedf_dbg.h"
+#include <linux/vmalloc.h>
+
+void
+qedf_dbg_err(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+	      const char *fmt, ...)
+{
+	va_list va;
+	struct va_format vaf;
+	char nfunc[32];
+
+	memset(nfunc, 0, sizeof(nfunc));
+	memcpy(nfunc, func, sizeof(nfunc) - 1);
+
+	va_start(va, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &va;
+
+	if (likely(qedf) && likely(qedf->pdev))
+		pr_err("[%s]:[%s:%d]:%d: %pV", dev_name(&(qedf->pdev->dev)),
+			nfunc, line, qedf->host_no, &vaf);
+	else
+		pr_err("[0000:00:00.0]:[%s:%d]: %pV", nfunc, line, &vaf);
+
+	va_end(va);
+}
+
+void
+qedf_dbg_warn(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+	       const char *fmt, ...)
+{
+	va_list va;
+	struct va_format vaf;
+	char nfunc[32];
+
+	memset(nfunc, 0, sizeof(nfunc));
+	memcpy(nfunc, func, sizeof(nfunc) - 1);
+
+	va_start(va, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &va;
+
+	if (!(qedf_debug & QEDF_LOG_WARN))
+		goto ret;
+
+	if (likely(qedf) && likely(qedf->pdev))
+		pr_warn("[%s]:[%s:%d]:%d: %pV", dev_name(&(qedf->pdev->dev)),
+			nfunc, line, qedf->host_no, &vaf);
+	else
+		pr_warn("[0000:00:00.0]:[%s:%d]: %pV", nfunc, line, &vaf);
+
+ret:
+	va_end(va);
+}
+
+void
+qedf_dbg_notice(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+		 const char *fmt, ...)
+{
+	va_list va;
+	struct va_format vaf;
+	char nfunc[32];
+
+	memset(nfunc, 0, sizeof(nfunc));
+	memcpy(nfunc, func, sizeof(nfunc) - 1);
+
+	va_start(va, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &va;
+
+	if (!(qedf_debug & QEDF_LOG_NOTICE))
+		goto ret;
+
+	if (likely(qedf) && likely(qedf->pdev))
+		pr_notice("[%s]:[%s:%d]:%d: %pV",
+			  dev_name(&(qedf->pdev->dev)), nfunc, line,
+			  qedf->host_no, &vaf);
+	else
+		pr_notice("[0000:00:00.0]:[%s:%d]: %pV", nfunc, line, &vaf);
+
+ret:
+	va_end(va);
+}
+
+void
+qedf_dbg_info(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+	       u32 level, const char *fmt, ...)
+{
+	va_list va;
+	struct va_format vaf;
+	char nfunc[32];
+
+	memset(nfunc, 0, sizeof(nfunc));
+	memcpy(nfunc, func, sizeof(nfunc) - 1);
+
+	va_start(va, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &va;
+
+	if (!(qedf_debug & level))
+		goto ret;
+
+	if (likely(qedf) && likely(qedf->pdev))
+		pr_info("[%s]:[%s:%d]:%d: %pV", dev_name(&(qedf->pdev->dev)),
+			nfunc, line, qedf->host_no, &vaf);
+	else
+		pr_info("[0000:00:00.0]:[%s:%d]: %pV", nfunc, line, &vaf);
+
+ret:
+	va_end(va);
+}
+
+int
+qedf_alloc_grc_dump_buf(u8 **buf, uint32_t len)
+{
+		*buf = vmalloc(len);
+		if (!(*buf))
+			return -ENOMEM;
+
+		memset(*buf, 0, len);
+		return 0;
+}
+
+void
+qedf_free_grc_dump_buf(uint8_t **buf)
+{
+		vfree(*buf);
+		*buf = NULL;
+}
+
+int
+qedf_get_grc_dump(struct qed_dev *cdev, const struct qed_common_ops *common,
+		   u8 **buf, uint32_t *grcsize)
+{
+	if (!*buf)
+		return -EINVAL;
+
+	return common->dbg_grc(cdev, *buf, grcsize);
+}
+
+void
+qedf_uevent_emit(struct Scsi_Host *shost, u32 code, char *msg)
+{
+	char event_string[40];
+	char *envp[] = {event_string, NULL};
+
+	memset(event_string, 0, sizeof(event_string));
+	switch (code) {
+	case QEDF_UEVENT_CODE_GRCDUMP:
+		if (msg)
+			strncpy(event_string, msg, strlen(msg));
+		else
+			sprintf(event_string, "GRCDUMP=%u", shost->host_no);
+		break;
+	default:
+		/* do nothing */
+		break;
+	}
+
+	kobject_uevent_env(&shost->shost_gendev.kobj, KOBJ_CHANGE, envp);
+}
+
+int
+qedf_create_sysfs_attr(struct Scsi_Host *shost, struct sysfs_bin_attrs *iter)
+{
+	int ret = 0;
+
+	for (; iter->name; iter++) {
+		ret = sysfs_create_bin_file(&shost->shost_gendev.kobj,
+					    iter->attr);
+		if (ret)
+			pr_err("Unable to create sysfs %s attr, err(%d).\n",
+			       iter->name, ret);
+	}
+	return ret;
+}
+
+void
+qedf_remove_sysfs_attr(struct Scsi_Host *shost, struct sysfs_bin_attrs *iter)
+{
+	for (; iter->name; iter++)
+		sysfs_remove_bin_file(&shost->shost_gendev.kobj, iter->attr);
+}
diff --git a/drivers/scsi/qedf/qedf_dbg.h b/drivers/scsi/qedf/qedf_dbg.h
new file mode 100644
index 000000000000..23bd70628a2f
--- /dev/null
+++ b/drivers/scsi/qedf/qedf_dbg.h
@@ -0,0 +1,154 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 Cavium Inc.
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+#ifndef _QEDF_DBG_H_
+#define _QEDF_DBG_H_
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/compiler.h>
+#include <linux/string.h>
+#include <linux/version.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <scsi/scsi_transport.h>
+#include <linux/fs.h>
+
+#include <linux/qed/common_hsi.h>
+#include <linux/qed/qed_if.h>
+
+extern uint qedf_debug;
+
+/* Debug print level definitions */
+#define QEDF_LOG_DEFAULT	0x1		/* Set default logging mask */
+#define QEDF_LOG_INFO		0x2		/*
+						 * Informational logs,
+						 * MAC address, WWPN, WWNN
+						 */
+#define QEDF_LOG_DISC		0x4		/* Init, discovery, rport */
+#define QEDF_LOG_LL2		0x8		/* LL2, VLAN logs */
+#define QEDF_LOG_CONN		0x10		/* Connection setup, cleanup */
+#define QEDF_LOG_EVT		0x20		/* Events, link, mtu */
+#define QEDF_LOG_TIMER		0x40		/* Timer events */
+#define QEDF_LOG_MP_REQ	0x80		/* Middle Path (MP) logs */
+#define QEDF_LOG_SCSI_TM	0x100		/* SCSI Aborts, Task Mgmt */
+#define QEDF_LOG_UNSOL		0x200		/* unsolicited event logs */
+#define QEDF_LOG_IO		0x400		/* scsi cmd, completion */
+#define QEDF_LOG_MQ		0x800		/* Multi Queue logs */
+#define QEDF_LOG_BSG		0x1000		/* BSG logs */
+#define QEDF_LOG_DEBUGFS	0x2000		/* debugFS logs */
+#define QEDF_LOG_LPORT		0x4000		/* lport logs */
+#define QEDF_LOG_ELS		0x8000		/* ELS logs */
+#define QEDF_LOG_NPIV		0x10000		/* NPIV logs */
+#define QEDF_LOG_SESS		0x20000		/* Conection setup, cleanup */
+#define QEDF_LOG_TID		0x80000         /*
+						 * FW TID context acquire
+						 * free
+						 */
+#define QEDF_TRACK_TID		0x100000        /*
+						 * Track TID state. To be
+						 * enabled only at module load
+						 * and not run-time.
+						 */
+#define QEDF_TRACK_CMD_LIST    0x300000        /*
+						* Track active cmd list nodes,
+						* done with reference to TID,
+						* hence TRACK_TID also enabled.
+						*/
+#define QEDF_LOG_NOTICE	0x40000000	/* Notice logs */
+#define QEDF_LOG_WARN		0x80000000	/* Warning logs */
+
+/* Debug context structure */
+struct qedf_dbg_ctx {
+	unsigned int host_no;
+	struct pci_dev *pdev;
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *bdf_dentry;
+#endif
+};
+
+#define QEDF_ERR(pdev, fmt, ...)	\
+		qedf_dbg_err(pdev, __func__, __LINE__, fmt, ## __VA_ARGS__)
+#define QEDF_WARN(pdev, fmt, ...)	\
+		qedf_dbg_warn(pdev, __func__, __LINE__, fmt, ## __VA_ARGS__)
+#define QEDF_NOTICE(pdev, fmt, ...)	\
+		qedf_dbg_notice(pdev, __func__, __LINE__, fmt, ## __VA_ARGS__)
+#define QEDF_INFO(pdev, level, fmt, ...)	\
+		qedf_dbg_info(pdev, __func__, __LINE__, level, fmt,	\
+			      ## __VA_ARGS__)
+
+extern void qedf_dbg_err(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+			  const char *fmt, ...);
+extern void qedf_dbg_warn(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+			   const char *, ...);
+extern void qedf_dbg_notice(struct qedf_dbg_ctx *qedf, const char *func,
+			    u32 line, const char *, ...);
+extern void qedf_dbg_info(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+			  u32 info, const char *fmt, ...);
+
+/* GRC Dump related defines */
+
+struct Scsi_Host;
+
+#define QEDF_UEVENT_CODE_GRCDUMP 0
+
+struct sysfs_bin_attrs {
+	char *name;
+	struct bin_attribute *attr;
+};
+
+extern int qedf_alloc_grc_dump_buf(uint8_t **buf, uint32_t len);
+extern void qedf_free_grc_dump_buf(uint8_t **buf);
+extern int qedf_get_grc_dump(struct qed_dev *cdev,
+			     const struct qed_common_ops *common, uint8_t **buf,
+			     uint32_t *grcsize);
+extern void qedf_uevent_emit(struct Scsi_Host *shost, u32 code, char *msg);
+extern int qedf_create_sysfs_attr(struct Scsi_Host *shost,
+				   struct sysfs_bin_attrs *iter);
+extern void qedf_remove_sysfs_attr(struct Scsi_Host *shost,
+				    struct sysfs_bin_attrs *iter);
+
+#ifdef CONFIG_DEBUG_FS
+/* DebugFS related code */
+struct qedf_list_of_funcs {
+	char *oper_str;
+	ssize_t (*oper_func)(struct qedf_dbg_ctx *qedf);
+};
+
+struct qedf_debugfs_ops {
+	char *name;
+	struct qedf_list_of_funcs *qedf_funcs;
+};
+
+#define qedf_dbg_fileops(drv, ops) \
+{ \
+	.owner  = THIS_MODULE, \
+	.open   = simple_open, \
+	.read   = drv##_dbg_##ops##_cmd_read, \
+	.write  = drv##_dbg_##ops##_cmd_write \
+}
+
+/* Used for debugfs sequential files */
+#define qedf_dbg_fileops_seq(drv, ops) \
+{ \
+	.owner = THIS_MODULE, \
+	.open = drv##_dbg_##ops##_open, \
+	.read = seq_read, \
+	.llseek = seq_lseek, \
+	.release = single_release, \
+}
+
+extern void qedf_dbg_host_init(struct qedf_dbg_ctx *qedf,
+				struct qedf_debugfs_ops *dops,
+				struct file_operations *fops);
+extern void qedf_dbg_host_exit(struct qedf_dbg_ctx *qedf);
+extern void qedf_dbg_init(char *drv_name);
+extern void qedf_dbg_exit(void);
+#endif /* CONFIG_DEBUG_FS */
+
+#endif /* _QEDF_DBG_H_ */
diff --git a/drivers/scsi/qedf/qedf_debugfs.c b/drivers/scsi/qedf/qedf_debugfs.c
new file mode 100644
index 000000000000..cb08b625c594
--- /dev/null
+++ b/drivers/scsi/qedf/qedf_debugfs.c
@@ -0,0 +1,460 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 QLogic Corporation
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+#ifdef CONFIG_DEBUG_FS
+
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+
+#include "qedf.h"
+#include "qedf_dbg.h"
+
+static struct dentry *qedf_dbg_root;
+
+/**
+ * qedf_dbg_host_init - setup the debugfs file for the pf
+ * @pf: the pf that is starting up
+ **/
+void
+qedf_dbg_host_init(struct qedf_dbg_ctx *qedf,
+		    struct qedf_debugfs_ops *dops,
+		    struct file_operations *fops)
+{
+	char host_dirname[32];
+	struct dentry *file_dentry = NULL;
+
+	QEDF_INFO(qedf, QEDF_LOG_DEBUGFS, "Creating debugfs host node\n");
+	/* create pf dir */
+	sprintf(host_dirname, "host%u", qedf->host_no);
+	qedf->bdf_dentry = debugfs_create_dir(host_dirname, qedf_dbg_root);
+	if (!qedf->bdf_dentry)
+		return;
+
+	/* create debugfs files */
+	while (dops) {
+		if (!(dops->name))
+			break;
+
+		file_dentry = debugfs_create_file(dops->name, 0600,
+						  qedf->bdf_dentry, qedf,
+						  fops);
+		if (!file_dentry) {
+			QEDF_INFO(qedf, QEDF_LOG_DEBUGFS,
+				   "Debugfs entry %s creation failed\n",
+				   dops->name);
+			debugfs_remove_recursive(qedf->bdf_dentry);
+			return;
+		}
+		dops++;
+		fops++;
+	}
+}
+
+/**
+ * qedf_dbg_host_exit - clear out the pf's debugfs entries
+ * @pf: the pf that is stopping
+ **/
+void
+qedf_dbg_host_exit(struct qedf_dbg_ctx *qedf)
+{
+	QEDF_INFO(qedf, QEDF_LOG_DEBUGFS, "Destroying debugfs host "
+		   "entry\n");
+	/* remove debugfs  entries of this PF */
+	debugfs_remove_recursive(qedf->bdf_dentry);
+	qedf->bdf_dentry = NULL;
+}
+
+/**
+ * qedf_dbg_init - start up debugfs for the driver
+ **/
+void
+qedf_dbg_init(char *drv_name)
+{
+	QEDF_INFO(NULL, QEDF_LOG_DEBUGFS, "Creating debugfs root node\n");
+
+	/* create qed dir in root of debugfs. NULL means debugfs root */
+	qedf_dbg_root = debugfs_create_dir(drv_name, NULL);
+	if (!qedf_dbg_root)
+		QEDF_INFO(NULL, QEDF_LOG_DEBUGFS, "Init of debugfs "
+			   "failed\n");
+}
+
+/**
+ * qedf_dbg_exit - clean out the driver's debugfs entries
+ **/
+void
+qedf_dbg_exit(void)
+{
+	QEDF_INFO(NULL, QEDF_LOG_DEBUGFS, "Destroying debugfs root "
+		   "entry\n");
+
+	/* remove qed dir in root of debugfs */
+	debugfs_remove_recursive(qedf_dbg_root);
+	qedf_dbg_root = NULL;
+}
+
+struct qedf_debugfs_ops qedf_debugfs_ops[] = {
+	{ "fp_int", NULL },
+	{ "io_trace", NULL },
+	{ "debug", NULL },
+	{ "stop_io_on_error", NULL},
+	{ "driver_stats", NULL},
+	{ "clear_stats", NULL},
+	{ "offload_stats", NULL},
+	/* This must be last */
+	{ NULL, NULL }
+};
+
+DECLARE_PER_CPU(struct qedf_percpu_iothread_s, qedf_percpu_iothreads);
+
+static ssize_t
+qedf_dbg_fp_int_cmd_read(struct file *filp, char __user *buffer, size_t count,
+			 loff_t *ppos)
+{
+	size_t cnt = 0;
+	int id;
+	struct qedf_fastpath *fp = NULL;
+	struct qedf_dbg_ctx *qedf_dbg =
+				(struct qedf_dbg_ctx *)filp->private_data;
+	struct qedf_ctx *qedf = container_of(qedf_dbg,
+	    struct qedf_ctx, dbg_ctx);
+
+	QEDF_INFO(qedf_dbg, QEDF_LOG_DEBUGFS, "entered\n");
+
+	cnt = sprintf(buffer, "\nFastpath I/O completions\n\n");
+
+	for (id = 0; id < qedf->num_queues; id++) {
+		fp = &(qedf->fp_array[id]);
+		if (fp->sb_id == QEDF_SB_ID_NULL)
+			continue;
+		cnt += sprintf((buffer + cnt), "#%d: %lu\n", id,
+			       fp->completions);
+	}
+
+	cnt = min_t(int, count, cnt - *ppos);
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t
+qedf_dbg_fp_int_cmd_write(struct file *filp, const char __user *buffer,
+			  size_t count, loff_t *ppos)
+{
+	if (!count || *ppos)
+		return 0;
+
+	return count;
+}
+
+static ssize_t
+qedf_dbg_debug_cmd_read(struct file *filp, char __user *buffer, size_t count,
+			loff_t *ppos)
+{
+	int cnt;
+	struct qedf_dbg_ctx *qedf =
+				(struct qedf_dbg_ctx *)filp->private_data;
+
+	QEDF_INFO(qedf, QEDF_LOG_DEBUGFS, "entered\n");
+	cnt = sprintf(buffer, "debug mask = 0x%x\n", qedf_debug);
+
+	cnt = min_t(int, count, cnt - *ppos);
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t
+qedf_dbg_debug_cmd_write(struct file *filp, const char __user *buffer,
+			 size_t count, loff_t *ppos)
+{
+	uint32_t val;
+	void *kern_buf;
+	int rval;
+	struct qedf_dbg_ctx *qedf =
+	    (struct qedf_dbg_ctx *)filp->private_data;
+
+	if (!count || *ppos)
+		return 0;
+
+	kern_buf = memdup_user(buffer, count);
+	if (IS_ERR(kern_buf))
+		return PTR_ERR(kern_buf);
+
+	rval = kstrtouint(kern_buf, 10, &val);
+	kfree(kern_buf);
+	if (rval)
+		return rval;
+
+	if (val == 1)
+		qedf_debug = QEDF_DEFAULT_LOG_MASK;
+	else
+		qedf_debug = val;
+
+	QEDF_INFO(qedf, QEDF_LOG_DEBUGFS, "Setting debug=0x%x.\n", val);
+	return count;
+}
+
+static ssize_t
+qedf_dbg_stop_io_on_error_cmd_read(struct file *filp, char __user *buffer,
+				   size_t count, loff_t *ppos)
+{
+	int cnt;
+	struct qedf_dbg_ctx *qedf_dbg =
+				(struct qedf_dbg_ctx *)filp->private_data;
+	struct qedf_ctx *qedf = container_of(qedf_dbg,
+	    struct qedf_ctx, dbg_ctx);
+
+	QEDF_INFO(qedf_dbg, QEDF_LOG_DEBUGFS, "entered\n");
+	cnt = sprintf(buffer, "%s\n",
+	    qedf->stop_io_on_error ? "true" : "false");
+
+	cnt = min_t(int, count, cnt - *ppos);
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t
+qedf_dbg_stop_io_on_error_cmd_write(struct file *filp,
+				    const char __user *buffer, size_t count,
+				    loff_t *ppos)
+{
+	void *kern_buf;
+	struct qedf_dbg_ctx *qedf_dbg =
+				(struct qedf_dbg_ctx *)filp->private_data;
+	struct qedf_ctx *qedf = container_of(qedf_dbg, struct qedf_ctx,
+	    dbg_ctx);
+
+	QEDF_INFO(qedf_dbg, QEDF_LOG_DEBUGFS, "entered\n");
+
+	if (!count || *ppos)
+		return 0;
+
+	kern_buf = memdup_user(buffer, 6);
+	if (IS_ERR(kern_buf))
+		return PTR_ERR(kern_buf);
+
+	if (strncmp(kern_buf, "false", 5) == 0)
+		qedf->stop_io_on_error = false;
+	else if (strncmp(kern_buf, "true", 4) == 0)
+		qedf->stop_io_on_error = true;
+	else if (strncmp(kern_buf, "now", 3) == 0)
+		/* Trigger from user to stop all I/O on this host */
+		set_bit(QEDF_DBG_STOP_IO, &qedf->flags);
+
+	kfree(kern_buf);
+	return count;
+}
+
+static int
+qedf_io_trace_show(struct seq_file *s, void *unused)
+{
+	int i, idx = 0;
+	struct qedf_ctx *qedf = s->private;
+	struct qedf_dbg_ctx *qedf_dbg = &qedf->dbg_ctx;
+	struct qedf_io_log *io_log;
+	unsigned long flags;
+
+	if (!qedf_io_tracing) {
+		seq_puts(s, "I/O tracing not enabled.\n");
+		goto out;
+	}
+
+	QEDF_INFO(qedf_dbg, QEDF_LOG_DEBUGFS, "entered\n");
+
+	spin_lock_irqsave(&qedf->io_trace_lock, flags);
+	idx = qedf->io_trace_idx;
+	for (i = 0; i < QEDF_IO_TRACE_SIZE; i++) {
+		io_log = &qedf->io_trace_buf[idx];
+		seq_printf(s, "%d:", io_log->direction);
+		seq_printf(s, "0x%x:", io_log->task_id);
+		seq_printf(s, "0x%06x:", io_log->port_id);
+		seq_printf(s, "%d:", io_log->lun);
+		seq_printf(s, "0x%02x:", io_log->op);
+		seq_printf(s, "0x%02x%02x%02x%02x:", io_log->lba[0],
+		    io_log->lba[1], io_log->lba[2], io_log->lba[3]);
+		seq_printf(s, "%d:", io_log->bufflen);
+		seq_printf(s, "%d:", io_log->sg_count);
+		seq_printf(s, "0x%08x:", io_log->result);
+		seq_printf(s, "%lu:", io_log->jiffies);
+		seq_printf(s, "%d:", io_log->refcount);
+		seq_printf(s, "%d:", io_log->req_cpu);
+		seq_printf(s, "%d:", io_log->int_cpu);
+		seq_printf(s, "%d:", io_log->rsp_cpu);
+		seq_printf(s, "%d\n", io_log->sge_type);
+
+		idx++;
+		if (idx == QEDF_IO_TRACE_SIZE)
+			idx = 0;
+	}
+	spin_unlock_irqrestore(&qedf->io_trace_lock, flags);
+
+out:
+	return 0;
+}
+
+static int
+qedf_dbg_io_trace_open(struct inode *inode, struct file *file)
+{
+	struct qedf_dbg_ctx *qedf_dbg = inode->i_private;
+	struct qedf_ctx *qedf = container_of(qedf_dbg,
+	    struct qedf_ctx, dbg_ctx);
+
+	return single_open(file, qedf_io_trace_show, qedf);
+}
+
+static int
+qedf_driver_stats_show(struct seq_file *s, void *unused)
+{
+	struct qedf_ctx *qedf = s->private;
+	struct qedf_rport *fcport;
+	struct fc_rport_priv *rdata;
+
+	seq_printf(s, "cmg_mgr free io_reqs: %d\n",
+	    atomic_read(&qedf->cmd_mgr->free_list_cnt));
+	seq_printf(s, "slow SGEs: %d\n", qedf->slow_sge_ios);
+	seq_printf(s, "single SGEs: %d\n", qedf->single_sge_ios);
+	seq_printf(s, "fast SGEs: %d\n\n", qedf->fast_sge_ios);
+
+	seq_puts(s, "Offloaded ports:\n\n");
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(fcport, &qedf->fcports, peers) {
+		rdata = fcport->rdata;
+		if (rdata == NULL)
+			continue;
+		seq_printf(s, "%06x: free_sqes: %d, num_active_ios: %d\n",
+		    rdata->ids.port_id, atomic_read(&fcport->free_sqes),
+		    atomic_read(&fcport->num_active_ios));
+	}
+	rcu_read_unlock();
+
+	return 0;
+}
+
+static int
+qedf_dbg_driver_stats_open(struct inode *inode, struct file *file)
+{
+	struct qedf_dbg_ctx *qedf_dbg = inode->i_private;
+	struct qedf_ctx *qedf = container_of(qedf_dbg,
+	    struct qedf_ctx, dbg_ctx);
+
+	return single_open(file, qedf_driver_stats_show, qedf);
+}
+
+static ssize_t
+qedf_dbg_clear_stats_cmd_read(struct file *filp, char __user *buffer,
+				   size_t count, loff_t *ppos)
+{
+	int cnt = 0;
+
+	/* Essentially a read stub */
+	cnt = min_t(int, count, cnt - *ppos);
+	*ppos += cnt;
+	return cnt;
+}
+
+static ssize_t
+qedf_dbg_clear_stats_cmd_write(struct file *filp,
+				    const char __user *buffer, size_t count,
+				    loff_t *ppos)
+{
+	struct qedf_dbg_ctx *qedf_dbg =
+				(struct qedf_dbg_ctx *)filp->private_data;
+	struct qedf_ctx *qedf = container_of(qedf_dbg, struct qedf_ctx,
+	    dbg_ctx);
+
+	QEDF_INFO(qedf_dbg, QEDF_LOG_DEBUGFS, "Clearing stat counters.\n");
+
+	if (!count || *ppos)
+		return 0;
+
+	/* Clear stat counters exposed by 'stats' node */
+	qedf->slow_sge_ios = 0;
+	qedf->single_sge_ios = 0;
+	qedf->fast_sge_ios = 0;
+
+	return count;
+}
+
+static int
+qedf_offload_stats_show(struct seq_file *s, void *unused)
+{
+	struct qedf_ctx *qedf = s->private;
+	struct qed_fcoe_stats *fw_fcoe_stats;
+
+	fw_fcoe_stats = kmalloc(sizeof(struct qed_fcoe_stats), GFP_KERNEL);
+	if (!fw_fcoe_stats) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Could not allocate memory for "
+		    "fw_fcoe_stats.\n");
+		goto out;
+	}
+
+	/* Query firmware for offload stats */
+	qed_ops->get_stats(qedf->cdev, fw_fcoe_stats);
+
+	seq_printf(s, "fcoe_rx_byte_cnt=%llu\n"
+	    "fcoe_rx_data_pkt_cnt=%llu\n"
+	    "fcoe_rx_xfer_pkt_cnt=%llu\n"
+	    "fcoe_rx_other_pkt_cnt=%llu\n"
+	    "fcoe_silent_drop_pkt_cmdq_full_cnt=%u\n"
+	    "fcoe_silent_drop_pkt_crc_error_cnt=%u\n"
+	    "fcoe_silent_drop_pkt_task_invalid_cnt=%u\n"
+	    "fcoe_silent_drop_total_pkt_cnt=%u\n"
+	    "fcoe_silent_drop_pkt_rq_full_cnt=%u\n"
+	    "fcoe_tx_byte_cnt=%llu\n"
+	    "fcoe_tx_data_pkt_cnt=%llu\n"
+	    "fcoe_tx_xfer_pkt_cnt=%llu\n"
+	    "fcoe_tx_other_pkt_cnt=%llu\n",
+	    fw_fcoe_stats->fcoe_rx_byte_cnt,
+	    fw_fcoe_stats->fcoe_rx_data_pkt_cnt,
+	    fw_fcoe_stats->fcoe_rx_xfer_pkt_cnt,
+	    fw_fcoe_stats->fcoe_rx_other_pkt_cnt,
+	    fw_fcoe_stats->fcoe_silent_drop_pkt_cmdq_full_cnt,
+	    fw_fcoe_stats->fcoe_silent_drop_pkt_crc_error_cnt,
+	    fw_fcoe_stats->fcoe_silent_drop_pkt_task_invalid_cnt,
+	    fw_fcoe_stats->fcoe_silent_drop_total_pkt_cnt,
+	    fw_fcoe_stats->fcoe_silent_drop_pkt_rq_full_cnt,
+	    fw_fcoe_stats->fcoe_tx_byte_cnt,
+	    fw_fcoe_stats->fcoe_tx_data_pkt_cnt,
+	    fw_fcoe_stats->fcoe_tx_xfer_pkt_cnt,
+	    fw_fcoe_stats->fcoe_tx_other_pkt_cnt);
+
+	kfree(fw_fcoe_stats);
+out:
+	return 0;
+}
+
+static int
+qedf_dbg_offload_stats_open(struct inode *inode, struct file *file)
+{
+	struct qedf_dbg_ctx *qedf_dbg = inode->i_private;
+	struct qedf_ctx *qedf = container_of(qedf_dbg,
+	    struct qedf_ctx, dbg_ctx);
+
+	return single_open(file, qedf_offload_stats_show, qedf);
+}
+
+
+const struct file_operations qedf_dbg_fops[] = {
+	qedf_dbg_fileops(qedf, fp_int),
+	qedf_dbg_fileops_seq(qedf, io_trace),
+	qedf_dbg_fileops(qedf, debug),
+	qedf_dbg_fileops(qedf, stop_io_on_error),
+	qedf_dbg_fileops_seq(qedf, driver_stats),
+	qedf_dbg_fileops(qedf, clear_stats),
+	qedf_dbg_fileops_seq(qedf, offload_stats),
+	/* This must be last */
+	{ NULL, NULL },
+};
+
+#else /* CONFIG_DEBUG_FS */
+void qedf_dbg_host_init(struct qedf_dbg_ctx *);
+void qedf_dbg_host_exit(struct qedf_dbg_ctx *);
+void qedf_dbg_init(char *);
+void qedf_dbg_exit(void);
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/scsi/qedf/qedf_els.c b/drivers/scsi/qedf/qedf_els.c
new file mode 100644
index 000000000000..78f1c252b649
--- /dev/null
+++ b/drivers/scsi/qedf/qedf_els.c
@@ -0,0 +1,949 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 Cavium Inc.
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+#include "qedf.h"
+
+/* It's assumed that the lock is held when calling this function. */
+static int qedf_initiate_els(struct qedf_rport *fcport, unsigned int op,
+	void *data, uint32_t data_len,
+	void (*cb_func)(struct qedf_els_cb_arg *cb_arg),
+	struct qedf_els_cb_arg *cb_arg, uint32_t timer_msec)
+{
+	struct qedf_ctx *qedf = fcport->qedf;
+	struct fc_lport *lport = qedf->lport;
+	struct qedf_ioreq *els_req;
+	struct qedf_mp_req *mp_req;
+	struct fc_frame_header *fc_hdr;
+	struct fcoe_task_context *task;
+	int rc = 0;
+	uint32_t did, sid;
+	uint16_t xid;
+	uint32_t start_time = jiffies / HZ;
+	uint32_t current_time;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Sending ELS\n");
+
+	rc = fc_remote_port_chkready(fcport->rport);
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "els 0x%x: rport not ready\n", op);
+		rc = -EAGAIN;
+		goto els_err;
+	}
+	if (lport->state != LPORT_ST_READY || !(lport->link_up)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "els 0x%x: link is not ready\n",
+			  op);
+		rc = -EAGAIN;
+		goto els_err;
+	}
+
+	if (!(test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags))) {
+		QEDF_ERR(&(qedf->dbg_ctx), "els 0x%x: fcport not ready\n", op);
+		rc = -EINVAL;
+		goto els_err;
+	}
+
+retry_els:
+	els_req = qedf_alloc_cmd(fcport, QEDF_ELS);
+	if (!els_req) {
+		current_time = jiffies / HZ;
+		if ((current_time - start_time) > 10) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+				   "els: Failed els 0x%x\n", op);
+			rc = -ENOMEM;
+			goto els_err;
+		}
+		mdelay(20 * USEC_PER_MSEC);
+		goto retry_els;
+	}
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "initiate_els els_req = "
+		   "0x%p cb_arg = %p xid = %x\n", els_req, cb_arg,
+		   els_req->xid);
+	els_req->sc_cmd = NULL;
+	els_req->cmd_type = QEDF_ELS;
+	els_req->fcport = fcport;
+	els_req->cb_func = cb_func;
+	cb_arg->io_req = els_req;
+	cb_arg->op = op;
+	els_req->cb_arg = cb_arg;
+	els_req->data_xfer_len = data_len;
+
+	/* Record which cpu this request is associated with */
+	els_req->cpu = smp_processor_id();
+
+	mp_req = (struct qedf_mp_req *)&(els_req->mp_req);
+	rc = qedf_init_mp_req(els_req);
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "ELS MP request init failed\n");
+		kref_put(&els_req->refcount, qedf_release_cmd);
+		goto els_err;
+	} else {
+		rc = 0;
+	}
+
+	/* Fill ELS Payload */
+	if ((op >= ELS_LS_RJT) && (op <= ELS_AUTH_ELS)) {
+		memcpy(mp_req->req_buf, data, data_len);
+	} else {
+		QEDF_ERR(&(qedf->dbg_ctx), "Invalid ELS op 0x%x\n", op);
+		els_req->cb_func = NULL;
+		els_req->cb_arg = NULL;
+		kref_put(&els_req->refcount, qedf_release_cmd);
+		rc = -EINVAL;
+	}
+
+	if (rc)
+		goto els_err;
+
+	/* Fill FC header */
+	fc_hdr = &(mp_req->req_fc_hdr);
+
+	did = fcport->rdata->ids.port_id;
+	sid = fcport->sid;
+
+	__fc_fill_fc_hdr(fc_hdr, FC_RCTL_ELS_REQ, sid, did,
+			   FC_TYPE_ELS, FC_FC_FIRST_SEQ | FC_FC_END_SEQ |
+			   FC_FC_SEQ_INIT, 0);
+
+	/* Obtain exchange id */
+	xid = els_req->xid;
+
+	/* Initialize task context for this IO request */
+	task = qedf_get_task_mem(&qedf->tasks, xid);
+	qedf_init_mp_task(els_req, task);
+
+	/* Put timer on original I/O request */
+	if (timer_msec)
+		qedf_cmd_timer_set(qedf, els_req, timer_msec);
+
+	qedf_add_to_sq(fcport, xid, 0, FCOE_TASK_TYPE_MIDPATH, 0);
+
+	/* Ring doorbell */
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Ringing doorbell for ELS "
+		   "req\n");
+	qedf_ring_doorbell(fcport);
+els_err:
+	return rc;
+}
+
+void qedf_process_els_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *els_req)
+{
+	struct fcoe_task_context *task_ctx;
+	struct scsi_cmnd *sc_cmd;
+	uint16_t xid;
+	struct fcoe_cqe_midpath_info *mp_info;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Entered with xid = 0x%x"
+		   " cmd_type = %d.\n", els_req->xid, els_req->cmd_type);
+
+	/* Kill the ELS timer */
+	cancel_delayed_work(&els_req->timeout_work);
+
+	xid = els_req->xid;
+	task_ctx = qedf_get_task_mem(&qedf->tasks, xid);
+	sc_cmd = els_req->sc_cmd;
+
+	/* Get ELS response length from CQE */
+	mp_info = &cqe->cqe_info.midpath_info;
+	els_req->mp_req.resp_len = mp_info->data_placement_size;
+
+	/* Parse ELS response */
+	if ((els_req->cb_func) && (els_req->cb_arg)) {
+		els_req->cb_func(els_req->cb_arg);
+		els_req->cb_arg = NULL;
+	}
+
+	kref_put(&els_req->refcount, qedf_release_cmd);
+}
+
+static void qedf_rrq_compl(struct qedf_els_cb_arg *cb_arg)
+{
+	struct qedf_ioreq *orig_io_req;
+	struct qedf_ioreq *rrq_req;
+	struct qedf_ctx *qedf;
+	int refcount;
+
+	rrq_req = cb_arg->io_req;
+	qedf = rrq_req->fcport->qedf;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Entered.\n");
+
+	orig_io_req = cb_arg->aborted_io_req;
+
+	if (!orig_io_req)
+		goto out_free;
+
+	if (rrq_req->event != QEDF_IOREQ_EV_ELS_TMO &&
+	    rrq_req->event != QEDF_IOREQ_EV_ELS_ERR_DETECT)
+		cancel_delayed_work_sync(&orig_io_req->timeout_work);
+
+	refcount = atomic_read(&orig_io_req->refcount.refcount);
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "rrq_compl: orig io = %p,"
+		   " orig xid = 0x%x, rrq_xid = 0x%x, refcount=%d\n",
+		   orig_io_req, orig_io_req->xid, rrq_req->xid, refcount);
+
+	/* This should return the aborted io_req to the command pool */
+	if (orig_io_req)
+		kref_put(&orig_io_req->refcount, qedf_release_cmd);
+
+out_free:
+	kfree(cb_arg);
+}
+
+/* Assumes kref is already held by caller */
+int qedf_send_rrq(struct qedf_ioreq *aborted_io_req)
+{
+
+	struct fc_els_rrq rrq;
+	struct qedf_rport *fcport;
+	struct fc_lport *lport;
+	struct qedf_els_cb_arg *cb_arg = NULL;
+	struct qedf_ctx *qedf;
+	uint32_t sid;
+	uint32_t r_a_tov;
+	int rc;
+
+	if (!aborted_io_req) {
+		QEDF_ERR(NULL, "abort_io_req is NULL.\n");
+		return -EINVAL;
+	}
+
+	fcport = aborted_io_req->fcport;
+
+	/* Check that fcport is still offloaded */
+	if (!(test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags))) {
+		QEDF_ERR(NULL, "fcport is no longer offloaded.\n");
+		return -EINVAL;
+	}
+
+	if (!fcport->qedf) {
+		QEDF_ERR(NULL, "fcport->qedf is NULL.\n");
+		return -EINVAL;
+	}
+
+	qedf = fcport->qedf;
+	lport = qedf->lport;
+	sid = fcport->sid;
+	r_a_tov = lport->r_a_tov;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Sending RRQ orig "
+		   "io = %p, orig_xid = 0x%x\n", aborted_io_req,
+		   aborted_io_req->xid);
+	memset(&rrq, 0, sizeof(rrq));
+
+	cb_arg = kzalloc(sizeof(struct qedf_els_cb_arg), GFP_NOIO);
+	if (!cb_arg) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Unable to allocate cb_arg for "
+			  "RRQ\n");
+		rc = -ENOMEM;
+		goto rrq_err;
+	}
+
+	cb_arg->aborted_io_req = aborted_io_req;
+
+	rrq.rrq_cmd = ELS_RRQ;
+	hton24(rrq.rrq_s_id, sid);
+	rrq.rrq_ox_id = htons(aborted_io_req->xid);
+	rrq.rrq_rx_id =
+	    htons(aborted_io_req->task->tstorm_st_context.read_write.rx_id);
+
+	rc = qedf_initiate_els(fcport, ELS_RRQ, &rrq, sizeof(rrq),
+	    qedf_rrq_compl, cb_arg, r_a_tov);
+
+rrq_err:
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "RRQ failed - release orig io "
+			  "req 0x%x\n", aborted_io_req->xid);
+		kfree(cb_arg);
+		kref_put(&aborted_io_req->refcount, qedf_release_cmd);
+	}
+	return rc;
+}
+
+static void qedf_process_l2_frame_compl(struct qedf_rport *fcport,
+					struct fc_frame *fp,
+					u16 l2_oxid)
+{
+	struct fc_lport *lport = fcport->qedf->lport;
+	struct fc_frame_header *fh;
+	u32 crc;
+
+	fh = (struct fc_frame_header *)fc_frame_header_get(fp);
+
+	/* Set the OXID we return to what libfc used */
+	if (l2_oxid != FC_XID_UNKNOWN)
+		fh->fh_ox_id = htons(l2_oxid);
+
+	/* Setup header fields */
+	fh->fh_r_ctl = FC_RCTL_ELS_REP;
+	fh->fh_type = FC_TYPE_ELS;
+	/* Last sequence, end sequence */
+	fh->fh_f_ctl[0] = 0x98;
+	hton24(fh->fh_d_id, lport->port_id);
+	hton24(fh->fh_s_id, fcport->rdata->ids.port_id);
+	fh->fh_rx_id = 0xffff;
+
+	/* Set frame attributes */
+	crc = fcoe_fc_crc(fp);
+	fc_frame_init(fp);
+	fr_dev(fp) = lport;
+	fr_sof(fp) = FC_SOF_I3;
+	fr_eof(fp) = FC_EOF_T;
+	fr_crc(fp) = cpu_to_le32(~crc);
+
+	/* Send completed request to libfc */
+	fc_exch_recv(lport, fp);
+}
+
+/*
+ * In instances where an ELS command times out we may need to restart the
+ * rport by logging out and then logging back in.
+ */
+void qedf_restart_rport(struct qedf_rport *fcport)
+{
+	struct fc_lport *lport;
+	struct fc_rport_priv *rdata;
+	u32 port_id;
+
+	if (!fcport)
+		return;
+
+	rdata = fcport->rdata;
+	if (rdata) {
+		lport = fcport->qedf->lport;
+		port_id = rdata->ids.port_id;
+		QEDF_ERR(&(fcport->qedf->dbg_ctx),
+		    "LOGO port_id=%x.\n", port_id);
+		fc_rport_logoff(rdata);
+		/* Recreate the rport and log back in */
+		rdata = fc_rport_create(lport, port_id);
+		if (rdata)
+			fc_rport_login(rdata);
+	}
+}
+
+static void qedf_l2_els_compl(struct qedf_els_cb_arg *cb_arg)
+{
+	struct qedf_ioreq *els_req;
+	struct qedf_rport *fcport;
+	struct qedf_mp_req *mp_req;
+	struct fc_frame *fp;
+	struct fc_frame_header *fh, *mp_fc_hdr;
+	void *resp_buf, *fc_payload;
+	u32 resp_len;
+	u16 l2_oxid;
+
+	l2_oxid = cb_arg->l2_oxid;
+	els_req = cb_arg->io_req;
+
+	if (!els_req) {
+		QEDF_ERR(NULL, "els_req is NULL.\n");
+		goto free_arg;
+	}
+
+	/*
+	 * If we are flushing the command just free the cb_arg as none of the
+	 * response data will be valid.
+	 */
+	if (els_req->event == QEDF_IOREQ_EV_ELS_FLUSH)
+		goto free_arg;
+
+	fcport = els_req->fcport;
+	mp_req = &(els_req->mp_req);
+	mp_fc_hdr = &(mp_req->resp_fc_hdr);
+	resp_len = mp_req->resp_len;
+	resp_buf = mp_req->resp_buf;
+
+	/*
+	 * If a middle path ELS command times out, don't try to return
+	 * the command but rather do any internal cleanup and then libfc
+	 * timeout the command and clean up its internal resources.
+	 */
+	if (els_req->event == QEDF_IOREQ_EV_ELS_TMO) {
+		/*
+		 * If ADISC times out, libfc will timeout the exchange and then
+		 * try to send a PLOGI which will timeout since the session is
+		 * still offloaded.  Force libfc to logout the session which
+		 * will offload the connection and allow the PLOGI response to
+		 * flow over the LL2 path.
+		 */
+		if (cb_arg->op == ELS_ADISC)
+			qedf_restart_rport(fcport);
+		return;
+	}
+
+	if (sizeof(struct fc_frame_header) + resp_len > QEDF_PAGE_SIZE) {
+		QEDF_ERR(&(fcport->qedf->dbg_ctx), "resp_len is "
+		   "beyond page size.\n");
+		goto free_arg;
+	}
+
+	fp = fc_frame_alloc(fcport->qedf->lport, resp_len);
+	if (!fp) {
+		QEDF_ERR(&(fcport->qedf->dbg_ctx),
+		    "fc_frame_alloc failure.\n");
+		return;
+	}
+
+	/* Copy frame header from firmware into fp */
+	fh = (struct fc_frame_header *)fc_frame_header_get(fp);
+	memcpy(fh, mp_fc_hdr, sizeof(struct fc_frame_header));
+
+	/* Copy payload from firmware into fp */
+	fc_payload = fc_frame_payload_get(fp, resp_len);
+	memcpy(fc_payload, resp_buf, resp_len);
+
+	QEDF_INFO(&(fcport->qedf->dbg_ctx), QEDF_LOG_ELS,
+	    "Completing OX_ID 0x%x back to libfc.\n", l2_oxid);
+	qedf_process_l2_frame_compl(fcport, fp, l2_oxid);
+
+free_arg:
+	kfree(cb_arg);
+}
+
+int qedf_send_adisc(struct qedf_rport *fcport, struct fc_frame *fp)
+{
+	struct fc_els_adisc *adisc;
+	struct fc_frame_header *fh;
+	struct fc_lport *lport = fcport->qedf->lport;
+	struct qedf_els_cb_arg *cb_arg = NULL;
+	struct qedf_ctx *qedf;
+	uint32_t r_a_tov = lport->r_a_tov;
+	int rc;
+
+	qedf = fcport->qedf;
+	fh = fc_frame_header_get(fp);
+
+	cb_arg = kzalloc(sizeof(struct qedf_els_cb_arg), GFP_NOIO);
+	if (!cb_arg) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Unable to allocate cb_arg for "
+			  "ADISC\n");
+		rc = -ENOMEM;
+		goto adisc_err;
+	}
+	cb_arg->l2_oxid = ntohs(fh->fh_ox_id);
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+	    "Sending ADISC ox_id=0x%x.\n", cb_arg->l2_oxid);
+
+	adisc = fc_frame_payload_get(fp, sizeof(*adisc));
+
+	rc = qedf_initiate_els(fcport, ELS_ADISC, adisc, sizeof(*adisc),
+	    qedf_l2_els_compl, cb_arg, r_a_tov);
+
+adisc_err:
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "ADISC failed.\n");
+		kfree(cb_arg);
+	}
+	return rc;
+}
+
+static void qedf_srr_compl(struct qedf_els_cb_arg *cb_arg)
+{
+	struct qedf_ioreq *orig_io_req;
+	struct qedf_ioreq *srr_req;
+	struct qedf_mp_req *mp_req;
+	struct fc_frame_header *mp_fc_hdr, *fh;
+	struct fc_frame *fp;
+	void *resp_buf, *fc_payload;
+	u32 resp_len;
+	struct fc_lport *lport;
+	struct qedf_ctx *qedf;
+	int refcount;
+	u8 opcode;
+
+	srr_req = cb_arg->io_req;
+	qedf = srr_req->fcport->qedf;
+	lport = qedf->lport;
+
+	orig_io_req = cb_arg->aborted_io_req;
+
+	if (!orig_io_req)
+		goto out_free;
+
+	clear_bit(QEDF_CMD_SRR_SENT, &orig_io_req->flags);
+
+	if (srr_req->event != QEDF_IOREQ_EV_ELS_TMO &&
+	    srr_req->event != QEDF_IOREQ_EV_ELS_ERR_DETECT)
+		cancel_delayed_work_sync(&orig_io_req->timeout_work);
+
+	refcount = atomic_read(&orig_io_req->refcount.refcount);
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Entered: orig_io=%p,"
+		   " orig_io_xid=0x%x, rec_xid=0x%x, refcount=%d\n",
+		   orig_io_req, orig_io_req->xid, srr_req->xid, refcount);
+
+	/* If a SRR times out, simply free resources */
+	if (srr_req->event == QEDF_IOREQ_EV_ELS_TMO)
+		goto out_free;
+
+	/* Normalize response data into struct fc_frame */
+	mp_req = &(srr_req->mp_req);
+	mp_fc_hdr = &(mp_req->resp_fc_hdr);
+	resp_len = mp_req->resp_len;
+	resp_buf = mp_req->resp_buf;
+
+	fp = fc_frame_alloc(lport, resp_len);
+	if (!fp) {
+		QEDF_ERR(&(qedf->dbg_ctx),
+		    "fc_frame_alloc failure.\n");
+		goto out_free;
+	}
+
+	/* Copy frame header from firmware into fp */
+	fh = (struct fc_frame_header *)fc_frame_header_get(fp);
+	memcpy(fh, mp_fc_hdr, sizeof(struct fc_frame_header));
+
+	/* Copy payload from firmware into fp */
+	fc_payload = fc_frame_payload_get(fp, resp_len);
+	memcpy(fc_payload, resp_buf, resp_len);
+
+	opcode = fc_frame_payload_op(fp);
+	switch (opcode) {
+	case ELS_LS_ACC:
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+		    "SRR success.\n");
+		break;
+	case ELS_LS_RJT:
+		QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_ELS,
+		    "SRR rejected.\n");
+		qedf_initiate_abts(orig_io_req, true);
+		break;
+	}
+
+	fc_frame_free(fp);
+out_free:
+	/* Put reference for original command since SRR completed */
+	kref_put(&orig_io_req->refcount, qedf_release_cmd);
+	kfree(cb_arg);
+}
+
+static int qedf_send_srr(struct qedf_ioreq *orig_io_req, u32 offset, u8 r_ctl)
+{
+	struct fcp_srr srr;
+	struct qedf_ctx *qedf;
+	struct qedf_rport *fcport;
+	struct fc_lport *lport;
+	struct qedf_els_cb_arg *cb_arg = NULL;
+	u32 sid, r_a_tov;
+	int rc;
+
+	if (!orig_io_req) {
+		QEDF_ERR(NULL, "orig_io_req is NULL.\n");
+		return -EINVAL;
+	}
+
+	fcport = orig_io_req->fcport;
+
+	/* Check that fcport is still offloaded */
+	if (!(test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags))) {
+		QEDF_ERR(NULL, "fcport is no longer offloaded.\n");
+		return -EINVAL;
+	}
+
+	if (!fcport->qedf) {
+		QEDF_ERR(NULL, "fcport->qedf is NULL.\n");
+		return -EINVAL;
+	}
+
+	/* Take reference until SRR command completion */
+	kref_get(&orig_io_req->refcount);
+
+	qedf = fcport->qedf;
+	lport = qedf->lport;
+	sid = fcport->sid;
+	r_a_tov = lport->r_a_tov;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Sending SRR orig_io=%p, "
+		   "orig_xid=0x%x\n", orig_io_req, orig_io_req->xid);
+	memset(&srr, 0, sizeof(srr));
+
+	cb_arg = kzalloc(sizeof(struct qedf_els_cb_arg), GFP_NOIO);
+	if (!cb_arg) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Unable to allocate cb_arg for "
+			  "SRR\n");
+		rc = -ENOMEM;
+		goto srr_err;
+	}
+
+	cb_arg->aborted_io_req = orig_io_req;
+
+	srr.srr_op = ELS_SRR;
+	srr.srr_ox_id = htons(orig_io_req->xid);
+	srr.srr_rx_id = htons(orig_io_req->rx_id);
+	srr.srr_rel_off = htonl(offset);
+	srr.srr_r_ctl = r_ctl;
+
+	rc = qedf_initiate_els(fcport, ELS_SRR, &srr, sizeof(srr),
+	    qedf_srr_compl, cb_arg, r_a_tov);
+
+srr_err:
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "SRR failed - release orig_io_req"
+			  "=0x%x\n", orig_io_req->xid);
+		kfree(cb_arg);
+		/* If we fail to queue SRR, send ABTS to orig_io */
+		qedf_initiate_abts(orig_io_req, true);
+		kref_put(&orig_io_req->refcount, qedf_release_cmd);
+	} else
+		/* Tell other threads that SRR is in progress */
+		set_bit(QEDF_CMD_SRR_SENT, &orig_io_req->flags);
+
+	return rc;
+}
+
+static void qedf_initiate_seq_cleanup(struct qedf_ioreq *orig_io_req,
+	u32 offset, u8 r_ctl)
+{
+	struct qedf_rport *fcport;
+	unsigned long flags;
+	struct qedf_els_cb_arg *cb_arg;
+
+	fcport = orig_io_req->fcport;
+
+	QEDF_INFO(&(fcport->qedf->dbg_ctx), QEDF_LOG_ELS,
+	    "Doing sequence cleanup for xid=0x%x offset=%u.\n",
+	    orig_io_req->xid, offset);
+
+	cb_arg = kzalloc(sizeof(struct qedf_els_cb_arg), GFP_NOIO);
+	if (!cb_arg) {
+		QEDF_ERR(&(fcport->qedf->dbg_ctx), "Unable to allocate cb_arg "
+			  "for sequence cleanup\n");
+		return;
+	}
+
+	/* Get reference for cleanup request */
+	kref_get(&orig_io_req->refcount);
+
+	orig_io_req->cmd_type = QEDF_SEQ_CLEANUP;
+	cb_arg->offset = offset;
+	cb_arg->r_ctl = r_ctl;
+	orig_io_req->cb_arg = cb_arg;
+
+	qedf_cmd_timer_set(fcport->qedf, orig_io_req,
+	    QEDF_CLEANUP_TIMEOUT * HZ);
+
+	spin_lock_irqsave(&fcport->rport_lock, flags);
+
+	qedf_add_to_sq(fcport, orig_io_req->xid, 0,
+	    FCOE_TASK_TYPE_SEQUENCE_CLEANUP, offset);
+	qedf_ring_doorbell(fcport);
+
+	spin_unlock_irqrestore(&fcport->rport_lock, flags);
+}
+
+void qedf_process_seq_cleanup_compl(struct qedf_ctx *qedf,
+	struct fcoe_cqe *cqe, struct qedf_ioreq *io_req)
+{
+	int rc;
+	struct qedf_els_cb_arg *cb_arg;
+
+	cb_arg = io_req->cb_arg;
+
+	/* If we timed out just free resources */
+	if (io_req->event == QEDF_IOREQ_EV_ELS_TMO || !cqe)
+		goto free;
+
+	/* Kill the timer we put on the request */
+	cancel_delayed_work_sync(&io_req->timeout_work);
+
+	rc = qedf_send_srr(io_req, cb_arg->offset, cb_arg->r_ctl);
+	if (rc)
+		QEDF_ERR(&(qedf->dbg_ctx), "Unable to send SRR, I/O will "
+		    "abort, xid=0x%x.\n", io_req->xid);
+free:
+	kfree(cb_arg);
+	kref_put(&io_req->refcount, qedf_release_cmd);
+}
+
+static bool qedf_requeue_io_req(struct qedf_ioreq *orig_io_req)
+{
+	struct qedf_rport *fcport;
+	struct qedf_ioreq *new_io_req;
+	unsigned long flags;
+	bool rc = false;
+
+	fcport = orig_io_req->fcport;
+	if (!fcport) {
+		QEDF_ERR(NULL, "fcport is NULL.\n");
+		goto out;
+	}
+
+	if (!orig_io_req->sc_cmd) {
+		QEDF_ERR(&(fcport->qedf->dbg_ctx), "sc_cmd is NULL for "
+		    "xid=0x%x.\n", orig_io_req->xid);
+		goto out;
+	}
+
+	new_io_req = qedf_alloc_cmd(fcport, QEDF_SCSI_CMD);
+	if (!new_io_req) {
+		QEDF_ERR(&(fcport->qedf->dbg_ctx), "Could not allocate new "
+		    "io_req.\n");
+		goto out;
+	}
+
+	new_io_req->sc_cmd = orig_io_req->sc_cmd;
+
+	/*
+	 * This keeps the sc_cmd struct from being returned to the tape
+	 * driver and being requeued twice. We do need to put a reference
+	 * for the original I/O request since we will not do a SCSI completion
+	 * for it.
+	 */
+	orig_io_req->sc_cmd = NULL;
+	kref_put(&orig_io_req->refcount, qedf_release_cmd);
+
+	spin_lock_irqsave(&fcport->rport_lock, flags);
+
+	/* kref for new command released in qedf_post_io_req on error */
+	if (qedf_post_io_req(fcport, new_io_req)) {
+		QEDF_ERR(&(fcport->qedf->dbg_ctx), "Unable to post io_req\n");
+		/* Return SQE to pool */
+		atomic_inc(&fcport->free_sqes);
+	} else {
+		QEDF_INFO(&(fcport->qedf->dbg_ctx), QEDF_LOG_ELS,
+		    "Reissued SCSI command from  orig_xid=0x%x on "
+		    "new_xid=0x%x.\n", orig_io_req->xid, new_io_req->xid);
+		/*
+		 * Abort the original I/O but do not return SCSI command as
+		 * it has been reissued on another OX_ID.
+		 */
+		spin_unlock_irqrestore(&fcport->rport_lock, flags);
+		qedf_initiate_abts(orig_io_req, false);
+		goto out;
+	}
+
+	spin_unlock_irqrestore(&fcport->rport_lock, flags);
+out:
+	return rc;
+}
+
+
+static void qedf_rec_compl(struct qedf_els_cb_arg *cb_arg)
+{
+	struct qedf_ioreq *orig_io_req;
+	struct qedf_ioreq *rec_req;
+	struct qedf_mp_req *mp_req;
+	struct fc_frame_header *mp_fc_hdr, *fh;
+	struct fc_frame *fp;
+	void *resp_buf, *fc_payload;
+	u32 resp_len;
+	struct fc_lport *lport;
+	struct qedf_ctx *qedf;
+	int refcount;
+	enum fc_rctl r_ctl;
+	struct fc_els_ls_rjt *rjt;
+	struct fc_els_rec_acc *acc;
+	u8 opcode;
+	u32 offset, e_stat;
+	struct scsi_cmnd *sc_cmd;
+	bool srr_needed = false;
+
+	rec_req = cb_arg->io_req;
+	qedf = rec_req->fcport->qedf;
+	lport = qedf->lport;
+
+	orig_io_req = cb_arg->aborted_io_req;
+
+	if (!orig_io_req)
+		goto out_free;
+
+	if (rec_req->event != QEDF_IOREQ_EV_ELS_TMO &&
+	    rec_req->event != QEDF_IOREQ_EV_ELS_ERR_DETECT)
+		cancel_delayed_work_sync(&orig_io_req->timeout_work);
+
+	refcount = atomic_read(&orig_io_req->refcount.refcount);
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Entered: orig_io=%p,"
+		   " orig_io_xid=0x%x, rec_xid=0x%x, refcount=%d\n",
+		   orig_io_req, orig_io_req->xid, rec_req->xid, refcount);
+
+	/* If a REC times out, free resources */
+	if (rec_req->event == QEDF_IOREQ_EV_ELS_TMO)
+		goto out_free;
+
+	/* Normalize response data into struct fc_frame */
+	mp_req = &(rec_req->mp_req);
+	mp_fc_hdr = &(mp_req->resp_fc_hdr);
+	resp_len = mp_req->resp_len;
+	acc = resp_buf = mp_req->resp_buf;
+
+	fp = fc_frame_alloc(lport, resp_len);
+	if (!fp) {
+		QEDF_ERR(&(qedf->dbg_ctx),
+		    "fc_frame_alloc failure.\n");
+		goto out_free;
+	}
+
+	/* Copy frame header from firmware into fp */
+	fh = (struct fc_frame_header *)fc_frame_header_get(fp);
+	memcpy(fh, mp_fc_hdr, sizeof(struct fc_frame_header));
+
+	/* Copy payload from firmware into fp */
+	fc_payload = fc_frame_payload_get(fp, resp_len);
+	memcpy(fc_payload, resp_buf, resp_len);
+
+	opcode = fc_frame_payload_op(fp);
+	if (opcode == ELS_LS_RJT) {
+		rjt = fc_frame_payload_get(fp, sizeof(*rjt));
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+		    "Received LS_RJT for REC: er_reason=0x%x, "
+		    "er_explan=0x%x.\n", rjt->er_reason, rjt->er_explan);
+		/*
+		 * The following response(s) mean that we need to reissue the
+		 * request on another exchange.  We need to do this without
+		 * informing the upper layers lest it cause an application
+		 * error.
+		 */
+		if ((rjt->er_reason == ELS_RJT_LOGIC ||
+		    rjt->er_reason == ELS_RJT_UNAB) &&
+		    rjt->er_explan == ELS_EXPL_OXID_RXID) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+			    "Handle CMD LOST case.\n");
+			qedf_requeue_io_req(orig_io_req);
+		}
+	} else if (opcode == ELS_LS_ACC) {
+		offset = ntohl(acc->reca_fc4value);
+		e_stat = ntohl(acc->reca_e_stat);
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+		    "Received LS_ACC for REC: offset=0x%x, e_stat=0x%x.\n",
+		    offset, e_stat);
+		if (e_stat & ESB_ST_SEQ_INIT)  {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+			    "Target has the seq init\n");
+			goto out_free_frame;
+		}
+		sc_cmd = orig_io_req->sc_cmd;
+		if (!sc_cmd) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+			    "sc_cmd is NULL for xid=0x%x.\n",
+			    orig_io_req->xid);
+			goto out_free_frame;
+		}
+		/* SCSI write case */
+		if (sc_cmd->sc_data_direction == DMA_TO_DEVICE) {
+			if (offset == orig_io_req->data_xfer_len) {
+				QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+				    "WRITE - response lost.\n");
+				r_ctl = FC_RCTL_DD_CMD_STATUS;
+				srr_needed = true;
+				offset = 0;
+			} else {
+				QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+				    "WRITE - XFER_RDY/DATA lost.\n");
+				r_ctl = FC_RCTL_DD_DATA_DESC;
+				/* Use data from warning CQE instead of REC */
+				offset = orig_io_req->tx_buf_off;
+			}
+		/* SCSI read case */
+		} else {
+			if (orig_io_req->rx_buf_off ==
+			    orig_io_req->data_xfer_len) {
+				QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+				    "READ - response lost.\n");
+				srr_needed = true;
+				r_ctl = FC_RCTL_DD_CMD_STATUS;
+				offset = 0;
+			} else {
+				QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+				    "READ - DATA lost.\n");
+				/*
+				 * For read case we always set the offset to 0
+				 * for sequence recovery task.
+				 */
+				offset = 0;
+				r_ctl = FC_RCTL_DD_SOL_DATA;
+			}
+		}
+
+		if (srr_needed)
+			qedf_send_srr(orig_io_req, offset, r_ctl);
+		else
+			qedf_initiate_seq_cleanup(orig_io_req, offset, r_ctl);
+	}
+
+out_free_frame:
+	fc_frame_free(fp);
+out_free:
+	/* Put reference for original command since REC completed */
+	kref_put(&orig_io_req->refcount, qedf_release_cmd);
+	kfree(cb_arg);
+}
+
+/* Assumes kref is already held by caller */
+int qedf_send_rec(struct qedf_ioreq *orig_io_req)
+{
+
+	struct fc_els_rec rec;
+	struct qedf_rport *fcport;
+	struct fc_lport *lport;
+	struct qedf_els_cb_arg *cb_arg = NULL;
+	struct qedf_ctx *qedf;
+	uint32_t sid;
+	uint32_t r_a_tov;
+	int rc;
+
+	if (!orig_io_req) {
+		QEDF_ERR(NULL, "orig_io_req is NULL.\n");
+		return -EINVAL;
+	}
+
+	fcport = orig_io_req->fcport;
+
+	/* Check that fcport is still offloaded */
+	if (!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
+		QEDF_ERR(NULL, "fcport is no longer offloaded.\n");
+		return -EINVAL;
+	}
+
+	if (!fcport->qedf) {
+		QEDF_ERR(NULL, "fcport->qedf is NULL.\n");
+		return -EINVAL;
+	}
+
+	/* Take reference until REC command completion */
+	kref_get(&orig_io_req->refcount);
+
+	qedf = fcport->qedf;
+	lport = qedf->lport;
+	sid = fcport->sid;
+	r_a_tov = lport->r_a_tov;
+
+	memset(&rec, 0, sizeof(rec));
+
+	cb_arg = kzalloc(sizeof(struct qedf_els_cb_arg), GFP_NOIO);
+	if (!cb_arg) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Unable to allocate cb_arg for "
+			  "REC\n");
+		rc = -ENOMEM;
+		goto rec_err;
+	}
+
+	cb_arg->aborted_io_req = orig_io_req;
+
+	rec.rec_cmd = ELS_REC;
+	hton24(rec.rec_s_id, sid);
+	rec.rec_ox_id = htons(orig_io_req->xid);
+	rec.rec_rx_id =
+	    htons(orig_io_req->task->tstorm_st_context.read_write.rx_id);
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Sending REC orig_io=%p, "
+	   "orig_xid=0x%x rx_id=0x%x\n", orig_io_req,
+	   orig_io_req->xid, rec.rec_rx_id);
+	rc = qedf_initiate_els(fcport, ELS_REC, &rec, sizeof(rec),
+	    qedf_rec_compl, cb_arg, r_a_tov);
+
+rec_err:
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "REC failed - release orig_io_req"
+			  "=0x%x\n", orig_io_req->xid);
+		kfree(cb_arg);
+		kref_put(&orig_io_req->refcount, qedf_release_cmd);
+	}
+	return rc;
+}
diff --git a/drivers/scsi/qedf/qedf_fip.c b/drivers/scsi/qedf/qedf_fip.c
new file mode 100644
index 000000000000..868d423380d1
--- /dev/null
+++ b/drivers/scsi/qedf/qedf_fip.c
@@ -0,0 +1,269 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 Cavium Inc.
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include "qedf.h"
+
+extern const struct qed_fcoe_ops *qed_ops;
+/*
+ * FIP VLAN functions that will eventually move to libfcoe.
+ */
+
+void qedf_fcoe_send_vlan_req(struct qedf_ctx *qedf)
+{
+	struct sk_buff *skb;
+	char *eth_fr;
+	int fr_len;
+	struct fip_vlan *vlan;
+#define MY_FIP_ALL_FCF_MACS        ((__u8[6]) { 1, 0x10, 0x18, 1, 0, 2 })
+	static u8 my_fcoe_all_fcfs[ETH_ALEN] = MY_FIP_ALL_FCF_MACS;
+
+	skb = dev_alloc_skb(sizeof(struct fip_vlan));
+	if (!skb)
+		return;
+
+	fr_len = sizeof(*vlan);
+	eth_fr = (char *)skb->data;
+	vlan = (struct fip_vlan *)eth_fr;
+
+	memset(vlan, 0, sizeof(*vlan));
+	ether_addr_copy(vlan->eth.h_source, qedf->mac);
+	ether_addr_copy(vlan->eth.h_dest, my_fcoe_all_fcfs);
+	vlan->eth.h_proto = htons(ETH_P_FIP);
+
+	vlan->fip.fip_ver = FIP_VER_ENCAPS(FIP_VER);
+	vlan->fip.fip_op = htons(FIP_OP_VLAN);
+	vlan->fip.fip_subcode = FIP_SC_VL_REQ;
+	vlan->fip.fip_dl_len = htons(sizeof(vlan->desc) / FIP_BPW);
+
+	vlan->desc.mac.fd_desc.fip_dtype = FIP_DT_MAC;
+	vlan->desc.mac.fd_desc.fip_dlen = sizeof(vlan->desc.mac) / FIP_BPW;
+	ether_addr_copy(vlan->desc.mac.fd_mac, qedf->mac);
+
+	vlan->desc.wwnn.fd_desc.fip_dtype = FIP_DT_NAME;
+	vlan->desc.wwnn.fd_desc.fip_dlen = sizeof(vlan->desc.wwnn) / FIP_BPW;
+	put_unaligned_be64(qedf->lport->wwnn, &vlan->desc.wwnn.fd_wwn);
+
+	skb_put(skb, sizeof(*vlan));
+	skb->protocol = htons(ETH_P_FIP);
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "Sending FIP VLAN "
+		   "request.");
+
+	if (atomic_read(&qedf->link_state) != QEDF_LINK_UP) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Cannot send vlan request "
+		    "because link is not up.\n");
+
+		kfree_skb(skb);
+		return;
+	}
+	qed_ops->ll2->start_xmit(qedf->cdev, skb);
+}
+
+static void qedf_fcoe_process_vlan_resp(struct qedf_ctx *qedf,
+	struct sk_buff *skb)
+{
+	struct fip_header *fiph;
+	struct fip_desc *desc;
+	u16 vid = 0;
+	ssize_t rlen;
+	size_t dlen;
+
+	fiph = (struct fip_header *)(((void *)skb->data) + 2 * ETH_ALEN + 2);
+
+	rlen = ntohs(fiph->fip_dl_len) * 4;
+	desc = (struct fip_desc *)(fiph + 1);
+	while (rlen > 0) {
+		dlen = desc->fip_dlen * FIP_BPW;
+		switch (desc->fip_dtype) {
+		case FIP_DT_VLAN:
+			vid = ntohs(((struct fip_vlan_desc *)desc)->fd_vlan);
+			break;
+		}
+		desc = (struct fip_desc *)((char *)desc + dlen);
+		rlen -= dlen;
+	}
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "VLAN response, "
+		   "vid=0x%x.\n", vid);
+
+	if (vid > 0 && qedf->vlan_id != vid) {
+		qedf_set_vlan_id(qedf, vid);
+
+		/* Inform waiter that it's ok to call fcoe_ctlr_link up() */
+		complete(&qedf->fipvlan_compl);
+	}
+}
+
+void qedf_fip_send(struct fcoe_ctlr *fip, struct sk_buff *skb)
+{
+	struct qedf_ctx *qedf = container_of(fip, struct qedf_ctx, ctlr);
+	struct ethhdr *eth_hdr;
+	struct vlan_ethhdr *vlan_hdr;
+	struct fip_header *fiph;
+	u16 op, vlan_tci = 0;
+	u8 sub;
+
+	if (!test_bit(QEDF_LL2_STARTED, &qedf->flags)) {
+		QEDF_WARN(&(qedf->dbg_ctx), "LL2 not started\n");
+		kfree_skb(skb);
+		return;
+	}
+
+	fiph = (struct fip_header *) ((void *)skb->data + 2 * ETH_ALEN + 2);
+	eth_hdr = (struct ethhdr *)skb_mac_header(skb);
+	op = ntohs(fiph->fip_op);
+	sub = fiph->fip_subcode;
+
+	if (!qedf->vlan_hw_insert) {
+		vlan_hdr = (struct vlan_ethhdr *)skb_push(skb, sizeof(*vlan_hdr)
+		    - sizeof(*eth_hdr));
+		memcpy(vlan_hdr, eth_hdr, 2 * ETH_ALEN);
+		vlan_hdr->h_vlan_proto = htons(ETH_P_8021Q);
+		vlan_hdr->h_vlan_encapsulated_proto = eth_hdr->h_proto;
+		vlan_hdr->h_vlan_TCI = vlan_tci =  htons(qedf->vlan_id);
+	}
+
+	/* Update eth_hdr since we added a VLAN tag */
+	eth_hdr = (struct ethhdr *)skb_mac_header(skb);
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2, "FIP frame send: "
+	    "dest=%pM op=%x sub=%x vlan=%04x.", eth_hdr->h_dest, op, sub,
+	    ntohs(vlan_tci));
+	if (qedf_dump_frames)
+		print_hex_dump(KERN_WARNING, "fip ", DUMP_PREFIX_OFFSET, 16, 1,
+		    skb->data, skb->len, false);
+
+	qed_ops->ll2->start_xmit(qedf->cdev, skb);
+}
+
+/* Process incoming FIP frames. */
+void qedf_fip_recv(struct qedf_ctx *qedf, struct sk_buff *skb)
+{
+	struct ethhdr *eth_hdr;
+	struct fip_header *fiph;
+	struct fip_desc *desc;
+	struct fip_mac_desc *mp;
+	struct fip_wwn_desc *wp;
+	struct fip_vn_desc *vp;
+	size_t rlen, dlen;
+	uint32_t cvl_port_id;
+	__u8 cvl_mac[ETH_ALEN];
+	u16 op;
+	u8 sub;
+
+	eth_hdr = (struct ethhdr *)skb_mac_header(skb);
+	fiph = (struct fip_header *) ((void *)skb->data + 2 * ETH_ALEN + 2);
+	op = ntohs(fiph->fip_op);
+	sub = fiph->fip_subcode;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2, "FIP frame received: "
+	    "skb=%p fiph=%p source=%pM op=%x sub=%x", skb, fiph,
+	    eth_hdr->h_source, op, sub);
+	if (qedf_dump_frames)
+		print_hex_dump(KERN_WARNING, "fip ", DUMP_PREFIX_OFFSET, 16, 1,
+		    skb->data, skb->len, false);
+
+	/* Handle FIP VLAN resp in the driver */
+	if (op == FIP_OP_VLAN && sub == FIP_SC_VL_NOTE) {
+		qedf_fcoe_process_vlan_resp(qedf, skb);
+		qedf->vlan_hw_insert = 0;
+		kfree_skb(skb);
+	} else if (op == FIP_OP_CTRL && sub == FIP_SC_CLR_VLINK) {
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "Clear virtual "
+			   "link received.\n");
+
+		/* Check that an FCF has been selected by fcoe */
+		if (qedf->ctlr.sel_fcf == NULL) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			    "Dropping CVL since FCF has not been selected "
+			    "yet.");
+			return;
+		}
+
+		cvl_port_id = 0;
+		memset(cvl_mac, 0, ETH_ALEN);
+		/*
+		 * We need to loop through the CVL descriptors to determine
+		 * if we want to reset the fcoe link
+		 */
+		rlen = ntohs(fiph->fip_dl_len) * FIP_BPW;
+		desc = (struct fip_desc *)(fiph + 1);
+		while (rlen >= sizeof(*desc)) {
+			dlen = desc->fip_dlen * FIP_BPW;
+			switch (desc->fip_dtype) {
+			case FIP_DT_MAC:
+				mp = (struct fip_mac_desc *)desc;
+				QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2,
+				    "fd_mac=%pM.\n", __func__, mp->fd_mac);
+				ether_addr_copy(cvl_mac, mp->fd_mac);
+				break;
+			case FIP_DT_NAME:
+				wp = (struct fip_wwn_desc *)desc;
+				QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2,
+				    "fc_wwpn=%016llx.\n",
+				    get_unaligned_be64(&wp->fd_wwn));
+				break;
+			case FIP_DT_VN_ID:
+				vp = (struct fip_vn_desc *)desc;
+				QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2,
+				    "fd_fc_id=%x.\n", ntoh24(vp->fd_fc_id));
+				cvl_port_id = ntoh24(vp->fd_fc_id);
+				break;
+			default:
+				/* Ignore anything else */
+				break;
+			}
+			desc = (struct fip_desc *)((char *)desc + dlen);
+			rlen -= dlen;
+		}
+
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2,
+		    "cvl_port_id=%06x cvl_mac=%pM.\n", cvl_port_id,
+		    cvl_mac);
+		if (cvl_port_id == qedf->lport->port_id &&
+		    ether_addr_equal(cvl_mac,
+		    qedf->ctlr.sel_fcf->fcf_mac)) {
+			fcoe_ctlr_link_down(&qedf->ctlr);
+			qedf_wait_for_upload(qedf);
+			fcoe_ctlr_link_up(&qedf->ctlr);
+		}
+		kfree_skb(skb);
+	} else {
+		/* Everything else is handled by libfcoe */
+		__skb_pull(skb, ETH_HLEN);
+		fcoe_ctlr_recv(&qedf->ctlr, skb);
+	}
+}
+
+void qedf_update_src_mac(struct fc_lport *lport, u8 *addr)
+{
+	struct qedf_ctx *qedf = lport_priv(lport);
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+	    "Setting data_src_addr=%pM.\n", addr);
+	ether_addr_copy(qedf->data_src_addr, addr);
+}
+
+u8 *qedf_get_src_mac(struct fc_lport *lport)
+{
+	u8 mac[ETH_ALEN];
+	u8 port_id[3];
+	struct qedf_ctx *qedf = lport_priv(lport);
+
+	/* We need to use the lport port_id to create the data_src_addr */
+	if (is_zero_ether_addr(qedf->data_src_addr)) {
+		hton24(port_id, lport->port_id);
+		fc_fcoe_set_mac(mac, port_id);
+		qedf->ctlr.update_mac(lport, mac);
+	}
+	return qedf->data_src_addr;
+}
diff --git a/drivers/scsi/qedf/qedf_hsi.h b/drivers/scsi/qedf/qedf_hsi.h
new file mode 100644
index 000000000000..dfd65dec2874
--- /dev/null
+++ b/drivers/scsi/qedf/qedf_hsi.h
@@ -0,0 +1,422 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 Cavium Inc.
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+#ifndef __QEDF_HSI__
+#define __QEDF_HSI__
+/*
+ * Add include to common target
+ */
+#include <linux/qed/common_hsi.h>
+
+/*
+ * Add include to common storage target
+ */
+#include <linux/qed/storage_common.h>
+
+/*
+ * Add include to common fcoe target for both eCore and protocol driver
+ */
+#include <linux/qed/fcoe_common.h>
+
+
+/*
+ * FCoE CQ element ABTS information
+ */
+struct fcoe_abts_info {
+	u8 r_ctl /* R_CTL in the ABTS response frame */;
+	u8 reserved0;
+	__le16 rx_id;
+	__le32 reserved2[2];
+	__le32 fc_payload[3] /* ABTS FC payload response frame */;
+};
+
+
+/*
+ * FCoE class type
+ */
+enum fcoe_class_type {
+	FCOE_TASK_CLASS_TYPE_3,
+	FCOE_TASK_CLASS_TYPE_2,
+	MAX_FCOE_CLASS_TYPE
+};
+
+
+/*
+ * FCoE CMDQ element control information
+ */
+struct fcoe_cmdqe_control {
+	__le16 conn_id;
+	u8 num_additional_cmdqes;
+	u8 cmdType;
+	/* true for ABTS request cmdqe. used in Target mode */
+#define FCOE_CMDQE_CONTROL_ABTSREQCMD_MASK  0x1
+#define FCOE_CMDQE_CONTROL_ABTSREQCMD_SHIFT 0
+#define FCOE_CMDQE_CONTROL_RESERVED1_MASK   0x7F
+#define FCOE_CMDQE_CONTROL_RESERVED1_SHIFT  1
+	u8 reserved2[4];
+};
+
+/*
+ * FCoE control + payload CMDQ element
+ */
+struct fcoe_cmdqe {
+	struct fcoe_cmdqe_control hdr;
+	u8 fc_header[24];
+	__le32 fcp_cmd_payload[8];
+};
+
+
+
+/*
+ * FCP RSP flags
+ */
+struct fcoe_fcp_rsp_flags {
+	u8 flags;
+#define FCOE_FCP_RSP_FLAGS_FCP_RSP_LEN_VALID_MASK  0x1
+#define FCOE_FCP_RSP_FLAGS_FCP_RSP_LEN_VALID_SHIFT 0
+#define FCOE_FCP_RSP_FLAGS_FCP_SNS_LEN_VALID_MASK  0x1
+#define FCOE_FCP_RSP_FLAGS_FCP_SNS_LEN_VALID_SHIFT 1
+#define FCOE_FCP_RSP_FLAGS_FCP_RESID_OVER_MASK     0x1
+#define FCOE_FCP_RSP_FLAGS_FCP_RESID_OVER_SHIFT    2
+#define FCOE_FCP_RSP_FLAGS_FCP_RESID_UNDER_MASK    0x1
+#define FCOE_FCP_RSP_FLAGS_FCP_RESID_UNDER_SHIFT   3
+#define FCOE_FCP_RSP_FLAGS_FCP_CONF_REQ_MASK       0x1
+#define FCOE_FCP_RSP_FLAGS_FCP_CONF_REQ_SHIFT      4
+#define FCOE_FCP_RSP_FLAGS_FCP_BIDI_FLAGS_MASK     0x7
+#define FCOE_FCP_RSP_FLAGS_FCP_BIDI_FLAGS_SHIFT    5
+};
+
+/*
+ * FCoE CQ element response information
+ */
+struct fcoe_cqe_rsp_info {
+	struct fcoe_fcp_rsp_flags rsp_flags;
+	u8 scsi_status_code;
+	__le16 retry_delay_timer;
+	__le32 fcp_resid;
+	__le32 fcp_sns_len;
+	__le32 fcp_rsp_len;
+	__le16 rx_id;
+	u8 fw_error_flags;
+#define FCOE_CQE_RSP_INFO_FW_UNDERRUN_MASK  0x1 /* FW detected underrun */
+#define FCOE_CQE_RSP_INFO_FW_UNDERRUN_SHIFT 0
+#define FCOE_CQE_RSP_INFO_RESREVED_MASK     0x7F
+#define FCOE_CQE_RSP_INFO_RESREVED_SHIFT    1
+	u8 reserved;
+	__le32 fw_residual /* Residual bytes calculated by FW */;
+};
+
+/*
+ * FCoE CQ element Target completion information
+ */
+struct fcoe_cqe_target_info {
+	__le16 rx_id;
+	__le16 reserved0;
+	__le32 reserved1[5];
+};
+
+/*
+ * FCoE error/warning reporting entry
+ */
+struct fcoe_err_report_entry {
+	__le32 err_warn_bitmap_lo /* Error bitmap lower 32 bits */;
+	__le32 err_warn_bitmap_hi /* Error bitmap higher 32 bits */;
+	/* Buffer offset the beginning of the Sequence last transmitted */
+	__le32 tx_buf_off;
+	/* Buffer offset from the beginning of the Sequence last received */
+	__le32 rx_buf_off;
+	__le16 rx_id /* RX_ID of the associated task */;
+	__le16 reserved1;
+	__le32 reserved2;
+};
+
+/*
+ * FCoE CQ element middle path information
+ */
+struct fcoe_cqe_midpath_info {
+	__le32 data_placement_size;
+	__le16 rx_id;
+	__le16 reserved0;
+	__le32 reserved1[4];
+};
+
+/*
+ * FCoE CQ element unsolicited information
+ */
+struct fcoe_unsolic_info {
+	/* BD information: Physical address and opaque data */
+	struct scsi_bd bd_info;
+	__le16 conn_id /* Connection ID the frame is associated to */;
+	__le16 pkt_len /* Packet length */;
+	u8 reserved1[4];
+};
+
+/*
+ * FCoE warning reporting entry
+ */
+struct fcoe_warning_report_entry {
+	/* BD information: Physical address and opaque data */
+	struct scsi_bd bd_info;
+	/* Buffer offset the beginning of the Sequence last transmitted */
+	__le32 buf_off;
+	__le16 rx_id /* RX_ID of the associated task */;
+	__le16 reserved1;
+};
+
+/*
+ * FCoE CQ element information
+ */
+union fcoe_cqe_info {
+	struct fcoe_cqe_rsp_info rsp_info /* Response completion information */;
+	/* Target completion information */
+	struct fcoe_cqe_target_info target_info;
+	/* Error completion information */
+	struct fcoe_err_report_entry err_info;
+	struct fcoe_abts_info abts_info /* ABTS completion information */;
+	/* Middle path completion information */
+	struct fcoe_cqe_midpath_info midpath_info;
+	/* Unsolicited packet completion information */
+	struct fcoe_unsolic_info unsolic_info;
+	/* Warning completion information (Rec Tov expiration) */
+	struct fcoe_warning_report_entry warn_info;
+};
+
+/*
+ * FCoE CQ element
+ */
+struct fcoe_cqe {
+	__le32 cqe_data;
+	/* The task identifier (OX_ID) to be completed */
+#define FCOE_CQE_TASK_ID_MASK    0xFFFF
+#define FCOE_CQE_TASK_ID_SHIFT   0
+	/*
+	 * The CQE type: 0x0 Indicating on a pending work request completion.
+	 * 0x1 - Indicating on an unsolicited event notification. use enum
+	 * fcoe_cqe_type  (use enum fcoe_cqe_type)
+	 */
+#define FCOE_CQE_CQE_TYPE_MASK   0xF
+#define FCOE_CQE_CQE_TYPE_SHIFT  16
+#define FCOE_CQE_RESERVED0_MASK  0xFFF
+#define FCOE_CQE_RESERVED0_SHIFT 20
+	__le16 reserved1;
+	__le16 fw_cq_prod;
+	union fcoe_cqe_info cqe_info;
+};
+
+/*
+ * FCoE CQE type
+ */
+enum fcoe_cqe_type {
+	/* solicited response on a R/W or middle-path SQE */
+	FCOE_GOOD_COMPLETION_CQE_TYPE,
+	FCOE_UNSOLIC_CQE_TYPE /* unsolicited packet, RQ consumed */,
+	FCOE_ERROR_DETECTION_CQE_TYPE /* timer expiration, validation error */,
+	FCOE_WARNING_CQE_TYPE /* rec_tov or rr_tov timer expiration */,
+	FCOE_EXCH_CLEANUP_CQE_TYPE /* task cleanup completed */,
+	FCOE_ABTS_CQE_TYPE /* ABTS received and task cleaned */,
+	FCOE_DUMMY_CQE_TYPE /* just increment SQ CONS */,
+	/* Task was completed wight after sending a pkt to the target */
+	FCOE_LOCAL_COMP_CQE_TYPE,
+	MAX_FCOE_CQE_TYPE
+};
+
+
+/*
+ * FCoE device type
+ */
+enum fcoe_device_type {
+	FCOE_TASK_DEV_TYPE_DISK,
+	FCOE_TASK_DEV_TYPE_TAPE,
+	MAX_FCOE_DEVICE_TYPE
+};
+
+
+
+
+/*
+ * FCoE fast path error codes
+ */
+enum fcoe_fp_error_warning_code {
+	FCOE_ERROR_CODE_XFER_OOO_RO /* XFER error codes */,
+	FCOE_ERROR_CODE_XFER_RO_NOT_ALIGNED,
+	FCOE_ERROR_CODE_XFER_NULL_BURST_LEN,
+	FCOE_ERROR_CODE_XFER_RO_GREATER_THAN_DATA2TRNS,
+	FCOE_ERROR_CODE_XFER_INVALID_PAYLOAD_SIZE,
+	FCOE_ERROR_CODE_XFER_TASK_TYPE_NOT_WRITE,
+	FCOE_ERROR_CODE_XFER_PEND_XFER_SET,
+	FCOE_ERROR_CODE_XFER_OPENED_SEQ,
+	FCOE_ERROR_CODE_XFER_FCTL,
+	FCOE_ERROR_CODE_FCP_RSP_BIDI_FLAGS_SET /* FCP RSP error codes */,
+	FCOE_ERROR_CODE_FCP_RSP_INVALID_LENGTH_FIELD,
+	FCOE_ERROR_CODE_FCP_RSP_INVALID_SNS_FIELD,
+	FCOE_ERROR_CODE_FCP_RSP_INVALID_PAYLOAD_SIZE,
+	FCOE_ERROR_CODE_FCP_RSP_PEND_XFER_SET,
+	FCOE_ERROR_CODE_FCP_RSP_OPENED_SEQ,
+	FCOE_ERROR_CODE_FCP_RSP_FCTL,
+	FCOE_ERROR_CODE_FCP_RSP_LAST_SEQ_RESET,
+	FCOE_ERROR_CODE_FCP_RSP_CONF_REQ_NOT_SUPPORTED_YET,
+	FCOE_ERROR_CODE_DATA_OOO_RO /* FCP DATA error codes */,
+	FCOE_ERROR_CODE_DATA_EXCEEDS_DEFINED_MAX_FRAME_SIZE,
+	FCOE_ERROR_CODE_DATA_EXCEEDS_DATA2TRNS,
+	FCOE_ERROR_CODE_DATA_SOFI3_SEQ_ACTIVE_SET,
+	FCOE_ERROR_CODE_DATA_SOFN_SEQ_ACTIVE_RESET,
+	FCOE_ERROR_CODE_DATA_EOFN_END_SEQ_SET,
+	FCOE_ERROR_CODE_DATA_EOFT_END_SEQ_RESET,
+	FCOE_ERROR_CODE_DATA_TASK_TYPE_NOT_READ,
+	FCOE_ERROR_CODE_DATA_FCTL_INITIATIR,
+	FCOE_ERROR_CODE_MIDPATH_INVALID_TYPE /* Middle path error codes */,
+	FCOE_ERROR_CODE_MIDPATH_SOFI3_SEQ_ACTIVE_SET,
+	FCOE_ERROR_CODE_MIDPATH_SOFN_SEQ_ACTIVE_RESET,
+	FCOE_ERROR_CODE_MIDPATH_EOFN_END_SEQ_SET,
+	FCOE_ERROR_CODE_MIDPATH_EOFT_END_SEQ_RESET,
+	FCOE_ERROR_CODE_MIDPATH_REPLY_FCTL,
+	FCOE_ERROR_CODE_MIDPATH_INVALID_REPLY,
+	FCOE_ERROR_CODE_MIDPATH_ELS_REPLY_RCTL,
+	FCOE_ERROR_CODE_COMMON_MIDDLE_FRAME_WITH_PAD /* Common error codes */,
+	FCOE_ERROR_CODE_COMMON_SEQ_INIT_IN_TCE,
+	FCOE_ERROR_CODE_COMMON_FC_HDR_RX_ID_MISMATCH,
+	FCOE_ERROR_CODE_COMMON_INCORRECT_SEQ_CNT,
+	FCOE_ERROR_CODE_COMMON_DATA_FC_HDR_FCP_TYPE_MISMATCH,
+	FCOE_ERROR_CODE_COMMON_DATA_NO_MORE_SGES,
+	FCOE_ERROR_CODE_COMMON_OPTIONAL_FC_HDR,
+	FCOE_ERROR_CODE_COMMON_READ_TCE_OX_ID_TOO_BIG,
+	FCOE_ERROR_CODE_COMMON_DATA_WAS_NOT_TRANSMITTED,
+	FCOE_ERROR_CODE_COMMON_TASK_DDF_RCTL_INFO_FIELD,
+	FCOE_ERROR_CODE_COMMON_TASK_INVALID_RCTL,
+	FCOE_ERROR_CODE_COMMON_TASK_RCTL_GENERAL_MISMATCH,
+	FCOE_ERROR_CODE_E_D_TOV_TIMER_EXPIRATION /* Timer error codes */,
+	FCOE_WARNING_CODE_REC_TOV_TIMER_EXPIRATION /* Timer error codes */,
+	FCOE_ERROR_CODE_RR_TOV_TIMER_EXPIRATION /* Timer error codes */,
+	/* ABTSrsp pckt arrived unexpected */
+	FCOE_ERROR_CODE_ABTS_REPLY_UNEXPECTED,
+	FCOE_ERROR_CODE_TARGET_MODE_FCP_RSP,
+	FCOE_ERROR_CODE_TARGET_MODE_FCP_XFER,
+	FCOE_ERROR_CODE_TARGET_MODE_DATA_TASK_TYPE_NOT_WRITE,
+	FCOE_ERROR_CODE_DATA_FCTL_TARGET,
+	FCOE_ERROR_CODE_TARGET_DATA_SIZE_NO_MATCH_XFER,
+	FCOE_ERROR_CODE_TARGET_DIF_CRC_CHECKSUM_ERROR,
+	FCOE_ERROR_CODE_TARGET_DIF_REF_TAG_ERROR,
+	FCOE_ERROR_CODE_TARGET_DIF_APP_TAG_ERROR,
+	MAX_FCOE_FP_ERROR_WARNING_CODE
+};
+
+
+/*
+ * FCoE RESPQ element
+ */
+struct fcoe_respqe {
+	__le16 ox_id /* OX_ID that is located in the FCP_RSP FC header */;
+	__le16 rx_id /* RX_ID that is located in the FCP_RSP FC header */;
+	__le32 additional_info;
+/* PARAM that is located in the FCP_RSP FC header */
+#define FCOE_RESPQE_PARAM_MASK            0xFFFFFF
+#define FCOE_RESPQE_PARAM_SHIFT           0
+/* Indication whther its Target-auto-rsp mode or not */
+#define FCOE_RESPQE_TARGET_AUTO_RSP_MASK  0xFF
+#define FCOE_RESPQE_TARGET_AUTO_RSP_SHIFT 24
+};
+
+
+/*
+ * FCoE slow path error codes
+ */
+enum fcoe_sp_error_code {
+	/* Error codes for Error Reporting in slow path flows */
+	FCOE_ERROR_CODE_SLOW_PATH_TOO_MANY_FUNCS,
+	FCOE_ERROR_SLOW_PATH_CODE_NO_LICENSE,
+	MAX_FCOE_SP_ERROR_CODE
+};
+
+
+/*
+ * FCoE SQE request type
+ */
+enum fcoe_sqe_request_type {
+	SEND_FCOE_CMD,
+	SEND_FCOE_MIDPATH,
+	SEND_FCOE_ABTS_REQUEST,
+	FCOE_EXCHANGE_CLEANUP,
+	FCOE_SEQUENCE_RECOVERY,
+	SEND_FCOE_XFER_RDY,
+	SEND_FCOE_RSP,
+	SEND_FCOE_RSP_WITH_SENSE_DATA,
+	SEND_FCOE_TARGET_DATA,
+	SEND_FCOE_INITIATOR_DATA,
+	/*
+	 * Xfer Continuation (==1) ready to be sent. Previous XFERs data
+	 * received successfully.
+	 */
+	SEND_FCOE_XFER_CONTINUATION_RDY,
+	SEND_FCOE_TARGET_ABTS_RSP,
+	MAX_FCOE_SQE_REQUEST_TYPE
+};
+
+
+/*
+ * FCoE task TX state
+ */
+enum fcoe_task_tx_state {
+	/* Initiate state after driver has initialized the task */
+	FCOE_TASK_TX_STATE_NORMAL,
+	/* Updated by TX path after complete transmitting unsolicited packet */
+	FCOE_TASK_TX_STATE_UNSOLICITED_COMPLETED,
+	/*
+	 * Updated by TX path after start processing the task requesting the
+	 * cleanup/abort operation
+	 */
+	FCOE_TASK_TX_STATE_CLEAN_REQ,
+	FCOE_TASK_TX_STATE_ABTS /* Updated by TX path during abort procedure */,
+	/* Updated by TX path during exchange cleanup procedure */
+	FCOE_TASK_TX_STATE_EXCLEANUP,
+	/*
+	 * Updated by TX path during exchange cleanup continuation task
+	 * procedure
+	 */
+	FCOE_TASK_TX_STATE_EXCLEANUP_TARGET_WRITE_CONT,
+	/* Updated by TX path during exchange cleanup first xfer procedure */
+	FCOE_TASK_TX_STATE_EXCLEANUP_TARGET_WRITE,
+	/* Updated by TX path during exchange cleanup read task in Target */
+	FCOE_TASK_TX_STATE_EXCLEANUP_TARGET_READ_OR_RSP,
+	/* Updated by TX path during target exchange cleanup procedure */
+	FCOE_TASK_TX_STATE_EXCLEANUP_TARGET_WRITE_LAST_CYCLE,
+	/* Updated by TX path during sequence recovery procedure */
+	FCOE_TASK_TX_STATE_SEQRECOVERY,
+	MAX_FCOE_TASK_TX_STATE
+};
+
+
+/*
+ * FCoE task type
+ */
+enum fcoe_task_type {
+	FCOE_TASK_TYPE_WRITE_INITIATOR,
+	FCOE_TASK_TYPE_READ_INITIATOR,
+	FCOE_TASK_TYPE_MIDPATH,
+	FCOE_TASK_TYPE_UNSOLICITED,
+	FCOE_TASK_TYPE_ABTS,
+	FCOE_TASK_TYPE_EXCHANGE_CLEANUP,
+	FCOE_TASK_TYPE_SEQUENCE_CLEANUP,
+	FCOE_TASK_TYPE_WRITE_TARGET,
+	FCOE_TASK_TYPE_READ_TARGET,
+	FCOE_TASK_TYPE_RSP,
+	FCOE_TASK_TYPE_RSP_SENSE_DATA,
+	FCOE_TASK_TYPE_ABTS_TARGET,
+	FCOE_TASK_TYPE_ENUM_SIZE,
+	MAX_FCOE_TASK_TYPE
+};
+
+struct scsi_glbl_queue_entry {
+	/* Start physical address for the RQ (receive queue) PBL. */
+	struct regpair rq_pbl_addr;
+	/* Start physical address for the CQ (completion queue) PBL. */
+	struct regpair cq_pbl_addr;
+	/* Start physical address for the CMDQ (command queue) PBL. */
+	struct regpair cmdq_pbl_addr;
+};
+
+#endif /* __QEDF_HSI__ */
diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c
new file mode 100644
index 000000000000..486c045ac8bb
--- /dev/null
+++ b/drivers/scsi/qedf/qedf_io.c
@@ -0,0 +1,2282 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 Cavium Inc.
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include "qedf.h"
+#include <scsi/scsi_tcq.h>
+
+void qedf_cmd_timer_set(struct qedf_ctx *qedf, struct qedf_ioreq *io_req,
+	unsigned int timer_msec)
+{
+	queue_delayed_work(qedf->timer_work_queue, &io_req->timeout_work,
+	    msecs_to_jiffies(timer_msec));
+}
+
+static void qedf_cmd_timeout(struct work_struct *work)
+{
+
+	struct qedf_ioreq *io_req =
+	    container_of(work, struct qedf_ioreq, timeout_work.work);
+	struct qedf_ctx *qedf = io_req->fcport->qedf;
+	struct qedf_rport *fcport = io_req->fcport;
+	u8 op = 0;
+
+	switch (io_req->cmd_type) {
+	case QEDF_ABTS:
+		QEDF_ERR((&qedf->dbg_ctx), "ABTS timeout, xid=0x%x.\n",
+		    io_req->xid);
+		/* Cleanup timed out ABTS */
+		qedf_initiate_cleanup(io_req, true);
+		complete(&io_req->abts_done);
+
+		/*
+		 * Need to call kref_put for reference taken when initiate_abts
+		 * was called since abts_compl won't be called now that we've
+		 * cleaned up the task.
+		 */
+		kref_put(&io_req->refcount, qedf_release_cmd);
+
+		/*
+		 * Now that the original I/O and the ABTS are complete see
+		 * if we need to reconnect to the target.
+		 */
+		qedf_restart_rport(fcport);
+		break;
+	case QEDF_ELS:
+		kref_get(&io_req->refcount);
+		/*
+		 * Don't attempt to clean an ELS timeout as any subseqeunt
+		 * ABTS or cleanup requests just hang.  For now just free
+		 * the resources of the original I/O and the RRQ
+		 */
+		QEDF_ERR(&(qedf->dbg_ctx), "ELS timeout, xid=0x%x.\n",
+			  io_req->xid);
+		io_req->event = QEDF_IOREQ_EV_ELS_TMO;
+		/* Call callback function to complete command */
+		if (io_req->cb_func && io_req->cb_arg) {
+			op = io_req->cb_arg->op;
+			io_req->cb_func(io_req->cb_arg);
+			io_req->cb_arg = NULL;
+		}
+		qedf_initiate_cleanup(io_req, true);
+		kref_put(&io_req->refcount, qedf_release_cmd);
+		break;
+	case QEDF_SEQ_CLEANUP:
+		QEDF_ERR(&(qedf->dbg_ctx), "Sequence cleanup timeout, "
+		    "xid=0x%x.\n", io_req->xid);
+		qedf_initiate_cleanup(io_req, true);
+		io_req->event = QEDF_IOREQ_EV_ELS_TMO;
+		qedf_process_seq_cleanup_compl(qedf, NULL, io_req);
+		break;
+	default:
+		break;
+	}
+}
+
+void qedf_cmd_mgr_free(struct qedf_cmd_mgr *cmgr)
+{
+	struct io_bdt *bdt_info;
+	struct qedf_ctx *qedf = cmgr->qedf;
+	size_t bd_tbl_sz;
+	u16 min_xid = QEDF_MIN_XID;
+	u16 max_xid = (FCOE_PARAMS_NUM_TASKS - 1);
+	int num_ios;
+	int i;
+	struct qedf_ioreq *io_req;
+
+	num_ios = max_xid - min_xid + 1;
+
+	/* Free fcoe_bdt_ctx structures */
+	if (!cmgr->io_bdt_pool)
+		goto free_cmd_pool;
+
+	bd_tbl_sz = QEDF_MAX_BDS_PER_CMD * sizeof(struct fcoe_sge);
+	for (i = 0; i < num_ios; i++) {
+		bdt_info = cmgr->io_bdt_pool[i];
+		if (bdt_info->bd_tbl) {
+			dma_free_coherent(&qedf->pdev->dev, bd_tbl_sz,
+			    bdt_info->bd_tbl, bdt_info->bd_tbl_dma);
+			bdt_info->bd_tbl = NULL;
+		}
+	}
+
+	/* Destroy io_bdt pool */
+	for (i = 0; i < num_ios; i++) {
+		kfree(cmgr->io_bdt_pool[i]);
+		cmgr->io_bdt_pool[i] = NULL;
+	}
+
+	kfree(cmgr->io_bdt_pool);
+	cmgr->io_bdt_pool = NULL;
+
+free_cmd_pool:
+
+	for (i = 0; i < num_ios; i++) {
+		io_req = &cmgr->cmds[i];
+		/* Make sure we free per command sense buffer */
+		if (io_req->sense_buffer)
+			dma_free_coherent(&qedf->pdev->dev,
+			    QEDF_SCSI_SENSE_BUFFERSIZE, io_req->sense_buffer,
+			    io_req->sense_buffer_dma);
+		cancel_delayed_work_sync(&io_req->rrq_work);
+	}
+
+	/* Free command manager itself */
+	vfree(cmgr);
+}
+
+static void qedf_handle_rrq(struct work_struct *work)
+{
+	struct qedf_ioreq *io_req =
+	    container_of(work, struct qedf_ioreq, rrq_work.work);
+
+	qedf_send_rrq(io_req);
+
+}
+
+struct qedf_cmd_mgr *qedf_cmd_mgr_alloc(struct qedf_ctx *qedf)
+{
+	struct qedf_cmd_mgr *cmgr;
+	struct io_bdt *bdt_info;
+	struct qedf_ioreq *io_req;
+	u16 xid;
+	int i;
+	int num_ios;
+	u16 min_xid = QEDF_MIN_XID;
+	u16 max_xid = (FCOE_PARAMS_NUM_TASKS - 1);
+
+	/* Make sure num_queues is already set before calling this function */
+	if (!qedf->num_queues) {
+		QEDF_ERR(&(qedf->dbg_ctx), "num_queues is not set.\n");
+		return NULL;
+	}
+
+	if (max_xid <= min_xid || max_xid == FC_XID_UNKNOWN) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Invalid min_xid 0x%x and "
+			   "max_xid 0x%x.\n", min_xid, max_xid);
+		return NULL;
+	}
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "min xid 0x%x, max xid "
+		   "0x%x.\n", min_xid, max_xid);
+
+	num_ios = max_xid - min_xid + 1;
+
+	cmgr = vzalloc(sizeof(struct qedf_cmd_mgr));
+	if (!cmgr) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Failed to alloc cmd mgr.\n");
+		return NULL;
+	}
+
+	cmgr->qedf = qedf;
+	spin_lock_init(&cmgr->lock);
+
+	/*
+	 * Initialize list of qedf_ioreq.
+	 */
+	xid = QEDF_MIN_XID;
+
+	for (i = 0; i < num_ios; i++) {
+		io_req = &cmgr->cmds[i];
+		INIT_DELAYED_WORK(&io_req->timeout_work, qedf_cmd_timeout);
+
+		io_req->xid = xid++;
+
+		INIT_DELAYED_WORK(&io_req->rrq_work, qedf_handle_rrq);
+
+		/* Allocate DMA memory to hold sense buffer */
+		io_req->sense_buffer = dma_alloc_coherent(&qedf->pdev->dev,
+		    QEDF_SCSI_SENSE_BUFFERSIZE, &io_req->sense_buffer_dma,
+		    GFP_KERNEL);
+		if (!io_req->sense_buffer)
+			goto mem_err;
+	}
+
+	/* Allocate pool of io_bdts - one for each qedf_ioreq */
+	cmgr->io_bdt_pool = kmalloc_array(num_ios, sizeof(struct io_bdt *),
+	    GFP_KERNEL);
+
+	if (!cmgr->io_bdt_pool) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Failed to alloc io_bdt_pool.\n");
+		goto mem_err;
+	}
+
+	for (i = 0; i < num_ios; i++) {
+		cmgr->io_bdt_pool[i] = kmalloc(sizeof(struct io_bdt),
+		    GFP_KERNEL);
+		if (!cmgr->io_bdt_pool[i]) {
+			QEDF_WARN(&(qedf->dbg_ctx), "Failed to alloc "
+				   "io_bdt_pool[%d].\n", i);
+			goto mem_err;
+		}
+	}
+
+	for (i = 0; i < num_ios; i++) {
+		bdt_info = cmgr->io_bdt_pool[i];
+		bdt_info->bd_tbl = dma_alloc_coherent(&qedf->pdev->dev,
+		    QEDF_MAX_BDS_PER_CMD * sizeof(struct fcoe_sge),
+		    &bdt_info->bd_tbl_dma, GFP_KERNEL);
+		if (!bdt_info->bd_tbl) {
+			QEDF_WARN(&(qedf->dbg_ctx), "Failed to alloc "
+				   "bdt_tbl[%d].\n", i);
+			goto mem_err;
+		}
+	}
+	atomic_set(&cmgr->free_list_cnt, num_ios);
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+	    "cmgr->free_list_cnt=%d.\n",
+	    atomic_read(&cmgr->free_list_cnt));
+
+	return cmgr;
+
+mem_err:
+	qedf_cmd_mgr_free(cmgr);
+	return NULL;
+}
+
+struct qedf_ioreq *qedf_alloc_cmd(struct qedf_rport *fcport, u8 cmd_type)
+{
+	struct qedf_ctx *qedf = fcport->qedf;
+	struct qedf_cmd_mgr *cmd_mgr = qedf->cmd_mgr;
+	struct qedf_ioreq *io_req = NULL;
+	struct io_bdt *bd_tbl;
+	u16 xid;
+	uint32_t free_sqes;
+	int i;
+	unsigned long flags;
+
+	free_sqes = atomic_read(&fcport->free_sqes);
+
+	if (!free_sqes) {
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Returning NULL, free_sqes=%d.\n ",
+		    free_sqes);
+		goto out_failed;
+	}
+
+	/* Limit the number of outstanding R/W tasks */
+	if ((atomic_read(&fcport->num_active_ios) >=
+	    NUM_RW_TASKS_PER_CONNECTION)) {
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Returning NULL, num_active_ios=%d.\n",
+		    atomic_read(&fcport->num_active_ios));
+		goto out_failed;
+	}
+
+	/* Limit global TIDs certain tasks */
+	if (atomic_read(&cmd_mgr->free_list_cnt) <= GBL_RSVD_TASKS) {
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Returning NULL, free_list_cnt=%d.\n",
+		    atomic_read(&cmd_mgr->free_list_cnt));
+		goto out_failed;
+	}
+
+	spin_lock_irqsave(&cmd_mgr->lock, flags);
+	for (i = 0; i < FCOE_PARAMS_NUM_TASKS; i++) {
+		io_req = &cmd_mgr->cmds[cmd_mgr->idx];
+		cmd_mgr->idx++;
+		if (cmd_mgr->idx == FCOE_PARAMS_NUM_TASKS)
+			cmd_mgr->idx = 0;
+
+		/* Check to make sure command was previously freed */
+		if (!test_bit(QEDF_CMD_OUTSTANDING, &io_req->flags))
+			break;
+	}
+
+	if (i == FCOE_PARAMS_NUM_TASKS) {
+		spin_unlock_irqrestore(&cmd_mgr->lock, flags);
+		goto out_failed;
+	}
+
+	set_bit(QEDF_CMD_OUTSTANDING, &io_req->flags);
+	spin_unlock_irqrestore(&cmd_mgr->lock, flags);
+
+	atomic_inc(&fcport->num_active_ios);
+	atomic_dec(&fcport->free_sqes);
+	xid = io_req->xid;
+	atomic_dec(&cmd_mgr->free_list_cnt);
+
+	io_req->cmd_mgr = cmd_mgr;
+	io_req->fcport = fcport;
+
+	/* Hold the io_req against deletion */
+	kref_init(&io_req->refcount);
+
+	/* Bind io_bdt for this io_req */
+	/* Have a static link between io_req and io_bdt_pool */
+	bd_tbl = io_req->bd_tbl = cmd_mgr->io_bdt_pool[xid];
+	if (bd_tbl == NULL) {
+		QEDF_ERR(&(qedf->dbg_ctx), "bd_tbl is NULL, xid=%x.\n", xid);
+		kref_put(&io_req->refcount, qedf_release_cmd);
+		goto out_failed;
+	}
+	bd_tbl->io_req = io_req;
+	io_req->cmd_type = cmd_type;
+
+	/* Reset sequence offset data */
+	io_req->rx_buf_off = 0;
+	io_req->tx_buf_off = 0;
+	io_req->rx_id = 0xffff; /* No OX_ID */
+
+	return io_req;
+
+out_failed:
+	/* Record failure for stats and return NULL to caller */
+	qedf->alloc_failures++;
+	return NULL;
+}
+
+static void qedf_free_mp_resc(struct qedf_ioreq *io_req)
+{
+	struct qedf_mp_req *mp_req = &(io_req->mp_req);
+	struct qedf_ctx *qedf = io_req->fcport->qedf;
+	uint64_t sz = sizeof(struct fcoe_sge);
+
+	/* clear tm flags */
+	mp_req->tm_flags = 0;
+	if (mp_req->mp_req_bd) {
+		dma_free_coherent(&qedf->pdev->dev, sz,
+		    mp_req->mp_req_bd, mp_req->mp_req_bd_dma);
+		mp_req->mp_req_bd = NULL;
+	}
+	if (mp_req->mp_resp_bd) {
+		dma_free_coherent(&qedf->pdev->dev, sz,
+		    mp_req->mp_resp_bd, mp_req->mp_resp_bd_dma);
+		mp_req->mp_resp_bd = NULL;
+	}
+	if (mp_req->req_buf) {
+		dma_free_coherent(&qedf->pdev->dev, QEDF_PAGE_SIZE,
+		    mp_req->req_buf, mp_req->req_buf_dma);
+		mp_req->req_buf = NULL;
+	}
+	if (mp_req->resp_buf) {
+		dma_free_coherent(&qedf->pdev->dev, QEDF_PAGE_SIZE,
+		    mp_req->resp_buf, mp_req->resp_buf_dma);
+		mp_req->resp_buf = NULL;
+	}
+}
+
+void qedf_release_cmd(struct kref *ref)
+{
+	struct qedf_ioreq *io_req =
+	    container_of(ref, struct qedf_ioreq, refcount);
+	struct qedf_cmd_mgr *cmd_mgr = io_req->cmd_mgr;
+	struct qedf_rport *fcport = io_req->fcport;
+
+	if (io_req->cmd_type == QEDF_ELS ||
+	    io_req->cmd_type == QEDF_TASK_MGMT_CMD)
+		qedf_free_mp_resc(io_req);
+
+	atomic_inc(&cmd_mgr->free_list_cnt);
+	atomic_dec(&fcport->num_active_ios);
+	if (atomic_read(&fcport->num_active_ios) < 0)
+		QEDF_WARN(&(fcport->qedf->dbg_ctx), "active_ios < 0.\n");
+
+	/* Increment task retry identifier now that the request is released */
+	io_req->task_retry_identifier++;
+
+	clear_bit(QEDF_CMD_OUTSTANDING, &io_req->flags);
+}
+
+static int qedf_split_bd(struct qedf_ioreq *io_req, u64 addr, int sg_len,
+	int bd_index)
+{
+	struct fcoe_sge *bd = io_req->bd_tbl->bd_tbl;
+	int frag_size, sg_frags;
+
+	sg_frags = 0;
+	while (sg_len) {
+		if (sg_len > QEDF_BD_SPLIT_SZ)
+			frag_size = QEDF_BD_SPLIT_SZ;
+		else
+			frag_size = sg_len;
+		bd[bd_index + sg_frags].sge_addr.lo = U64_LO(addr);
+		bd[bd_index + sg_frags].sge_addr.hi = U64_HI(addr);
+		bd[bd_index + sg_frags].size = (uint16_t)frag_size;
+
+		addr += (u64)frag_size;
+		sg_frags++;
+		sg_len -= frag_size;
+	}
+	return sg_frags;
+}
+
+static int qedf_map_sg(struct qedf_ioreq *io_req)
+{
+	struct scsi_cmnd *sc = io_req->sc_cmd;
+	struct Scsi_Host *host = sc->device->host;
+	struct fc_lport *lport = shost_priv(host);
+	struct qedf_ctx *qedf = lport_priv(lport);
+	struct fcoe_sge *bd = io_req->bd_tbl->bd_tbl;
+	struct scatterlist *sg;
+	int byte_count = 0;
+	int sg_count = 0;
+	int bd_count = 0;
+	int sg_frags;
+	unsigned int sg_len;
+	u64 addr, end_addr;
+	int i;
+
+	sg_count = dma_map_sg(&qedf->pdev->dev, scsi_sglist(sc),
+	    scsi_sg_count(sc), sc->sc_data_direction);
+
+	sg = scsi_sglist(sc);
+
+	/*
+	 * New condition to send single SGE as cached-SGL with length less
+	 * than 64k.
+	 */
+	if ((sg_count == 1) && (sg_dma_len(sg) <=
+	    QEDF_MAX_SGLEN_FOR_CACHESGL)) {
+		sg_len = sg_dma_len(sg);
+		addr = (u64)sg_dma_address(sg);
+
+		bd[bd_count].sge_addr.lo = (addr & 0xffffffff);
+		bd[bd_count].sge_addr.hi = (addr >> 32);
+		bd[bd_count].size = (u16)sg_len;
+
+		return ++bd_count;
+	}
+
+	scsi_for_each_sg(sc, sg, sg_count, i) {
+		sg_len = sg_dma_len(sg);
+		addr = (u64)sg_dma_address(sg);
+		end_addr = (u64)(addr + sg_len);
+
+		/*
+		 * First s/g element in the list so check if the end_addr
+		 * is paged aligned. Also check to make sure the length is
+		 * at least page size.
+		 */
+		if ((i == 0) && (sg_count > 1) &&
+		    ((end_addr % QEDF_PAGE_SIZE) ||
+		    sg_len < QEDF_PAGE_SIZE))
+			io_req->use_slowpath = true;
+		/*
+		 * Last s/g element so check if the start address is paged
+		 * aligned.
+		 */
+		else if ((i == (sg_count - 1)) && (sg_count > 1) &&
+		    (addr % QEDF_PAGE_SIZE))
+			io_req->use_slowpath = true;
+		/*
+		 * Intermediate s/g element so check if start and end address
+		 * is page aligned.
+		 */
+		else if ((i != 0) && (i != (sg_count - 1)) &&
+		    ((addr % QEDF_PAGE_SIZE) || (end_addr % QEDF_PAGE_SIZE)))
+			io_req->use_slowpath = true;
+
+		if (sg_len > QEDF_MAX_BD_LEN) {
+			sg_frags = qedf_split_bd(io_req, addr, sg_len,
+			    bd_count);
+		} else {
+			sg_frags = 1;
+			bd[bd_count].sge_addr.lo = U64_LO(addr);
+			bd[bd_count].sge_addr.hi  = U64_HI(addr);
+			bd[bd_count].size = (uint16_t)sg_len;
+		}
+
+		bd_count += sg_frags;
+		byte_count += sg_len;
+	}
+
+	if (byte_count != scsi_bufflen(sc))
+		QEDF_ERR(&(qedf->dbg_ctx), "byte_count = %d != "
+			  "scsi_bufflen = %d, task_id = 0x%x.\n", byte_count,
+			   scsi_bufflen(sc), io_req->xid);
+
+	return bd_count;
+}
+
+static int qedf_build_bd_list_from_sg(struct qedf_ioreq *io_req)
+{
+	struct scsi_cmnd *sc = io_req->sc_cmd;
+	struct fcoe_sge *bd = io_req->bd_tbl->bd_tbl;
+	int bd_count;
+
+	if (scsi_sg_count(sc)) {
+		bd_count = qedf_map_sg(io_req);
+		if (bd_count == 0)
+			return -ENOMEM;
+	} else {
+		bd_count = 0;
+		bd[0].sge_addr.lo = bd[0].sge_addr.hi = 0;
+		bd[0].size = 0;
+	}
+	io_req->bd_tbl->bd_valid = bd_count;
+
+	return 0;
+}
+
+static void qedf_build_fcp_cmnd(struct qedf_ioreq *io_req,
+				  struct fcp_cmnd *fcp_cmnd)
+{
+	struct scsi_cmnd *sc_cmd = io_req->sc_cmd;
+
+	/* fcp_cmnd is 32 bytes */
+	memset(fcp_cmnd, 0, FCP_CMND_LEN);
+
+	/* 8 bytes: SCSI LUN info */
+	int_to_scsilun(sc_cmd->device->lun,
+			(struct scsi_lun *)&fcp_cmnd->fc_lun);
+
+	/* 4 bytes: flag info */
+	fcp_cmnd->fc_pri_ta = 0;
+	fcp_cmnd->fc_tm_flags = io_req->mp_req.tm_flags;
+	fcp_cmnd->fc_flags = io_req->io_req_flags;
+	fcp_cmnd->fc_cmdref = 0;
+
+	/* Populate data direction */
+	if (sc_cmd->sc_data_direction == DMA_TO_DEVICE)
+		fcp_cmnd->fc_flags |= FCP_CFL_WRDATA;
+	else if (sc_cmd->sc_data_direction == DMA_FROM_DEVICE)
+		fcp_cmnd->fc_flags |= FCP_CFL_RDDATA;
+
+	fcp_cmnd->fc_pri_ta = FCP_PTA_SIMPLE;
+
+	/* 16 bytes: CDB information */
+	memcpy(fcp_cmnd->fc_cdb, sc_cmd->cmnd, sc_cmd->cmd_len);
+
+	/* 4 bytes: FCP data length */
+	fcp_cmnd->fc_dl = htonl(io_req->data_xfer_len);
+
+}
+
+static void  qedf_init_task(struct qedf_rport *fcport, struct fc_lport *lport,
+	struct qedf_ioreq *io_req, u32 *ptu_invalidate,
+	struct fcoe_task_context *task_ctx)
+{
+	enum fcoe_task_type task_type;
+	struct scsi_cmnd *sc_cmd = io_req->sc_cmd;
+	struct io_bdt *bd_tbl = io_req->bd_tbl;
+	union fcoe_data_desc_ctx *data_desc;
+	u32 *fcp_cmnd;
+	u32 tmp_fcp_cmnd[8];
+	int cnt, i;
+	int bd_count;
+	struct qedf_ctx *qedf = fcport->qedf;
+	uint16_t cq_idx = smp_processor_id() % qedf->num_queues;
+	u8 tmp_sgl_mode = 0;
+	u8 mst_sgl_mode = 0;
+
+	memset(task_ctx, 0, sizeof(struct fcoe_task_context));
+	io_req->task = task_ctx;
+
+	if (sc_cmd->sc_data_direction == DMA_TO_DEVICE)
+		task_type = FCOE_TASK_TYPE_WRITE_INITIATOR;
+	else
+		task_type = FCOE_TASK_TYPE_READ_INITIATOR;
+
+	/* Y Storm context */
+	task_ctx->ystorm_st_context.expect_first_xfer = 1;
+	task_ctx->ystorm_st_context.data_2_trns_rem = io_req->data_xfer_len;
+	/* Check if this is required */
+	task_ctx->ystorm_st_context.ox_id = io_req->xid;
+	task_ctx->ystorm_st_context.task_rety_identifier =
+	    io_req->task_retry_identifier;
+
+	/* T Storm ag context */
+	SET_FIELD(task_ctx->tstorm_ag_context.flags0,
+	    TSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE, PROTOCOLID_FCOE);
+	task_ctx->tstorm_ag_context.icid = (u16)fcport->fw_cid;
+
+	/* T Storm st context */
+	SET_FIELD(task_ctx->tstorm_st_context.read_write.flags,
+	    FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_EXP_FIRST_FRAME,
+	    1);
+	task_ctx->tstorm_st_context.read_write.rx_id = 0xffff;
+
+	task_ctx->tstorm_st_context.read_only.dev_type =
+	    FCOE_TASK_DEV_TYPE_DISK;
+	task_ctx->tstorm_st_context.read_only.conf_supported = 0;
+	task_ctx->tstorm_st_context.read_only.cid = fcport->fw_cid;
+
+	/* Completion queue for response. */
+	task_ctx->tstorm_st_context.read_only.glbl_q_num = cq_idx;
+	task_ctx->tstorm_st_context.read_only.fcp_cmd_trns_size =
+	    io_req->data_xfer_len;
+	task_ctx->tstorm_st_context.read_write.e_d_tov_exp_timeout_val =
+	    lport->e_d_tov;
+
+	task_ctx->ustorm_ag_context.global_cq_num = cq_idx;
+	io_req->fp_idx = cq_idx;
+
+	bd_count = bd_tbl->bd_valid;
+	if (task_type == FCOE_TASK_TYPE_WRITE_INITIATOR) {
+		/* Setup WRITE task */
+		struct fcoe_sge *fcoe_bd_tbl = bd_tbl->bd_tbl;
+
+		task_ctx->ystorm_st_context.task_type =
+		    FCOE_TASK_TYPE_WRITE_INITIATOR;
+		data_desc = &task_ctx->ystorm_st_context.data_desc;
+
+		if (io_req->use_slowpath) {
+			SET_FIELD(task_ctx->ystorm_st_context.sgl_mode,
+			    YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE,
+			    FCOE_SLOW_SGL);
+			data_desc->slow.base_sgl_addr.lo =
+			    U64_LO(bd_tbl->bd_tbl_dma);
+			data_desc->slow.base_sgl_addr.hi =
+			    U64_HI(bd_tbl->bd_tbl_dma);
+			data_desc->slow.remainder_num_sges = bd_count;
+			data_desc->slow.curr_sge_off = 0;
+			data_desc->slow.curr_sgl_index = 0;
+			qedf->slow_sge_ios++;
+			io_req->sge_type = QEDF_IOREQ_SLOW_SGE;
+		} else {
+			SET_FIELD(task_ctx->ystorm_st_context.sgl_mode,
+			    YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE,
+			    (bd_count <= 4) ? (enum fcoe_sgl_mode)bd_count :
+			    FCOE_MUL_FAST_SGES);
+
+			if (bd_count == 1) {
+				data_desc->single_sge.sge_addr.lo =
+				    fcoe_bd_tbl->sge_addr.lo;
+				data_desc->single_sge.sge_addr.hi =
+				    fcoe_bd_tbl->sge_addr.hi;
+				data_desc->single_sge.size =
+				    fcoe_bd_tbl->size;
+				data_desc->single_sge.is_valid_sge = 0;
+				qedf->single_sge_ios++;
+				io_req->sge_type = QEDF_IOREQ_SINGLE_SGE;
+			} else {
+				data_desc->fast.sgl_start_addr.lo =
+				    U64_LO(bd_tbl->bd_tbl_dma);
+				data_desc->fast.sgl_start_addr.hi =
+				    U64_HI(bd_tbl->bd_tbl_dma);
+				data_desc->fast.sgl_byte_offset =
+				    data_desc->fast.sgl_start_addr.lo &
+				    (QEDF_PAGE_SIZE - 1);
+				if (data_desc->fast.sgl_byte_offset > 0)
+					QEDF_ERR(&(qedf->dbg_ctx),
+					    "byte_offset=%u for xid=0x%x.\n",
+					    io_req->xid,
+					    data_desc->fast.sgl_byte_offset);
+				data_desc->fast.task_reuse_cnt =
+				    io_req->reuse_count;
+				io_req->reuse_count++;
+				if (io_req->reuse_count == QEDF_MAX_REUSE) {
+					*ptu_invalidate = 1;
+					io_req->reuse_count = 0;
+				}
+				qedf->fast_sge_ios++;
+				io_req->sge_type = QEDF_IOREQ_FAST_SGE;
+			}
+		}
+
+		/* T Storm context */
+		task_ctx->tstorm_st_context.read_only.task_type =
+		    FCOE_TASK_TYPE_WRITE_INITIATOR;
+
+		/* M Storm context */
+		tmp_sgl_mode = GET_FIELD(task_ctx->ystorm_st_context.sgl_mode,
+		    YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE);
+		SET_FIELD(task_ctx->mstorm_st_context.non_fp.tx_rx_sgl_mode,
+		    FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_TX_SGL_MODE,
+		    tmp_sgl_mode);
+
+	} else {
+		/* Setup READ task */
+
+		/* M Storm context */
+		struct fcoe_sge *fcoe_bd_tbl = bd_tbl->bd_tbl;
+
+		data_desc = &task_ctx->mstorm_st_context.fp.data_desc;
+		task_ctx->mstorm_st_context.fp.data_2_trns_rem =
+		    io_req->data_xfer_len;
+
+		if (io_req->use_slowpath) {
+			SET_FIELD(
+			    task_ctx->mstorm_st_context.non_fp.tx_rx_sgl_mode,
+			    FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RX_SGL_MODE,
+			    FCOE_SLOW_SGL);
+			data_desc->slow.base_sgl_addr.lo =
+			    U64_LO(bd_tbl->bd_tbl_dma);
+			data_desc->slow.base_sgl_addr.hi =
+			    U64_HI(bd_tbl->bd_tbl_dma);
+			data_desc->slow.remainder_num_sges =
+			    bd_count;
+			data_desc->slow.curr_sge_off = 0;
+			data_desc->slow.curr_sgl_index = 0;
+			qedf->slow_sge_ios++;
+			io_req->sge_type = QEDF_IOREQ_SLOW_SGE;
+		} else {
+			SET_FIELD(
+			    task_ctx->mstorm_st_context.non_fp.tx_rx_sgl_mode,
+			    FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RX_SGL_MODE,
+			    (bd_count <= 4) ? (enum fcoe_sgl_mode)bd_count :
+			    FCOE_MUL_FAST_SGES);
+
+			if (bd_count == 1) {
+				data_desc->single_sge.sge_addr.lo =
+				    fcoe_bd_tbl->sge_addr.lo;
+				data_desc->single_sge.sge_addr.hi =
+				    fcoe_bd_tbl->sge_addr.hi;
+				data_desc->single_sge.size =
+				    fcoe_bd_tbl->size;
+				data_desc->single_sge.is_valid_sge = 0;
+				qedf->single_sge_ios++;
+				io_req->sge_type = QEDF_IOREQ_SINGLE_SGE;
+			} else {
+				data_desc->fast.sgl_start_addr.lo =
+				    U64_LO(bd_tbl->bd_tbl_dma);
+				data_desc->fast.sgl_start_addr.hi =
+				    U64_HI(bd_tbl->bd_tbl_dma);
+				data_desc->fast.sgl_byte_offset = 0;
+				data_desc->fast.task_reuse_cnt =
+				    io_req->reuse_count;
+				io_req->reuse_count++;
+				if (io_req->reuse_count == QEDF_MAX_REUSE) {
+					*ptu_invalidate = 1;
+					io_req->reuse_count = 0;
+				}
+				qedf->fast_sge_ios++;
+				io_req->sge_type = QEDF_IOREQ_FAST_SGE;
+			}
+		}
+
+		/* Y Storm context */
+		task_ctx->ystorm_st_context.expect_first_xfer = 0;
+		task_ctx->ystorm_st_context.task_type =
+		    FCOE_TASK_TYPE_READ_INITIATOR;
+
+		/* T Storm context */
+		task_ctx->tstorm_st_context.read_only.task_type =
+		    FCOE_TASK_TYPE_READ_INITIATOR;
+		mst_sgl_mode = GET_FIELD(
+		    task_ctx->mstorm_st_context.non_fp.tx_rx_sgl_mode,
+		    FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RX_SGL_MODE);
+		SET_FIELD(task_ctx->tstorm_st_context.read_write.flags,
+		    FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RX_SGL_MODE,
+		    mst_sgl_mode);
+	}
+
+	/* fill FCP_CMND IU */
+	fcp_cmnd = (u32 *)task_ctx->ystorm_st_context.tx_info_union.fcp_cmd_payload.opaque;
+	qedf_build_fcp_cmnd(io_req, (struct fcp_cmnd *)&tmp_fcp_cmnd);
+
+	/* Swap fcp_cmnd since FC is big endian */
+	cnt = sizeof(struct fcp_cmnd) / sizeof(u32);
+
+	for (i = 0; i < cnt; i++) {
+		*fcp_cmnd = cpu_to_be32(tmp_fcp_cmnd[i]);
+		fcp_cmnd++;
+	}
+
+	/* M Storm context - Sense buffer */
+	task_ctx->mstorm_st_context.non_fp.rsp_buf_addr.lo =
+		U64_LO(io_req->sense_buffer_dma);
+	task_ctx->mstorm_st_context.non_fp.rsp_buf_addr.hi =
+		U64_HI(io_req->sense_buffer_dma);
+}
+
+void qedf_init_mp_task(struct qedf_ioreq *io_req,
+	struct fcoe_task_context *task_ctx)
+{
+	struct qedf_mp_req *mp_req = &(io_req->mp_req);
+	struct qedf_rport *fcport = io_req->fcport;
+	struct qedf_ctx *qedf = io_req->fcport->qedf;
+	struct fc_frame_header *fc_hdr;
+	enum fcoe_task_type task_type = 0;
+	union fcoe_data_desc_ctx *data_desc;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "Initializing MP task "
+		   "for cmd_type = %d\n", io_req->cmd_type);
+
+	qedf->control_requests++;
+
+	/* Obtain task_type */
+	if ((io_req->cmd_type == QEDF_TASK_MGMT_CMD) ||
+	    (io_req->cmd_type == QEDF_ELS)) {
+		task_type = FCOE_TASK_TYPE_MIDPATH;
+	} else if (io_req->cmd_type == QEDF_ABTS) {
+		task_type = FCOE_TASK_TYPE_ABTS;
+	}
+
+	memset(task_ctx, 0, sizeof(struct fcoe_task_context));
+
+	/* Setup the task from io_req for easy reference */
+	io_req->task = task_ctx;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "task type = %d\n",
+		   task_type);
+
+	/* YSTORM only */
+	{
+		/* Initialize YSTORM task context */
+		struct fcoe_tx_mid_path_params *task_fc_hdr =
+		    &task_ctx->ystorm_st_context.tx_info_union.tx_params.mid_path;
+		memset(task_fc_hdr, 0, sizeof(struct fcoe_tx_mid_path_params));
+		task_ctx->ystorm_st_context.task_rety_identifier =
+		    io_req->task_retry_identifier;
+
+		/* Init SGL parameters */
+		if ((task_type == FCOE_TASK_TYPE_MIDPATH) ||
+		    (task_type == FCOE_TASK_TYPE_UNSOLICITED)) {
+			data_desc = &task_ctx->ystorm_st_context.data_desc;
+			data_desc->slow.base_sgl_addr.lo =
+			    U64_LO(mp_req->mp_req_bd_dma);
+			data_desc->slow.base_sgl_addr.hi =
+			    U64_HI(mp_req->mp_req_bd_dma);
+			data_desc->slow.remainder_num_sges = 1;
+			data_desc->slow.curr_sge_off = 0;
+			data_desc->slow.curr_sgl_index = 0;
+		}
+
+		fc_hdr = &(mp_req->req_fc_hdr);
+		if (task_type == FCOE_TASK_TYPE_MIDPATH) {
+			fc_hdr->fh_ox_id = io_req->xid;
+			fc_hdr->fh_rx_id = htons(0xffff);
+		} else if (task_type == FCOE_TASK_TYPE_UNSOLICITED) {
+			fc_hdr->fh_rx_id = io_req->xid;
+		}
+
+		/* Fill FC Header into middle path buffer */
+		task_fc_hdr->parameter = fc_hdr->fh_parm_offset;
+		task_fc_hdr->r_ctl = fc_hdr->fh_r_ctl;
+		task_fc_hdr->type = fc_hdr->fh_type;
+		task_fc_hdr->cs_ctl = fc_hdr->fh_cs_ctl;
+		task_fc_hdr->df_ctl = fc_hdr->fh_df_ctl;
+		task_fc_hdr->rx_id = fc_hdr->fh_rx_id;
+		task_fc_hdr->ox_id = fc_hdr->fh_ox_id;
+
+		task_ctx->ystorm_st_context.data_2_trns_rem =
+		    io_req->data_xfer_len;
+		task_ctx->ystorm_st_context.task_type = task_type;
+	}
+
+	/* TSTORM ONLY */
+	{
+		task_ctx->tstorm_ag_context.icid = (u16)fcport->fw_cid;
+		task_ctx->tstorm_st_context.read_only.cid = fcport->fw_cid;
+		/* Always send middle-path repsonses on CQ #0 */
+		task_ctx->tstorm_st_context.read_only.glbl_q_num = 0;
+		io_req->fp_idx = 0;
+		SET_FIELD(task_ctx->tstorm_ag_context.flags0,
+		    TSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE,
+		    PROTOCOLID_FCOE);
+		task_ctx->tstorm_st_context.read_only.task_type = task_type;
+		SET_FIELD(task_ctx->tstorm_st_context.read_write.flags,
+		    FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_EXP_FIRST_FRAME,
+		    1);
+		task_ctx->tstorm_st_context.read_write.rx_id = 0xffff;
+	}
+
+	/* MSTORM only */
+	{
+		if (task_type == FCOE_TASK_TYPE_MIDPATH) {
+			/* Initialize task context */
+			data_desc = &task_ctx->mstorm_st_context.fp.data_desc;
+
+			/* Set cache sges address and length */
+			data_desc->slow.base_sgl_addr.lo =
+			    U64_LO(mp_req->mp_resp_bd_dma);
+			data_desc->slow.base_sgl_addr.hi =
+			    U64_HI(mp_req->mp_resp_bd_dma);
+			data_desc->slow.remainder_num_sges = 1;
+			data_desc->slow.curr_sge_off = 0;
+			data_desc->slow.curr_sgl_index = 0;
+
+			/*
+			 * Also need to fil in non-fastpath response address
+			 * for middle path commands.
+			 */
+			task_ctx->mstorm_st_context.non_fp.rsp_buf_addr.lo =
+			    U64_LO(mp_req->mp_resp_bd_dma);
+			task_ctx->mstorm_st_context.non_fp.rsp_buf_addr.hi =
+			    U64_HI(mp_req->mp_resp_bd_dma);
+		}
+	}
+
+	/* USTORM ONLY */
+	{
+		task_ctx->ustorm_ag_context.global_cq_num = 0;
+	}
+
+	/* I/O stats. Middle path commands always use slow SGEs */
+	qedf->slow_sge_ios++;
+	io_req->sge_type = QEDF_IOREQ_SLOW_SGE;
+}
+
+void qedf_add_to_sq(struct qedf_rport *fcport, u16 xid, u32 ptu_invalidate,
+	enum fcoe_task_type req_type, u32 offset)
+{
+	struct fcoe_wqe *sqe;
+	uint16_t total_sqe = (fcport->sq_mem_size)/(sizeof(struct fcoe_wqe));
+
+	sqe = &fcport->sq[fcport->sq_prod_idx];
+
+	fcport->sq_prod_idx++;
+	fcport->fw_sq_prod_idx++;
+	if (fcport->sq_prod_idx == total_sqe)
+		fcport->sq_prod_idx = 0;
+
+	switch (req_type) {
+	case FCOE_TASK_TYPE_WRITE_INITIATOR:
+	case FCOE_TASK_TYPE_READ_INITIATOR:
+		SET_FIELD(sqe->flags, FCOE_WQE_REQ_TYPE, SEND_FCOE_CMD);
+		if (ptu_invalidate)
+			SET_FIELD(sqe->flags, FCOE_WQE_INVALIDATE_PTU, 1);
+		break;
+	case FCOE_TASK_TYPE_MIDPATH:
+		SET_FIELD(sqe->flags, FCOE_WQE_REQ_TYPE, SEND_FCOE_MIDPATH);
+		break;
+	case FCOE_TASK_TYPE_ABTS:
+		SET_FIELD(sqe->flags, FCOE_WQE_REQ_TYPE,
+		    SEND_FCOE_ABTS_REQUEST);
+		break;
+	case FCOE_TASK_TYPE_EXCHANGE_CLEANUP:
+		SET_FIELD(sqe->flags, FCOE_WQE_REQ_TYPE,
+		     FCOE_EXCHANGE_CLEANUP);
+		break;
+	case FCOE_TASK_TYPE_SEQUENCE_CLEANUP:
+		SET_FIELD(sqe->flags, FCOE_WQE_REQ_TYPE,
+		    FCOE_SEQUENCE_RECOVERY);
+		/* NOTE: offset param only used for sequence recovery */
+		sqe->additional_info_union.seq_rec_updated_offset = offset;
+		break;
+	case FCOE_TASK_TYPE_UNSOLICITED:
+		break;
+	default:
+		break;
+	}
+
+	sqe->task_id = xid;
+
+	/* Make sure SQ data is coherent */
+	wmb();
+
+}
+
+void qedf_ring_doorbell(struct qedf_rport *fcport)
+{
+	struct fcoe_db_data dbell = { 0 };
+
+	dbell.agg_flags = 0;
+
+	dbell.params |= DB_DEST_XCM << FCOE_DB_DATA_DEST_SHIFT;
+	dbell.params |= DB_AGG_CMD_SET << FCOE_DB_DATA_AGG_CMD_SHIFT;
+	dbell.params |= DQ_XCM_FCOE_SQ_PROD_CMD <<
+	    FCOE_DB_DATA_AGG_VAL_SEL_SHIFT;
+
+	dbell.sq_prod = fcport->fw_sq_prod_idx;
+	writel(*(u32 *)&dbell, fcport->p_doorbell);
+	/* Make sure SQ index is updated so f/w prcesses requests in order */
+	wmb();
+	mmiowb();
+}
+
+static void qedf_trace_io(struct qedf_rport *fcport, struct qedf_ioreq *io_req,
+			  int8_t direction)
+{
+	struct qedf_ctx *qedf = fcport->qedf;
+	struct qedf_io_log *io_log;
+	struct scsi_cmnd *sc_cmd = io_req->sc_cmd;
+	unsigned long flags;
+	uint8_t op;
+
+	spin_lock_irqsave(&qedf->io_trace_lock, flags);
+
+	io_log = &qedf->io_trace_buf[qedf->io_trace_idx];
+	io_log->direction = direction;
+	io_log->task_id = io_req->xid;
+	io_log->port_id = fcport->rdata->ids.port_id;
+	io_log->lun = sc_cmd->device->lun;
+	io_log->op = op = sc_cmd->cmnd[0];
+	io_log->lba[0] = sc_cmd->cmnd[2];
+	io_log->lba[1] = sc_cmd->cmnd[3];
+	io_log->lba[2] = sc_cmd->cmnd[4];
+	io_log->lba[3] = sc_cmd->cmnd[5];
+	io_log->bufflen = scsi_bufflen(sc_cmd);
+	io_log->sg_count = scsi_sg_count(sc_cmd);
+	io_log->result = sc_cmd->result;
+	io_log->jiffies = jiffies;
+	io_log->refcount = atomic_read(&io_req->refcount.refcount);
+
+	if (direction == QEDF_IO_TRACE_REQ) {
+		/* For requests we only care abot the submission CPU */
+		io_log->req_cpu = io_req->cpu;
+		io_log->int_cpu = 0;
+		io_log->rsp_cpu = 0;
+	} else if (direction == QEDF_IO_TRACE_RSP) {
+		io_log->req_cpu = io_req->cpu;
+		io_log->int_cpu = io_req->int_cpu;
+		io_log->rsp_cpu = smp_processor_id();
+	}
+
+	io_log->sge_type = io_req->sge_type;
+
+	qedf->io_trace_idx++;
+	if (qedf->io_trace_idx == QEDF_IO_TRACE_SIZE)
+		qedf->io_trace_idx = 0;
+
+	spin_unlock_irqrestore(&qedf->io_trace_lock, flags);
+}
+
+int qedf_post_io_req(struct qedf_rport *fcport, struct qedf_ioreq *io_req)
+{
+	struct scsi_cmnd *sc_cmd = io_req->sc_cmd;
+	struct Scsi_Host *host = sc_cmd->device->host;
+	struct fc_lport *lport = shost_priv(host);
+	struct qedf_ctx *qedf = lport_priv(lport);
+	struct fcoe_task_context *task_ctx;
+	u16 xid;
+	enum fcoe_task_type req_type = 0;
+	u32 ptu_invalidate = 0;
+
+	/* Initialize rest of io_req fileds */
+	io_req->data_xfer_len = scsi_bufflen(sc_cmd);
+	sc_cmd->SCp.ptr = (char *)io_req;
+	io_req->use_slowpath = false; /* Assume fast SGL by default */
+
+	/* Record which cpu this request is associated with */
+	io_req->cpu = smp_processor_id();
+
+	if (sc_cmd->sc_data_direction == DMA_FROM_DEVICE) {
+		req_type = FCOE_TASK_TYPE_READ_INITIATOR;
+		io_req->io_req_flags = QEDF_READ;
+		qedf->input_requests++;
+	} else if (sc_cmd->sc_data_direction == DMA_TO_DEVICE) {
+		req_type = FCOE_TASK_TYPE_WRITE_INITIATOR;
+		io_req->io_req_flags = QEDF_WRITE;
+		qedf->output_requests++;
+	} else {
+		io_req->io_req_flags = 0;
+		qedf->control_requests++;
+	}
+
+	xid = io_req->xid;
+
+	/* Build buffer descriptor list for firmware from sg list */
+	if (qedf_build_bd_list_from_sg(io_req)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "BD list creation failed.\n");
+		kref_put(&io_req->refcount, qedf_release_cmd);
+		return -EAGAIN;
+	}
+
+	/* Get the task context */
+	task_ctx = qedf_get_task_mem(&qedf->tasks, xid);
+	if (!task_ctx) {
+		QEDF_WARN(&(qedf->dbg_ctx), "task_ctx is NULL, xid=%d.\n",
+			   xid);
+		kref_put(&io_req->refcount, qedf_release_cmd);
+		return -EINVAL;
+	}
+
+	qedf_init_task(fcport, lport, io_req, &ptu_invalidate, task_ctx);
+
+	if (!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Session not offloaded yet.\n");
+		kref_put(&io_req->refcount, qedf_release_cmd);
+	}
+
+	/* Obtain free SQ entry */
+	qedf_add_to_sq(fcport, xid, ptu_invalidate, req_type, 0);
+
+	/* Ring doorbell */
+	qedf_ring_doorbell(fcport);
+
+	if (qedf_io_tracing && io_req->sc_cmd)
+		qedf_trace_io(fcport, io_req, QEDF_IO_TRACE_REQ);
+
+	return false;
+}
+
+int
+qedf_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc_cmd)
+{
+	struct fc_lport *lport = shost_priv(host);
+	struct qedf_ctx *qedf = lport_priv(lport);
+	struct fc_rport *rport = starget_to_rport(scsi_target(sc_cmd->device));
+	struct fc_rport_libfc_priv *rp = rport->dd_data;
+	struct qedf_rport *fcport = rport->dd_data;
+	struct qedf_ioreq *io_req;
+	int rc = 0;
+	int rval;
+	unsigned long flags = 0;
+
+
+	if (test_bit(QEDF_UNLOADING, &qedf->flags) ||
+	    test_bit(QEDF_DBG_STOP_IO, &qedf->flags)) {
+		sc_cmd->result = DID_NO_CONNECT << 16;
+		sc_cmd->scsi_done(sc_cmd);
+		return 0;
+	}
+
+	rval = fc_remote_port_chkready(rport);
+	if (rval) {
+		sc_cmd->result = rval;
+		sc_cmd->scsi_done(sc_cmd);
+		return 0;
+	}
+
+	/* Retry command if we are doing a qed drain operation */
+	if (test_bit(QEDF_DRAIN_ACTIVE, &qedf->flags)) {
+		rc = SCSI_MLQUEUE_HOST_BUSY;
+		goto exit_qcmd;
+	}
+
+	if (lport->state != LPORT_ST_READY ||
+	    atomic_read(&qedf->link_state) != QEDF_LINK_UP) {
+		rc = SCSI_MLQUEUE_HOST_BUSY;
+		goto exit_qcmd;
+	}
+
+	/* rport and tgt are allocated together, so tgt should be non-NULL */
+	fcport = (struct qedf_rport *)&rp[1];
+
+	if (!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
+		/*
+		 * Session is not offloaded yet. Let SCSI-ml retry
+		 * the command.
+		 */
+		rc = SCSI_MLQUEUE_TARGET_BUSY;
+		goto exit_qcmd;
+	}
+	if (fcport->retry_delay_timestamp) {
+		if (time_after(jiffies, fcport->retry_delay_timestamp)) {
+			fcport->retry_delay_timestamp = 0;
+		} else {
+			/* If retry_delay timer is active, flow off the ML */
+			rc = SCSI_MLQUEUE_TARGET_BUSY;
+			goto exit_qcmd;
+		}
+	}
+
+	io_req = qedf_alloc_cmd(fcport, QEDF_SCSI_CMD);
+	if (!io_req) {
+		rc = SCSI_MLQUEUE_HOST_BUSY;
+		goto exit_qcmd;
+	}
+
+	io_req->sc_cmd = sc_cmd;
+
+	/* Take fcport->rport_lock for posting to fcport send queue */
+	spin_lock_irqsave(&fcport->rport_lock, flags);
+	if (qedf_post_io_req(fcport, io_req)) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Unable to post io_req\n");
+		/* Return SQE to pool */
+		atomic_inc(&fcport->free_sqes);
+		rc = SCSI_MLQUEUE_HOST_BUSY;
+	}
+	spin_unlock_irqrestore(&fcport->rport_lock, flags);
+
+exit_qcmd:
+	return rc;
+}
+
+static void qedf_parse_fcp_rsp(struct qedf_ioreq *io_req,
+				 struct fcoe_cqe_rsp_info *fcp_rsp)
+{
+	struct scsi_cmnd *sc_cmd = io_req->sc_cmd;
+	struct qedf_ctx *qedf = io_req->fcport->qedf;
+	u8 rsp_flags = fcp_rsp->rsp_flags.flags;
+	int fcp_sns_len = 0;
+	int fcp_rsp_len = 0;
+	uint8_t *rsp_info, *sense_data;
+
+	io_req->fcp_status = FC_GOOD;
+	io_req->fcp_resid = 0;
+	if (rsp_flags & (FCOE_FCP_RSP_FLAGS_FCP_RESID_OVER |
+	    FCOE_FCP_RSP_FLAGS_FCP_RESID_UNDER))
+		io_req->fcp_resid = fcp_rsp->fcp_resid;
+
+	io_req->scsi_comp_flags = rsp_flags;
+	CMD_SCSI_STATUS(sc_cmd) = io_req->cdb_status =
+	    fcp_rsp->scsi_status_code;
+
+	if (rsp_flags &
+	    FCOE_FCP_RSP_FLAGS_FCP_RSP_LEN_VALID)
+		fcp_rsp_len = fcp_rsp->fcp_rsp_len;
+
+	if (rsp_flags &
+	    FCOE_FCP_RSP_FLAGS_FCP_SNS_LEN_VALID)
+		fcp_sns_len = fcp_rsp->fcp_sns_len;
+
+	io_req->fcp_rsp_len = fcp_rsp_len;
+	io_req->fcp_sns_len = fcp_sns_len;
+	rsp_info = sense_data = io_req->sense_buffer;
+
+	/* fetch fcp_rsp_code */
+	if ((fcp_rsp_len == 4) || (fcp_rsp_len == 8)) {
+		/* Only for task management function */
+		io_req->fcp_rsp_code = rsp_info[3];
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "fcp_rsp_code = %d\n", io_req->fcp_rsp_code);
+		/* Adjust sense-data location. */
+		sense_data += fcp_rsp_len;
+	}
+
+	if (fcp_sns_len > SCSI_SENSE_BUFFERSIZE) {
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Truncating sense buffer\n");
+		fcp_sns_len = SCSI_SENSE_BUFFERSIZE;
+	}
+
+	memset(sc_cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
+	if (fcp_sns_len)
+		memcpy(sc_cmd->sense_buffer, sense_data,
+		    fcp_sns_len);
+}
+
+static void qedf_unmap_sg_list(struct qedf_ctx *qedf, struct qedf_ioreq *io_req)
+{
+	struct scsi_cmnd *sc = io_req->sc_cmd;
+
+	if (io_req->bd_tbl->bd_valid && sc && scsi_sg_count(sc)) {
+		dma_unmap_sg(&qedf->pdev->dev, scsi_sglist(sc),
+		    scsi_sg_count(sc), sc->sc_data_direction);
+		io_req->bd_tbl->bd_valid = 0;
+	}
+}
+
+void qedf_scsi_completion(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *io_req)
+{
+	u16 xid, rval;
+	struct fcoe_task_context *task_ctx;
+	struct scsi_cmnd *sc_cmd;
+	struct fcoe_cqe_rsp_info *fcp_rsp;
+	struct qedf_rport *fcport;
+	int refcount;
+	u16 scope, qualifier = 0;
+	u8 fw_residual_flag = 0;
+
+	if (!io_req)
+		return;
+	if (!cqe)
+		return;
+
+	xid = io_req->xid;
+	task_ctx = qedf_get_task_mem(&qedf->tasks, xid);
+	sc_cmd = io_req->sc_cmd;
+	fcp_rsp = &cqe->cqe_info.rsp_info;
+
+	if (!sc_cmd) {
+		QEDF_WARN(&(qedf->dbg_ctx), "sc_cmd is NULL!\n");
+		return;
+	}
+
+	if (!sc_cmd->SCp.ptr) {
+		QEDF_WARN(&(qedf->dbg_ctx), "SCp.ptr is NULL, returned in "
+		    "another context.\n");
+		return;
+	}
+
+	if (!sc_cmd->request) {
+		QEDF_WARN(&(qedf->dbg_ctx), "sc_cmd->request is NULL, "
+		    "sc_cmd=%p.\n", sc_cmd);
+		return;
+	}
+
+	if (!sc_cmd->request->special) {
+		QEDF_WARN(&(qedf->dbg_ctx), "request->special is NULL so "
+		    "request not valid, sc_cmd=%p.\n", sc_cmd);
+		return;
+	}
+
+	if (!sc_cmd->request->q) {
+		QEDF_WARN(&(qedf->dbg_ctx), "request->q is NULL so request "
+		   "is not valid, sc_cmd=%p.\n", sc_cmd);
+		return;
+	}
+
+	fcport = io_req->fcport;
+
+	qedf_parse_fcp_rsp(io_req, fcp_rsp);
+
+	qedf_unmap_sg_list(qedf, io_req);
+
+	/* Check for FCP transport error */
+	if (io_req->fcp_rsp_len > 3 && io_req->fcp_rsp_code) {
+		QEDF_ERR(&(qedf->dbg_ctx),
+		    "FCP I/O protocol failure xid=0x%x fcp_rsp_len=%d "
+		    "fcp_rsp_code=%d.\n", io_req->xid, io_req->fcp_rsp_len,
+		    io_req->fcp_rsp_code);
+		sc_cmd->result = DID_BUS_BUSY << 16;
+		goto out;
+	}
+
+	fw_residual_flag = GET_FIELD(cqe->cqe_info.rsp_info.fw_error_flags,
+	    FCOE_CQE_RSP_INFO_FW_UNDERRUN);
+	if (fw_residual_flag) {
+		QEDF_ERR(&(qedf->dbg_ctx),
+		    "Firmware detected underrun: xid=0x%x fcp_rsp.flags=0x%02x "
+		    "fcp_resid=%d fw_residual=0x%x.\n", io_req->xid,
+		    fcp_rsp->rsp_flags.flags, io_req->fcp_resid,
+		    cqe->cqe_info.rsp_info.fw_residual);
+
+		if (io_req->cdb_status == 0)
+			sc_cmd->result = (DID_ERROR << 16) | io_req->cdb_status;
+		else
+			sc_cmd->result = (DID_OK << 16) | io_req->cdb_status;
+
+		/* Abort the command since we did not get all the data */
+		init_completion(&io_req->abts_done);
+		rval = qedf_initiate_abts(io_req, true);
+		if (rval) {
+			QEDF_ERR(&(qedf->dbg_ctx), "Failed to queue ABTS.\n");
+			sc_cmd->result = (DID_ERROR << 16) | io_req->cdb_status;
+		}
+
+		/*
+		 * Set resid to the whole buffer length so we won't try to resue
+		 * any previously data.
+		 */
+		scsi_set_resid(sc_cmd, scsi_bufflen(sc_cmd));
+		goto out;
+	}
+
+	switch (io_req->fcp_status) {
+	case FC_GOOD:
+		if (io_req->cdb_status == 0) {
+			/* Good I/O completion */
+			sc_cmd->result = DID_OK << 16;
+		} else {
+			refcount = atomic_read(&io_req->refcount.refcount);
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+			    "%d:0:%d:%d xid=0x%0x op=0x%02x "
+			    "lba=%02x%02x%02x%02x cdb_status=%d "
+			    "fcp_resid=0x%x refcount=%d.\n",
+			    qedf->lport->host->host_no, sc_cmd->device->id,
+			    sc_cmd->device->lun, io_req->xid,
+			    sc_cmd->cmnd[0], sc_cmd->cmnd[2], sc_cmd->cmnd[3],
+			    sc_cmd->cmnd[4], sc_cmd->cmnd[5],
+			    io_req->cdb_status, io_req->fcp_resid,
+			    refcount);
+			sc_cmd->result = (DID_OK << 16) | io_req->cdb_status;
+
+			if (io_req->cdb_status == SAM_STAT_TASK_SET_FULL ||
+			    io_req->cdb_status == SAM_STAT_BUSY) {
+				/*
+				 * Check whether we need to set retry_delay at
+				 * all based on retry_delay module parameter
+				 * and the status qualifier.
+				 */
+
+				/* Upper 2 bits */
+				scope = fcp_rsp->retry_delay_timer & 0xC000;
+				/* Lower 14 bits */
+				qualifier = fcp_rsp->retry_delay_timer & 0x3FFF;
+
+				if (qedf_retry_delay &&
+				    scope > 0 && qualifier > 0 &&
+				    qualifier <= 0x3FEF) {
+					/* Check we don't go over the max */
+					if (qualifier > QEDF_RETRY_DELAY_MAX)
+						qualifier =
+						    QEDF_RETRY_DELAY_MAX;
+					fcport->retry_delay_timestamp =
+					    jiffies + (qualifier * HZ / 10);
+				}
+			}
+		}
+		if (io_req->fcp_resid)
+			scsi_set_resid(sc_cmd, io_req->fcp_resid);
+		break;
+	default:
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "fcp_status=%d.\n",
+			   io_req->fcp_status);
+		break;
+	}
+
+out:
+	if (qedf_io_tracing)
+		qedf_trace_io(fcport, io_req, QEDF_IO_TRACE_RSP);
+
+	io_req->sc_cmd = NULL;
+	sc_cmd->SCp.ptr =  NULL;
+	sc_cmd->scsi_done(sc_cmd);
+	kref_put(&io_req->refcount, qedf_release_cmd);
+}
+
+/* Return a SCSI command in some other context besides a normal completion */
+void qedf_scsi_done(struct qedf_ctx *qedf, struct qedf_ioreq *io_req,
+	int result)
+{
+	u16 xid;
+	struct scsi_cmnd *sc_cmd;
+	int refcount;
+
+	if (!io_req)
+		return;
+
+	xid = io_req->xid;
+	sc_cmd = io_req->sc_cmd;
+
+	if (!sc_cmd) {
+		QEDF_WARN(&(qedf->dbg_ctx), "sc_cmd is NULL!\n");
+		return;
+	}
+
+	if (!sc_cmd->SCp.ptr) {
+		QEDF_WARN(&(qedf->dbg_ctx), "SCp.ptr is NULL, returned in "
+		    "another context.\n");
+		return;
+	}
+
+	qedf_unmap_sg_list(qedf, io_req);
+
+	sc_cmd->result = result << 16;
+	refcount = atomic_read(&io_req->refcount.refcount);
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "%d:0:%d:%d: Completing "
+	    "sc_cmd=%p result=0x%08x op=0x%02x lba=0x%02x%02x%02x%02x, "
+	    "allowed=%d retries=%d refcount=%d.\n",
+	    qedf->lport->host->host_no, sc_cmd->device->id,
+	    sc_cmd->device->lun, sc_cmd, sc_cmd->result, sc_cmd->cmnd[0],
+	    sc_cmd->cmnd[2], sc_cmd->cmnd[3], sc_cmd->cmnd[4],
+	    sc_cmd->cmnd[5], sc_cmd->allowed, sc_cmd->retries,
+	    refcount);
+
+	/*
+	 * Set resid to the whole buffer length so we won't try to resue any
+	 * previously read data
+	 */
+	scsi_set_resid(sc_cmd, scsi_bufflen(sc_cmd));
+
+	if (qedf_io_tracing)
+		qedf_trace_io(io_req->fcport, io_req, QEDF_IO_TRACE_RSP);
+
+	io_req->sc_cmd = NULL;
+	sc_cmd->SCp.ptr = NULL;
+	sc_cmd->scsi_done(sc_cmd);
+	kref_put(&io_req->refcount, qedf_release_cmd);
+}
+
+/*
+ * Handle warning type CQE completions. This is mainly used for REC timer
+ * popping.
+ */
+void qedf_process_warning_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *io_req)
+{
+	int rval, i;
+	struct qedf_rport *fcport = io_req->fcport;
+	u64 err_warn_bit_map;
+	u8 err_warn = 0xff;
+
+	if (!cqe)
+		return;
+
+	QEDF_ERR(&(io_req->fcport->qedf->dbg_ctx), "Warning CQE, "
+		  "xid=0x%x\n", io_req->xid);
+	QEDF_ERR(&(io_req->fcport->qedf->dbg_ctx),
+		  "err_warn_bitmap=%08x:%08x\n",
+		  le32_to_cpu(cqe->cqe_info.err_info.err_warn_bitmap_hi),
+		  le32_to_cpu(cqe->cqe_info.err_info.err_warn_bitmap_lo));
+	QEDF_ERR(&(io_req->fcport->qedf->dbg_ctx), "tx_buff_off=%08x, "
+		  "rx_buff_off=%08x, rx_id=%04x\n",
+		  le32_to_cpu(cqe->cqe_info.err_info.tx_buf_off),
+		  le32_to_cpu(cqe->cqe_info.err_info.rx_buf_off),
+		  le32_to_cpu(cqe->cqe_info.err_info.rx_id));
+
+	/* Normalize the error bitmap value to an just an unsigned int */
+	err_warn_bit_map = (u64)
+	    ((u64)cqe->cqe_info.err_info.err_warn_bitmap_hi << 32) |
+	    (u64)cqe->cqe_info.err_info.err_warn_bitmap_lo;
+	for (i = 0; i < 64; i++) {
+		if (err_warn_bit_map & (u64)((u64)1 << i)) {
+			err_warn = i;
+			break;
+		}
+	}
+
+	/* Check if REC TOV expired if this is a tape device */
+	if (fcport->dev_type == QEDF_RPORT_TYPE_TAPE) {
+		if (err_warn ==
+		    FCOE_WARNING_CODE_REC_TOV_TIMER_EXPIRATION) {
+			QEDF_ERR(&(qedf->dbg_ctx), "REC timer expired.\n");
+			if (!test_bit(QEDF_CMD_SRR_SENT, &io_req->flags)) {
+				io_req->rx_buf_off =
+				    cqe->cqe_info.err_info.rx_buf_off;
+				io_req->tx_buf_off =
+				    cqe->cqe_info.err_info.tx_buf_off;
+				io_req->rx_id = cqe->cqe_info.err_info.rx_id;
+				rval = qedf_send_rec(io_req);
+				/*
+				 * We only want to abort the io_req if we
+				 * can't queue the REC command as we want to
+				 * keep the exchange open for recovery.
+				 */
+				if (rval)
+					goto send_abort;
+			}
+			return;
+		}
+	}
+
+send_abort:
+	init_completion(&io_req->abts_done);
+	rval = qedf_initiate_abts(io_req, true);
+	if (rval)
+		QEDF_ERR(&(qedf->dbg_ctx), "Failed to queue ABTS.\n");
+}
+
+/* Cleanup a command when we receive an error detection completion */
+void qedf_process_error_detect(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *io_req)
+{
+	int rval;
+
+	if (!cqe)
+		return;
+
+	QEDF_ERR(&(io_req->fcport->qedf->dbg_ctx), "Error detection CQE, "
+		  "xid=0x%x\n", io_req->xid);
+	QEDF_ERR(&(io_req->fcport->qedf->dbg_ctx),
+		  "err_warn_bitmap=%08x:%08x\n",
+		  le32_to_cpu(cqe->cqe_info.err_info.err_warn_bitmap_hi),
+		  le32_to_cpu(cqe->cqe_info.err_info.err_warn_bitmap_lo));
+	QEDF_ERR(&(io_req->fcport->qedf->dbg_ctx), "tx_buff_off=%08x, "
+		  "rx_buff_off=%08x, rx_id=%04x\n",
+		  le32_to_cpu(cqe->cqe_info.err_info.tx_buf_off),
+		  le32_to_cpu(cqe->cqe_info.err_info.rx_buf_off),
+		  le32_to_cpu(cqe->cqe_info.err_info.rx_id));
+
+	if (qedf->stop_io_on_error) {
+		qedf_stop_all_io(qedf);
+		return;
+	}
+
+	init_completion(&io_req->abts_done);
+	rval = qedf_initiate_abts(io_req, true);
+	if (rval)
+		QEDF_ERR(&(qedf->dbg_ctx), "Failed to queue ABTS.\n");
+}
+
+static void qedf_flush_els_req(struct qedf_ctx *qedf,
+	struct qedf_ioreq *els_req)
+{
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+	    "Flushing ELS request xid=0x%x refcount=%d.\n", els_req->xid,
+	    atomic_read(&els_req->refcount.refcount));
+
+	/*
+	 * Need to distinguish this from a timeout when calling the
+	 * els_req->cb_func.
+	 */
+	els_req->event = QEDF_IOREQ_EV_ELS_FLUSH;
+
+	/* Cancel the timer */
+	cancel_delayed_work_sync(&els_req->timeout_work);
+
+	/* Call callback function to complete command */
+	if (els_req->cb_func && els_req->cb_arg) {
+		els_req->cb_func(els_req->cb_arg);
+		els_req->cb_arg = NULL;
+	}
+
+	/* Release kref for original initiate_els */
+	kref_put(&els_req->refcount, qedf_release_cmd);
+}
+
+/* A value of -1 for lun is a wild card that means flush all
+ * active SCSI I/Os for the target.
+ */
+void qedf_flush_active_ios(struct qedf_rport *fcport, int lun)
+{
+	struct qedf_ioreq *io_req;
+	struct qedf_ctx *qedf;
+	struct qedf_cmd_mgr *cmd_mgr;
+	int i, rc;
+
+	if (!fcport)
+		return;
+
+	qedf = fcport->qedf;
+	cmd_mgr = qedf->cmd_mgr;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "Flush active i/o's.\n");
+
+	for (i = 0; i < FCOE_PARAMS_NUM_TASKS; i++) {
+		io_req = &cmd_mgr->cmds[i];
+
+		if (!io_req)
+			continue;
+		if (io_req->fcport != fcport)
+			continue;
+		if (io_req->cmd_type == QEDF_ELS) {
+			rc = kref_get_unless_zero(&io_req->refcount);
+			if (!rc) {
+				QEDF_ERR(&(qedf->dbg_ctx),
+				    "Could not get kref for io_req=0x%p.\n",
+				    io_req);
+				continue;
+			}
+			qedf_flush_els_req(qedf, io_req);
+			/*
+			 * Release the kref and go back to the top of the
+			 * loop.
+			 */
+			goto free_cmd;
+		}
+
+		if (!io_req->sc_cmd)
+			continue;
+		if (lun > 0) {
+			if (io_req->sc_cmd->device->lun !=
+			    (u64)lun)
+				continue;
+		}
+
+		/*
+		 * Use kref_get_unless_zero in the unlikely case the command
+		 * we're about to flush was completed in the normal SCSI path
+		 */
+		rc = kref_get_unless_zero(&io_req->refcount);
+		if (!rc) {
+			QEDF_ERR(&(qedf->dbg_ctx), "Could not get kref for "
+			    "io_req=0x%p\n", io_req);
+			continue;
+		}
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Cleanup xid=0x%x.\n", io_req->xid);
+
+		/* Cleanup task and return I/O mid-layer */
+		qedf_initiate_cleanup(io_req, true);
+
+free_cmd:
+		kref_put(&io_req->refcount, qedf_release_cmd);
+	}
+}
+
+/*
+ * Initiate a ABTS middle path command. Note that we don't have to initialize
+ * the task context for an ABTS task.
+ */
+int qedf_initiate_abts(struct qedf_ioreq *io_req, bool return_scsi_cmd_on_abts)
+{
+	struct fc_lport *lport;
+	struct qedf_rport *fcport = io_req->fcport;
+	struct fc_rport_priv *rdata = fcport->rdata;
+	struct qedf_ctx *qedf = fcport->qedf;
+	u16 xid;
+	u32 r_a_tov = 0;
+	int rc = 0;
+	unsigned long flags;
+
+	r_a_tov = rdata->r_a_tov;
+	lport = qedf->lport;
+
+	if (!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "tgt not offloaded\n");
+		rc = 1;
+		goto abts_err;
+	}
+
+	if (lport->state != LPORT_ST_READY || !(lport->link_up)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "link is not ready\n");
+		rc = 1;
+		goto abts_err;
+	}
+
+	if (atomic_read(&qedf->link_down_tmo_valid) > 0) {
+		QEDF_ERR(&(qedf->dbg_ctx), "link_down_tmo active.\n");
+		rc = 1;
+		goto abts_err;
+	}
+
+	/* Ensure room on SQ */
+	if (!atomic_read(&fcport->free_sqes)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "No SQ entries available\n");
+		rc = 1;
+		goto abts_err;
+	}
+
+
+	kref_get(&io_req->refcount);
+
+	xid = io_req->xid;
+	qedf->control_requests++;
+	qedf->packet_aborts++;
+
+	/* Set the return CPU to be the same as the request one */
+	io_req->cpu = smp_processor_id();
+
+	/* Set the command type to abort */
+	io_req->cmd_type = QEDF_ABTS;
+	io_req->return_scsi_cmd_on_abts = return_scsi_cmd_on_abts;
+
+	set_bit(QEDF_CMD_IN_ABORT, &io_req->flags);
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_SCSI_TM, "ABTS io_req xid = "
+		   "0x%x\n", xid);
+
+	qedf_cmd_timer_set(qedf, io_req, QEDF_ABORT_TIMEOUT * HZ);
+
+	spin_lock_irqsave(&fcport->rport_lock, flags);
+
+	/* Add ABTS to send queue */
+	qedf_add_to_sq(fcport, xid, 0, FCOE_TASK_TYPE_ABTS, 0);
+
+	/* Ring doorbell */
+	qedf_ring_doorbell(fcport);
+
+	spin_unlock_irqrestore(&fcport->rport_lock, flags);
+
+	return rc;
+abts_err:
+	/*
+	 * If the ABTS task fails to queue then we need to cleanup the
+	 * task at the firmware.
+	 */
+	qedf_initiate_cleanup(io_req, return_scsi_cmd_on_abts);
+	return rc;
+}
+
+void qedf_process_abts_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *io_req)
+{
+	uint32_t r_ctl;
+	uint16_t xid;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_SCSI_TM, "Entered with xid = "
+		   "0x%x cmd_type = %d\n", io_req->xid, io_req->cmd_type);
+
+	cancel_delayed_work(&io_req->timeout_work);
+
+	xid = io_req->xid;
+	r_ctl = cqe->cqe_info.abts_info.r_ctl;
+
+	switch (r_ctl) {
+	case FC_RCTL_BA_ACC:
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_SCSI_TM,
+		    "ABTS response - ACC Send RRQ after R_A_TOV\n");
+		io_req->event = QEDF_IOREQ_EV_ABORT_SUCCESS;
+		/*
+		 * Dont release this cmd yet. It will be relesed
+		 * after we get RRQ response
+		 */
+		kref_get(&io_req->refcount);
+		queue_delayed_work(qedf->dpc_wq, &io_req->rrq_work,
+		    msecs_to_jiffies(qedf->lport->r_a_tov));
+		break;
+	/* For error cases let the cleanup return the command */
+	case FC_RCTL_BA_RJT:
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_SCSI_TM,
+		   "ABTS response - RJT\n");
+		io_req->event = QEDF_IOREQ_EV_ABORT_FAILED;
+		break;
+	default:
+		QEDF_ERR(&(qedf->dbg_ctx), "Unknown ABTS response\n");
+		break;
+	}
+
+	clear_bit(QEDF_CMD_IN_ABORT, &io_req->flags);
+
+	if (io_req->sc_cmd) {
+		if (io_req->return_scsi_cmd_on_abts)
+			qedf_scsi_done(qedf, io_req, DID_ERROR);
+	}
+
+	/* Notify eh_abort handler that ABTS is complete */
+	complete(&io_req->abts_done);
+
+	kref_put(&io_req->refcount, qedf_release_cmd);
+}
+
+int qedf_init_mp_req(struct qedf_ioreq *io_req)
+{
+	struct qedf_mp_req *mp_req;
+	struct fcoe_sge *mp_req_bd;
+	struct fcoe_sge *mp_resp_bd;
+	struct qedf_ctx *qedf = io_req->fcport->qedf;
+	dma_addr_t addr;
+	uint64_t sz;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_MP_REQ, "Entered.\n");
+
+	mp_req = (struct qedf_mp_req *)&(io_req->mp_req);
+	memset(mp_req, 0, sizeof(struct qedf_mp_req));
+
+	if (io_req->cmd_type != QEDF_ELS) {
+		mp_req->req_len = sizeof(struct fcp_cmnd);
+		io_req->data_xfer_len = mp_req->req_len;
+	} else
+		mp_req->req_len = io_req->data_xfer_len;
+
+	mp_req->req_buf = dma_alloc_coherent(&qedf->pdev->dev, QEDF_PAGE_SIZE,
+	    &mp_req->req_buf_dma, GFP_KERNEL);
+	if (!mp_req->req_buf) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Unable to alloc MP req buffer\n");
+		qedf_free_mp_resc(io_req);
+		return -ENOMEM;
+	}
+
+	mp_req->resp_buf = dma_alloc_coherent(&qedf->pdev->dev,
+	    QEDF_PAGE_SIZE, &mp_req->resp_buf_dma, GFP_KERNEL);
+	if (!mp_req->resp_buf) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Unable to alloc TM resp "
+			  "buffer\n");
+		qedf_free_mp_resc(io_req);
+		return -ENOMEM;
+	}
+
+	/* Allocate and map mp_req_bd and mp_resp_bd */
+	sz = sizeof(struct fcoe_sge);
+	mp_req->mp_req_bd = dma_alloc_coherent(&qedf->pdev->dev, sz,
+	    &mp_req->mp_req_bd_dma, GFP_KERNEL);
+	if (!mp_req->mp_req_bd) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Unable to alloc MP req bd\n");
+		qedf_free_mp_resc(io_req);
+		return -ENOMEM;
+	}
+
+	mp_req->mp_resp_bd = dma_alloc_coherent(&qedf->pdev->dev, sz,
+	    &mp_req->mp_resp_bd_dma, GFP_KERNEL);
+	if (!mp_req->mp_resp_bd) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Unable to alloc MP resp bd\n");
+		qedf_free_mp_resc(io_req);
+		return -ENOMEM;
+	}
+
+	/* Fill bd table */
+	addr = mp_req->req_buf_dma;
+	mp_req_bd = mp_req->mp_req_bd;
+	mp_req_bd->sge_addr.lo = U64_LO(addr);
+	mp_req_bd->sge_addr.hi = U64_HI(addr);
+	mp_req_bd->size = QEDF_PAGE_SIZE;
+
+	/*
+	 * MP buffer is either a task mgmt command or an ELS.
+	 * So the assumption is that it consumes a single bd
+	 * entry in the bd table
+	 */
+	mp_resp_bd = mp_req->mp_resp_bd;
+	addr = mp_req->resp_buf_dma;
+	mp_resp_bd->sge_addr.lo = U64_LO(addr);
+	mp_resp_bd->sge_addr.hi = U64_HI(addr);
+	mp_resp_bd->size = QEDF_PAGE_SIZE;
+
+	return 0;
+}
+
+/*
+ * Last ditch effort to clear the port if it's stuck. Used only after a
+ * cleanup task times out.
+ */
+static void qedf_drain_request(struct qedf_ctx *qedf)
+{
+	if (test_bit(QEDF_DRAIN_ACTIVE, &qedf->flags)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "MCP drain already active.\n");
+		return;
+	}
+
+	/* Set bit to return all queuecommand requests as busy */
+	set_bit(QEDF_DRAIN_ACTIVE, &qedf->flags);
+
+	/* Call qed drain request for function. Should be synchronous */
+	qed_ops->common->drain(qedf->cdev);
+
+	/* Settle time for CQEs to be returned */
+	msleep(100);
+
+	/* Unplug and continue */
+	clear_bit(QEDF_DRAIN_ACTIVE, &qedf->flags);
+}
+
+/*
+ * Returns SUCCESS if the cleanup task does not timeout, otherwise return
+ * FAILURE.
+ */
+int qedf_initiate_cleanup(struct qedf_ioreq *io_req,
+	bool return_scsi_cmd_on_abts)
+{
+	struct qedf_rport *fcport;
+	struct qedf_ctx *qedf;
+	uint16_t xid;
+	struct fcoe_task_context *task;
+	int tmo = 0;
+	int rc = SUCCESS;
+	unsigned long flags;
+
+	fcport = io_req->fcport;
+	if (!fcport) {
+		QEDF_ERR(NULL, "fcport is NULL.\n");
+		return SUCCESS;
+	}
+
+	qedf = fcport->qedf;
+	if (!qedf) {
+		QEDF_ERR(NULL, "qedf is NULL.\n");
+		return SUCCESS;
+	}
+
+	if (!test_bit(QEDF_CMD_OUTSTANDING, &io_req->flags) ||
+	    test_bit(QEDF_CMD_IN_CLEANUP, &io_req->flags)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "io_req xid=0x%x already in "
+			  "cleanup processing or already completed.\n",
+			  io_req->xid);
+		return SUCCESS;
+	}
+
+	/* Ensure room on SQ */
+	if (!atomic_read(&fcport->free_sqes)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "No SQ entries available\n");
+		return FAILED;
+	}
+
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "Entered xid=0x%x\n",
+	    io_req->xid);
+
+	/* Cleanup cmds re-use the same TID as the original I/O */
+	xid = io_req->xid;
+	io_req->cmd_type = QEDF_CLEANUP;
+	io_req->return_scsi_cmd_on_abts = return_scsi_cmd_on_abts;
+
+	/* Set the return CPU to be the same as the request one */
+	io_req->cpu = smp_processor_id();
+
+	set_bit(QEDF_CMD_IN_CLEANUP, &io_req->flags);
+
+	task = qedf_get_task_mem(&qedf->tasks, xid);
+
+	init_completion(&io_req->tm_done);
+
+	/* Obtain free SQ entry */
+	spin_lock_irqsave(&fcport->rport_lock, flags);
+	qedf_add_to_sq(fcport, xid, 0, FCOE_TASK_TYPE_EXCHANGE_CLEANUP, 0);
+
+	/* Ring doorbell */
+	qedf_ring_doorbell(fcport);
+	spin_unlock_irqrestore(&fcport->rport_lock, flags);
+
+	tmo = wait_for_completion_timeout(&io_req->tm_done,
+	    QEDF_CLEANUP_TIMEOUT * HZ);
+
+	if (!tmo) {
+		rc = FAILED;
+		/* Timeout case */
+		QEDF_ERR(&(qedf->dbg_ctx), "Cleanup command timeout, "
+			  "xid=%x.\n", io_req->xid);
+		clear_bit(QEDF_CMD_IN_CLEANUP, &io_req->flags);
+		/* Issue a drain request if cleanup task times out */
+		QEDF_ERR(&(qedf->dbg_ctx), "Issuing MCP drain request.\n");
+		qedf_drain_request(qedf);
+	}
+
+	if (io_req->sc_cmd) {
+		if (io_req->return_scsi_cmd_on_abts)
+			qedf_scsi_done(qedf, io_req, DID_ERROR);
+	}
+
+	if (rc == SUCCESS)
+		io_req->event = QEDF_IOREQ_EV_CLEANUP_SUCCESS;
+	else
+		io_req->event = QEDF_IOREQ_EV_CLEANUP_FAILED;
+
+	return rc;
+}
+
+void qedf_process_cleanup_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *io_req)
+{
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "Entered xid = 0x%x\n",
+		   io_req->xid);
+
+	clear_bit(QEDF_CMD_IN_CLEANUP, &io_req->flags);
+
+	/* Complete so we can finish cleaning up the I/O */
+	complete(&io_req->tm_done);
+}
+
+static int qedf_execute_tmf(struct qedf_rport *fcport, struct scsi_cmnd *sc_cmd,
+	uint8_t tm_flags)
+{
+	struct qedf_ioreq *io_req;
+	struct qedf_mp_req *tm_req;
+	struct fcoe_task_context *task;
+	struct fc_frame_header *fc_hdr;
+	struct fcp_cmnd *fcp_cmnd;
+	struct qedf_ctx *qedf = fcport->qedf;
+	int rc = 0;
+	uint16_t xid;
+	uint32_t sid, did;
+	int tmo = 0;
+	unsigned long flags;
+
+	if (!sc_cmd) {
+		QEDF_ERR(&(qedf->dbg_ctx), "invalid arg\n");
+		return FAILED;
+	}
+
+	if (!(test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags))) {
+		QEDF_ERR(&(qedf->dbg_ctx), "fcport not offloaded\n");
+		rc = FAILED;
+		return FAILED;
+	}
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_SCSI_TM, "portid = 0x%x "
+		   "tm_flags = %d\n", fcport->rdata->ids.port_id, tm_flags);
+
+	io_req = qedf_alloc_cmd(fcport, QEDF_TASK_MGMT_CMD);
+	if (!io_req) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Failed TMF");
+		rc = -EAGAIN;
+		goto reset_tmf_err;
+	}
+
+	/* Initialize rest of io_req fields */
+	io_req->sc_cmd = sc_cmd;
+	io_req->fcport = fcport;
+	io_req->cmd_type = QEDF_TASK_MGMT_CMD;
+
+	/* Set the return CPU to be the same as the request one */
+	io_req->cpu = smp_processor_id();
+
+	tm_req = (struct qedf_mp_req *)&(io_req->mp_req);
+
+	rc = qedf_init_mp_req(io_req);
+	if (rc == FAILED) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Task mgmt MP request init "
+			  "failed\n");
+		kref_put(&io_req->refcount, qedf_release_cmd);
+		goto reset_tmf_err;
+	}
+
+	/* Set TM flags */
+	io_req->io_req_flags = 0;
+	tm_req->tm_flags = tm_flags;
+
+	/* Default is to return a SCSI command when an error occurs */
+	io_req->return_scsi_cmd_on_abts = true;
+
+	/* Fill FCP_CMND */
+	qedf_build_fcp_cmnd(io_req, (struct fcp_cmnd *)tm_req->req_buf);
+	fcp_cmnd = (struct fcp_cmnd *)tm_req->req_buf;
+	memset(fcp_cmnd->fc_cdb, 0, FCP_CMND_LEN);
+	fcp_cmnd->fc_dl = 0;
+
+	/* Fill FC header */
+	fc_hdr = &(tm_req->req_fc_hdr);
+	sid = fcport->sid;
+	did = fcport->rdata->ids.port_id;
+	__fc_fill_fc_hdr(fc_hdr, FC_RCTL_DD_UNSOL_CMD, sid, did,
+			   FC_TYPE_FCP, FC_FC_FIRST_SEQ | FC_FC_END_SEQ |
+			   FC_FC_SEQ_INIT, 0);
+	/* Obtain exchange id */
+	xid = io_req->xid;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_SCSI_TM, "TMF io_req xid = "
+		   "0x%x\n", xid);
+
+	/* Initialize task context for this IO request */
+	task = qedf_get_task_mem(&qedf->tasks, xid);
+	qedf_init_mp_task(io_req, task);
+
+	init_completion(&io_req->tm_done);
+
+	/* Obtain free SQ entry */
+	spin_lock_irqsave(&fcport->rport_lock, flags);
+	qedf_add_to_sq(fcport, xid, 0, FCOE_TASK_TYPE_MIDPATH, 0);
+
+	/* Ring doorbell */
+	qedf_ring_doorbell(fcport);
+	spin_unlock_irqrestore(&fcport->rport_lock, flags);
+
+	tmo = wait_for_completion_timeout(&io_req->tm_done,
+	    QEDF_TM_TIMEOUT * HZ);
+
+	if (!tmo) {
+		rc = FAILED;
+		QEDF_ERR(&(qedf->dbg_ctx), "wait for tm_cmpl timeout!\n");
+	} else {
+		/* Check TMF response code */
+		if (io_req->fcp_rsp_code == 0)
+			rc = SUCCESS;
+		else
+			rc = FAILED;
+	}
+
+	if (tm_flags == FCP_TMF_LUN_RESET)
+		qedf_flush_active_ios(fcport, (int)sc_cmd->device->lun);
+	else
+		qedf_flush_active_ios(fcport, -1);
+
+	kref_put(&io_req->refcount, qedf_release_cmd);
+
+	if (rc != SUCCESS) {
+		QEDF_ERR(&(qedf->dbg_ctx), "task mgmt command failed...\n");
+		rc = FAILED;
+	} else {
+		QEDF_ERR(&(qedf->dbg_ctx), "task mgmt command success...\n");
+		rc = SUCCESS;
+	}
+reset_tmf_err:
+	return rc;
+}
+
+int qedf_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags)
+{
+	struct fc_rport *rport = starget_to_rport(scsi_target(sc_cmd->device));
+	struct fc_rport_libfc_priv *rp = rport->dd_data;
+	struct qedf_rport *fcport = (struct qedf_rport *)&rp[1];
+	struct qedf_ctx *qedf;
+	struct fc_lport *lport;
+	int rc = SUCCESS;
+	int rval;
+
+	rval = fc_remote_port_chkready(rport);
+
+	if (rval) {
+		QEDF_ERR(NULL, "device_reset rport not ready\n");
+		rc = FAILED;
+		goto tmf_err;
+	}
+
+	if (fcport == NULL) {
+		QEDF_ERR(NULL, "device_reset: rport is NULL\n");
+		rc = FAILED;
+		goto tmf_err;
+	}
+
+	qedf = fcport->qedf;
+	lport = qedf->lport;
+
+	if (test_bit(QEDF_UNLOADING, &qedf->flags) ||
+	    test_bit(QEDF_DBG_STOP_IO, &qedf->flags)) {
+		rc = SUCCESS;
+		goto tmf_err;
+	}
+
+	if (lport->state != LPORT_ST_READY || !(lport->link_up)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "link is not ready\n");
+		rc = FAILED;
+		goto tmf_err;
+	}
+
+	rc = qedf_execute_tmf(fcport, sc_cmd, tm_flags);
+
+tmf_err:
+	return rc;
+}
+
+void qedf_process_tmf_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
+	struct qedf_ioreq *io_req)
+{
+	struct fcoe_cqe_rsp_info *fcp_rsp;
+	struct fcoe_cqe_midpath_info *mp_info;
+
+
+	/* Get TMF response length from CQE */
+	mp_info = &cqe->cqe_info.midpath_info;
+	io_req->mp_req.resp_len = mp_info->data_placement_size;
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_SCSI_TM,
+	    "Response len is %d.\n", io_req->mp_req.resp_len);
+
+	fcp_rsp = &cqe->cqe_info.rsp_info;
+	qedf_parse_fcp_rsp(io_req, fcp_rsp);
+
+	io_req->sc_cmd = NULL;
+	complete(&io_req->tm_done);
+}
+
+void qedf_process_unsol_compl(struct qedf_ctx *qedf, uint16_t que_idx,
+	struct fcoe_cqe *cqe)
+{
+	unsigned long flags;
+	uint16_t tmp;
+	uint16_t pktlen = cqe->cqe_info.unsolic_info.pkt_len;
+	u32 payload_len, crc;
+	struct fc_frame_header *fh;
+	struct fc_frame *fp;
+	struct qedf_io_work *io_work;
+	u32 bdq_idx;
+	void *bdq_addr;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_UNSOL,
+	    "address.hi=%x address.lo=%x opaque_data.hi=%x "
+	    "opaque_data.lo=%x bdq_prod_idx=%u len=%u.\n",
+	    le32_to_cpu(cqe->cqe_info.unsolic_info.bd_info.address.hi),
+	    le32_to_cpu(cqe->cqe_info.unsolic_info.bd_info.address.lo),
+	    le32_to_cpu(cqe->cqe_info.unsolic_info.bd_info.opaque.hi),
+	    le32_to_cpu(cqe->cqe_info.unsolic_info.bd_info.opaque.lo),
+	    qedf->bdq_prod_idx, pktlen);
+
+	bdq_idx = le32_to_cpu(cqe->cqe_info.unsolic_info.bd_info.opaque.lo);
+	if (bdq_idx >= QEDF_BDQ_SIZE) {
+		QEDF_ERR(&(qedf->dbg_ctx), "bdq_idx is out of range %d.\n",
+		    bdq_idx);
+		goto increment_prod;
+	}
+
+	bdq_addr = qedf->bdq[bdq_idx].buf_addr;
+	if (!bdq_addr) {
+		QEDF_ERR(&(qedf->dbg_ctx), "bdq_addr is NULL, dropping "
+		    "unsolicited packet.\n");
+		goto increment_prod;
+	}
+
+	if (qedf_dump_frames) {
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_UNSOL,
+		    "BDQ frame is at addr=%p.\n", bdq_addr);
+		print_hex_dump(KERN_WARNING, "bdq ", DUMP_PREFIX_OFFSET, 16, 1,
+		    (void *)bdq_addr, pktlen, false);
+	}
+
+	/* Allocate frame */
+	payload_len = pktlen - sizeof(struct fc_frame_header);
+	fp = fc_frame_alloc(qedf->lport, payload_len);
+	if (!fp) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Could not allocate fp.\n");
+		goto increment_prod;
+	}
+
+	/* Copy data from BDQ buffer into fc_frame struct */
+	fh = (struct fc_frame_header *)fc_frame_header_get(fp);
+	memcpy(fh, (void *)bdq_addr, pktlen);
+
+	/* Initialize the frame so libfc sees it as a valid frame */
+	crc = fcoe_fc_crc(fp);
+	fc_frame_init(fp);
+	fr_dev(fp) = qedf->lport;
+	fr_sof(fp) = FC_SOF_I3;
+	fr_eof(fp) = FC_EOF_T;
+	fr_crc(fp) = cpu_to_le32(~crc);
+
+	/*
+	 * We need to return the frame back up to libfc in a non-atomic
+	 * context
+	 */
+	io_work = mempool_alloc(qedf->io_mempool, GFP_ATOMIC);
+	if (!io_work) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Could not allocate "
+			   "work for I/O completion.\n");
+		fc_frame_free(fp);
+		goto increment_prod;
+	}
+	memset(io_work, 0, sizeof(struct qedf_io_work));
+
+	INIT_WORK(&io_work->work, qedf_fp_io_handler);
+
+	/* Copy contents of CQE for deferred processing */
+	memcpy(&io_work->cqe, cqe, sizeof(struct fcoe_cqe));
+
+	io_work->qedf = qedf;
+	io_work->fp = fp;
+
+	queue_work_on(smp_processor_id(), qedf_io_wq, &io_work->work);
+increment_prod:
+	spin_lock_irqsave(&qedf->hba_lock, flags);
+
+	/* Increment producer to let f/w know we've handled the frame */
+	qedf->bdq_prod_idx++;
+
+	/* Producer index wraps at uint16_t boundary */
+	if (qedf->bdq_prod_idx == 0xffff)
+		qedf->bdq_prod_idx = 0;
+
+	writew(qedf->bdq_prod_idx, qedf->bdq_primary_prod);
+	tmp = readw(qedf->bdq_primary_prod);
+	writew(qedf->bdq_prod_idx, qedf->bdq_secondary_prod);
+	tmp = readw(qedf->bdq_secondary_prod);
+
+	spin_unlock_irqrestore(&qedf->hba_lock, flags);
+}
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
new file mode 100644
index 000000000000..d9d7a86b5f8b
--- /dev/null
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -0,0 +1,3336 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 Cavium Inc.
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/highmem.h>
+#include <linux/crc32.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/kthread.h>
+#include <scsi/libfc.h>
+#include <scsi/scsi_host.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/cpu.h>
+#include "qedf.h"
+
+const struct qed_fcoe_ops *qed_ops;
+
+static int qedf_probe(struct pci_dev *pdev, const struct pci_device_id *id);
+static void qedf_remove(struct pci_dev *pdev);
+
+extern struct qedf_debugfs_ops qedf_debugfs_ops;
+extern struct file_operations qedf_dbg_fops;
+
+/*
+ * Driver module parameters.
+ */
+static unsigned int qedf_dev_loss_tmo = 60;
+module_param_named(dev_loss_tmo, qedf_dev_loss_tmo, int, S_IRUGO);
+MODULE_PARM_DESC(dev_loss_tmo,  " dev_loss_tmo setting for attached "
+	"remote ports (default 60)");
+
+uint qedf_debug = QEDF_LOG_INFO;
+module_param_named(debug, qedf_debug, uint, S_IRUGO);
+MODULE_PARM_DESC(qedf_debug, " Debug mask. Pass '1' to enable default debugging"
+	" mask");
+
+static uint qedf_fipvlan_retries = 30;
+module_param_named(fipvlan_retries, qedf_fipvlan_retries, int, S_IRUGO);
+MODULE_PARM_DESC(fipvlan_retries, " Number of FIP VLAN requests to attempt "
+	"before giving up (default 30)");
+
+static uint qedf_fallback_vlan = QEDF_FALLBACK_VLAN;
+module_param_named(fallback_vlan, qedf_fallback_vlan, int, S_IRUGO);
+MODULE_PARM_DESC(fallback_vlan, " VLAN ID to try if fip vlan request fails "
+	"(default 1002).");
+
+static uint qedf_default_prio = QEDF_DEFAULT_PRIO;
+module_param_named(default_prio, qedf_default_prio, int, S_IRUGO);
+MODULE_PARM_DESC(default_prio, " Default 802.1q priority for FIP and FCoE"
+	" traffic (default 3).");
+
+uint qedf_dump_frames;
+module_param_named(dump_frames, qedf_dump_frames, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(dump_frames, " Print the skb data of FIP and FCoE frames "
+	"(default off)");
+
+static uint qedf_queue_depth;
+module_param_named(queue_depth, qedf_queue_depth, int, S_IRUGO);
+MODULE_PARM_DESC(queue_depth, " Sets the queue depth for all LUNs discovered "
+	"by the qedf driver. Default is 0 (use OS default).");
+
+uint qedf_io_tracing;
+module_param_named(io_tracing, qedf_io_tracing, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(io_tracing, " Enable logging of SCSI requests/completions "
+	"into trace buffer. (default off).");
+
+static uint qedf_max_lun = MAX_FIBRE_LUNS;
+module_param_named(max_lun, qedf_max_lun, int, S_IRUGO);
+MODULE_PARM_DESC(max_lun, " Sets the maximum luns per target that the driver "
+	"supports. (default 0xffffffff)");
+
+uint qedf_link_down_tmo;
+module_param_named(link_down_tmo, qedf_link_down_tmo, int, S_IRUGO);
+MODULE_PARM_DESC(link_down_tmo, " Delays informing the fcoe transport that the "
+	"link is down by N seconds.");
+
+bool qedf_retry_delay;
+module_param_named(retry_delay, qedf_retry_delay, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(retry_delay, " Enable/disable handling of FCP_RSP IU retry "
+	"delay handling (default off).");
+
+static uint qedf_dp_module;
+module_param_named(dp_module, qedf_dp_module, uint, S_IRUGO);
+MODULE_PARM_DESC(dp_module, " bit flags control for verbose printk passed "
+	"qed module during probe.");
+
+static uint qedf_dp_level;
+module_param_named(dp_level, qedf_dp_level, uint, S_IRUGO);
+MODULE_PARM_DESC(dp_level, " printk verbosity control passed to qed module  "
+	"during probe (0-3: 0 more verbose).");
+
+struct workqueue_struct *qedf_io_wq;
+
+static struct fcoe_percpu_s qedf_global;
+static DEFINE_SPINLOCK(qedf_global_lock);
+
+static struct kmem_cache *qedf_io_work_cache;
+
+void qedf_set_vlan_id(struct qedf_ctx *qedf, int vlan_id)
+{
+	qedf->vlan_id = vlan_id;
+	qedf->vlan_id |= qedf_default_prio << VLAN_PRIO_SHIFT;
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "Setting vlan_id=%04x "
+		   "prio=%d.\n", vlan_id, qedf_default_prio);
+}
+
+/* Returns true if we have a valid vlan, false otherwise */
+static bool qedf_initiate_fipvlan_req(struct qedf_ctx *qedf)
+{
+	int rc;
+
+	if (atomic_read(&qedf->link_state) != QEDF_LINK_UP) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Link not up.\n");
+		return  false;
+	}
+
+	while (qedf->fipvlan_retries--) {
+		if (qedf->vlan_id > 0)
+			return true;
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			   "Retry %d.\n", qedf->fipvlan_retries);
+		init_completion(&qedf->fipvlan_compl);
+		qedf_fcoe_send_vlan_req(qedf);
+		rc = wait_for_completion_timeout(&qedf->fipvlan_compl,
+		    1 * HZ);
+		if (rc > 0) {
+			fcoe_ctlr_link_up(&qedf->ctlr);
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static void qedf_handle_link_update(struct work_struct *work)
+{
+	struct qedf_ctx *qedf =
+	    container_of(work, struct qedf_ctx, link_update.work);
+	int rc;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "Entered.\n");
+
+	if (atomic_read(&qedf->link_state) == QEDF_LINK_UP) {
+		rc = qedf_initiate_fipvlan_req(qedf);
+		if (rc)
+			return;
+		/*
+		 * If we get here then we never received a repsonse to our
+		 * fip vlan request so set the vlan_id to the default and
+		 * tell FCoE that the link is up
+		 */
+		QEDF_WARN(&(qedf->dbg_ctx), "Did not receive FIP VLAN "
+			   "response, falling back to default VLAN %d.\n",
+			   qedf_fallback_vlan);
+		qedf_set_vlan_id(qedf, QEDF_FALLBACK_VLAN);
+
+		/*
+		 * Zero out data_src_addr so we'll update it with the new
+		 * lport port_id
+		 */
+		eth_zero_addr(qedf->data_src_addr);
+		fcoe_ctlr_link_up(&qedf->ctlr);
+	} else if (atomic_read(&qedf->link_state) == QEDF_LINK_DOWN) {
+		/*
+		 * If we hit here and link_down_tmo_valid is still 1 it means
+		 * that link_down_tmo timed out so set it to 0 to make sure any
+		 * other readers have accurate state.
+		 */
+		atomic_set(&qedf->link_down_tmo_valid, 0);
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+		    "Calling fcoe_ctlr_link_down().\n");
+		fcoe_ctlr_link_down(&qedf->ctlr);
+		qedf_wait_for_upload(qedf);
+		/* Reset the number of FIP VLAN retries */
+		qedf->fipvlan_retries = qedf_fipvlan_retries;
+	}
+}
+
+static void qedf_flogi_resp(struct fc_seq *seq, struct fc_frame *fp,
+	void *arg)
+{
+	struct fc_exch *exch = fc_seq_exch(seq);
+	struct fc_lport *lport = exch->lp;
+	struct qedf_ctx *qedf = lport_priv(lport);
+
+	if (!qedf) {
+		QEDF_ERR(NULL, "qedf is NULL.\n");
+		return;
+	}
+
+	/*
+	 * If ERR_PTR is set then don't try to stat anything as it will cause
+	 * a crash when we access fp.
+	 */
+	if (IS_ERR(fp)) {
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+		    "fp has IS_ERR() set.\n");
+		goto skip_stat;
+	}
+
+	/* Log stats for FLOGI reject */
+	if (fc_frame_payload_op(fp) == ELS_LS_RJT)
+		qedf->flogi_failed++;
+
+	/* Complete flogi_compl so we can proceed to sending ADISCs */
+	complete(&qedf->flogi_compl);
+
+skip_stat:
+	/* Report response to libfc */
+	fc_lport_flogi_resp(seq, fp, lport);
+}
+
+static struct fc_seq *qedf_elsct_send(struct fc_lport *lport, u32 did,
+	struct fc_frame *fp, unsigned int op,
+	void (*resp)(struct fc_seq *,
+	struct fc_frame *,
+	void *),
+	void *arg, u32 timeout)
+{
+	struct qedf_ctx *qedf = lport_priv(lport);
+
+	/*
+	 * Intercept FLOGI for statistic purposes. Note we use the resp
+	 * callback to tell if this is really a flogi.
+	 */
+	if (resp == fc_lport_flogi_resp) {
+		qedf->flogi_cnt++;
+		return fc_elsct_send(lport, did, fp, op, qedf_flogi_resp,
+		    arg, timeout);
+	}
+
+	return fc_elsct_send(lport, did, fp, op, resp, arg, timeout);
+}
+
+int qedf_send_flogi(struct qedf_ctx *qedf)
+{
+	struct fc_lport *lport;
+	struct fc_frame *fp;
+
+	lport = qedf->lport;
+
+	if (!lport->tt.elsct_send)
+		return -EINVAL;
+
+	fp = fc_frame_alloc(lport, sizeof(struct fc_els_flogi));
+	if (!fp) {
+		QEDF_ERR(&(qedf->dbg_ctx), "fc_frame_alloc failed.\n");
+		return -ENOMEM;
+	}
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS,
+	    "Sending FLOGI to reestablish session with switch.\n");
+	lport->tt.elsct_send(lport, FC_FID_FLOGI, fp,
+	    ELS_FLOGI, qedf_flogi_resp, lport, lport->r_a_tov);
+
+	init_completion(&qedf->flogi_compl);
+
+	return 0;
+}
+
+struct qedf_tmp_rdata_item {
+	struct fc_rport_priv *rdata;
+	struct list_head list;
+};
+
+/*
+ * This function is called if link_down_tmo is in use.  If we get a link up and
+ * link_down_tmo has not expired then use just FLOGI/ADISC to recover our
+ * sessions with targets.  Otherwise, just call fcoe_ctlr_link_up().
+ */
+static void qedf_link_recovery(struct work_struct *work)
+{
+	struct qedf_ctx *qedf =
+	    container_of(work, struct qedf_ctx, link_recovery.work);
+	struct qedf_rport *fcport;
+	struct fc_rport_priv *rdata;
+	struct qedf_tmp_rdata_item *rdata_item, *tmp_rdata_item;
+	bool rc;
+	int retries = 30;
+	int rval, i;
+	struct list_head rdata_login_list;
+
+	INIT_LIST_HEAD(&rdata_login_list);
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+	    "Link down tmo did not expire.\n");
+
+	/*
+	 * Essentially reset the fcoe_ctlr here without affecting the state
+	 * of the libfc structs.
+	 */
+	qedf->ctlr.state = FIP_ST_LINK_WAIT;
+	fcoe_ctlr_link_down(&qedf->ctlr);
+
+	/*
+	 * Bring the link up before we send the fipvlan request so libfcoe
+	 * can select a new fcf in parallel
+	 */
+	fcoe_ctlr_link_up(&qedf->ctlr);
+
+	/* Since the link when down and up to verify which vlan we're on */
+	qedf->fipvlan_retries = qedf_fipvlan_retries;
+	rc = qedf_initiate_fipvlan_req(qedf);
+	if (!rc)
+		return;
+
+	/*
+	 * We need to wait for an FCF to be selected due to the
+	 * fcoe_ctlr_link_up other the FLOGI will be rejected.
+	 */
+	while (retries > 0) {
+		if (qedf->ctlr.sel_fcf) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			    "FCF reselected, proceeding with FLOGI.\n");
+			break;
+		}
+		msleep(500);
+		retries--;
+	}
+
+	if (retries < 1) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Exhausted retries waiting for "
+		    "FCF selection.\n");
+		return;
+	}
+
+	rval = qedf_send_flogi(qedf);
+	if (rval)
+		return;
+
+	/* Wait for FLOGI completion before proceeding with sending ADISCs */
+	i = wait_for_completion_timeout(&qedf->flogi_compl,
+	    qedf->lport->r_a_tov);
+	if (i == 0) {
+		QEDF_ERR(&(qedf->dbg_ctx), "FLOGI timed out.\n");
+		return;
+	}
+
+	/*
+	 * Call lport->tt.rport_login which will cause libfc to send an
+	 * ADISC since the rport is in state ready.
+	 */
+	rcu_read_lock();
+	list_for_each_entry_rcu(fcport, &qedf->fcports, peers) {
+		rdata = fcport->rdata;
+		if (rdata == NULL)
+			continue;
+		rdata_item = kzalloc(sizeof(struct qedf_tmp_rdata_item),
+		    GFP_ATOMIC);
+		if (!rdata_item)
+			continue;
+		if (kref_get_unless_zero(&rdata->kref)) {
+			rdata_item->rdata = rdata;
+			list_add(&rdata_item->list, &rdata_login_list);
+		} else
+			kfree(rdata_item);
+	}
+	rcu_read_unlock();
+	/*
+	 * Do the fc_rport_login outside of the rcu lock so we don't take a
+	 * mutex in an atomic context.
+	 */
+	list_for_each_entry_safe(rdata_item, tmp_rdata_item, &rdata_login_list,
+	    list) {
+		list_del(&rdata_item->list);
+		fc_rport_login(rdata_item->rdata);
+		kref_put(&rdata_item->rdata->kref, fc_rport_destroy);
+		kfree(rdata_item);
+	}
+}
+
+static void qedf_update_link_speed(struct qedf_ctx *qedf,
+	struct qed_link_output *link)
+{
+	struct fc_lport *lport = qedf->lport;
+
+	lport->link_speed = FC_PORTSPEED_UNKNOWN;
+	lport->link_supported_speeds = FC_PORTSPEED_UNKNOWN;
+
+	/* Set fc_host link speed */
+	switch (link->speed) {
+	case 10000:
+		lport->link_speed = FC_PORTSPEED_10GBIT;
+		break;
+	case 25000:
+		lport->link_speed = FC_PORTSPEED_25GBIT;
+		break;
+	case 40000:
+		lport->link_speed = FC_PORTSPEED_40GBIT;
+		break;
+	case 50000:
+		lport->link_speed = FC_PORTSPEED_50GBIT;
+		break;
+	case 100000:
+		lport->link_speed = FC_PORTSPEED_100GBIT;
+		break;
+	default:
+		lport->link_speed = FC_PORTSPEED_UNKNOWN;
+		break;
+	}
+
+	/*
+	 * Set supported link speed by querying the supported
+	 * capabilities of the link.
+	 */
+	if (link->supported_caps & SUPPORTED_10000baseKR_Full)
+		lport->link_supported_speeds |= FC_PORTSPEED_10GBIT;
+	if (link->supported_caps & SUPPORTED_25000baseKR_Full)
+		lport->link_supported_speeds |= FC_PORTSPEED_25GBIT;
+	if (link->supported_caps & SUPPORTED_40000baseLR4_Full)
+		lport->link_supported_speeds |= FC_PORTSPEED_40GBIT;
+	if (link->supported_caps & SUPPORTED_50000baseKR2_Full)
+		lport->link_supported_speeds |= FC_PORTSPEED_50GBIT;
+	if (link->supported_caps & SUPPORTED_100000baseKR4_Full)
+		lport->link_supported_speeds |= FC_PORTSPEED_100GBIT;
+	fc_host_supported_speeds(lport->host) = lport->link_supported_speeds;
+}
+
+static void qedf_link_update(void *dev, struct qed_link_output *link)
+{
+	struct qedf_ctx *qedf = (struct qedf_ctx *)dev;
+
+	if (link->link_up) {
+		QEDF_ERR(&(qedf->dbg_ctx), "LINK UP (%d GB/s).\n",
+		    link->speed / 1000);
+
+		/* Cancel any pending link down work */
+		cancel_delayed_work(&qedf->link_update);
+
+		atomic_set(&qedf->link_state, QEDF_LINK_UP);
+		qedf_update_link_speed(qedf, link);
+
+		if (atomic_read(&qedf->dcbx) == QEDF_DCBX_DONE) {
+			QEDF_ERR(&(qedf->dbg_ctx), "DCBx done.\n");
+			if (atomic_read(&qedf->link_down_tmo_valid) > 0)
+				queue_delayed_work(qedf->link_update_wq,
+				    &qedf->link_recovery, 0);
+			else
+				queue_delayed_work(qedf->link_update_wq,
+				    &qedf->link_update, 0);
+			atomic_set(&qedf->link_down_tmo_valid, 0);
+		}
+
+	} else {
+		QEDF_ERR(&(qedf->dbg_ctx), "LINK DOWN.\n");
+
+		atomic_set(&qedf->link_state, QEDF_LINK_DOWN);
+		atomic_set(&qedf->dcbx, QEDF_DCBX_PENDING);
+		/*
+		 * Flag that we're waiting for the link to come back up before
+		 * informing the fcoe layer of the event.
+		 */
+		if (qedf_link_down_tmo > 0) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			    "Starting link down tmo.\n");
+			atomic_set(&qedf->link_down_tmo_valid, 1);
+		}
+		qedf->vlan_id  = 0;
+		qedf_update_link_speed(qedf, link);
+		queue_delayed_work(qedf->link_update_wq, &qedf->link_update,
+		    qedf_link_down_tmo * HZ);
+	}
+}
+
+
+static void qedf_dcbx_handler(void *dev, struct qed_dcbx_get *get, u32 mib_type)
+{
+	struct qedf_ctx *qedf = (struct qedf_ctx *)dev;
+
+	QEDF_ERR(&(qedf->dbg_ctx), "DCBx event valid=%d enabled=%d fcoe "
+	    "prio=%d.\n", get->operational.valid, get->operational.enabled,
+	    get->operational.app_prio.fcoe);
+
+	if (get->operational.enabled && get->operational.valid) {
+		/* If DCBX was already negotiated on link up then just exit */
+		if (atomic_read(&qedf->dcbx) == QEDF_DCBX_DONE) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			    "DCBX already set on link up.\n");
+			return;
+		}
+
+		atomic_set(&qedf->dcbx, QEDF_DCBX_DONE);
+
+		if (atomic_read(&qedf->link_state) == QEDF_LINK_UP) {
+			if (atomic_read(&qedf->link_down_tmo_valid) > 0)
+				queue_delayed_work(qedf->link_update_wq,
+				    &qedf->link_recovery, 0);
+			else
+				queue_delayed_work(qedf->link_update_wq,
+				    &qedf->link_update, 0);
+			atomic_set(&qedf->link_down_tmo_valid, 0);
+		}
+	}
+
+}
+
+static u32 qedf_get_login_failures(void *cookie)
+{
+	struct qedf_ctx *qedf;
+
+	qedf = (struct qedf_ctx *)cookie;
+	return qedf->flogi_failed;
+}
+
+static struct qed_fcoe_cb_ops qedf_cb_ops = {
+	{
+		.link_update = qedf_link_update,
+		.dcbx_aen = qedf_dcbx_handler,
+	}
+};
+
+/*
+ * Various transport templates.
+ */
+
+static struct scsi_transport_template *qedf_fc_transport_template;
+static struct scsi_transport_template *qedf_fc_vport_transport_template;
+
+/*
+ * SCSI EH handlers
+ */
+static int qedf_eh_abort(struct scsi_cmnd *sc_cmd)
+{
+	struct fc_rport *rport = starget_to_rport(scsi_target(sc_cmd->device));
+	struct fc_rport_libfc_priv *rp = rport->dd_data;
+	struct qedf_rport *fcport;
+	struct fc_lport *lport;
+	struct qedf_ctx *qedf;
+	struct qedf_ioreq *io_req;
+	int rc = FAILED;
+	int rval;
+
+	if (fc_remote_port_chkready(rport)) {
+		QEDF_ERR(NULL, "rport not ready\n");
+		goto out;
+	}
+
+	lport = shost_priv(sc_cmd->device->host);
+	qedf = (struct qedf_ctx *)lport_priv(lport);
+
+	if ((lport->state != LPORT_ST_READY) || !(lport->link_up)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "link not ready.\n");
+		goto out;
+	}
+
+	fcport = (struct qedf_rport *)&rp[1];
+
+	io_req = (struct qedf_ioreq *)sc_cmd->SCp.ptr;
+	if (!io_req) {
+		QEDF_ERR(&(qedf->dbg_ctx), "io_req is NULL.\n");
+		rc = SUCCESS;
+		goto out;
+	}
+
+	if (!test_bit(QEDF_CMD_OUTSTANDING, &io_req->flags) ||
+	    test_bit(QEDF_CMD_IN_CLEANUP, &io_req->flags) ||
+	    test_bit(QEDF_CMD_IN_ABORT, &io_req->flags)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "io_req xid=0x%x already in "
+			  "cleanup or abort processing or already "
+			  "completed.\n", io_req->xid);
+		rc = SUCCESS;
+		goto out;
+	}
+
+	QEDF_ERR(&(qedf->dbg_ctx), "Aborting io_req sc_cmd=%p xid=0x%x "
+		  "fp_idx=%d.\n", sc_cmd, io_req->xid, io_req->fp_idx);
+
+	if (qedf->stop_io_on_error) {
+		qedf_stop_all_io(qedf);
+		rc = SUCCESS;
+		goto out;
+	}
+
+	init_completion(&io_req->abts_done);
+	rval = qedf_initiate_abts(io_req, true);
+	if (rval) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Failed to queue ABTS.\n");
+		goto out;
+	}
+
+	wait_for_completion(&io_req->abts_done);
+
+	if (io_req->event == QEDF_IOREQ_EV_ABORT_SUCCESS ||
+	    io_req->event == QEDF_IOREQ_EV_ABORT_FAILED ||
+	    io_req->event == QEDF_IOREQ_EV_CLEANUP_SUCCESS) {
+		/*
+		 * If we get a reponse to the abort this is success from
+		 * the perspective that all references to the command have
+		 * been removed from the driver and firmware
+		 */
+		rc = SUCCESS;
+	} else {
+		/* If the abort and cleanup failed then return a failure */
+		rc = FAILED;
+	}
+
+	if (rc == SUCCESS)
+		QEDF_ERR(&(qedf->dbg_ctx), "ABTS succeeded, xid=0x%x.\n",
+			  io_req->xid);
+	else
+		QEDF_ERR(&(qedf->dbg_ctx), "ABTS failed, xid=0x%x.\n",
+			  io_req->xid);
+
+out:
+	return rc;
+}
+
+static int qedf_eh_target_reset(struct scsi_cmnd *sc_cmd)
+{
+	QEDF_ERR(NULL, "TARGET RESET Issued...");
+	return qedf_initiate_tmf(sc_cmd, FCP_TMF_TGT_RESET);
+}
+
+static int qedf_eh_device_reset(struct scsi_cmnd *sc_cmd)
+{
+	QEDF_ERR(NULL, "LUN RESET Issued...\n");
+	return qedf_initiate_tmf(sc_cmd, FCP_TMF_LUN_RESET);
+}
+
+void qedf_wait_for_upload(struct qedf_ctx *qedf)
+{
+	while (1) {
+		if (atomic_read(&qedf->num_offloads))
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			    "Waiting for all uploads to complete.\n");
+		else
+			break;
+		msleep(500);
+	}
+}
+
+/* Reset the host by gracefully logging out and then logging back in */
+static int qedf_eh_host_reset(struct scsi_cmnd *sc_cmd)
+{
+	struct fc_lport *lport;
+	struct qedf_ctx *qedf;
+
+	lport = shost_priv(sc_cmd->device->host);
+
+	if (lport->vport) {
+		QEDF_ERR(NULL, "Cannot issue host reset on NPIV port.\n");
+		return SUCCESS;
+	}
+
+	qedf = (struct qedf_ctx *)lport_priv(lport);
+
+	if (atomic_read(&qedf->link_state) == QEDF_LINK_DOWN ||
+	    test_bit(QEDF_UNLOADING, &qedf->flags) ||
+	    test_bit(QEDF_DBG_STOP_IO, &qedf->flags))
+		return FAILED;
+
+	QEDF_ERR(&(qedf->dbg_ctx), "HOST RESET Issued...");
+
+	/* For host reset, essentially do a soft link up/down */
+	atomic_set(&qedf->link_state, QEDF_LINK_DOWN);
+	atomic_set(&qedf->dcbx, QEDF_DCBX_PENDING);
+	queue_delayed_work(qedf->link_update_wq, &qedf->link_update,
+	    0);
+	qedf_wait_for_upload(qedf);
+	atomic_set(&qedf->link_state, QEDF_LINK_UP);
+	qedf->vlan_id  = 0;
+	queue_delayed_work(qedf->link_update_wq, &qedf->link_update,
+	    0);
+
+	return SUCCESS;
+}
+
+static int qedf_slave_configure(struct scsi_device *sdev)
+{
+	if (qedf_queue_depth) {
+		scsi_change_queue_depth(sdev, qedf_queue_depth);
+	}
+
+	return 0;
+}
+
+static struct scsi_host_template qedf_host_template = {
+	.module 	= THIS_MODULE,
+	.name 		= QEDF_MODULE_NAME,
+	.this_id 	= -1,
+	.cmd_per_lun 	= 3,
+	.use_clustering = ENABLE_CLUSTERING,
+	.max_sectors 	= 0xffff,
+	.queuecommand 	= qedf_queuecommand,
+	.shost_attrs	= qedf_host_attrs,
+	.eh_abort_handler	= qedf_eh_abort,
+	.eh_device_reset_handler = qedf_eh_device_reset, /* lun reset */
+	.eh_target_reset_handler = qedf_eh_target_reset, /* target reset */
+	.eh_host_reset_handler  = qedf_eh_host_reset,
+	.slave_configure	= qedf_slave_configure,
+	.dma_boundary = QED_HW_DMA_BOUNDARY,
+	.sg_tablesize = QEDF_MAX_BDS_PER_CMD,
+	.can_queue = FCOE_PARAMS_NUM_TASKS,
+};
+
+static int qedf_get_paged_crc_eof(struct sk_buff *skb, int tlen)
+{
+	int rc;
+
+	spin_lock(&qedf_global_lock);
+	rc = fcoe_get_paged_crc_eof(skb, tlen, &qedf_global);
+	spin_unlock(&qedf_global_lock);
+
+	return rc;
+}
+
+static struct qedf_rport *qedf_fcport_lookup(struct qedf_ctx *qedf, u32 port_id)
+{
+	struct qedf_rport *fcport;
+	struct fc_rport_priv *rdata;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(fcport, &qedf->fcports, peers) {
+		rdata = fcport->rdata;
+		if (rdata == NULL)
+			continue;
+		if (rdata->ids.port_id == port_id) {
+			rcu_read_unlock();
+			return fcport;
+		}
+	}
+	rcu_read_unlock();
+
+	/* Return NULL to caller to let them know fcport was not found */
+	return NULL;
+}
+
+/* Transmits an ELS frame over an offloaded session */
+static int qedf_xmit_l2_frame(struct qedf_rport *fcport, struct fc_frame *fp)
+{
+	struct fc_frame_header *fh;
+	int rc = 0;
+
+	fh = fc_frame_header_get(fp);
+	if ((fh->fh_type == FC_TYPE_ELS) &&
+	    (fh->fh_r_ctl == FC_RCTL_ELS_REQ)) {
+		switch (fc_frame_payload_op(fp)) {
+		case ELS_ADISC:
+			qedf_send_adisc(fcport, fp);
+			rc = 1;
+			break;
+		}
+	}
+
+	return rc;
+}
+
+/**
+ * qedf_xmit - qedf FCoE frame transmit function
+ *
+ */
+static int qedf_xmit(struct fc_lport *lport, struct fc_frame *fp)
+{
+	struct fc_lport		*base_lport;
+	struct qedf_ctx		*qedf;
+	struct ethhdr		*eh;
+	struct fcoe_crc_eof	*cp;
+	struct sk_buff		*skb;
+	struct fc_frame_header	*fh;
+	struct fcoe_hdr		*hp;
+	u8			sof, eof;
+	u32			crc;
+	unsigned int		hlen, tlen, elen;
+	int			wlen;
+	struct fc_stats		*stats;
+	struct fc_lport *tmp_lport;
+	struct fc_lport *vn_port = NULL;
+	struct qedf_rport *fcport;
+	int rc;
+	u16 vlan_tci = 0;
+
+	qedf = (struct qedf_ctx *)lport_priv(lport);
+
+	fh = fc_frame_header_get(fp);
+	skb = fp_skb(fp);
+
+	/* Filter out traffic to other NPIV ports on the same host */
+	if (lport->vport)
+		base_lport = shost_priv(vport_to_shost(lport->vport));
+	else
+		base_lport = lport;
+
+	/* Flag if the destination is the base port */
+	if (base_lport->port_id == ntoh24(fh->fh_d_id)) {
+		vn_port = base_lport;
+	} else {
+		/* Got through the list of vports attached to the base_lport
+		 * and see if we have a match with the destination address.
+		 */
+		list_for_each_entry(tmp_lport, &base_lport->vports, list) {
+			if (tmp_lport->port_id == ntoh24(fh->fh_d_id)) {
+				vn_port = tmp_lport;
+				break;
+			}
+		}
+	}
+	if (vn_port && ntoh24(fh->fh_d_id) != FC_FID_FLOGI) {
+		struct fc_rport_priv *rdata = NULL;
+
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2,
+		    "Dropping FCoE frame to %06x.\n", ntoh24(fh->fh_d_id));
+		kfree_skb(skb);
+		rdata = fc_rport_lookup(lport, ntoh24(fh->fh_d_id));
+		if (rdata)
+			rdata->retries = lport->max_rport_retry_count;
+		return -EINVAL;
+	}
+	/* End NPIV filtering */
+
+	if (!qedf->ctlr.sel_fcf) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	if (!test_bit(QEDF_LL2_STARTED, &qedf->flags)) {
+		QEDF_WARN(&(qedf->dbg_ctx), "LL2 not started\n");
+		kfree_skb(skb);
+		return 0;
+	}
+
+	if (atomic_read(&qedf->link_state) != QEDF_LINK_UP) {
+		QEDF_WARN(&(qedf->dbg_ctx), "qedf link down\n");
+		kfree_skb(skb);
+		return 0;
+	}
+
+	if (unlikely(fh->fh_r_ctl == FC_RCTL_ELS_REQ)) {
+		if (fcoe_ctlr_els_send(&qedf->ctlr, lport, skb))
+			return 0;
+	}
+
+	/* Check to see if this needs to be sent on an offloaded session */
+	fcport = qedf_fcport_lookup(qedf, ntoh24(fh->fh_d_id));
+
+	if (fcport && test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
+		rc = qedf_xmit_l2_frame(fcport, fp);
+		/*
+		 * If the frame was successfully sent over the middle path
+		 * then do not try to also send it over the LL2 path
+		 */
+		if (rc)
+			return 0;
+	}
+
+	sof = fr_sof(fp);
+	eof = fr_eof(fp);
+
+	elen = sizeof(struct ethhdr);
+	hlen = sizeof(struct fcoe_hdr);
+	tlen = sizeof(struct fcoe_crc_eof);
+	wlen = (skb->len - tlen + sizeof(crc)) / FCOE_WORD_TO_BYTE;
+
+	skb->ip_summed = CHECKSUM_NONE;
+	crc = fcoe_fc_crc(fp);
+
+	/* copy port crc and eof to the skb buff */
+	if (skb_is_nonlinear(skb)) {
+		skb_frag_t *frag;
+
+		if (qedf_get_paged_crc_eof(skb, tlen)) {
+			kfree_skb(skb);
+			return -ENOMEM;
+		}
+		frag = &skb_shinfo(skb)->frags[skb_shinfo(skb)->nr_frags - 1];
+		cp = kmap_atomic(skb_frag_page(frag)) + frag->page_offset;
+	} else {
+		cp = (struct fcoe_crc_eof *)skb_put(skb, tlen);
+	}
+
+	memset(cp, 0, sizeof(*cp));
+	cp->fcoe_eof = eof;
+	cp->fcoe_crc32 = cpu_to_le32(~crc);
+	if (skb_is_nonlinear(skb)) {
+		kunmap_atomic(cp);
+		cp = NULL;
+	}
+
+
+	/* adjust skb network/transport offsets to match mac/fcoe/port */
+	skb_push(skb, elen + hlen);
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb->mac_len = elen;
+	skb->protocol = htons(ETH_P_FCOE);
+
+	__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), qedf->vlan_id);
+
+	/* fill up mac and fcoe headers */
+	eh = eth_hdr(skb);
+	eh->h_proto = htons(ETH_P_FCOE);
+	if (qedf->ctlr.map_dest)
+		fc_fcoe_set_mac(eh->h_dest, fh->fh_d_id);
+	else
+		/* insert GW address */
+		ether_addr_copy(eh->h_dest, qedf->ctlr.dest_addr);
+
+	/* Set the source MAC address */
+	fc_fcoe_set_mac(eh->h_source, fh->fh_s_id);
+
+	hp = (struct fcoe_hdr *)(eh + 1);
+	memset(hp, 0, sizeof(*hp));
+	if (FC_FCOE_VER)
+		FC_FCOE_ENCAPS_VER(hp, FC_FCOE_VER);
+	hp->fcoe_sof = sof;
+
+	/*update tx stats */
+	stats = per_cpu_ptr(lport->stats, get_cpu());
+	stats->TxFrames++;
+	stats->TxWords += wlen;
+	put_cpu();
+
+	/* Get VLAN ID from skb for printing purposes */
+	__vlan_hwaccel_get_tag(skb, &vlan_tci);
+
+	/* send down to lld */
+	fr_dev(fp) = lport;
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2, "FCoE frame send: "
+	    "src=%06x dest=%06x r_ctl=%x type=%x vlan=%04x.\n",
+	    ntoh24(fh->fh_s_id), ntoh24(fh->fh_d_id), fh->fh_r_ctl, fh->fh_type,
+	    vlan_tci);
+	if (qedf_dump_frames)
+		print_hex_dump(KERN_WARNING, "fcoe: ", DUMP_PREFIX_OFFSET, 16,
+		    1, skb->data, skb->len, false);
+	qed_ops->ll2->start_xmit(qedf->cdev, skb);
+
+	return 0;
+}
+
+static int qedf_alloc_sq(struct qedf_ctx *qedf, struct qedf_rport *fcport)
+{
+	int rval = 0;
+	u32 *pbl;
+	dma_addr_t page;
+	int num_pages;
+
+	/* Calculate appropriate queue and PBL sizes */
+	fcport->sq_mem_size = SQ_NUM_ENTRIES * sizeof(struct fcoe_wqe);
+	fcport->sq_mem_size = ALIGN(fcport->sq_mem_size, QEDF_PAGE_SIZE);
+	fcport->sq_pbl_size = (fcport->sq_mem_size / QEDF_PAGE_SIZE) *
+	    sizeof(void *);
+	fcport->sq_pbl_size = fcport->sq_pbl_size + QEDF_PAGE_SIZE;
+
+	fcport->sq = dma_alloc_coherent(&qedf->pdev->dev, fcport->sq_mem_size,
+	    &fcport->sq_dma, GFP_KERNEL);
+	if (!fcport->sq) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Could not allocate send "
+			   "queue.\n");
+		rval = 1;
+		goto out;
+	}
+	memset(fcport->sq, 0, fcport->sq_mem_size);
+
+	fcport->sq_pbl = dma_alloc_coherent(&qedf->pdev->dev,
+	    fcport->sq_pbl_size, &fcport->sq_pbl_dma, GFP_KERNEL);
+	if (!fcport->sq_pbl) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Could not allocate send "
+			   "queue PBL.\n");
+		rval = 1;
+		goto out_free_sq;
+	}
+	memset(fcport->sq_pbl, 0, fcport->sq_pbl_size);
+
+	/* Create PBL */
+	num_pages = fcport->sq_mem_size / QEDF_PAGE_SIZE;
+	page = fcport->sq_dma;
+	pbl = (u32 *)fcport->sq_pbl;
+
+	while (num_pages--) {
+		*pbl = U64_LO(page);
+		pbl++;
+		*pbl = U64_HI(page);
+		pbl++;
+		page += QEDF_PAGE_SIZE;
+	}
+
+	return rval;
+
+out_free_sq:
+	dma_free_coherent(&qedf->pdev->dev, fcport->sq_mem_size, fcport->sq,
+	    fcport->sq_dma);
+out:
+	return rval;
+}
+
+static void qedf_free_sq(struct qedf_ctx *qedf, struct qedf_rport *fcport)
+{
+	if (fcport->sq_pbl)
+		dma_free_coherent(&qedf->pdev->dev, fcport->sq_pbl_size,
+		    fcport->sq_pbl, fcport->sq_pbl_dma);
+	if (fcport->sq)
+		dma_free_coherent(&qedf->pdev->dev, fcport->sq_mem_size,
+		    fcport->sq, fcport->sq_dma);
+}
+
+static int qedf_offload_connection(struct qedf_ctx *qedf,
+	struct qedf_rport *fcport)
+{
+	struct qed_fcoe_params_offload conn_info;
+	u32 port_id;
+	u8 lport_src_id[3];
+	int rval;
+	uint16_t total_sqe = (fcport->sq_mem_size / sizeof(struct fcoe_wqe));
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_CONN, "Offloading connection "
+		   "portid=%06x.\n", fcport->rdata->ids.port_id);
+	rval = qed_ops->acquire_conn(qedf->cdev, &fcport->handle,
+	    &fcport->fw_cid, &fcport->p_doorbell);
+	if (rval) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Could not acquire connection "
+			   "for portid=%06x.\n", fcport->rdata->ids.port_id);
+		rval = 1; /* For some reason qed returns 0 on failure here */
+		goto out;
+	}
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_CONN, "portid=%06x "
+		   "fw_cid=%08x handle=%d.\n", fcport->rdata->ids.port_id,
+		   fcport->fw_cid, fcport->handle);
+
+	memset(&conn_info, 0, sizeof(struct qed_fcoe_params_offload));
+
+	/* Fill in the offload connection info */
+	conn_info.sq_pbl_addr = fcport->sq_pbl_dma;
+
+	conn_info.sq_curr_page_addr = (dma_addr_t)(*(u64 *)fcport->sq_pbl);
+	conn_info.sq_next_page_addr =
+	    (dma_addr_t)(*(u64 *)(fcport->sq_pbl + 8));
+
+	/* Need to use our FCoE MAC for the offload session */
+	port_id = fc_host_port_id(qedf->lport->host);
+	lport_src_id[2] = (port_id & 0x000000FF);
+	lport_src_id[1] = (port_id & 0x0000FF00) >> 8;
+	lport_src_id[0] = (port_id & 0x00FF0000) >> 16;
+	fc_fcoe_set_mac(conn_info.src_mac, lport_src_id);
+
+	ether_addr_copy(conn_info.dst_mac, qedf->ctlr.dest_addr);
+
+	conn_info.tx_max_fc_pay_len = fcport->rdata->maxframe_size;
+	conn_info.e_d_tov_timer_val = qedf->lport->e_d_tov / 20;
+	conn_info.rec_tov_timer_val = 3; /* I think this is what E3 was */
+	conn_info.rx_max_fc_pay_len = fcport->rdata->maxframe_size;
+
+	/* Set VLAN data */
+	conn_info.vlan_tag = qedf->vlan_id <<
+	    FCOE_CONN_OFFLOAD_RAMROD_DATA_VLAN_ID_SHIFT;
+	conn_info.vlan_tag |=
+	    qedf_default_prio << FCOE_CONN_OFFLOAD_RAMROD_DATA_PRIORITY_SHIFT;
+	conn_info.flags |= (FCOE_CONN_OFFLOAD_RAMROD_DATA_B_VLAN_FLAG_MASK <<
+	    FCOE_CONN_OFFLOAD_RAMROD_DATA_B_VLAN_FLAG_SHIFT);
+
+	/* Set host port source id */
+	port_id = fc_host_port_id(qedf->lport->host);
+	fcport->sid = port_id;
+	conn_info.s_id.addr_hi = (port_id & 0x000000FF);
+	conn_info.s_id.addr_mid = (port_id & 0x0000FF00) >> 8;
+	conn_info.s_id.addr_lo = (port_id & 0x00FF0000) >> 16;
+
+	conn_info.max_conc_seqs_c3 = fcport->rdata->max_seq;
+
+	/* Set remote port destination id */
+	port_id = fcport->rdata->rport->port_id;
+	conn_info.d_id.addr_hi = (port_id & 0x000000FF);
+	conn_info.d_id.addr_mid = (port_id & 0x0000FF00) >> 8;
+	conn_info.d_id.addr_lo = (port_id & 0x00FF0000) >> 16;
+
+	conn_info.def_q_idx = 0; /* Default index for send queue? */
+
+	/* Set FC-TAPE specific flags if needed */
+	if (fcport->dev_type == QEDF_RPORT_TYPE_TAPE) {
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_CONN,
+		    "Enable CONF, REC for portid=%06x.\n",
+		    fcport->rdata->ids.port_id);
+		conn_info.flags |= 1 <<
+		    FCOE_CONN_OFFLOAD_RAMROD_DATA_B_CONF_REQ_SHIFT;
+		conn_info.flags |=
+		    ((fcport->rdata->sp_features & FC_SP_FT_SEQC) ? 1 : 0) <<
+		    FCOE_CONN_OFFLOAD_RAMROD_DATA_B_REC_VALID_SHIFT;
+	}
+
+	rval = qed_ops->offload_conn(qedf->cdev, fcport->handle, &conn_info);
+	if (rval) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Could not offload connection "
+			   "for portid=%06x.\n", fcport->rdata->ids.port_id);
+		goto out_free_conn;
+	} else
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_CONN, "Offload "
+			   "succeeded portid=%06x total_sqe=%d.\n",
+			   fcport->rdata->ids.port_id, total_sqe);
+
+	spin_lock_init(&fcport->rport_lock);
+	atomic_set(&fcport->free_sqes, total_sqe);
+	return 0;
+out_free_conn:
+	qed_ops->release_conn(qedf->cdev, fcport->handle);
+out:
+	return rval;
+}
+
+#define QEDF_TERM_BUFF_SIZE		10
+static void qedf_upload_connection(struct qedf_ctx *qedf,
+	struct qedf_rport *fcport)
+{
+	void *term_params;
+	dma_addr_t term_params_dma;
+
+	/* Term params needs to be a DMA coherent buffer as qed shared the
+	 * physical DMA address with the firmware. The buffer may be used in
+	 * the receive path so we may eventually have to move this.
+	 */
+	term_params = dma_alloc_coherent(&qedf->pdev->dev, QEDF_TERM_BUFF_SIZE,
+		&term_params_dma, GFP_KERNEL);
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_CONN, "Uploading connection "
+		   "port_id=%06x.\n", fcport->rdata->ids.port_id);
+
+	qed_ops->destroy_conn(qedf->cdev, fcport->handle, term_params_dma);
+	qed_ops->release_conn(qedf->cdev, fcport->handle);
+
+	dma_free_coherent(&qedf->pdev->dev, QEDF_TERM_BUFF_SIZE, term_params,
+	    term_params_dma);
+}
+
+static void qedf_cleanup_fcport(struct qedf_ctx *qedf,
+	struct qedf_rport *fcport)
+{
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_CONN, "Cleaning up portid=%06x.\n",
+	    fcport->rdata->ids.port_id);
+
+	/* Flush any remaining i/o's before we upload the connection */
+	qedf_flush_active_ios(fcport, -1);
+
+	if (test_and_clear_bit(QEDF_RPORT_SESSION_READY, &fcport->flags))
+		qedf_upload_connection(qedf, fcport);
+	qedf_free_sq(qedf, fcport);
+	fcport->rdata = NULL;
+	fcport->qedf = NULL;
+}
+
+/**
+ * This event_callback is called after successful completion of libfc
+ * initiated target login. qedf can proceed with initiating the session
+ * establishment.
+ */
+static void qedf_rport_event_handler(struct fc_lport *lport,
+				struct fc_rport_priv *rdata,
+				enum fc_rport_event event)
+{
+	struct qedf_ctx *qedf = lport_priv(lport);
+	struct fc_rport *rport = rdata->rport;
+	struct fc_rport_libfc_priv *rp;
+	struct qedf_rport *fcport;
+	u32 port_id;
+	int rval;
+	unsigned long flags;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "event = %d, "
+		   "port_id = 0x%x\n", event, rdata->ids.port_id);
+
+	switch (event) {
+	case RPORT_EV_READY:
+		if (!rport) {
+			QEDF_WARN(&(qedf->dbg_ctx), "rport is NULL.\n");
+			break;
+		}
+
+		rp = rport->dd_data;
+		fcport = (struct qedf_rport *)&rp[1];
+		fcport->qedf = qedf;
+
+		if (atomic_read(&qedf->num_offloads) >= QEDF_MAX_SESSIONS) {
+			QEDF_ERR(&(qedf->dbg_ctx), "Not offloading "
+			    "portid=0x%x as max number of offloaded sessions "
+			    "reached.\n", rdata->ids.port_id);
+			return;
+		}
+
+		/*
+		 * Don't try to offload the session again. Can happen when we
+		 * get an ADISC
+		 */
+		if (test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
+			QEDF_WARN(&(qedf->dbg_ctx), "Session already "
+				   "offloaded, portid=0x%x.\n",
+				   rdata->ids.port_id);
+			return;
+		}
+
+		if (rport->port_id == FC_FID_DIR_SERV) {
+			/*
+			 * qedf_rport structure doesn't exist for
+			 * directory server.
+			 * We should not come here, as lport will
+			 * take care of fabric login
+			 */
+			QEDF_WARN(&(qedf->dbg_ctx), "rport struct does not "
+			    "exist for dir server port_id=%x\n",
+			    rdata->ids.port_id);
+			break;
+		}
+
+		if (rdata->spp_type != FC_TYPE_FCP) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			    "Not offlading since since spp type isn't FCP\n");
+			break;
+		}
+		if (!(rdata->ids.roles & FC_RPORT_ROLE_FCP_TARGET)) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			    "Not FCP target so not offloading\n");
+			break;
+		}
+
+		fcport->rdata = rdata;
+		fcport->rport = rport;
+
+		rval = qedf_alloc_sq(qedf, fcport);
+		if (rval) {
+			qedf_cleanup_fcport(qedf, fcport);
+			break;
+		}
+
+		/* Set device type */
+		if (rdata->flags & FC_RP_FLAGS_RETRY &&
+		    rdata->ids.roles & FC_RPORT_ROLE_FCP_TARGET &&
+		    !(rdata->ids.roles & FC_RPORT_ROLE_FCP_INITIATOR)) {
+			fcport->dev_type = QEDF_RPORT_TYPE_TAPE;
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			    "portid=%06x is a TAPE device.\n",
+			    rdata->ids.port_id);
+		} else {
+			fcport->dev_type = QEDF_RPORT_TYPE_DISK;
+		}
+
+		rval = qedf_offload_connection(qedf, fcport);
+		if (rval) {
+			qedf_cleanup_fcport(qedf, fcport);
+			break;
+		}
+
+		/* Add fcport to list of qedf_ctx list of offloaded ports */
+		spin_lock_irqsave(&qedf->hba_lock, flags);
+		list_add_rcu(&fcport->peers, &qedf->fcports);
+		spin_unlock_irqrestore(&qedf->hba_lock, flags);
+
+		/*
+		 * Set the session ready bit to let everyone know that this
+		 * connection is ready for I/O
+		 */
+		set_bit(QEDF_RPORT_SESSION_READY, &fcport->flags);
+		atomic_inc(&qedf->num_offloads);
+
+		break;
+	case RPORT_EV_LOGO:
+	case RPORT_EV_FAILED:
+	case RPORT_EV_STOP:
+		port_id = rdata->ids.port_id;
+		if (port_id == FC_FID_DIR_SERV)
+			break;
+
+		if (!rport) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			    "port_id=%x - rport notcreated Yet!!\n", port_id);
+			break;
+		}
+		rp = rport->dd_data;
+		/*
+		 * Perform session upload. Note that rdata->peers is already
+		 * removed from disc->rports list before we get this event.
+		 */
+		fcport = (struct qedf_rport *)&rp[1];
+
+		/* Only free this fcport if it is offloaded already */
+		if (test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
+			set_bit(QEDF_RPORT_UPLOADING_CONNECTION, &fcport->flags);
+			qedf_cleanup_fcport(qedf, fcport);
+
+			/*
+			 * Remove fcport to list of qedf_ctx list of offloaded
+			 * ports
+			 */
+			spin_lock_irqsave(&qedf->hba_lock, flags);
+			list_del_rcu(&fcport->peers);
+			spin_unlock_irqrestore(&qedf->hba_lock, flags);
+
+			clear_bit(QEDF_RPORT_UPLOADING_CONNECTION,
+			    &fcport->flags);
+			atomic_dec(&qedf->num_offloads);
+		}
+
+		break;
+
+	case RPORT_EV_NONE:
+		break;
+	}
+}
+
+static void qedf_abort_io(struct fc_lport *lport)
+{
+	/* NO-OP but need to fill in the template */
+}
+
+static void qedf_fcp_cleanup(struct fc_lport *lport)
+{
+	/*
+	 * NO-OP but need to fill in template to prevent a NULL
+	 * function pointer dereference during link down. I/Os
+	 * will be flushed when port is uploaded.
+	 */
+}
+
+static struct libfc_function_template qedf_lport_template = {
+	.frame_send		= qedf_xmit,
+	.fcp_abort_io		= qedf_abort_io,
+	.fcp_cleanup		= qedf_fcp_cleanup,
+	.rport_event_callback	= qedf_rport_event_handler,
+	.elsct_send		= qedf_elsct_send,
+};
+
+static void qedf_fcoe_ctlr_setup(struct qedf_ctx *qedf)
+{
+	fcoe_ctlr_init(&qedf->ctlr, FIP_ST_AUTO);
+
+	qedf->ctlr.send = qedf_fip_send;
+	qedf->ctlr.update_mac = qedf_update_src_mac;
+	qedf->ctlr.get_src_addr = qedf_get_src_mac;
+	ether_addr_copy(qedf->ctlr.ctl_src_addr, qedf->mac);
+}
+
+static int qedf_lport_setup(struct qedf_ctx *qedf)
+{
+	struct fc_lport *lport = qedf->lport;
+
+	lport->link_up = 0;
+	lport->max_retry_count = QEDF_FLOGI_RETRY_CNT;
+	lport->max_rport_retry_count = QEDF_RPORT_RETRY_CNT;
+	lport->service_params = (FCP_SPPF_INIT_FCN | FCP_SPPF_RD_XRDY_DIS |
+	    FCP_SPPF_RETRY | FCP_SPPF_CONF_COMPL);
+	lport->boot_time = jiffies;
+	lport->e_d_tov = 2 * 1000;
+	lport->r_a_tov = 10 * 1000;
+
+	/* Set NPIV support */
+	lport->does_npiv = 1;
+	fc_host_max_npiv_vports(lport->host) = QEDF_MAX_NPIV;
+
+	fc_set_wwnn(lport, qedf->wwnn);
+	fc_set_wwpn(lport, qedf->wwpn);
+
+	fcoe_libfc_config(lport, &qedf->ctlr, &qedf_lport_template, 0);
+
+	/* Allocate the exchange manager */
+	fc_exch_mgr_alloc(lport, FC_CLASS_3, qedf->max_scsi_xid + 1,
+	    qedf->max_els_xid, NULL);
+
+	if (fc_lport_init_stats(lport))
+		return -ENOMEM;
+
+	/* Finish lport config */
+	fc_lport_config(lport);
+
+	/* Set max frame size */
+	fc_set_mfs(lport, QEDF_MFS);
+	fc_host_maxframe_size(lport->host) = lport->mfs;
+
+	/* Set default dev_loss_tmo based on module parameter */
+	fc_host_dev_loss_tmo(lport->host) = qedf_dev_loss_tmo;
+
+	/* Set symbolic node name */
+	snprintf(fc_host_symbolic_name(lport->host), 256,
+	    "QLogic %s v%s", QEDF_MODULE_NAME, QEDF_VERSION);
+
+	return 0;
+}
+
+/*
+ * NPIV functions
+ */
+
+static int qedf_vport_libfc_config(struct fc_vport *vport,
+	struct fc_lport *lport)
+{
+	lport->link_up = 0;
+	lport->qfull = 0;
+	lport->max_retry_count = QEDF_FLOGI_RETRY_CNT;
+	lport->max_rport_retry_count = QEDF_RPORT_RETRY_CNT;
+	lport->service_params = (FCP_SPPF_INIT_FCN | FCP_SPPF_RD_XRDY_DIS |
+	    FCP_SPPF_RETRY | FCP_SPPF_CONF_COMPL);
+	lport->boot_time = jiffies;
+	lport->e_d_tov = 2 * 1000;
+	lport->r_a_tov = 10 * 1000;
+	lport->does_npiv = 1; /* Temporary until we add NPIV support */
+
+	/* Allocate stats for vport */
+	if (fc_lport_init_stats(lport))
+		return -ENOMEM;
+
+	/* Finish lport config */
+	fc_lport_config(lport);
+
+	/* offload related configuration */
+	lport->crc_offload = 0;
+	lport->seq_offload = 0;
+	lport->lro_enabled = 0;
+	lport->lro_xid = 0;
+	lport->lso_max = 0;
+
+	return 0;
+}
+
+static int qedf_vport_create(struct fc_vport *vport, bool disabled)
+{
+	struct Scsi_Host *shost = vport_to_shost(vport);
+	struct fc_lport *n_port = shost_priv(shost);
+	struct fc_lport *vn_port;
+	struct qedf_ctx *base_qedf = lport_priv(n_port);
+	struct qedf_ctx *vport_qedf;
+
+	char buf[32];
+	int rc = 0;
+
+	rc = fcoe_validate_vport_create(vport);
+	if (rc) {
+		fcoe_wwn_to_str(vport->port_name, buf, sizeof(buf));
+		QEDF_WARN(&(base_qedf->dbg_ctx), "Failed to create vport, "
+			   "WWPN (0x%s) already exists.\n", buf);
+		goto err1;
+	}
+
+	if (atomic_read(&base_qedf->link_state) != QEDF_LINK_UP) {
+		QEDF_WARN(&(base_qedf->dbg_ctx), "Cannot create vport "
+			   "because link is not up.\n");
+		rc = -EIO;
+		goto err1;
+	}
+
+	vn_port = libfc_vport_create(vport, sizeof(struct qedf_ctx));
+	if (!vn_port) {
+		QEDF_WARN(&(base_qedf->dbg_ctx), "Could not create lport "
+			   "for vport.\n");
+		rc = -ENOMEM;
+		goto err1;
+	}
+
+	fcoe_wwn_to_str(vport->port_name, buf, sizeof(buf));
+	QEDF_ERR(&(base_qedf->dbg_ctx), "Creating NPIV port, WWPN=%s.\n",
+	    buf);
+
+	/* Copy some fields from base_qedf */
+	vport_qedf = lport_priv(vn_port);
+	memcpy(vport_qedf, base_qedf, sizeof(struct qedf_ctx));
+
+	/* Set qedf data specific to this vport */
+	vport_qedf->lport = vn_port;
+	/* Use same hba_lock as base_qedf */
+	vport_qedf->hba_lock = base_qedf->hba_lock;
+	vport_qedf->pdev = base_qedf->pdev;
+	vport_qedf->cmd_mgr = base_qedf->cmd_mgr;
+	init_completion(&vport_qedf->flogi_compl);
+	INIT_LIST_HEAD(&vport_qedf->fcports);
+
+	rc = qedf_vport_libfc_config(vport, vn_port);
+	if (rc) {
+		QEDF_ERR(&(base_qedf->dbg_ctx), "Could not allocate memory "
+		    "for lport stats.\n");
+		goto err2;
+	}
+
+	fc_set_wwnn(vn_port, vport->node_name);
+	fc_set_wwpn(vn_port, vport->port_name);
+	vport_qedf->wwnn = vn_port->wwnn;
+	vport_qedf->wwpn = vn_port->wwpn;
+
+	vn_port->host->transportt = qedf_fc_vport_transport_template;
+	vn_port->host->can_queue = QEDF_MAX_ELS_XID;
+	vn_port->host->max_lun = qedf_max_lun;
+	vn_port->host->sg_tablesize = QEDF_MAX_BDS_PER_CMD;
+	vn_port->host->max_cmd_len = QEDF_MAX_CDB_LEN;
+
+	rc = scsi_add_host(vn_port->host, &vport->dev);
+	if (rc) {
+		QEDF_WARN(&(base_qedf->dbg_ctx), "Error adding Scsi_Host.\n");
+		goto err2;
+	}
+
+	/* Set default dev_loss_tmo based on module parameter */
+	fc_host_dev_loss_tmo(vn_port->host) = qedf_dev_loss_tmo;
+
+	/* Init libfc stuffs */
+	memcpy(&vn_port->tt, &qedf_lport_template,
+		sizeof(qedf_lport_template));
+	fc_exch_init(vn_port);
+	fc_elsct_init(vn_port);
+	fc_lport_init(vn_port);
+	fc_disc_init(vn_port);
+	fc_disc_config(vn_port, vn_port);
+
+
+	/* Allocate the exchange manager */
+	shost = vport_to_shost(vport);
+	n_port = shost_priv(shost);
+	fc_exch_mgr_list_clone(n_port, vn_port);
+
+	/* Set max frame size */
+	fc_set_mfs(vn_port, QEDF_MFS);
+
+	fc_host_port_type(vn_port->host) = FC_PORTTYPE_UNKNOWN;
+
+	if (disabled) {
+		fc_vport_set_state(vport, FC_VPORT_DISABLED);
+	} else {
+		vn_port->boot_time = jiffies;
+		fc_fabric_login(vn_port);
+		fc_vport_setlink(vn_port);
+	}
+
+	QEDF_INFO(&(base_qedf->dbg_ctx), QEDF_LOG_NPIV, "vn_port=%p.\n",
+		   vn_port);
+
+	/* Set up debug context for vport */
+	vport_qedf->dbg_ctx.host_no = vn_port->host->host_no;
+	vport_qedf->dbg_ctx.pdev = base_qedf->pdev;
+
+err2:
+	scsi_host_put(vn_port->host);
+err1:
+	return rc;
+}
+
+static int qedf_vport_destroy(struct fc_vport *vport)
+{
+	struct Scsi_Host *shost = vport_to_shost(vport);
+	struct fc_lport *n_port = shost_priv(shost);
+	struct fc_lport *vn_port = vport->dd_data;
+
+	mutex_lock(&n_port->lp_mutex);
+	list_del(&vn_port->list);
+	mutex_unlock(&n_port->lp_mutex);
+
+	fc_fabric_logoff(vn_port);
+	fc_lport_destroy(vn_port);
+
+	/* Detach from scsi-ml */
+	fc_remove_host(vn_port->host);
+	scsi_remove_host(vn_port->host);
+
+	/*
+	 * Only try to release the exchange manager if the vn_port
+	 * configuration is complete.
+	 */
+	if (vn_port->state == LPORT_ST_READY)
+		fc_exch_mgr_free(vn_port);
+
+	/* Free memory used by statistical counters */
+	fc_lport_free_stats(vn_port);
+
+	/* Release Scsi_Host */
+	if (vn_port->host)
+		scsi_host_put(vn_port->host);
+
+	return 0;
+}
+
+static int qedf_vport_disable(struct fc_vport *vport, bool disable)
+{
+	struct fc_lport *lport = vport->dd_data;
+
+	if (disable) {
+		fc_vport_set_state(vport, FC_VPORT_DISABLED);
+		fc_fabric_logoff(lport);
+	} else {
+		lport->boot_time = jiffies;
+		fc_fabric_login(lport);
+		fc_vport_setlink(lport);
+	}
+	return 0;
+}
+
+/*
+ * During removal we need to wait for all the vports associated with a port
+ * to be destroyed so we avoid a race condition where libfc is still trying
+ * to reap vports while the driver remove function has already reaped the
+ * driver contexts associated with the physical port.
+ */
+static void qedf_wait_for_vport_destroy(struct qedf_ctx *qedf)
+{
+	struct fc_host_attrs *fc_host = shost_to_fc_host(qedf->lport->host);
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_NPIV,
+	    "Entered.\n");
+	while (fc_host->npiv_vports_inuse > 0) {
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_NPIV,
+		    "Waiting for all vports to be reaped.\n");
+		msleep(1000);
+	}
+}
+
+/**
+ * qedf_fcoe_reset - Resets the fcoe
+ *
+ * @shost: shost the reset is from
+ *
+ * Returns: always 0
+ */
+static int qedf_fcoe_reset(struct Scsi_Host *shost)
+{
+	struct fc_lport *lport = shost_priv(shost);
+
+	fc_fabric_logoff(lport);
+	fc_fabric_login(lport);
+	return 0;
+}
+
+static struct fc_host_statistics *qedf_fc_get_host_stats(struct Scsi_Host
+	*shost)
+{
+	struct fc_host_statistics *qedf_stats;
+	struct fc_lport *lport = shost_priv(shost);
+	struct qedf_ctx *qedf = lport_priv(lport);
+	struct qed_fcoe_stats *fw_fcoe_stats;
+
+	qedf_stats = fc_get_host_stats(shost);
+
+	/* We don't collect offload stats for specific NPIV ports */
+	if (lport->vport)
+		goto out;
+
+	fw_fcoe_stats = kmalloc(sizeof(struct qed_fcoe_stats), GFP_KERNEL);
+	if (!fw_fcoe_stats) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Could not allocate memory for "
+		    "fw_fcoe_stats.\n");
+		goto out;
+	}
+
+	/* Query firmware for offload stats */
+	qed_ops->get_stats(qedf->cdev, fw_fcoe_stats);
+
+	/*
+	 * The expectation is that we add our offload stats to the stats
+	 * being maintained by libfc each time the fc_get_host_status callback
+	 * is invoked. The additions are not carried over for each call to
+	 * the fc_get_host_stats callback.
+	 */
+	qedf_stats->tx_frames += fw_fcoe_stats->fcoe_tx_data_pkt_cnt +
+	    fw_fcoe_stats->fcoe_tx_xfer_pkt_cnt +
+	    fw_fcoe_stats->fcoe_tx_other_pkt_cnt;
+	qedf_stats->rx_frames += fw_fcoe_stats->fcoe_rx_data_pkt_cnt +
+	    fw_fcoe_stats->fcoe_rx_xfer_pkt_cnt +
+	    fw_fcoe_stats->fcoe_rx_other_pkt_cnt;
+	qedf_stats->fcp_input_megabytes +=
+	    do_div(fw_fcoe_stats->fcoe_rx_byte_cnt, 1000000);
+	qedf_stats->fcp_output_megabytes +=
+	    do_div(fw_fcoe_stats->fcoe_tx_byte_cnt, 1000000);
+	qedf_stats->rx_words += fw_fcoe_stats->fcoe_rx_byte_cnt / 4;
+	qedf_stats->tx_words += fw_fcoe_stats->fcoe_tx_byte_cnt / 4;
+	qedf_stats->invalid_crc_count +=
+	    fw_fcoe_stats->fcoe_silent_drop_pkt_crc_error_cnt;
+	qedf_stats->dumped_frames =
+	    fw_fcoe_stats->fcoe_silent_drop_total_pkt_cnt;
+	qedf_stats->error_frames +=
+	    fw_fcoe_stats->fcoe_silent_drop_total_pkt_cnt;
+	qedf_stats->fcp_input_requests += qedf->input_requests;
+	qedf_stats->fcp_output_requests += qedf->output_requests;
+	qedf_stats->fcp_control_requests += qedf->control_requests;
+	qedf_stats->fcp_packet_aborts += qedf->packet_aborts;
+	qedf_stats->fcp_frame_alloc_failures += qedf->alloc_failures;
+
+	kfree(fw_fcoe_stats);
+out:
+	return qedf_stats;
+}
+
+static struct fc_function_template qedf_fc_transport_fn = {
+	.show_host_node_name = 1,
+	.show_host_port_name = 1,
+	.show_host_supported_classes = 1,
+	.show_host_supported_fc4s = 1,
+	.show_host_active_fc4s = 1,
+	.show_host_maxframe_size = 1,
+
+	.show_host_port_id = 1,
+	.show_host_supported_speeds = 1,
+	.get_host_speed = fc_get_host_speed,
+	.show_host_speed = 1,
+	.show_host_port_type = 1,
+	.get_host_port_state = fc_get_host_port_state,
+	.show_host_port_state = 1,
+	.show_host_symbolic_name = 1,
+
+	/*
+	 * Tell FC transport to allocate enough space to store the backpointer
+	 * for the associate qedf_rport struct.
+	 */
+	.dd_fcrport_size = (sizeof(struct fc_rport_libfc_priv) +
+				sizeof(struct qedf_rport)),
+	.show_rport_maxframe_size = 1,
+	.show_rport_supported_classes = 1,
+	.show_host_fabric_name = 1,
+	.show_starget_node_name = 1,
+	.show_starget_port_name = 1,
+	.show_starget_port_id = 1,
+	.set_rport_dev_loss_tmo = fc_set_rport_loss_tmo,
+	.show_rport_dev_loss_tmo = 1,
+	.get_fc_host_stats = qedf_fc_get_host_stats,
+	.issue_fc_host_lip = qedf_fcoe_reset,
+	.vport_create = qedf_vport_create,
+	.vport_delete = qedf_vport_destroy,
+	.vport_disable = qedf_vport_disable,
+	.bsg_request = fc_lport_bsg_request,
+};
+
+static struct fc_function_template qedf_fc_vport_transport_fn = {
+	.show_host_node_name = 1,
+	.show_host_port_name = 1,
+	.show_host_supported_classes = 1,
+	.show_host_supported_fc4s = 1,
+	.show_host_active_fc4s = 1,
+	.show_host_maxframe_size = 1,
+	.show_host_port_id = 1,
+	.show_host_supported_speeds = 1,
+	.get_host_speed = fc_get_host_speed,
+	.show_host_speed = 1,
+	.show_host_port_type = 1,
+	.get_host_port_state = fc_get_host_port_state,
+	.show_host_port_state = 1,
+	.show_host_symbolic_name = 1,
+	.dd_fcrport_size = (sizeof(struct fc_rport_libfc_priv) +
+				sizeof(struct qedf_rport)),
+	.show_rport_maxframe_size = 1,
+	.show_rport_supported_classes = 1,
+	.show_host_fabric_name = 1,
+	.show_starget_node_name = 1,
+	.show_starget_port_name = 1,
+	.show_starget_port_id = 1,
+	.set_rport_dev_loss_tmo = fc_set_rport_loss_tmo,
+	.show_rport_dev_loss_tmo = 1,
+	.get_fc_host_stats = fc_get_host_stats,
+	.issue_fc_host_lip = qedf_fcoe_reset,
+	.bsg_request = fc_lport_bsg_request,
+};
+
+static bool qedf_fp_has_work(struct qedf_fastpath *fp)
+{
+	struct qedf_ctx *qedf = fp->qedf;
+	struct global_queue *que;
+	struct qed_sb_info *sb_info = fp->sb_info;
+	struct status_block *sb = sb_info->sb_virt;
+	u16 prod_idx;
+
+	/* Get the pointer to the global CQ this completion is on */
+	que = qedf->global_queues[fp->sb_id];
+
+	/* Be sure all responses have been written to PI */
+	rmb();
+
+	/* Get the current firmware producer index */
+	prod_idx = sb->pi_array[QEDF_FCOE_PARAMS_GL_RQ_PI];
+
+	return (que->cq_prod_idx != prod_idx);
+}
+
+/*
+ * Interrupt handler code.
+ */
+
+/* Process completion queue and copy CQE contents for deferred processesing
+ *
+ * Return true if we should wake the I/O thread, false if not.
+ */
+static bool qedf_process_completions(struct qedf_fastpath *fp)
+{
+	struct qedf_ctx *qedf = fp->qedf;
+	struct qed_sb_info *sb_info = fp->sb_info;
+	struct status_block *sb = sb_info->sb_virt;
+	struct global_queue *que;
+	u16 prod_idx;
+	struct fcoe_cqe *cqe;
+	struct qedf_io_work *io_work;
+	int num_handled = 0;
+	unsigned int cpu;
+	struct qedf_ioreq *io_req = NULL;
+	u16 xid;
+	u16 new_cqes;
+	u32 comp_type;
+
+	/* Get the current firmware producer index */
+	prod_idx = sb->pi_array[QEDF_FCOE_PARAMS_GL_RQ_PI];
+
+	/* Get the pointer to the global CQ this completion is on */
+	que = qedf->global_queues[fp->sb_id];
+
+	/* Calculate the amount of new elements since last processing */
+	new_cqes = (prod_idx >= que->cq_prod_idx) ?
+	    (prod_idx - que->cq_prod_idx) :
+	    0x10000 - que->cq_prod_idx + prod_idx;
+
+	/* Save producer index */
+	que->cq_prod_idx = prod_idx;
+
+	while (new_cqes) {
+		fp->completions++;
+		num_handled++;
+		cqe = &que->cq[que->cq_cons_idx];
+
+		comp_type = (cqe->cqe_data >> FCOE_CQE_CQE_TYPE_SHIFT) &
+		    FCOE_CQE_CQE_TYPE_MASK;
+
+		/*
+		 * Process unsolicited CQEs directly in the interrupt handler
+		 * sine we need the fastpath ID
+		 */
+		if (comp_type == FCOE_UNSOLIC_CQE_TYPE) {
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_UNSOL,
+			   "Unsolicated CQE.\n");
+			qedf_process_unsol_compl(qedf, fp->sb_id, cqe);
+			/*
+			 * Don't add a work list item.  Increment consumer
+			 * consumer index and move on.
+			 */
+			goto inc_idx;
+		}
+
+		xid = cqe->cqe_data & FCOE_CQE_TASK_ID_MASK;
+		io_req = &qedf->cmd_mgr->cmds[xid];
+
+		/*
+		 * Figure out which percpu thread we should queue this I/O
+		 * on.
+		 */
+		if (!io_req)
+			/* If there is not io_req assocated with this CQE
+			 * just queue it on CPU 0
+			 */
+			cpu = 0;
+		else {
+			cpu = io_req->cpu;
+			io_req->int_cpu = smp_processor_id();
+		}
+
+		io_work = mempool_alloc(qedf->io_mempool, GFP_ATOMIC);
+		if (!io_work) {
+			QEDF_WARN(&(qedf->dbg_ctx), "Could not allocate "
+				   "work for I/O completion.\n");
+			continue;
+		}
+		memset(io_work, 0, sizeof(struct qedf_io_work));
+
+		INIT_WORK(&io_work->work, qedf_fp_io_handler);
+
+		/* Copy contents of CQE for deferred processing */
+		memcpy(&io_work->cqe, cqe, sizeof(struct fcoe_cqe));
+
+		io_work->qedf = fp->qedf;
+		io_work->fp = NULL; /* Only used for unsolicited frames */
+
+		queue_work_on(cpu, qedf_io_wq, &io_work->work);
+
+inc_idx:
+		que->cq_cons_idx++;
+		if (que->cq_cons_idx == fp->cq_num_entries)
+			que->cq_cons_idx = 0;
+		new_cqes--;
+	}
+
+	return true;
+}
+
+
+/* MSI-X fastpath handler code */
+static irqreturn_t qedf_msix_handler(int irq, void *dev_id)
+{
+	struct qedf_fastpath *fp = dev_id;
+
+	if (!fp) {
+		QEDF_ERR(NULL, "fp is null.\n");
+		return IRQ_HANDLED;
+	}
+	if (!fp->sb_info) {
+		QEDF_ERR(NULL, "fp->sb_info in null.");
+		return IRQ_HANDLED;
+	}
+
+	/*
+	 * Disable interrupts for this status block while we process new
+	 * completions
+	 */
+	qed_sb_ack(fp->sb_info, IGU_INT_DISABLE, 0 /*do not update*/);
+
+	while (1) {
+		qedf_process_completions(fp);
+
+		if (qedf_fp_has_work(fp) == 0) {
+			/* Update the sb information */
+			qed_sb_update_sb_idx(fp->sb_info);
+
+			/* Check for more work */
+			rmb();
+
+			if (qedf_fp_has_work(fp) == 0) {
+				/* Re-enable interrupts */
+				qed_sb_ack(fp->sb_info, IGU_INT_ENABLE, 1);
+				return IRQ_HANDLED;
+			}
+		}
+	}
+
+	/* Do we ever want to break out of above loop? */
+	return IRQ_HANDLED;
+}
+
+/* simd handler for MSI/INTa */
+static void qedf_simd_int_handler(void *cookie)
+{
+	/* Cookie is qedf_ctx struct */
+	struct qedf_ctx *qedf = (struct qedf_ctx *)cookie;
+
+	QEDF_WARN(&(qedf->dbg_ctx), "qedf=%p.\n", qedf);
+}
+
+#define QEDF_SIMD_HANDLER_NUM		0
+static void qedf_sync_free_irqs(struct qedf_ctx *qedf)
+{
+	int i;
+
+	if (qedf->int_info.msix_cnt) {
+		for (i = 0; i < qedf->int_info.used_cnt; i++) {
+			synchronize_irq(qedf->int_info.msix[i].vector);
+			irq_set_affinity_hint(qedf->int_info.msix[i].vector,
+			    NULL);
+			irq_set_affinity_notifier(qedf->int_info.msix[i].vector,
+			    NULL);
+			free_irq(qedf->int_info.msix[i].vector,
+			    &qedf->fp_array[i]);
+		}
+	} else
+		qed_ops->common->simd_handler_clean(qedf->cdev,
+		    QEDF_SIMD_HANDLER_NUM);
+
+	qedf->int_info.used_cnt = 0;
+	qed_ops->common->set_fp_int(qedf->cdev, 0);
+}
+
+static int qedf_request_msix_irq(struct qedf_ctx *qedf)
+{
+	int i, rc, cpu;
+
+	cpu = cpumask_first(cpu_online_mask);
+	for (i = 0; i < qedf->num_queues; i++) {
+		rc = request_irq(qedf->int_info.msix[i].vector,
+		    qedf_msix_handler, 0, "qedf", &qedf->fp_array[i]);
+
+		if (rc) {
+			QEDF_WARN(&(qedf->dbg_ctx), "request_irq failed.\n");
+			qedf_sync_free_irqs(qedf);
+			return rc;
+		}
+
+		qedf->int_info.used_cnt++;
+		rc = irq_set_affinity_hint(qedf->int_info.msix[i].vector,
+		    get_cpu_mask(cpu));
+		cpu = cpumask_next(cpu, cpu_online_mask);
+	}
+
+	return 0;
+}
+
+static int qedf_setup_int(struct qedf_ctx *qedf)
+{
+	int rc = 0;
+
+	/*
+	 * Learn interrupt configuration
+	 */
+	rc = qed_ops->common->set_fp_int(qedf->cdev, num_online_cpus());
+
+	rc  = qed_ops->common->get_fp_int(qedf->cdev, &qedf->int_info);
+	if (rc)
+		return 0;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "Number of msix_cnt = "
+		   "0x%x num of cpus = 0x%x\n", qedf->int_info.msix_cnt,
+		   num_online_cpus());
+
+	if (qedf->int_info.msix_cnt)
+		return qedf_request_msix_irq(qedf);
+
+	qed_ops->common->simd_handler_config(qedf->cdev, &qedf,
+	    QEDF_SIMD_HANDLER_NUM, qedf_simd_int_handler);
+	qedf->int_info.used_cnt = 1;
+
+	return 0;
+}
+
+/* Main function for libfc frame reception */
+static void qedf_recv_frame(struct qedf_ctx *qedf,
+	struct sk_buff *skb)
+{
+	u32 fr_len;
+	struct fc_lport *lport;
+	struct fc_frame_header *fh;
+	struct fcoe_crc_eof crc_eof;
+	struct fc_frame *fp;
+	u8 *mac = NULL;
+	u8 *dest_mac = NULL;
+	struct fcoe_hdr *hp;
+	struct qedf_rport *fcport;
+
+	lport = qedf->lport;
+	if (lport == NULL || lport->state == LPORT_ST_DISABLED) {
+		QEDF_WARN(NULL, "Invalid lport struct or lport disabled.\n");
+		kfree_skb(skb);
+		return;
+	}
+
+	if (skb_is_nonlinear(skb))
+		skb_linearize(skb);
+	mac = eth_hdr(skb)->h_source;
+	dest_mac = eth_hdr(skb)->h_dest;
+
+	/* Pull the header */
+	hp = (struct fcoe_hdr *)skb->data;
+	fh = (struct fc_frame_header *) skb_transport_header(skb);
+	skb_pull(skb, sizeof(struct fcoe_hdr));
+	fr_len = skb->len - sizeof(struct fcoe_crc_eof);
+
+	fp = (struct fc_frame *)skb;
+	fc_frame_init(fp);
+	fr_dev(fp) = lport;
+	fr_sof(fp) = hp->fcoe_sof;
+	if (skb_copy_bits(skb, fr_len, &crc_eof, sizeof(crc_eof))) {
+		kfree_skb(skb);
+		return;
+	}
+	fr_eof(fp) = crc_eof.fcoe_eof;
+	fr_crc(fp) = crc_eof.fcoe_crc32;
+	if (pskb_trim(skb, fr_len)) {
+		kfree_skb(skb);
+		return;
+	}
+
+	fh = fc_frame_header_get(fp);
+
+	if (fh->fh_r_ctl == FC_RCTL_DD_SOL_DATA &&
+	    fh->fh_type == FC_TYPE_FCP) {
+		/* Drop FCP data. We dont this in L2 path */
+		kfree_skb(skb);
+		return;
+	}
+	if (fh->fh_r_ctl == FC_RCTL_ELS_REQ &&
+	    fh->fh_type == FC_TYPE_ELS) {
+		switch (fc_frame_payload_op(fp)) {
+		case ELS_LOGO:
+			if (ntoh24(fh->fh_s_id) == FC_FID_FLOGI) {
+				/* drop non-FIP LOGO */
+				kfree_skb(skb);
+				return;
+			}
+			break;
+		}
+	}
+
+	if (fh->fh_r_ctl == FC_RCTL_BA_ABTS) {
+		/* Drop incoming ABTS */
+		kfree_skb(skb);
+		return;
+	}
+
+	/*
+	 * If a connection is uploading, drop incoming FCoE frames as there
+	 * is a small window where we could try to return a frame while libfc
+	 * is trying to clean things up.
+	 */
+
+	/* Get fcport associated with d_id if it exists */
+	fcport = qedf_fcport_lookup(qedf, ntoh24(fh->fh_d_id));
+
+	if (fcport && test_bit(QEDF_RPORT_UPLOADING_CONNECTION,
+	    &fcport->flags)) {
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2,
+		    "Connection uploading, dropping fp=%p.\n", fp);
+		kfree_skb(skb);
+		return;
+	}
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2, "FCoE frame receive: "
+	    "skb=%p fp=%p src=%06x dest=%06x r_ctl=%x fh_type=%x.\n", skb, fp,
+	    ntoh24(fh->fh_s_id), ntoh24(fh->fh_d_id), fh->fh_r_ctl,
+	    fh->fh_type);
+	if (qedf_dump_frames)
+		print_hex_dump(KERN_WARNING, "fcoe: ", DUMP_PREFIX_OFFSET, 16,
+		    1, skb->data, skb->len, false);
+	fc_exch_recv(lport, fp);
+}
+
+static void qedf_ll2_process_skb(struct work_struct *work)
+{
+	struct qedf_skb_work *skb_work =
+	    container_of(work, struct qedf_skb_work, work);
+	struct qedf_ctx *qedf = skb_work->qedf;
+	struct sk_buff *skb = skb_work->skb;
+	struct ethhdr *eh;
+
+	if (!qedf) {
+		QEDF_ERR(NULL, "qedf is NULL\n");
+		goto err_out;
+	}
+
+	eh = (struct ethhdr *)skb->data;
+
+	/* Undo VLAN encapsulation */
+	if (eh->h_proto == htons(ETH_P_8021Q)) {
+		memmove((u8 *)eh + VLAN_HLEN, eh, ETH_ALEN * 2);
+		eh = (struct ethhdr *)skb_pull(skb, VLAN_HLEN);
+		skb_reset_mac_header(skb);
+	}
+
+	/*
+	 * Process either a FIP frame or FCoE frame based on the
+	 * protocol value.  If it's not either just drop the
+	 * frame.
+	 */
+	if (eh->h_proto == htons(ETH_P_FIP)) {
+		qedf_fip_recv(qedf, skb);
+		goto out;
+	} else if (eh->h_proto == htons(ETH_P_FCOE)) {
+		__skb_pull(skb, ETH_HLEN);
+		qedf_recv_frame(qedf, skb);
+		goto out;
+	} else
+		goto err_out;
+
+err_out:
+	kfree_skb(skb);
+out:
+	kfree(skb_work);
+	return;
+}
+
+static int qedf_ll2_rx(void *cookie, struct sk_buff *skb,
+	u32 arg1, u32 arg2)
+{
+	struct qedf_ctx *qedf = (struct qedf_ctx *)cookie;
+	struct qedf_skb_work *skb_work;
+
+	skb_work = kzalloc(sizeof(struct qedf_skb_work), GFP_ATOMIC);
+	if (!skb_work) {
+		QEDF_WARN(&(qedf->dbg_ctx), "Could not allocate skb_work so "
+			   "dropping frame.\n");
+		kfree_skb(skb);
+		return 0;
+	}
+
+	INIT_WORK(&skb_work->work, qedf_ll2_process_skb);
+	skb_work->skb = skb;
+	skb_work->qedf = qedf;
+	queue_work(qedf->ll2_recv_wq, &skb_work->work);
+
+	return 0;
+}
+
+static struct qed_ll2_cb_ops qedf_ll2_cb_ops = {
+	.rx_cb = qedf_ll2_rx,
+	.tx_cb = NULL,
+};
+
+/* Main thread to process I/O completions */
+void qedf_fp_io_handler(struct work_struct *work)
+{
+	struct qedf_io_work *io_work =
+	    container_of(work, struct qedf_io_work, work);
+	u32 comp_type;
+
+	/*
+	 * Deferred part of unsolicited CQE sends
+	 * frame to libfc.
+	 */
+	comp_type = (io_work->cqe.cqe_data >>
+	    FCOE_CQE_CQE_TYPE_SHIFT) &
+	    FCOE_CQE_CQE_TYPE_MASK;
+	if (comp_type == FCOE_UNSOLIC_CQE_TYPE &&
+	    io_work->fp)
+		fc_exch_recv(io_work->qedf->lport, io_work->fp);
+	else
+		qedf_process_cqe(io_work->qedf, &io_work->cqe);
+
+	kfree(io_work);
+}
+
+static int qedf_alloc_and_init_sb(struct qedf_ctx *qedf,
+	struct qed_sb_info *sb_info, u16 sb_id)
+{
+	struct status_block *sb_virt;
+	dma_addr_t sb_phys;
+	int ret;
+
+	sb_virt = dma_alloc_coherent(&qedf->pdev->dev,
+	    sizeof(struct status_block), &sb_phys, GFP_KERNEL);
+
+	if (!sb_virt) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Status block allocation failed "
+			  "for id = %d.\n", sb_id);
+		return -ENOMEM;
+	}
+
+	ret = qed_ops->common->sb_init(qedf->cdev, sb_info, sb_virt, sb_phys,
+	    sb_id, QED_SB_TYPE_STORAGE);
+
+	if (ret) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Status block initialization "
+			  "failed for id = %d.\n", sb_id);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void qedf_free_sb(struct qedf_ctx *qedf, struct qed_sb_info *sb_info)
+{
+	if (sb_info->sb_virt)
+		dma_free_coherent(&qedf->pdev->dev, sizeof(*sb_info->sb_virt),
+		    (void *)sb_info->sb_virt, sb_info->sb_phys);
+}
+
+static void qedf_destroy_sb(struct qedf_ctx *qedf)
+{
+	int id;
+	struct qedf_fastpath *fp = NULL;
+
+	for (id = 0; id < qedf->num_queues; id++) {
+		fp = &(qedf->fp_array[id]);
+		if (fp->sb_id == QEDF_SB_ID_NULL)
+			break;
+		qedf_free_sb(qedf, fp->sb_info);
+		kfree(fp->sb_info);
+	}
+	kfree(qedf->fp_array);
+}
+
+static int qedf_prepare_sb(struct qedf_ctx *qedf)
+{
+	int id;
+	struct qedf_fastpath *fp;
+	int ret;
+
+	qedf->fp_array =
+	    kcalloc(qedf->num_queues, sizeof(struct qedf_fastpath),
+		GFP_KERNEL);
+
+	if (!qedf->fp_array) {
+		QEDF_ERR(&(qedf->dbg_ctx), "fastpath array allocation "
+			  "failed.\n");
+		return -ENOMEM;
+	}
+
+	for (id = 0; id < qedf->num_queues; id++) {
+		fp = &(qedf->fp_array[id]);
+		fp->sb_id = QEDF_SB_ID_NULL;
+		fp->sb_info = kcalloc(1, sizeof(*fp->sb_info), GFP_KERNEL);
+		if (!fp->sb_info) {
+			QEDF_ERR(&(qedf->dbg_ctx), "SB info struct "
+				  "allocation failed.\n");
+			goto err;
+		}
+		ret = qedf_alloc_and_init_sb(qedf, fp->sb_info, id);
+		if (ret) {
+			QEDF_ERR(&(qedf->dbg_ctx), "SB allocation and "
+				  "initialization failed.\n");
+			goto err;
+		}
+		fp->sb_id = id;
+		fp->qedf = qedf;
+		fp->cq_num_entries =
+		    qedf->global_queues[id]->cq_mem_size /
+		    sizeof(struct fcoe_cqe);
+	}
+err:
+	return 0;
+}
+
+void qedf_process_cqe(struct qedf_ctx *qedf, struct fcoe_cqe *cqe)
+{
+	u16 xid;
+	struct qedf_ioreq *io_req;
+	struct qedf_rport *fcport;
+	u32 comp_type;
+
+	comp_type = (cqe->cqe_data >> FCOE_CQE_CQE_TYPE_SHIFT) &
+	    FCOE_CQE_CQE_TYPE_MASK;
+
+	xid = cqe->cqe_data & FCOE_CQE_TASK_ID_MASK;
+	io_req = &qedf->cmd_mgr->cmds[xid];
+
+	/* Completion not for a valid I/O anymore so just return */
+	if (!io_req)
+		return;
+
+	fcport = io_req->fcport;
+
+	if (fcport == NULL) {
+		QEDF_ERR(&(qedf->dbg_ctx), "fcport is NULL.\n");
+		return;
+	}
+
+	/*
+	 * Check that fcport is offloaded.  If it isn't then the spinlock
+	 * isn't valid and shouldn't be taken. We should just return.
+	 */
+	if (!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Session not offloaded yet.\n");
+		return;
+	}
+
+
+	switch (comp_type) {
+	case FCOE_GOOD_COMPLETION_CQE_TYPE:
+		atomic_inc(&fcport->free_sqes);
+		switch (io_req->cmd_type) {
+		case QEDF_SCSI_CMD:
+			qedf_scsi_completion(qedf, cqe, io_req);
+			break;
+		case QEDF_ELS:
+			qedf_process_els_compl(qedf, cqe, io_req);
+			break;
+		case QEDF_TASK_MGMT_CMD:
+			qedf_process_tmf_compl(qedf, cqe, io_req);
+			break;
+		case QEDF_SEQ_CLEANUP:
+			qedf_process_seq_cleanup_compl(qedf, cqe, io_req);
+			break;
+		}
+		break;
+	case FCOE_ERROR_DETECTION_CQE_TYPE:
+		atomic_inc(&fcport->free_sqes);
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Error detect CQE.\n");
+		qedf_process_error_detect(qedf, cqe, io_req);
+		break;
+	case FCOE_EXCH_CLEANUP_CQE_TYPE:
+		atomic_inc(&fcport->free_sqes);
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Cleanup CQE.\n");
+		qedf_process_cleanup_compl(qedf, cqe, io_req);
+		break;
+	case FCOE_ABTS_CQE_TYPE:
+		atomic_inc(&fcport->free_sqes);
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Abort CQE.\n");
+		qedf_process_abts_compl(qedf, cqe, io_req);
+		break;
+	case FCOE_DUMMY_CQE_TYPE:
+		atomic_inc(&fcport->free_sqes);
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Dummy CQE.\n");
+		break;
+	case FCOE_LOCAL_COMP_CQE_TYPE:
+		atomic_inc(&fcport->free_sqes);
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Local completion CQE.\n");
+		break;
+	case FCOE_WARNING_CQE_TYPE:
+		atomic_inc(&fcport->free_sqes);
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Warning CQE.\n");
+		qedf_process_warning_compl(qedf, cqe, io_req);
+		break;
+	case MAX_FCOE_CQE_TYPE:
+		atomic_inc(&fcport->free_sqes);
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Max FCoE CQE.\n");
+		break;
+	default:
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
+		    "Default CQE.\n");
+		break;
+	}
+}
+
+static void qedf_free_bdq(struct qedf_ctx *qedf)
+{
+	int i;
+
+	if (qedf->bdq_pbl_list)
+		dma_free_coherent(&qedf->pdev->dev, QEDF_PAGE_SIZE,
+		    qedf->bdq_pbl_list, qedf->bdq_pbl_list_dma);
+
+	if (qedf->bdq_pbl)
+		dma_free_coherent(&qedf->pdev->dev, qedf->bdq_pbl_mem_size,
+		    qedf->bdq_pbl, qedf->bdq_pbl_dma);
+
+	for (i = 0; i < QEDF_BDQ_SIZE; i++) {
+		if (qedf->bdq[i].buf_addr) {
+			dma_free_coherent(&qedf->pdev->dev, QEDF_BDQ_BUF_SIZE,
+			    qedf->bdq[i].buf_addr, qedf->bdq[i].buf_dma);
+		}
+	}
+}
+
+static void qedf_free_global_queues(struct qedf_ctx *qedf)
+{
+	int i;
+	struct global_queue **gl = qedf->global_queues;
+
+	for (i = 0; i < qedf->num_queues; i++) {
+		if (!gl[i])
+			continue;
+
+		if (gl[i]->cq)
+			dma_free_coherent(&qedf->pdev->dev,
+			    gl[i]->cq_mem_size, gl[i]->cq, gl[i]->cq_dma);
+		if (gl[i]->cq_pbl)
+			dma_free_coherent(&qedf->pdev->dev, gl[i]->cq_pbl_size,
+			    gl[i]->cq_pbl, gl[i]->cq_pbl_dma);
+
+		kfree(gl[i]);
+	}
+
+	qedf_free_bdq(qedf);
+}
+
+static int qedf_alloc_bdq(struct qedf_ctx *qedf)
+{
+	int i;
+	struct scsi_bd *pbl;
+	u64 *list;
+	dma_addr_t page;
+
+	/* Alloc dma memory for BDQ buffers */
+	for (i = 0; i < QEDF_BDQ_SIZE; i++) {
+		qedf->bdq[i].buf_addr = dma_alloc_coherent(&qedf->pdev->dev,
+		    QEDF_BDQ_BUF_SIZE, &qedf->bdq[i].buf_dma, GFP_KERNEL);
+		if (!qedf->bdq[i].buf_addr) {
+			QEDF_ERR(&(qedf->dbg_ctx), "Could not allocate BDQ "
+			    "buffer %d.\n", i);
+			return -ENOMEM;
+		}
+	}
+
+	/* Alloc dma memory for BDQ page buffer list */
+	qedf->bdq_pbl_mem_size =
+	    QEDF_BDQ_SIZE * sizeof(struct scsi_bd);
+	qedf->bdq_pbl_mem_size =
+	    ALIGN(qedf->bdq_pbl_mem_size, QEDF_PAGE_SIZE);
+
+	qedf->bdq_pbl = dma_alloc_coherent(&qedf->pdev->dev,
+	    qedf->bdq_pbl_mem_size, &qedf->bdq_pbl_dma, GFP_KERNEL);
+	if (!qedf->bdq_pbl) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Could not allocate BDQ PBL.\n");
+		return -ENOMEM;
+	}
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+	    "BDQ PBL addr=0x%p dma=0x%llx.\n", qedf->bdq_pbl,
+	    qedf->bdq_pbl_dma);
+
+	/*
+	 * Populate BDQ PBL with physical and virtual address of individual
+	 * BDQ buffers
+	 */
+	pbl = (struct scsi_bd *)qedf->bdq_pbl;
+	for (i = 0; i < QEDF_BDQ_SIZE; i++) {
+		pbl->address.hi = cpu_to_le32(U64_HI(qedf->bdq[i].buf_dma));
+		pbl->address.lo = cpu_to_le32(U64_LO(qedf->bdq[i].buf_dma));
+		pbl->opaque.hi = 0;
+		/* Opaque lo data is an index into the BDQ array */
+		pbl->opaque.lo = cpu_to_le32(i);
+		pbl++;
+	}
+
+	/* Allocate list of PBL pages */
+	qedf->bdq_pbl_list = dma_alloc_coherent(&qedf->pdev->dev,
+	    QEDF_PAGE_SIZE, &qedf->bdq_pbl_list_dma, GFP_KERNEL);
+	if (!qedf->bdq_pbl_list) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Could not allocate list of PBL "
+		    "pages.\n");
+		return -ENOMEM;
+	}
+	memset(qedf->bdq_pbl_list, 0, QEDF_PAGE_SIZE);
+
+	/*
+	 * Now populate PBL list with pages that contain pointers to the
+	 * individual buffers.
+	 */
+	qedf->bdq_pbl_list_num_entries = qedf->bdq_pbl_mem_size /
+	    QEDF_PAGE_SIZE;
+	list = (u64 *)qedf->bdq_pbl_list;
+	page = qedf->bdq_pbl_list_dma;
+	for (i = 0; i < qedf->bdq_pbl_list_num_entries; i++) {
+		*list = qedf->bdq_pbl_dma;
+		list++;
+		page += QEDF_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+static int qedf_alloc_global_queues(struct qedf_ctx *qedf)
+{
+	u32 *list;
+	int i;
+	int status = 0, rc;
+	u32 *pbl;
+	dma_addr_t page;
+	int num_pages;
+
+	/* Allocate and map CQs, RQs */
+	/*
+	 * Number of global queues (CQ / RQ). This should
+	 * be <= number of available MSIX vectors for the PF
+	 */
+	if (!qedf->num_queues) {
+		QEDF_ERR(&(qedf->dbg_ctx), "No MSI-X vectors available!\n");
+		return 1;
+	}
+
+	/*
+	 * Make sure we allocated the PBL that will contain the physical
+	 * addresses of our queues
+	 */
+	if (!qedf->p_cpuq) {
+		status = 1;
+		goto mem_alloc_failure;
+	}
+
+	qedf->global_queues = kzalloc((sizeof(struct global_queue *)
+	    * qedf->num_queues), GFP_KERNEL);
+	if (!qedf->global_queues) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Unable to allocate global "
+			  "queues array ptr memory\n");
+		return -ENOMEM;
+	}
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+		   "qedf->global_queues=%p.\n", qedf->global_queues);
+
+	/* Allocate DMA coherent buffers for BDQ */
+	rc = qedf_alloc_bdq(qedf);
+	if (rc)
+		goto mem_alloc_failure;
+
+	/* Allocate a CQ and an associated PBL for each MSI-X vector */
+	for (i = 0; i < qedf->num_queues; i++) {
+		qedf->global_queues[i] = kzalloc(sizeof(struct global_queue),
+		    GFP_KERNEL);
+		if (!qedf->global_queues[i]) {
+			QEDF_WARN(&(qedf->dbg_ctx), "Unable to allocation "
+				   "global queue %d.\n", i);
+			goto mem_alloc_failure;
+		}
+
+		qedf->global_queues[i]->cq_mem_size =
+		    FCOE_PARAMS_CQ_NUM_ENTRIES * sizeof(struct fcoe_cqe);
+		qedf->global_queues[i]->cq_mem_size =
+		    ALIGN(qedf->global_queues[i]->cq_mem_size, QEDF_PAGE_SIZE);
+
+		qedf->global_queues[i]->cq_pbl_size =
+		    (qedf->global_queues[i]->cq_mem_size /
+		    PAGE_SIZE) * sizeof(void *);
+		qedf->global_queues[i]->cq_pbl_size =
+		    ALIGN(qedf->global_queues[i]->cq_pbl_size, QEDF_PAGE_SIZE);
+
+		qedf->global_queues[i]->cq =
+		    dma_alloc_coherent(&qedf->pdev->dev,
+			qedf->global_queues[i]->cq_mem_size,
+			&qedf->global_queues[i]->cq_dma, GFP_KERNEL);
+
+		if (!qedf->global_queues[i]->cq) {
+			QEDF_WARN(&(qedf->dbg_ctx), "Could not allocate "
+				   "cq.\n");
+			status = -ENOMEM;
+			goto mem_alloc_failure;
+		}
+		memset(qedf->global_queues[i]->cq, 0,
+		    qedf->global_queues[i]->cq_mem_size);
+
+		qedf->global_queues[i]->cq_pbl =
+		    dma_alloc_coherent(&qedf->pdev->dev,
+			qedf->global_queues[i]->cq_pbl_size,
+			&qedf->global_queues[i]->cq_pbl_dma, GFP_KERNEL);
+
+		if (!qedf->global_queues[i]->cq_pbl) {
+			QEDF_WARN(&(qedf->dbg_ctx), "Could not allocate "
+				   "cq PBL.\n");
+			status = -ENOMEM;
+			goto mem_alloc_failure;
+		}
+		memset(qedf->global_queues[i]->cq_pbl, 0,
+		    qedf->global_queues[i]->cq_pbl_size);
+
+		/* Create PBL */
+		num_pages = qedf->global_queues[i]->cq_mem_size /
+		    QEDF_PAGE_SIZE;
+		page = qedf->global_queues[i]->cq_dma;
+		pbl = (u32 *)qedf->global_queues[i]->cq_pbl;
+
+		while (num_pages--) {
+			*pbl = U64_LO(page);
+			pbl++;
+			*pbl = U64_HI(page);
+			pbl++;
+			page += QEDF_PAGE_SIZE;
+		}
+		/* Set the initial consumer index for cq */
+		qedf->global_queues[i]->cq_cons_idx = 0;
+	}
+
+	list = (u32 *)qedf->p_cpuq;
+
+	/*
+	 * The list is built as follows: CQ#0 PBL pointer, RQ#0 PBL pointer,
+	 * CQ#1 PBL pointer, RQ#1 PBL pointer, etc.  Each PBL pointer points
+	 * to the physical address which contains an array of pointers to
+	 * the physical addresses of the specific queue pages.
+	 */
+	for (i = 0; i < qedf->num_queues; i++) {
+		*list = U64_LO(qedf->global_queues[i]->cq_pbl_dma);
+		list++;
+		*list = U64_HI(qedf->global_queues[i]->cq_pbl_dma);
+		list++;
+		*list = U64_LO(0);
+		list++;
+		*list = U64_HI(0);
+		list++;
+	}
+
+	return 0;
+
+mem_alloc_failure:
+	qedf_free_global_queues(qedf);
+	return status;
+}
+
+static int qedf_set_fcoe_pf_param(struct qedf_ctx *qedf)
+{
+	u8 sq_num_pbl_pages;
+	u32 sq_mem_size;
+	u32 cq_mem_size;
+	u32 cq_num_entries;
+	int rval;
+
+	/*
+	 * The number of completion queues/fastpath interrupts/status blocks
+	 * we allocation is the minimum off:
+	 *
+	 * Number of CPUs
+	 * Number of MSI-X vectors
+	 * Max number allocated in hardware (QEDF_MAX_NUM_CQS)
+	 */
+	qedf->num_queues = min((unsigned int)QEDF_MAX_NUM_CQS,
+	    num_online_cpus());
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "Number of CQs is %d.\n",
+		   qedf->num_queues);
+
+	qedf->p_cpuq = pci_alloc_consistent(qedf->pdev,
+	    qedf->num_queues * sizeof(struct qedf_glbl_q_params),
+	    &qedf->hw_p_cpuq);
+
+	if (!qedf->p_cpuq) {
+		QEDF_ERR(&(qedf->dbg_ctx), "pci_alloc_consistent failed.\n");
+		return 1;
+	}
+
+	rval = qedf_alloc_global_queues(qedf);
+	if (rval) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Global queue allocation "
+			  "failed.\n");
+		return 1;
+	}
+
+	/* Calculate SQ PBL size in the same manner as in qedf_sq_alloc() */
+	sq_mem_size = SQ_NUM_ENTRIES * sizeof(struct fcoe_wqe);
+	sq_mem_size = ALIGN(sq_mem_size, QEDF_PAGE_SIZE);
+	sq_num_pbl_pages = (sq_mem_size / QEDF_PAGE_SIZE);
+
+	/* Calculate CQ num entries */
+	cq_mem_size = FCOE_PARAMS_CQ_NUM_ENTRIES * sizeof(struct fcoe_cqe);
+	cq_mem_size = ALIGN(cq_mem_size, QEDF_PAGE_SIZE);
+	cq_num_entries = cq_mem_size / sizeof(struct fcoe_cqe);
+
+	memset(&(qedf->pf_params), 0,
+	    sizeof(qedf->pf_params));
+
+	/* Setup the value for fcoe PF */
+	qedf->pf_params.fcoe_pf_params.num_cons = QEDF_MAX_SESSIONS;
+	qedf->pf_params.fcoe_pf_params.num_tasks = FCOE_PARAMS_NUM_TASKS;
+	qedf->pf_params.fcoe_pf_params.glbl_q_params_addr =
+	    (u64)qedf->hw_p_cpuq;
+	qedf->pf_params.fcoe_pf_params.sq_num_pbl_pages = sq_num_pbl_pages;
+
+	qedf->pf_params.fcoe_pf_params.rq_buffer_log_size = 0;
+
+	qedf->pf_params.fcoe_pf_params.cq_num_entries = cq_num_entries;
+	qedf->pf_params.fcoe_pf_params.num_cqs = qedf->num_queues;
+
+	/* log_page_size: 12 for 4KB pages */
+	qedf->pf_params.fcoe_pf_params.log_page_size = ilog2(QEDF_PAGE_SIZE);
+
+	qedf->pf_params.fcoe_pf_params.mtu = 9000;
+	qedf->pf_params.fcoe_pf_params.gl_rq_pi = QEDF_FCOE_PARAMS_GL_RQ_PI;
+	qedf->pf_params.fcoe_pf_params.gl_cmd_pi = QEDF_FCOE_PARAMS_GL_CMD_PI;
+
+	/* BDQ address and size */
+	qedf->pf_params.fcoe_pf_params.bdq_pbl_base_addr[0] =
+	    qedf->bdq_pbl_list_dma;
+	qedf->pf_params.fcoe_pf_params.bdq_pbl_num_entries[0] =
+	    qedf->bdq_pbl_list_num_entries;
+	qedf->pf_params.fcoe_pf_params.rq_buffer_size = QEDF_BDQ_BUF_SIZE;
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+	    "bdq_list=%p bdq_pbl_list_dma=%llx bdq_pbl_list_entries=%d.\n",
+	    qedf->bdq_pbl_list,
+	    qedf->pf_params.fcoe_pf_params.bdq_pbl_base_addr[0],
+	    qedf->pf_params.fcoe_pf_params.bdq_pbl_num_entries[0]);
+
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+	    "cq_num_entries=%d.\n",
+	    qedf->pf_params.fcoe_pf_params.cq_num_entries);
+
+	return 0;
+}
+
+/* Free DMA coherent memory for array of queue pointers we pass to qed */
+static void qedf_free_fcoe_pf_param(struct qedf_ctx *qedf)
+{
+	size_t size = 0;
+
+	if (qedf->p_cpuq) {
+		size = qedf->num_queues * sizeof(struct qedf_glbl_q_params);
+		pci_free_consistent(qedf->pdev, size, qedf->p_cpuq,
+		    qedf->hw_p_cpuq);
+	}
+
+	qedf_free_global_queues(qedf);
+
+	if (qedf->global_queues)
+		kfree(qedf->global_queues);
+}
+
+/*
+ * PCI driver functions
+ */
+
+static const struct pci_device_id qedf_pci_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, 0x165c) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, 0x8080) },
+	{0}
+};
+MODULE_DEVICE_TABLE(pci, qedf_pci_tbl);
+
+static struct pci_driver qedf_pci_driver = {
+	.name = QEDF_MODULE_NAME,
+	.id_table = qedf_pci_tbl,
+	.probe = qedf_probe,
+	.remove = qedf_remove,
+};
+
+static int __qedf_probe(struct pci_dev *pdev, int mode)
+{
+	int rc = -EINVAL;
+	struct fc_lport *lport;
+	struct qedf_ctx *qedf;
+	struct Scsi_Host *host;
+	bool is_vf = false;
+	struct qed_ll2_params params;
+	char host_buf[20];
+	struct qed_link_params link_params;
+	int status;
+	void *task_start, *task_end;
+	struct qed_slowpath_params slowpath_params;
+	struct qed_probe_params qed_params;
+	u16 tmp;
+
+	/*
+	 * When doing error recovery we didn't reap the lport so don't try
+	 * to reallocate it.
+	 */
+	if (mode != QEDF_MODE_RECOVERY) {
+		lport = libfc_host_alloc(&qedf_host_template,
+		    sizeof(struct qedf_ctx));
+
+		if (!lport) {
+			QEDF_ERR(NULL, "Could not allocate lport.\n");
+			rc = -ENOMEM;
+			goto err0;
+		}
+
+		/* Initialize qedf_ctx */
+		qedf = lport_priv(lport);
+		qedf->lport = lport;
+		qedf->ctlr.lp = lport;
+		qedf->pdev = pdev;
+		qedf->dbg_ctx.pdev = pdev;
+		qedf->dbg_ctx.host_no = lport->host->host_no;
+		spin_lock_init(&qedf->hba_lock);
+		INIT_LIST_HEAD(&qedf->fcports);
+		qedf->curr_conn_id = QEDF_MAX_SESSIONS - 1;
+		atomic_set(&qedf->num_offloads, 0);
+		qedf->stop_io_on_error = false;
+		pci_set_drvdata(pdev, qedf);
+
+		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_INFO,
+		   "QLogic FastLinQ FCoE Module qedf %s, "
+		   "FW %d.%d.%d.%d\n", QEDF_VERSION,
+		   FW_MAJOR_VERSION, FW_MINOR_VERSION, FW_REVISION_VERSION,
+		   FW_ENGINEERING_VERSION);
+	} else {
+		/* Init pointers during recovery */
+		qedf = pci_get_drvdata(pdev);
+		lport = qedf->lport;
+	}
+
+	host = lport->host;
+
+	/* Allocate mempool for qedf_io_work structs */
+	qedf->io_mempool = mempool_create_slab_pool(QEDF_IO_WORK_MIN,
+	    qedf_io_work_cache);
+	if (qedf->io_mempool == NULL) {
+		QEDF_ERR(&(qedf->dbg_ctx), "qedf->io_mempool is NULL.\n");
+		goto err1;
+	}
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_INFO, "qedf->io_mempool=%p.\n",
+	    qedf->io_mempool);
+
+	sprintf(host_buf, "qedf_%u_link",
+	    qedf->lport->host->host_no);
+	qedf->link_update_wq = create_singlethread_workqueue(host_buf);
+	INIT_DELAYED_WORK(&qedf->link_update, qedf_handle_link_update);
+	INIT_DELAYED_WORK(&qedf->link_recovery, qedf_link_recovery);
+
+	qedf->fipvlan_retries = qedf_fipvlan_retries;
+
+	/*
+	 * Common probe. Takes care of basic hardware init and pci_*
+	 * functions.
+	 */
+	memset(&qed_params, 0, sizeof(qed_params));
+	qed_params.protocol = QED_PROTOCOL_FCOE;
+	qed_params.dp_module = qedf_dp_module;
+	qed_params.dp_level = qedf_dp_level;
+	qed_params.is_vf = is_vf;
+	qedf->cdev = qed_ops->common->probe(pdev, &qed_params);
+	if (!qedf->cdev) {
+		rc = -ENODEV;
+		goto err1;
+	}
+
+	/* queue allocation code should come here
+	 * order should be
+	 * 	slowpath_start
+	 * 	status block allocation
+	 *	interrupt registration (to get min number of queues)
+	 *	set_fcoe_pf_param
+	 *	qed_sp_fcoe_func_start
+	 */
+	rc = qedf_set_fcoe_pf_param(qedf);
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Cannot set fcoe pf param.\n");
+		goto err2;
+	}
+	qed_ops->common->update_pf_params(qedf->cdev, &qedf->pf_params);
+
+	/* Learn information crucial for qedf to progress */
+	rc = qed_ops->fill_dev_info(qedf->cdev, &qedf->dev_info);
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Failed to dev info.\n");
+		goto err1;
+	}
+
+	/* Record BDQ producer doorbell addresses */
+	qedf->bdq_primary_prod = qedf->dev_info.primary_dbq_rq_addr;
+	qedf->bdq_secondary_prod = qedf->dev_info.secondary_bdq_rq_addr;
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+	    "BDQ primary_prod=%p secondary_prod=%p.\n", qedf->bdq_primary_prod,
+	    qedf->bdq_secondary_prod);
+
+	qed_ops->register_ops(qedf->cdev, &qedf_cb_ops, qedf);
+
+	rc = qedf_prepare_sb(qedf);
+	if (rc) {
+
+		QEDF_ERR(&(qedf->dbg_ctx), "Cannot start slowpath.\n");
+		goto err2;
+	}
+
+	/* Start the Slowpath-process */
+	slowpath_params.int_mode = QED_INT_MODE_MSIX;
+	slowpath_params.drv_major = QEDF_DRIVER_MAJOR_VER;
+	slowpath_params.drv_minor = QEDF_DRIVER_MINOR_VER;
+	slowpath_params.drv_rev = QEDF_DRIVER_REV_VER;
+	slowpath_params.drv_eng = QEDF_DRIVER_ENG_VER;
+	memcpy(slowpath_params.name, "qedf", QED_DRV_VER_STR_SIZE);
+	rc = qed_ops->common->slowpath_start(qedf->cdev, &slowpath_params);
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Cannot start slowpath.\n");
+		goto err2;
+	}
+
+	/*
+	 * update_pf_params needs to be called before and after slowpath
+	 * start
+	 */
+	qed_ops->common->update_pf_params(qedf->cdev, &qedf->pf_params);
+
+	/* Setup interrupts */
+	rc = qedf_setup_int(qedf);
+	if (rc)
+		goto err3;
+
+	rc = qed_ops->start(qedf->cdev, &qedf->tasks);
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Cannot start FCoE function.\n");
+		goto err4;
+	}
+	task_start = qedf_get_task_mem(&qedf->tasks, 0);
+	task_end = qedf_get_task_mem(&qedf->tasks, MAX_TID_BLOCKS_FCOE - 1);
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "Task context start=%p, "
+		   "end=%p block_size=%u.\n", task_start, task_end,
+		   qedf->tasks.size);
+
+	/*
+	 * We need to write the number of BDs in the BDQ we've preallocated so
+	 * the f/w will do a prefetch and we'll get an unsolicited CQE when a
+	 * packet arrives.
+	 */
+	qedf->bdq_prod_idx = QEDF_BDQ_SIZE;
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+	    "Writing %d to primary and secondary BDQ doorbell registers.\n",
+	    qedf->bdq_prod_idx);
+	writew(qedf->bdq_prod_idx, qedf->bdq_primary_prod);
+	tmp = readw(qedf->bdq_primary_prod);
+	writew(qedf->bdq_prod_idx, qedf->bdq_secondary_prod);
+	tmp = readw(qedf->bdq_secondary_prod);
+
+	qed_ops->common->set_power_state(qedf->cdev, PCI_D0);
+
+	/* Now that the dev_info struct has been filled in set the MAC
+	 * address
+	 */
+	ether_addr_copy(qedf->mac, qedf->dev_info.common.hw_mac);
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "MAC address is %pM.\n",
+		   qedf->mac);
+
+	/* Set the WWNN and WWPN based on the MAC address */
+	qedf->wwnn = fcoe_wwn_from_mac(qedf->mac, 1, 0);
+	qedf->wwpn = fcoe_wwn_from_mac(qedf->mac, 2, 0);
+	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,  "WWNN=%016llx "
+		   "WWPN=%016llx.\n", qedf->wwnn, qedf->wwpn);
+
+	sprintf(host_buf, "host_%d", host->host_no);
+	qed_ops->common->set_id(qedf->cdev, host_buf, QEDF_VERSION);
+
+
+	/* Set xid max values */
+	qedf->max_scsi_xid = QEDF_MAX_SCSI_XID;
+	qedf->max_els_xid = QEDF_MAX_ELS_XID;
+
+	/* Allocate cmd mgr */
+	qedf->cmd_mgr = qedf_cmd_mgr_alloc(qedf);
+	if (!qedf->cmd_mgr) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Failed to allocate cmd mgr.\n");
+		goto err5;
+	}
+
+	if (mode != QEDF_MODE_RECOVERY) {
+		host->transportt = qedf_fc_transport_template;
+		host->can_queue = QEDF_MAX_ELS_XID;
+		host->max_lun = qedf_max_lun;
+		host->max_cmd_len = QEDF_MAX_CDB_LEN;
+		rc = scsi_add_host(host, &pdev->dev);
+		if (rc)
+			goto err6;
+	}
+
+	memset(&params, 0, sizeof(params));
+	params.mtu = 9000;
+	ether_addr_copy(params.ll2_mac_address, qedf->mac);
+
+	/* Start LL2 processing thread */
+	snprintf(host_buf, 20, "qedf_%d_ll2", host->host_no);
+	qedf->ll2_recv_wq =
+		create_singlethread_workqueue(host_buf);
+	if (!qedf->ll2_recv_wq) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Failed to LL2 workqueue.\n");
+		goto err7;
+	}
+
+#ifdef CONFIG_DEBUG_FS
+	qedf_dbg_host_init(&(qedf->dbg_ctx), &qedf_debugfs_ops,
+			    &qedf_dbg_fops);
+#endif
+
+	/* Start LL2 */
+	qed_ops->ll2->register_cb_ops(qedf->cdev, &qedf_ll2_cb_ops, qedf);
+	rc = qed_ops->ll2->start(qedf->cdev, &params);
+	if (rc) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Could not start Light L2.\n");
+		goto err7;
+	}
+	set_bit(QEDF_LL2_STARTED, &qedf->flags);
+
+	/* hw will be insterting vlan tag*/
+	qedf->vlan_hw_insert = 1;
+	qedf->vlan_id = 0;
+
+	/*
+	 * No need to setup fcoe_ctlr or fc_lport objects during recovery since
+	 * they were not reaped during the unload process.
+	 */
+	if (mode != QEDF_MODE_RECOVERY) {
+		/* Setup imbedded fcoe controller */
+		qedf_fcoe_ctlr_setup(qedf);
+
+		/* Setup lport */
+		rc = qedf_lport_setup(qedf);
+		if (rc) {
+			QEDF_ERR(&(qedf->dbg_ctx),
+			    "qedf_lport_setup failed.\n");
+			goto err7;
+		}
+	}
+
+	sprintf(host_buf, "qedf_%u_timer", qedf->lport->host->host_no);
+	qedf->timer_work_queue =
+		create_singlethread_workqueue(host_buf);
+	if (!qedf->timer_work_queue) {
+		QEDF_ERR(&(qedf->dbg_ctx), "Failed to start timer "
+			  "workqueue.\n");
+		goto err7;
+	}
+
+	/* DPC workqueue is not reaped during recovery unload */
+	if (mode != QEDF_MODE_RECOVERY) {
+		sprintf(host_buf, "qedf_%u_dpc",
+		    qedf->lport->host->host_no);
+		qedf->dpc_wq = create_singlethread_workqueue(host_buf);
+	}
+
+	/*
+	 * GRC dump and sysfs parameters are not reaped during the recovery
+	 * unload process.
+	 */
+	if (mode != QEDF_MODE_RECOVERY) {
+		qedf->grcdump_size = qed_ops->common->dbg_grc_size(qedf->cdev);
+		if (qedf->grcdump_size) {
+			rc = qedf_alloc_grc_dump_buf(&qedf->grcdump,
+			    qedf->grcdump_size);
+			if (rc) {
+				QEDF_ERR(&(qedf->dbg_ctx),
+				    "GRC Dump buffer alloc failed.\n");
+				qedf->grcdump = NULL;
+			}
+
+			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
+			    "grcdump: addr=%p, size=%u.\n",
+			    qedf->grcdump, qedf->grcdump_size);
+		}
+		qedf_create_sysfs_ctx_attr(qedf);
+
+		/* Initialize I/O tracing for this adapter */
+		spin_lock_init(&qedf->io_trace_lock);
+		qedf->io_trace_idx = 0;
+	}
+
+	init_completion(&qedf->flogi_compl);
+
+	memset(&link_params, 0, sizeof(struct qed_link_params));
+	link_params.link_up = true;
+	status = qed_ops->common->set_link(qedf->cdev, &link_params);
+	if (status)
+		QEDF_WARN(&(qedf->dbg_ctx), "set_link failed.\n");
+
+	/* Start/restart discovery */
+	if (mode == QEDF_MODE_RECOVERY)
+		fcoe_ctlr_link_up(&qedf->ctlr);
+	else
+		fc_fabric_login(lport);
+
+	/* All good */
+	return 0;
+
+err7:
+	if (qedf->ll2_recv_wq)
+		destroy_workqueue(qedf->ll2_recv_wq);
+	fc_remove_host(qedf->lport->host);
+	scsi_remove_host(qedf->lport->host);
+#ifdef CONFIG_DEBUG_FS
+	qedf_dbg_host_exit(&(qedf->dbg_ctx));
+#endif
+err6:
+	qedf_cmd_mgr_free(qedf->cmd_mgr);
+err5:
+	qed_ops->stop(qedf->cdev);
+err4:
+	qedf_free_fcoe_pf_param(qedf);
+	qedf_sync_free_irqs(qedf);
+err3:
+	qed_ops->common->slowpath_stop(qedf->cdev);
+err2:
+	qed_ops->common->remove(qedf->cdev);
+err1:
+	scsi_host_put(lport->host);
+err0:
+	return rc;
+}
+
+static int qedf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	return __qedf_probe(pdev, QEDF_MODE_NORMAL);
+}
+
+static void __qedf_remove(struct pci_dev *pdev, int mode)
+{
+	struct qedf_ctx *qedf;
+
+	if (!pdev) {
+		QEDF_ERR(NULL, "pdev is NULL.\n");
+		return;
+	}
+
+	qedf = pci_get_drvdata(pdev);
+
+	/*
+	 * Prevent race where we're in board disable work and then try to
+	 * rmmod the module.
+	 */
+	if (test_bit(QEDF_UNLOADING, &qedf->flags)) {
+		QEDF_ERR(&qedf->dbg_ctx, "Already removing PCI function.\n");
+		return;
+	}
+
+	if (mode != QEDF_MODE_RECOVERY)
+		set_bit(QEDF_UNLOADING, &qedf->flags);
+
+	/* Logoff the fabric to upload all connections */
+	if (mode == QEDF_MODE_RECOVERY)
+		fcoe_ctlr_link_down(&qedf->ctlr);
+	else
+		fc_fabric_logoff(qedf->lport);
+	qedf_wait_for_upload(qedf);
+
+#ifdef CONFIG_DEBUG_FS
+	qedf_dbg_host_exit(&(qedf->dbg_ctx));
+#endif
+
+	/* Stop any link update handling */
+	cancel_delayed_work_sync(&qedf->link_update);
+	destroy_workqueue(qedf->link_update_wq);
+	qedf->link_update_wq = NULL;
+
+	if (qedf->timer_work_queue)
+		destroy_workqueue(qedf->timer_work_queue);
+
+	/* Stop Light L2 */
+	clear_bit(QEDF_LL2_STARTED, &qedf->flags);
+	qed_ops->ll2->stop(qedf->cdev);
+	if (qedf->ll2_recv_wq)
+		destroy_workqueue(qedf->ll2_recv_wq);
+
+	/* Stop fastpath */
+	qedf_sync_free_irqs(qedf);
+	qedf_destroy_sb(qedf);
+
+	/*
+	 * During recovery don't destroy OS constructs that represent the
+	 * physical port.
+	 */
+	if (mode != QEDF_MODE_RECOVERY) {
+		qedf_free_grc_dump_buf(&qedf->grcdump);
+		qedf_remove_sysfs_ctx_attr(qedf);
+
+		/* Remove all SCSI/libfc/libfcoe structures */
+		fcoe_ctlr_destroy(&qedf->ctlr);
+		fc_lport_destroy(qedf->lport);
+		fc_remove_host(qedf->lport->host);
+		scsi_remove_host(qedf->lport->host);
+	}
+
+	qedf_cmd_mgr_free(qedf->cmd_mgr);
+
+	if (mode != QEDF_MODE_RECOVERY) {
+		fc_exch_mgr_free(qedf->lport);
+		fc_lport_free_stats(qedf->lport);
+
+		/* Wait for all vports to be reaped */
+		qedf_wait_for_vport_destroy(qedf);
+	}
+
+	/*
+	 * Now that all connections have been uploaded we can stop the
+	 * rest of the qed operations
+	 */
+	qed_ops->stop(qedf->cdev);
+
+	if (mode != QEDF_MODE_RECOVERY) {
+		if (qedf->dpc_wq) {
+			/* Stop general DPC handling */
+			destroy_workqueue(qedf->dpc_wq);
+			qedf->dpc_wq = NULL;
+		}
+	}
+
+	/* Final shutdown for the board */
+	qedf_free_fcoe_pf_param(qedf);
+	if (mode != QEDF_MODE_RECOVERY) {
+		qed_ops->common->set_power_state(qedf->cdev, PCI_D0);
+		pci_set_drvdata(pdev, NULL);
+	}
+	qed_ops->common->slowpath_stop(qedf->cdev);
+	qed_ops->common->remove(qedf->cdev);
+
+	mempool_destroy(qedf->io_mempool);
+
+	/* Only reap the Scsi_host on a real removal */
+	if (mode != QEDF_MODE_RECOVERY)
+		scsi_host_put(qedf->lport->host);
+}
+
+static void qedf_remove(struct pci_dev *pdev)
+{
+	/* Check to make sure this function wasn't already disabled */
+	if (!atomic_read(&pdev->enable_cnt))
+		return;
+
+	__qedf_remove(pdev, QEDF_MODE_NORMAL);
+}
+
+/*
+ * Module Init/Remove
+ */
+
+static int __init qedf_init(void)
+{
+	int ret;
+
+	/* If debug=1 passed, set the default log mask */
+	if (qedf_debug == QEDF_LOG_DEFAULT)
+		qedf_debug = QEDF_DEFAULT_LOG_MASK;
+
+	/* Print driver banner */
+	QEDF_INFO(NULL, QEDF_LOG_INFO, "%s v%s.\n", QEDF_DESCR,
+		   QEDF_VERSION);
+
+	/* Create kmem_cache for qedf_io_work structs */
+	qedf_io_work_cache = kmem_cache_create("qedf_io_work_cache",
+	    sizeof(struct qedf_io_work), 0, SLAB_HWCACHE_ALIGN, NULL);
+	if (qedf_io_work_cache == NULL) {
+		QEDF_ERR(NULL, "qedf_io_work_cache is NULL.\n");
+		goto err1;
+	}
+	QEDF_INFO(NULL, QEDF_LOG_DISC, "qedf_io_work_cache=%p.\n",
+	    qedf_io_work_cache);
+
+	qed_ops = qed_get_fcoe_ops();
+	if (!qed_ops) {
+		QEDF_ERR(NULL, "Failed to get qed fcoe operations\n");
+		goto err1;
+	}
+
+#ifdef CONFIG_DEBUG_FS
+	qedf_dbg_init("qedf");
+#endif
+
+	qedf_fc_transport_template =
+	    fc_attach_transport(&qedf_fc_transport_fn);
+	if (!qedf_fc_transport_template) {
+		QEDF_ERR(NULL, "Could not register with FC transport\n");
+		goto err2;
+	}
+
+	qedf_fc_vport_transport_template =
+		fc_attach_transport(&qedf_fc_vport_transport_fn);
+	if (!qedf_fc_vport_transport_template) {
+		QEDF_ERR(NULL, "Could not register vport template with FC "
+			  "transport\n");
+		goto err3;
+	}
+
+	qedf_io_wq = create_workqueue("qedf_io_wq");
+	if (!qedf_io_wq) {
+		QEDF_ERR(NULL, "Could not create qedf_io_wq.\n");
+		goto err4;
+	}
+
+	qedf_cb_ops.get_login_failures = qedf_get_login_failures;
+
+	ret = pci_register_driver(&qedf_pci_driver);
+	if (ret) {
+		QEDF_ERR(NULL, "Failed to register driver\n");
+		goto err5;
+	}
+
+	return 0;
+
+err5:
+	destroy_workqueue(qedf_io_wq);
+err4:
+	fc_release_transport(qedf_fc_vport_transport_template);
+err3:
+	fc_release_transport(qedf_fc_transport_template);
+err2:
+#ifdef CONFIG_DEBUG_FS
+	qedf_dbg_exit();
+#endif
+	qed_put_fcoe_ops();
+err1:
+	return -EINVAL;
+}
+
+static void __exit qedf_cleanup(void)
+{
+	pci_unregister_driver(&qedf_pci_driver);
+
+	destroy_workqueue(qedf_io_wq);
+
+	fc_release_transport(qedf_fc_vport_transport_template);
+	fc_release_transport(qedf_fc_transport_template);
+#ifdef CONFIG_DEBUG_FS
+	qedf_dbg_exit();
+#endif
+	qed_put_fcoe_ops();
+
+	kmem_cache_destroy(qedf_io_work_cache);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("QLogic QEDF 25/40/50/100Gb FCoE Driver");
+MODULE_AUTHOR("QLogic Corporation");
+MODULE_VERSION(QEDF_VERSION);
+module_init(qedf_init);
+module_exit(qedf_cleanup);
diff --git a/drivers/scsi/qedf/qedf_version.h b/drivers/scsi/qedf/qedf_version.h
new file mode 100644
index 000000000000..4ae5f537a440
--- /dev/null
+++ b/drivers/scsi/qedf/qedf_version.h
@@ -0,0 +1,15 @@
+/*
+ *  QLogic FCoE Offload Driver
+ *  Copyright (c) 2016 Cavium Inc.
+ *
+ *  This software is available under the terms of the GNU General Public License
+ *  (GPL) Version 2, available from the file COPYING in the main directory of
+ *  this source tree.
+ */
+
+#define QEDF_VERSION		"8.10.7.0"
+#define QEDF_DRIVER_MAJOR_VER		8
+#define QEDF_DRIVER_MINOR_VER		10
+#define QEDF_DRIVER_REV_VER		7
+#define QEDF_DRIVER_ENG_VER		0
+
-- 
GitLab


From 8bfcd1bf867d5fae67ecc553b44d7a29ab66013c Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 8 Feb 2017 16:24:24 +0000
Subject: [PATCH 0457/1084] scsi: sd: make sd_devt_release() static

Fixes the following sparse warning:

drivers/scsi/sd.c:3087:6: warning:
 symbol 'sd_devt_release' was not declared. Should it be static?

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index cb6e68dd6df0..b9b32f5811e0 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3084,7 +3084,7 @@ struct sd_devt {
 	struct disk_devt disk_devt;
 };
 
-void sd_devt_release(struct disk_devt *disk_devt)
+static void sd_devt_release(struct disk_devt *disk_devt)
 {
 	struct sd_devt *sd_devt = container_of(disk_devt, struct sd_devt,
 			disk_devt);
-- 
GitLab


From f170396c14243d307e2f479af142fd7356b3bd2a Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 3 Feb 2017 19:38:54 +0000
Subject: [PATCH 0458/1084] scsi: fix memory leak of sdpk on when gd fails to
 allocate

On an allocation failure of gd, the current exit path is via
out_free_devt which leaves sdpk still allocated and hence it gets
leaked. Fix this by correcting the order of resource free'ing with a
change in the error exit path labels.

Detected by CoverityScan, CID#1399519 ("Resource Leak")

Fixes: 0dba1314d4f81115dc ("scsi, block: fix duplicate bdi name registration crashes")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index b9b32f5811e0..8e4f4f72ea64 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3213,10 +3213,10 @@ static int sd_probe(struct device *dev)
 	sd_devt = NULL;
  out_put:
 	put_disk(gd);
- out_free:
-	kfree(sdkp);
  out_free_devt:
 	kfree(sd_devt);
+ out_free:
+	kfree(sdkp);
  out:
 	scsi_autopm_put_device(sdp);
 	return error;
-- 
GitLab


From 4d7d39a18b8b81511f0b893b7d2203790bf8a58b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 21 Feb 2017 21:46:37 +0300
Subject: [PATCH 0459/1084] scsi: scsi_dh_emc: return success in
 clariion_std_inquiry()

We accidentally return an uninitialized variable on success.

Fixes: b6ff1b14cdf4 ("[SCSI] scsi_dh: Update EMC handler")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/device_handler/scsi_dh_emc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c
index 4a7679f6c73d..f1578832ec7a 100644
--- a/drivers/scsi/device_handler/scsi_dh_emc.c
+++ b/drivers/scsi/device_handler/scsi_dh_emc.c
@@ -358,7 +358,7 @@ static int clariion_prep_fn(struct scsi_device *sdev, struct request *req)
 static int clariion_std_inquiry(struct scsi_device *sdev,
 				struct clariion_dh_data *csdev)
 {
-	int err;
+	int err = SCSI_DH_OK;
 	char *sp_model;
 
 	sp_model = parse_sp_model(sdev, sdev->inquiry);
-- 
GitLab


From 148cff67b401e2229c076c0ea418712654be77e4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 14 Feb 2017 20:15:55 +0100
Subject: [PATCH 0460/1084] scsi: always zero sshdr in scsi_normalize_sense

This gives us a clear state even if a command didn't return sense data.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/scsi_common.c b/drivers/scsi/scsi_common.c
index b1383a71400e..a75673bb82b3 100644
--- a/drivers/scsi/scsi_common.c
+++ b/drivers/scsi/scsi_common.c
@@ -137,11 +137,11 @@ EXPORT_SYMBOL(int_to_scsilun);
 bool scsi_normalize_sense(const u8 *sense_buffer, int sb_len,
 			  struct scsi_sense_hdr *sshdr)
 {
+	memset(sshdr, 0, sizeof(struct scsi_sense_hdr));
+
 	if (!sense_buffer || !sb_len)
 		return false;
 
-	memset(sshdr, 0, sizeof(struct scsi_sense_hdr));
-
 	sshdr->response_code = (sense_buffer[0] & 0x7f);
 
 	if (!scsi_sense_valid(sshdr))
-- 
GitLab


From 6fa2b8f9e3ae8dd770d8fa264825f86b146381c8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 14 Feb 2017 20:15:56 +0100
Subject: [PATCH 0461/1084] scsi: sd: improve TUR handling in sd_check_events

Remove bogus evaluations of retval and sshdr when the device is offline,
and fix a possible NULL pointer dereference by allocating the 8 byte
sized sense header on stack.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sd.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 8e4f4f72ea64..be535d4215c4 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1425,7 +1425,6 @@ static unsigned int sd_check_events(struct gendisk *disk, unsigned int clearing)
 {
 	struct scsi_disk *sdkp = scsi_disk_get(disk);
 	struct scsi_device *sdp;
-	struct scsi_sense_hdr *sshdr = NULL;
 	int retval;
 
 	if (!sdkp)
@@ -1454,22 +1453,21 @@ static unsigned int sd_check_events(struct gendisk *disk, unsigned int clearing)
 	 * by sd_spinup_disk() from sd_revalidate_disk(), which happens whenever
 	 * sd_revalidate() is called.
 	 */
-	retval = -ENODEV;
-
 	if (scsi_block_when_processing_errors(sdp)) {
-		sshdr  = kzalloc(sizeof(*sshdr), GFP_KERNEL);
+		struct scsi_sense_hdr sshdr = { 0, };
+
 		retval = scsi_test_unit_ready(sdp, SD_TIMEOUT, SD_MAX_RETRIES,
-					      sshdr);
-	}
+					      &sshdr);
 
-	/* failed to execute TUR, assume media not present */
-	if (host_byte(retval)) {
-		set_media_not_present(sdkp);
-		goto out;
-	}
+		/* failed to execute TUR, assume media not present */
+		if (host_byte(retval)) {
+			set_media_not_present(sdkp);
+			goto out;
+		}
 
-	if (media_not_present(sdkp, sshdr))
-		goto out;
+		if (media_not_present(sdkp, &sshdr))
+			goto out;
+	}
 
 	/*
 	 * For removable scsi disk we have to recognise the presence
@@ -1485,7 +1483,6 @@ static unsigned int sd_check_events(struct gendisk *disk, unsigned int clearing)
 	 *	Medium present state has changed in either direction.
 	 *	Device has indicated UNIT_ATTENTION.
 	 */
-	kfree(sshdr);
 	retval = sdp->changed ? DISK_EVENT_MEDIA_CHANGE : 0;
 	sdp->changed = 0;
 	scsi_disk_put(sdkp);
-- 
GitLab


From 74a78ebda4fa5c5ae87fc501152863ee31c17da4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 14 Feb 2017 20:15:57 +0100
Subject: [PATCH 0462/1084] scsi: make the sense header argument to
 scsi_test_unit_ready mandatory

It's a tiny structure that can be allocated on the stack, don't
complicate the code by making it optional.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/osd/osd_uld.c |  3 ++-
 drivers/scsi/scsi_ioctl.c  |  3 ++-
 drivers/scsi/scsi_lib.c    | 14 ++------------
 3 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/drivers/scsi/osd/osd_uld.c b/drivers/scsi/osd/osd_uld.c
index 243eab3d10d0..e0ce5d2fd14d 100644
--- a/drivers/scsi/osd/osd_uld.c
+++ b/drivers/scsi/osd/osd_uld.c
@@ -372,6 +372,7 @@ EXPORT_SYMBOL(osduld_device_same);
 static int __detect_osd(struct osd_uld_device *oud)
 {
 	struct scsi_device *scsi_device = oud->od.scsi_device;
+	struct scsi_sense_hdr sense_hdr;
 	char caps[OSD_CAP_LEN];
 	int error;
 
@@ -380,7 +381,7 @@ static int __detect_osd(struct osd_uld_device *oud)
 	 */
 	OSD_DEBUG("start scsi_test_unit_ready %p %p %p\n",
 			oud, scsi_device, scsi_device->request_queue);
-	error = scsi_test_unit_ready(scsi_device, 10*HZ, 5, NULL);
+	error = scsi_test_unit_ready(scsi_device, 10*HZ, 5, &sense_hdr);
 	if (error)
 		OSD_ERR("warning: scsi_test_unit_ready failed\n");
 
diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
index 8b8c814df5c7..b6bf3f29a12a 100644
--- a/drivers/scsi/scsi_ioctl.c
+++ b/drivers/scsi/scsi_ioctl.c
@@ -199,6 +199,7 @@ static int scsi_ioctl_get_pci(struct scsi_device *sdev, void __user *arg)
 int scsi_ioctl(struct scsi_device *sdev, int cmd, void __user *arg)
 {
 	char scsi_cmd[MAX_COMMAND_SIZE];
+	struct scsi_sense_hdr sense_hdr;
 
 	/* Check for deprecated ioctls ... all the ioctls which don't
 	 * follow the new unique numbering scheme are deprecated */
@@ -243,7 +244,7 @@ int scsi_ioctl(struct scsi_device *sdev, int cmd, void __user *arg)
 		return scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW);
 	case SCSI_IOCTL_TEST_UNIT_READY:
 		return scsi_test_unit_ready(sdev, IOCTL_NORMAL_TIMEOUT,
-					    NORMAL_RETRIES, NULL);
+					    NORMAL_RETRIES, &sense_hdr);
 	case SCSI_IOCTL_START_UNIT:
 		scsi_cmd[0] = START_STOP;
 		scsi_cmd[1] = 0;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 39b2d727f033..a3c7ec1795c9 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2520,28 +2520,20 @@ EXPORT_SYMBOL(scsi_mode_sense);
  *	@sdev:	scsi device to change the state of.
  *	@timeout: command timeout
  *	@retries: number of retries before failing
- *	@sshdr_external: Optional pointer to struct scsi_sense_hdr for
- *		returning sense. Make sure that this is cleared before passing
- *		in.
+ *	@sshdr: outpout pointer for decoded sense information.
  *
  *	Returns zero if unsuccessful or an error if TUR failed.  For
  *	removable media, UNIT_ATTENTION sets ->changed flag.
  **/
 int
 scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries,
-		     struct scsi_sense_hdr *sshdr_external)
+		     struct scsi_sense_hdr *sshdr)
 {
 	char cmd[] = {
 		TEST_UNIT_READY, 0, 0, 0, 0, 0,
 	};
-	struct scsi_sense_hdr *sshdr;
 	int result;
 
-	if (!sshdr_external)
-		sshdr = kzalloc(sizeof(*sshdr), GFP_KERNEL);
-	else
-		sshdr = sshdr_external;
-
 	/* try to eat the UNIT_ATTENTION if there are enough retries */
 	do {
 		result = scsi_execute_req(sdev, cmd, DMA_NONE, NULL, 0, sshdr,
@@ -2552,8 +2544,6 @@ scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries,
 	} while (scsi_sense_valid(sshdr) &&
 		 sshdr->sense_key == UNIT_ATTENTION && --retries);
 
-	if (!sshdr_external)
-		kfree(sshdr);
 	return result;
 }
 EXPORT_SYMBOL(scsi_test_unit_ready);
-- 
GitLab


From 3949e2f04262495f6c8795b148aa26dffccba646 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 14 Feb 2017 20:15:58 +0100
Subject: [PATCH 0463/1084] scsi: simplify scsi_execute_req_flags

Add a sshdr argument to __scsi_execute so that we can decode the sense
data directly into the sense header instead of needing a copy of it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_lib.c | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index a3c7ec1795c9..5f661486cf6e 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -215,8 +215,9 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
 
 static int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 		 int data_direction, void *buffer, unsigned bufflen,
-		 unsigned char *sense, int timeout, int retries, u64 flags,
-		 req_flags_t rq_flags, int *resid)
+		 unsigned char *sense, struct scsi_sense_hdr *sshdr,
+		 int timeout, int retries, u64 flags, req_flags_t rq_flags,
+		 int *resid)
 {
 	struct request *req;
 	struct scsi_request *rq;
@@ -259,6 +260,8 @@ static int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 		*resid = rq->resid_len;
 	if (sense && rq->sense_len)
 		memcpy(sense, rq->sense, SCSI_SENSE_BUFFERSIZE);
+	if (sshdr)
+		scsi_normalize_sense(rq->sense, rq->sense_len, sshdr);
 	ret = req->errors;
  out:
 	blk_put_request(req);
@@ -288,7 +291,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 		 int *resid)
 {
 	return __scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense,
-			timeout, retries, flags, 0, resid);
+			NULL, timeout, retries, flags, 0, resid);
 }
 EXPORT_SYMBOL(scsi_execute);
 
@@ -297,21 +300,9 @@ int scsi_execute_req_flags(struct scsi_device *sdev, const unsigned char *cmd,
 		     struct scsi_sense_hdr *sshdr, int timeout, int retries,
 		     int *resid, u64 flags, req_flags_t rq_flags)
 {
-	char *sense = NULL;
-	int result;
-	
-	if (sshdr) {
-		sense = kzalloc(SCSI_SENSE_BUFFERSIZE, GFP_NOIO);
-		if (!sense)
-			return DRIVER_ERROR << 24;
-	}
-	result = __scsi_execute(sdev, cmd, data_direction, buffer, bufflen,
-			      sense, timeout, retries, flags, rq_flags, resid);
-	if (sshdr)
-		scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, sshdr);
-
-	kfree(sense);
-	return result;
+	return __scsi_execute(sdev, cmd, data_direction, buffer, bufflen,
+			      NULL, sshdr, timeout, retries, flags, rq_flags,
+			      resid);
 }
 EXPORT_SYMBOL(scsi_execute_req_flags);
 
-- 
GitLab


From dc5e1d628a7bb631450ea661553a8bb496891f47 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Mon, 20 Feb 2017 21:49:11 +0800
Subject: [PATCH 0464/1084] drm: zte: fix static checker warning on variable
 'fmt'

Commit 4e986d3705df ("drm: zte: add overlay plane support") introduces
the following static checker warning:

 drivers/gpu/drm/zte/zx_plane.c:170 zx_vl_rsz_setup()
 warn: always true condition '(fmt >= 0) => (0-u32max >= 0)'

Fix it by change 'fmt' type to integer.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Fixes: 4e986d3705df ("drm: zte: add overlay plane support")
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Reviewed-by: Sean Paul <seanpaul@chromium.org>
Link: http://patchwork.freedesktop.org/patch/msgid/1487598551-28310-1-git-send-email-shawnguo@kernel.org
---
 drivers/gpu/drm/zte/zx_plane.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/zte/zx_plane.c b/drivers/gpu/drm/zte/zx_plane.c
index 1d08ba381098..d646ac931663 100644
--- a/drivers/gpu/drm/zte/zx_plane.c
+++ b/drivers/gpu/drm/zte/zx_plane.c
@@ -159,7 +159,7 @@ static void zx_vl_rsz_setup(struct zx_plane *zplane, uint32_t format,
 	void __iomem *rsz = zplane->rsz;
 	u32 src_chroma_w = src_w;
 	u32 src_chroma_h = src_h;
-	u32 fmt;
+	int fmt;
 
 	/* Set up source and destination resolution */
 	zx_writel(rsz + RSZ_SRC_CFG, RSZ_VER(src_h - 1) | RSZ_HOR(src_w - 1));
@@ -203,7 +203,7 @@ static void zx_vl_plane_atomic_update(struct drm_plane *plane,
 	u32 src_x, src_y, src_w, src_h;
 	u32 dst_x, dst_y, dst_w, dst_h;
 	uint32_t format;
-	u32 fmt;
+	int fmt;
 	int num_planes;
 	int i;
 
-- 
GitLab


From 25290fa5591d81767713db304e0d567bf991786f Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 29 Dec 2016 22:06:15 -0800
Subject: [PATCH 0465/1084] f2fs: return fs_trim if there is no candidate

If there is no candidate to submit discard command during f2fs_trim_fs, let's
return without checkpoint.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c |  5 +++++
 fs/f2fs/f2fs.h       |  1 +
 fs/f2fs/segment.c    | 33 ++++++++++++++++++++++++++++-----
 3 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 917b5c5053ae..ccea40763d9d 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1249,6 +1249,11 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	/* this is the case of multiple fstrims without any changes */
 	if (cpc->reason == CP_DISCARD) {
+		if (!exist_trim_candidates(sbi, cpc)) {
+			unblock_operations(sbi);
+			goto out;
+		}
+
 		if (NM_I(sbi)->dirty_nat_cnt == 0 &&
 				SIT_I(sbi)->dirty_sentries == 0 &&
 				prefree_segments(sbi) == 0) {
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e0db895fd84c..a4e8e6278a17 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2119,6 +2119,7 @@ void release_discard_addrs(struct f2fs_sb_info *);
 int npages_for_summary_flush(struct f2fs_sb_info *, bool);
 void allocate_new_segments(struct f2fs_sb_info *);
 int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
+bool exist_trim_candidates(struct f2fs_sb_info *, struct cp_control *);
 struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
 void update_meta_page(struct f2fs_sb_info *, void *, block_t);
 void write_meta_page(struct f2fs_sb_info *, struct page *);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 44d69f90be2a..8b54b1fafa70 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -834,7 +834,8 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
 	SM_I(sbi)->nr_discards += end - start;
 }
 
-static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
+							bool check_only)
 {
 	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
 	int max_blocks = sbi->blocks_per_seg;
@@ -848,12 +849,12 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	int i;
 
 	if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi))
-		return;
+		return false;
 
 	if (!force) {
 		if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
 		    SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards)
-			return;
+			return false;
 	}
 
 	/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
@@ -871,8 +872,12 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 					&& (end - start) < cpc->trim_minlen)
 			continue;
 
+		if (check_only)
+			return true;
+
 		__add_discard_entry(sbi, cpc, se, start, end);
 	}
+	return false;
 }
 
 void release_discard_addrs(struct f2fs_sb_info *sbi)
@@ -1453,6 +1458,24 @@ static const struct segment_allocation default_salloc_ops = {
 	.allocate_segment = allocate_segment_by_default,
 };
 
+bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+{
+	__u64 trim_start = cpc->trim_start;
+	bool has_candidate = false;
+
+	mutex_lock(&SIT_I(sbi)->sentry_lock);
+	for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
+		if (add_discard_addrs(sbi, cpc, true)) {
+			has_candidate = true;
+			break;
+		}
+	}
+	mutex_unlock(&SIT_I(sbi)->sentry_lock);
+
+	cpc->trim_start = trim_start;
+	return has_candidate;
+}
+
 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
 {
 	__u64 start = F2FS_BYTES_TO_BLK(range->start);
@@ -2249,7 +2272,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 			/* add discard candidates */
 			if (cpc->reason != CP_DISCARD) {
 				cpc->trim_start = segno;
-				add_discard_addrs(sbi, cpc);
+				add_discard_addrs(sbi, cpc, false);
 			}
 
 			if (to_journal) {
@@ -2287,7 +2310,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		__u64 trim_start = cpc->trim_start;
 
 		for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
-			add_discard_addrs(sbi, cpc);
+			add_discard_addrs(sbi, cpc, false);
 
 		cpc->trim_start = trim_start;
 	}
-- 
GitLab


From 939afa943c5290a3b92f01612a792af17bc98115 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Sat, 7 Jan 2017 18:49:42 +0800
Subject: [PATCH 0466/1084] f2fs: clean up with list_{first, last}_entry

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c | 4 ++--
 fs/f2fs/data.c       | 4 ++--
 fs/f2fs/node.h       | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index ccea40763d9d..fb8cdfcaece6 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -891,7 +891,7 @@ int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
 				F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
 		return 0;
 	}
-	fi = list_entry(head->next, struct f2fs_inode_info, dirty_list);
+	fi = list_first_entry(head, struct f2fs_inode_info, dirty_list);
 	inode = igrab(&fi->vfs_inode);
 	spin_unlock(&sbi->inode_lock[type]);
 	if (inode) {
@@ -924,7 +924,7 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
 			spin_unlock(&sbi->inode_lock[DIRTY_META]);
 			return 0;
 		}
-		fi = list_entry(head->next, struct f2fs_inode_info,
+		fi = list_first_entry(head, struct f2fs_inode_info,
 							gdirty_list);
 		inode = igrab(&fi->vfs_inode);
 		spin_unlock(&sbi->inode_lock[DIRTY_META]);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 12d235f2c771..ab2008dea921 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1143,7 +1143,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
 
 		prefetchw(&page->flags);
 		if (pages) {
-			page = list_entry(pages->prev, struct page, lru);
+			page = list_last_entry(pages, struct page, lru);
 			list_del(&page->lru);
 			if (add_to_page_cache_lru(page, mapping,
 						  page->index,
@@ -1262,7 +1262,7 @@ static int f2fs_read_data_pages(struct file *file,
 			struct list_head *pages, unsigned nr_pages)
 {
 	struct inode *inode = file->f_mapping->host;
-	struct page *page = list_entry(pages->prev, struct page, lru);
+	struct page *page = list_last_entry(pages, struct page, lru);
 
 	trace_f2fs_readpages(inode, page, nr_pages);
 
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index e7997e240366..9278b21ee073 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -174,7 +174,7 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
 		spin_unlock(&nm_i->nid_list_lock);
 		return;
 	}
-	fnid = list_entry(nm_i->nid_list[FREE_NID_LIST].next,
+	fnid = list_first_entry(&nm_i->nid_list[FREE_NID_LIST],
 						struct free_nid, list);
 	*nid = fnid->nid;
 	spin_unlock(&nm_i->nid_list_lock);
-- 
GitLab


From 5fe457430e554a2f5188f13c1a2e36ad845640c5 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Sat, 7 Jan 2017 18:50:26 +0800
Subject: [PATCH 0467/1084] f2fs: introduce FI_ATOMIC_COMMIT

This patch introduces a new flag to indicate inode status of doing atomic
write committing, so that, we can keep atomic write status for inode
during atomic committing, then we can skip GCing pages of atomic write inode,
that avoids random GCed datas being mixed with current transaction, so
isolation of transaction can be kept.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c    |  2 +-
 fs/f2fs/f2fs.h    |  6 ++++++
 fs/f2fs/file.c    | 11 ++++++-----
 fs/f2fs/gc.c      |  6 ++++++
 fs/f2fs/segment.c | 10 +++++++---
 5 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index ab2008dea921..848d110dc1ca 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1977,7 +1977,7 @@ static int f2fs_set_data_page_dirty(struct page *page)
 	if (!PageUptodate(page))
 		SetPageUptodate(page);
 
-	if (f2fs_is_atomic_file(inode)) {
+	if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
 		if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
 			register_inmem_page(inode, page);
 			return 1;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index a4e8e6278a17..94250a69762a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1639,6 +1639,7 @@ enum {
 	FI_UPDATE_WRITE,	/* inode has in-place-update data */
 	FI_NEED_IPU,		/* used for ipu per file */
 	FI_ATOMIC_FILE,		/* indicate atomic file */
+	FI_ATOMIC_COMMIT,	/* indicate the state of atomical committing */
 	FI_VOLATILE_FILE,	/* indicate volatile file */
 	FI_FIRST_BLOCK_WRITTEN,	/* indicate #0 data block was written */
 	FI_DROP_CACHE,		/* drop dirty page cache */
@@ -1828,6 +1829,11 @@ static inline bool f2fs_is_atomic_file(struct inode *inode)
 	return is_inode_flag_set(inode, FI_ATOMIC_FILE);
 }
 
+static inline bool f2fs_is_commit_atomic_write(struct inode *inode)
+{
+	return is_inode_flag_set(inode, FI_ATOMIC_COMMIT);
+}
+
 static inline bool f2fs_is_volatile_file(struct inode *inode)
 {
 	return is_inode_flag_set(inode, FI_VOLATILE_FILE);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 6c335180b9d8..e45522115b1c 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1569,14 +1569,15 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
 		goto err_out;
 
 	if (f2fs_is_atomic_file(inode)) {
-		clear_inode_flag(inode, FI_ATOMIC_FILE);
 		ret = commit_inmem_pages(inode);
-		if (ret) {
-			set_inode_flag(inode, FI_ATOMIC_FILE);
+		if (ret)
 			goto err_out;
-		}
+
 		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
-		stat_dec_atomic_write(inode);
+		if (!ret) {
+			clear_inode_flag(inode, FI_ATOMIC_FILE);
+			stat_dec_atomic_write(inode);
+		}
 	} else {
 		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
 	}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 88bfc3dff496..88e5e7b10ab6 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -569,6 +569,9 @@ static void move_encrypted_block(struct inode *inode, block_t bidx,
 	if (!check_valid_map(F2FS_I_SB(inode), segno, off))
 		goto out;
 
+	if (f2fs_is_atomic_file(inode))
+		goto out;
+
 	set_new_dnode(&dn, inode, NULL, NULL, 0);
 	err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
 	if (err)
@@ -661,6 +664,9 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
 	if (!check_valid_map(F2FS_I_SB(inode), segno, off))
 		goto out;
 
+	if (f2fs_is_atomic_file(inode))
+		goto out;
+
 	if (gc_type == BG_GC) {
 		if (PageWriteback(page))
 			goto out;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 8b54b1fafa70..02a8d4ee65eb 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -242,12 +242,12 @@ void drop_inmem_pages(struct inode *inode)
 {
 	struct f2fs_inode_info *fi = F2FS_I(inode);
 
-	clear_inode_flag(inode, FI_ATOMIC_FILE);
-	stat_dec_atomic_write(inode);
-
 	mutex_lock(&fi->inmem_lock);
 	__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
 	mutex_unlock(&fi->inmem_lock);
+
+	clear_inode_flag(inode, FI_ATOMIC_FILE);
+	stat_dec_atomic_write(inode);
 }
 
 static int __commit_inmem_pages(struct inode *inode,
@@ -316,6 +316,8 @@ int commit_inmem_pages(struct inode *inode)
 	f2fs_balance_fs(sbi, true);
 	f2fs_lock_op(sbi);
 
+	set_inode_flag(inode, FI_ATOMIC_COMMIT);
+
 	mutex_lock(&fi->inmem_lock);
 	err = __commit_inmem_pages(inode, &revoke_list);
 	if (err) {
@@ -337,6 +339,8 @@ int commit_inmem_pages(struct inode *inode)
 	}
 	mutex_unlock(&fi->inmem_lock);
 
+	clear_inode_flag(inode, FI_ATOMIC_COMMIT);
+
 	f2fs_unlock_op(sbi);
 	return err;
 }
-- 
GitLab


From 355e78913c0d57492076d545b6f44b94fec2bf6b Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Sat, 7 Jan 2017 18:51:01 +0800
Subject: [PATCH 0468/1084] f2fs: check in-memory block bitmap

This patch adds a mirror for valid block bitmap, and use it to detect
in-memory bitmap corruption which may be caused by bit-transition of
cache or memory overflow.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 32 ++++++++++++++++++++++++++++++--
 fs/f2fs/segment.h |  6 ++++++
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 02a8d4ee65eb..56a097d22150 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1021,14 +1021,32 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 
 	/* Update valid block bitmap */
 	if (del > 0) {
-		if (f2fs_test_and_set_bit(offset, se->cur_valid_map))
+		if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) {
+#ifdef CONFIG_F2FS_CHECK_FS
+			if (f2fs_test_and_set_bit(offset,
+						se->cur_valid_map_mir))
+				f2fs_bug_on(sbi, 1);
+			else
+				WARN_ON(1);
+#else
 			f2fs_bug_on(sbi, 1);
+#endif
+		}
 		if (f2fs_discard_en(sbi) &&
 			!f2fs_test_and_set_bit(offset, se->discard_map))
 			sbi->discard_blks--;
 	} else {
-		if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map))
+		if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) {
+#ifdef CONFIG_F2FS_CHECK_FS
+			if (!f2fs_test_and_clear_bit(offset,
+						se->cur_valid_map_mir))
+				f2fs_bug_on(sbi, 1);
+			else
+				WARN_ON(1);
+#else
 			f2fs_bug_on(sbi, 1);
+#endif
+		}
 		if (f2fs_discard_en(sbi) &&
 			f2fs_test_and_clear_bit(offset, se->discard_map))
 			sbi->discard_blks++;
@@ -2357,6 +2375,13 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
 				!sit_i->sentries[start].ckpt_valid_map)
 			return -ENOMEM;
 
+#ifdef CONFIG_F2FS_CHECK_FS
+		sit_i->sentries[start].cur_valid_map_mir
+			= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
+		if (!sit_i->sentries[start].cur_valid_map_mir)
+			return -ENOMEM;
+#endif
+
 		if (f2fs_discard_en(sbi)) {
 			sit_i->sentries[start].discard_map
 				= kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
@@ -2786,6 +2811,9 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
 	if (sit_i->sentries) {
 		for (start = 0; start < MAIN_SEGS(sbi); start++) {
 			kfree(sit_i->sentries[start].cur_valid_map);
+#ifdef CONFIG_F2FS_CHECK_FS
+			kfree(sit_i->sentries[start].cur_valid_map_mir);
+#endif
 			kfree(sit_i->sentries[start].ckpt_valid_map);
 			kfree(sit_i->sentries[start].discard_map);
 		}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 08f1455c812c..9af95194db06 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -164,6 +164,9 @@ struct seg_entry {
 	unsigned int ckpt_valid_blocks:10;	/* # of valid blocks last cp */
 	unsigned int padding:6;		/* padding */
 	unsigned char *cur_valid_map;	/* validity bitmap of blocks */
+#ifdef CONFIG_F2FS_CHECK_FS
+	unsigned char *cur_valid_map_mir;	/* mirror of current valid bitmap */
+#endif
 	/*
 	 * # of valid blocks and the validity bitmap stored in the the last
 	 * checkpoint pack. This information is used by the SSR mode.
@@ -320,6 +323,9 @@ static inline void seg_info_from_raw_sit(struct seg_entry *se,
 	se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs);
 	memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
 	memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
+#ifdef CONFIG_F2FS_CHECK_FS
+	memcpy(se->cur_valid_map_mir, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
+#endif
 	se->type = GET_SIT_TYPE(rs);
 	se->mtime = le64_to_cpu(rs->mtime);
 }
-- 
GitLab


From 599a09b2c1ac222e6aad0c22515d1ccde7c3b702 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Sat, 7 Jan 2017 18:52:01 +0800
Subject: [PATCH 0469/1084] f2fs: check in-memory nat version bitmap

This patch adds a mirror for nat version bitmap, and use it to detect
in-memory bitmap corruption which may be caused by bit-transition of
cache or memory overflow.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h |  3 +++
 fs/f2fs/node.c | 11 +++++++++++
 fs/f2fs/node.h | 15 +++++++++++++++
 3 files changed, 29 insertions(+)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 94250a69762a..4a84b3fbbfd1 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -540,6 +540,9 @@ struct f2fs_nm_info {
 
 	/* for checkpoint */
 	char *nat_bitmap;		/* NAT bitmap pointer */
+#ifdef CONFIG_F2FS_CHECK_FS
+	char *nat_bitmap_mir;		/* NAT bitmap mirror */
+#endif
 	int bitmap_size;		/* bitmap size */
 };
 
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 03a1f9043558..69c38a0022e7 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -2363,6 +2363,14 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 					GFP_KERNEL);
 	if (!nm_i->nat_bitmap)
 		return -ENOMEM;
+
+#ifdef CONFIG_F2FS_CHECK_FS
+	nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
+					GFP_KERNEL);
+	if (!nm_i->nat_bitmap_mir)
+		return -ENOMEM;
+#endif
+
 	return 0;
 }
 
@@ -2437,6 +2445,9 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
 	up_write(&nm_i->nat_tree_lock);
 
 	kfree(nm_i->nat_bitmap);
+#ifdef CONFIG_F2FS_CHECK_FS
+	kfree(nm_i->nat_bitmap_mir);
+#endif
 	sbi->nm_info = NULL;
 	kfree(nm_i);
 }
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 9278b21ee073..29ff783eb9c3 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -186,6 +186,12 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
 static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
+
+#ifdef CONFIG_F2FS_CHECK_FS
+	if (memcmp(nm_i->nat_bitmap, nm_i->nat_bitmap_mir,
+						nm_i->bitmap_size))
+		f2fs_bug_on(sbi, 1);
+#endif
 	memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size);
 }
 
@@ -203,6 +209,12 @@ static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start)
 		(seg_off << sbi->log_blocks_per_seg << 1) +
 		(block_off & (sbi->blocks_per_seg - 1)));
 
+#ifdef CONFIG_F2FS_CHECK_FS
+	if (f2fs_test_bit(block_off, nm_i->nat_bitmap) !=
+			f2fs_test_bit(block_off, nm_i->nat_bitmap_mir))
+		f2fs_bug_on(sbi, 1);
+#endif
+
 	if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
 		block_addr += sbi->blocks_per_seg;
 
@@ -228,6 +240,9 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
 	unsigned int block_off = NAT_BLOCK_OFFSET(start_nid);
 
 	f2fs_change_bit(block_off, nm_i->nat_bitmap);
+#ifdef CONFIG_F2FS_CHECK_FS
+	f2fs_change_bit(block_off, nm_i->nat_bitmap_mir);
+#endif
 }
 
 static inline nid_t ino_of_node(struct page *node_page)
-- 
GitLab


From ae27d62e6befd3cac4ffa702e644cc52019642e8 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Sat, 7 Jan 2017 18:52:34 +0800
Subject: [PATCH 0470/1084] f2fs: check in-memory sit version bitmap

This patch adds a mirror for sit version bitmap, and use it to detect
in-memory bitmap corruption which may be caused by bit-transition of
cache or memory overflow.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 16 ++++++++++++----
 fs/f2fs/segment.h | 18 ++++++++++++++++++
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 56a097d22150..c8c825574fed 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2346,7 +2346,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
 	struct sit_info *sit_i;
 	unsigned int sit_segs, start;
-	char *src_bitmap, *dst_bitmap;
+	char *src_bitmap;
 	unsigned int bitmap_size;
 
 	/* allocate memory for SIT information */
@@ -2408,17 +2408,22 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
 	bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
 	src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
 
-	dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
-	if (!dst_bitmap)
+	sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
+	if (!sit_i->sit_bitmap)
 		return -ENOMEM;
 
+#ifdef CONFIG_F2FS_CHECK_FS
+	sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
+	if (!sit_i->sit_bitmap_mir)
+		return -ENOMEM;
+#endif
+
 	/* init SIT information */
 	sit_i->s_ops = &default_salloc_ops;
 
 	sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
 	sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
 	sit_i->written_valid_blocks = 0;
-	sit_i->sit_bitmap = dst_bitmap;
 	sit_i->bitmap_size = bitmap_size;
 	sit_i->dirty_sentries = 0;
 	sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
@@ -2826,6 +2831,9 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
 
 	SM_I(sbi)->sit_info = NULL;
 	kfree(sit_i->sit_bitmap);
+#ifdef CONFIG_F2FS_CHECK_FS
+	kfree(sit_i->sit_bitmap_mir);
+#endif
 	kfree(sit_i);
 }
 
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 9af95194db06..5cb5755c75d9 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -209,6 +209,9 @@ struct sit_info {
 	block_t sit_blocks;		/* # of blocks used by SIT area */
 	block_t written_valid_blocks;	/* # of valid blocks in main area */
 	char *sit_bitmap;		/* SIT bitmap pointer */
+#ifdef CONFIG_F2FS_CHECK_FS
+	char *sit_bitmap_mir;		/* SIT bitmap mirror */
+#endif
 	unsigned int bitmap_size;	/* SIT bitmap size */
 
 	unsigned long *tmp_map;			/* bitmap for temporal use */
@@ -423,6 +426,12 @@ static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
 		void *dst_addr)
 {
 	struct sit_info *sit_i = SIT_I(sbi);
+
+#ifdef CONFIG_F2FS_CHECK_FS
+	if (memcmp(sit_i->sit_bitmap, sit_i->sit_bitmap_mir,
+						sit_i->bitmap_size))
+		f2fs_bug_on(sbi, 1);
+#endif
 	memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size);
 }
 
@@ -643,6 +652,12 @@ static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
 
 	check_seg_range(sbi, start);
 
+#ifdef CONFIG_F2FS_CHECK_FS
+	if (f2fs_test_bit(offset, sit_i->sit_bitmap) !=
+			f2fs_test_bit(offset, sit_i->sit_bitmap_mir))
+		f2fs_bug_on(sbi, 1);
+#endif
+
 	/* calculate sit block address */
 	if (f2fs_test_bit(offset, sit_i->sit_bitmap))
 		blk_addr += sit_i->sit_blocks;
@@ -668,6 +683,9 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start)
 	unsigned int block_off = SIT_BLOCK_OFFSET(start);
 
 	f2fs_change_bit(block_off, sit_i->sit_bitmap);
+#ifdef CONFIG_F2FS_CHECK_FS
+	f2fs_change_bit(block_off, sit_i->sit_bitmap_mir);
+#endif
 }
 
 static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi)
-- 
GitLab


From b01a92019cac30398ef75b560d2668b399f4e393 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Mon, 9 Jan 2017 14:13:03 -0800
Subject: [PATCH 0471/1084] f2fs: clean up flush/discard command namings

This patch simply cleans up the names for flush/discard commands.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c   |  2 +-
 fs/f2fs/f2fs.h    | 20 +++++-----
 fs/f2fs/segment.c | 98 +++++++++++++++++++++++------------------------
 3 files changed, 59 insertions(+), 61 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 29cdf0c1da1d..883f1ea9e0b6 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -194,7 +194,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
 		si->cache_mem += sizeof(struct f2fs_gc_kthread);
 
 	/* build merge flush thread */
-	if (SM_I(sbi)->cmd_control_info)
+	if (SM_I(sbi)->fcc_info)
 		si->cache_mem += sizeof(struct flush_cmd_control);
 
 	/* free nids */
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4a84b3fbbfd1..548e75d18ec1 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -181,13 +181,12 @@ struct discard_entry {
 	int len;		/* # of consecutive blocks of the discard */
 };
 
-struct bio_entry {
-	struct list_head list;
-	block_t lstart;
-	block_t len;
-	struct bio *bio;
-	struct completion event;
-	int error;
+struct discard_cmd {
+	struct list_head list;		/* command list */
+	struct completion wait;		/* compleation */
+	block_t lstart;			/* logical start address */
+	block_t len;			/* length */
+	struct bio *bio;		/* bio */
 };
 
 /* for the list of fsync inodes, used only during recovery */
@@ -634,8 +633,8 @@ struct f2fs_sm_info {
 	unsigned int rec_prefree_segments;
 
 	/* for small discard management */
-	struct list_head discard_list;		/* 4KB discard list */
-	struct list_head wait_list;		/* linked with issued discard bio */
+	struct list_head discard_entry_list;	/* 4KB discard entry list */
+	struct list_head discard_cmd_list;	/* discard cmd list */
 	int nr_discards;			/* # of discards in the list */
 	int max_discards;			/* max. discards to be issued */
 
@@ -649,8 +648,7 @@ struct f2fs_sm_info {
 	unsigned int min_fsync_blocks;	/* threshold for fsync */
 
 	/* for flush command control */
-	struct flush_cmd_control *cmd_control_info;
-
+	struct flush_cmd_control *fcc_info;
 };
 
 /*
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c8c825574fed..fa30c117b4cc 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -26,7 +26,7 @@
 #define __reverse_ffz(x) __reverse_ffs(~(x))
 
 static struct kmem_cache *discard_entry_slab;
-static struct kmem_cache *bio_entry_slab;
+static struct kmem_cache *discard_cmd_slab;
 static struct kmem_cache *sit_entry_set_slab;
 static struct kmem_cache *inmem_entry_slab;
 
@@ -439,7 +439,7 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi)
 static int issue_flush_thread(void *data)
 {
 	struct f2fs_sb_info *sbi = data;
-	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
 	wait_queue_head_t *q = &fcc->flush_wait_queue;
 repeat:
 	if (kthread_should_stop())
@@ -468,7 +468,7 @@ static int issue_flush_thread(void *data)
 
 int f2fs_issue_flush(struct f2fs_sb_info *sbi)
 {
-	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
 	struct flush_cmd cmd;
 
 	trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
@@ -511,8 +511,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
 	struct flush_cmd_control *fcc;
 	int err = 0;
 
-	if (SM_I(sbi)->cmd_control_info) {
-		fcc = SM_I(sbi)->cmd_control_info;
+	if (SM_I(sbi)->fcc_info) {
+		fcc = SM_I(sbi)->fcc_info;
 		goto init_thread;
 	}
 
@@ -522,14 +522,14 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
 	atomic_set(&fcc->submit_flush, 0);
 	init_waitqueue_head(&fcc->flush_wait_queue);
 	init_llist_head(&fcc->issue_list);
-	SM_I(sbi)->cmd_control_info = fcc;
+	SM_I(sbi)->fcc_info = fcc;
 init_thread:
 	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
 				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
 	if (IS_ERR(fcc->f2fs_issue_flush)) {
 		err = PTR_ERR(fcc->f2fs_issue_flush);
 		kfree(fcc);
-		SM_I(sbi)->cmd_control_info = NULL;
+		SM_I(sbi)->fcc_info = NULL;
 		return err;
 	}
 
@@ -538,7 +538,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
 
 void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
 {
-	struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
+	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
 
 	if (fcc && fcc->f2fs_issue_flush) {
 		struct task_struct *flush_thread = fcc->f2fs_issue_flush;
@@ -548,7 +548,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
 	}
 	if (free) {
 		kfree(fcc);
-		SM_I(sbi)->cmd_control_info = NULL;
+		SM_I(sbi)->fcc_info = NULL;
 	}
 }
 
@@ -628,42 +628,43 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 	mutex_unlock(&dirty_i->seglist_lock);
 }
 
-static struct bio_entry *__add_bio_entry(struct f2fs_sb_info *sbi,
+static struct discard_cmd *__add_discard_cmd(struct f2fs_sb_info *sbi,
 			struct bio *bio, block_t lstart, block_t len)
 {
-	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
-	struct bio_entry *be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
+	struct list_head *wait_list = &(SM_I(sbi)->discard_cmd_list);
+	struct discard_cmd *dc;
 
-	INIT_LIST_HEAD(&be->list);
-	be->bio = bio;
-	be->lstart = lstart;
-	be->len = len;
-	init_completion(&be->event);
-	list_add_tail(&be->list, wait_list);
+	dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
+	INIT_LIST_HEAD(&dc->list);
+	dc->bio = bio;
+	dc->lstart = lstart;
+	dc->len = len;
+	init_completion(&dc->wait);
+	list_add_tail(&dc->list, wait_list);
 
-	return be;
+	return dc;
 }
 
 /* This should be covered by global mutex, &sit_i->sentry_lock */
 void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
 {
-	struct list_head *wait_list = &(SM_I(sbi)->wait_list);
-	struct bio_entry *be, *tmp;
+	struct list_head *wait_list = &(SM_I(sbi)->discard_cmd_list);
+	struct discard_cmd *dc, *tmp;
 
-	list_for_each_entry_safe(be, tmp, wait_list, list) {
-		struct bio *bio = be->bio;
+	list_for_each_entry_safe(dc, tmp, wait_list, list) {
+		struct bio *bio = dc->bio;
 		int err;
 
-		if (!completion_done(&be->event)) {
-			if ((be->lstart <= blkaddr &&
-					blkaddr < be->lstart + be->len) ||
+		if (!completion_done(&dc->wait)) {
+			if ((dc->lstart <= blkaddr &&
+					blkaddr < dc->lstart + dc->len) ||
 					blkaddr == NULL_ADDR)
-				wait_for_completion_io(&be->event);
+				wait_for_completion_io(&dc->wait);
 			else
 				continue;
 		}
 
-		err = be->error;
+		err = bio->bi_error;
 		if (err == -EOPNOTSUPP)
 			err = 0;
 
@@ -672,17 +673,16 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
 				"Issue discard failed, ret: %d", err);
 
 		bio_put(bio);
-		list_del(&be->list);
-		kmem_cache_free(bio_entry_slab, be);
+		list_del(&dc->list);
+		kmem_cache_free(discard_cmd_slab, dc);
 	}
 }
 
-static void f2fs_submit_bio_wait_endio(struct bio *bio)
+static void f2fs_submit_discard_endio(struct bio *bio)
 {
-	struct bio_entry *be = (struct bio_entry *)bio->bi_private;
+	struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
 
-	be->error = bio->bi_error;
-	complete(&be->event);
+	complete(&dc->wait);
 }
 
 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
@@ -705,11 +705,11 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
 				SECTOR_FROM_BLOCK(blklen),
 				GFP_NOFS, 0, &bio);
 	if (!err && bio) {
-		struct bio_entry *be = __add_bio_entry(sbi, bio,
+		struct discard_cmd *dc = __add_discard_cmd(sbi, bio,
 						lblkstart, blklen);
 
-		bio->bi_private = be;
-		bio->bi_end_io = f2fs_submit_bio_wait_endio;
+		bio->bi_private = dc;
+		bio->bi_end_io = f2fs_submit_discard_endio;
 		bio->bi_opf |= REQ_SYNC;
 		submit_bio(bio);
 	}
@@ -817,7 +817,7 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
 		struct cp_control *cpc, struct seg_entry *se,
 		unsigned int start, unsigned int end)
 {
-	struct list_head *head = &SM_I(sbi)->discard_list;
+	struct list_head *head = &SM_I(sbi)->discard_entry_list;
 	struct discard_entry *new, *last;
 
 	if (!list_empty(head)) {
@@ -886,7 +886,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
 
 void release_discard_addrs(struct f2fs_sb_info *sbi)
 {
-	struct list_head *head = &(SM_I(sbi)->discard_list);
+	struct list_head *head = &(SM_I(sbi)->discard_entry_list);
 	struct discard_entry *entry, *this;
 
 	/* drop caches */
@@ -912,7 +912,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
 
 void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 {
-	struct list_head *head = &(SM_I(sbi)->discard_list);
+	struct list_head *head = &(SM_I(sbi)->discard_entry_list);
 	struct discard_entry *entry, *this;
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 	struct blk_plug plug;
@@ -2708,8 +2708,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
 	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
 	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
 
-	INIT_LIST_HEAD(&sm_info->discard_list);
-	INIT_LIST_HEAD(&sm_info->wait_list);
+	INIT_LIST_HEAD(&sm_info->discard_entry_list);
+	INIT_LIST_HEAD(&sm_info->discard_cmd_list);
 	sm_info->nr_discards = 0;
 	sm_info->max_discards = 0;
 
@@ -2859,15 +2859,15 @@ int __init create_segment_manager_caches(void)
 	if (!discard_entry_slab)
 		goto fail;
 
-	bio_entry_slab = f2fs_kmem_cache_create("bio_entry",
-			sizeof(struct bio_entry));
-	if (!bio_entry_slab)
+	discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd",
+			sizeof(struct discard_cmd));
+	if (!discard_cmd_slab)
 		goto destroy_discard_entry;
 
 	sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
 			sizeof(struct sit_entry_set));
 	if (!sit_entry_set_slab)
-		goto destroy_bio_entry;
+		goto destroy_discard_cmd;
 
 	inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
 			sizeof(struct inmem_pages));
@@ -2877,8 +2877,8 @@ int __init create_segment_manager_caches(void)
 
 destroy_sit_entry_set:
 	kmem_cache_destroy(sit_entry_set_slab);
-destroy_bio_entry:
-	kmem_cache_destroy(bio_entry_slab);
+destroy_discard_cmd:
+	kmem_cache_destroy(discard_cmd_slab);
 destroy_discard_entry:
 	kmem_cache_destroy(discard_entry_slab);
 fail:
@@ -2888,7 +2888,7 @@ int __init create_segment_manager_caches(void)
 void destroy_segment_manager_caches(void)
 {
 	kmem_cache_destroy(sit_entry_set_slab);
-	kmem_cache_destroy(bio_entry_slab);
+	kmem_cache_destroy(discard_cmd_slab);
 	kmem_cache_destroy(discard_entry_slab);
 	kmem_cache_destroy(inmem_entry_slab);
 }
-- 
GitLab


From d4adb30f25f5f2aa9b205891e395251d2a9098be Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 11 Jan 2017 10:21:15 -0800
Subject: [PATCH 0472/1084] f2fs: reorganize stat information

This patch modifies stat information more clearly.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 883f1ea9e0b6..cd338ca24941 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -258,8 +258,6 @@ static int stat_show(struct seq_file *s, void *v)
 			   si->inline_dir);
 		seq_printf(s, "  - Orphan Inode: %u\n",
 			   si->orphans);
-		seq_printf(s, "  - Atomic write count: %4d (Max. %4d)\n",
-			   si->aw_cnt, si->max_aw_cnt);
 		seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
 			   si->main_area_segs, si->main_area_sections,
 			   si->main_area_zones);
@@ -318,8 +316,10 @@ static int stat_show(struct seq_file *s, void *v)
 		seq_printf(s, "  - Inner Struct Count: tree: %d(%d), node: %d\n",
 				si->ext_tree, si->zombie_tree, si->ext_node);
 		seq_puts(s, "\nBalancing F2FS Async:\n");
-		seq_printf(s, "  - inmem: %4d, wb_cp_data: %4d, wb_data: %4d\n",
-			   si->inmem_pages, si->nr_wb_cp_data, si->nr_wb_data);
+		seq_printf(s, "  - IO (CP: %4d, Data: %4d)\n",
+			   si->nr_wb_cp_data, si->nr_wb_data);
+		seq_printf(s, "  - inmem: %4d, atomic IO: %4d (Max. %4d)\n",
+			   si->inmem_pages, si->aw_cnt, si->max_aw_cnt);
 		seq_printf(s, "  - nodes: %4d in %4d\n",
 			   si->ndirty_node, si->node_pages);
 		seq_printf(s, "  - dents: %4d in dirs:%4d (%4d)\n",
-- 
GitLab


From 0b54fb8458199dbed409abb06933c27439ea0911 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 11 Jan 2017 14:40:24 -0800
Subject: [PATCH 0473/1084] f2fs: factor out discard command info into
 discard_cmd_control

This patch adds discard_cmd_control with the existing discarding controls.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c   |  2 ++
 fs/f2fs/f2fs.h    | 16 ++++++-----
 fs/f2fs/segment.c | 68 ++++++++++++++++++++++++++++++++++++-----------
 fs/f2fs/super.c   |  5 +++-
 4 files changed, 69 insertions(+), 22 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index cd338ca24941..f9f6b0aeba02 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -196,6 +196,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
 	/* build merge flush thread */
 	if (SM_I(sbi)->fcc_info)
 		si->cache_mem += sizeof(struct flush_cmd_control);
+	if (SM_I(sbi)->dcc_info)
+		si->cache_mem += sizeof(struct discard_cmd_control);
 
 	/* free nids */
 	si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] +
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 548e75d18ec1..90eb2b3645a3 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -189,6 +189,13 @@ struct discard_cmd {
 	struct bio *bio;		/* bio */
 };
 
+struct discard_cmd_control {
+	struct list_head discard_entry_list;	/* 4KB discard entry list */
+	int nr_discards;			/* # of discards in the list */
+	struct list_head discard_cmd_list;	/* discard cmd list */
+	int max_discards;			/* max. discards to be issued */
+};
+
 /* for the list of fsync inodes, used only during recovery */
 struct fsync_inode_entry {
 	struct list_head list;	/* list head */
@@ -632,12 +639,6 @@ struct f2fs_sm_info {
 	/* a threshold to reclaim prefree segments */
 	unsigned int rec_prefree_segments;
 
-	/* for small discard management */
-	struct list_head discard_entry_list;	/* 4KB discard entry list */
-	struct list_head discard_cmd_list;	/* discard cmd list */
-	int nr_discards;			/* # of discards in the list */
-	int max_discards;			/* max. discards to be issued */
-
 	/* for batched trimming */
 	unsigned int trim_sections;		/* # of sections to trim */
 
@@ -649,6 +650,9 @@ struct f2fs_sm_info {
 
 	/* for flush command control */
 	struct flush_cmd_control *fcc_info;
+
+	/* for discard command control */
+	struct discard_cmd_control *dcc_info;
 };
 
 /*
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index fa30c117b4cc..a009f8a70c3d 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -631,7 +631,8 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 static struct discard_cmd *__add_discard_cmd(struct f2fs_sb_info *sbi,
 			struct bio *bio, block_t lstart, block_t len)
 {
-	struct list_head *wait_list = &(SM_I(sbi)->discard_cmd_list);
+	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+	struct list_head *cmd_list = &(dcc->discard_cmd_list);
 	struct discard_cmd *dc;
 
 	dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
@@ -640,7 +641,7 @@ static struct discard_cmd *__add_discard_cmd(struct f2fs_sb_info *sbi,
 	dc->lstart = lstart;
 	dc->len = len;
 	init_completion(&dc->wait);
-	list_add_tail(&dc->list, wait_list);
+	list_add_tail(&dc->list, cmd_list);
 
 	return dc;
 }
@@ -648,7 +649,8 @@ static struct discard_cmd *__add_discard_cmd(struct f2fs_sb_info *sbi,
 /* This should be covered by global mutex, &sit_i->sentry_lock */
 void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
 {
-	struct list_head *wait_list = &(SM_I(sbi)->discard_cmd_list);
+	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+	struct list_head *wait_list = &(dcc->discard_cmd_list);
 	struct discard_cmd *dc, *tmp;
 
 	list_for_each_entry_safe(dc, tmp, wait_list, list) {
@@ -817,7 +819,7 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
 		struct cp_control *cpc, struct seg_entry *se,
 		unsigned int start, unsigned int end)
 {
-	struct list_head *head = &SM_I(sbi)->discard_entry_list;
+	struct list_head *head = &SM_I(sbi)->dcc_info->discard_entry_list;
 	struct discard_entry *new, *last;
 
 	if (!list_empty(head)) {
@@ -835,7 +837,7 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
 	new->len = end - start;
 	list_add_tail(&new->list, head);
 done:
-	SM_I(sbi)->nr_discards += end - start;
+	SM_I(sbi)->dcc_info->nr_discards += end - start;
 }
 
 static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
@@ -857,7 +859,8 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
 
 	if (!force) {
 		if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
-		    SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards)
+			SM_I(sbi)->dcc_info->nr_discards >=
+				SM_I(sbi)->dcc_info->max_discards)
 			return false;
 	}
 
@@ -866,7 +869,8 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
 		dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
 				(cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
 
-	while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
+	while (force || SM_I(sbi)->dcc_info->nr_discards <=
+				SM_I(sbi)->dcc_info->max_discards) {
 		start = __find_rev_next_bit(dmap, max_blocks, end + 1);
 		if (start >= max_blocks)
 			break;
@@ -886,7 +890,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
 
 void release_discard_addrs(struct f2fs_sb_info *sbi)
 {
-	struct list_head *head = &(SM_I(sbi)->discard_entry_list);
+	struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list);
 	struct discard_entry *entry, *this;
 
 	/* drop caches */
@@ -912,7 +916,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
 
 void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 {
-	struct list_head *head = &(SM_I(sbi)->discard_entry_list);
+	struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list);
 	struct discard_entry *entry, *this;
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 	struct blk_plug plug;
@@ -972,13 +976,47 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		cpc->trimmed += entry->len;
 skip:
 		list_del(&entry->list);
-		SM_I(sbi)->nr_discards -= entry->len;
+		SM_I(sbi)->dcc_info->nr_discards -= entry->len;
 		kmem_cache_free(discard_entry_slab, entry);
 	}
 
 	blk_finish_plug(&plug);
 }
 
+int create_discard_cmd_control(struct f2fs_sb_info *sbi)
+{
+	struct discard_cmd_control *dcc;
+	int err = 0;
+
+	if (SM_I(sbi)->dcc_info) {
+		dcc = SM_I(sbi)->dcc_info;
+		goto init_thread;
+	}
+
+	dcc = kzalloc(sizeof(struct discard_cmd_control), GFP_KERNEL);
+	if (!dcc)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&dcc->discard_entry_list);
+	INIT_LIST_HEAD(&dcc->discard_cmd_list);
+	dcc->nr_discards = 0;
+	dcc->max_discards = 0;
+
+	SM_I(sbi)->dcc_info = dcc;
+init_thread:
+	return err;
+}
+
+void destroy_discard_cmd_control(struct f2fs_sb_info *sbi, bool free)
+{
+	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+
+	if (free) {
+		kfree(dcc);
+		SM_I(sbi)->dcc_info = NULL;
+	}
+}
+
 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
 {
 	struct sit_info *sit_i = SIT_I(sbi);
@@ -2708,11 +2746,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
 	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
 	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
 
-	INIT_LIST_HEAD(&sm_info->discard_entry_list);
-	INIT_LIST_HEAD(&sm_info->discard_cmd_list);
-	sm_info->nr_discards = 0;
-	sm_info->max_discards = 0;
-
 	sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
 
 	INIT_LIST_HEAD(&sm_info->sit_entry_set);
@@ -2723,6 +2756,10 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
 			return err;
 	}
 
+	err = create_discard_cmd_control(sbi);
+	if (err)
+		return err;
+
 	err = build_sit_info(sbi);
 	if (err)
 		return err;
@@ -2844,6 +2881,7 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
 	if (!sm_info)
 		return;
 	destroy_flush_cmd_control(sbi, true);
+	destroy_discard_cmd_control(sbi, true);
 	destroy_dirty_segmap(sbi);
 	destroy_curseg(sbi);
 	destroy_free_segmap(sbi);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index e68cec492f06..921228189acd 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -145,6 +145,7 @@ static match_table_t f2fs_tokens = {
 enum {
 	GC_THREAD,	/* struct f2fs_gc_thread */
 	SM_INFO,	/* struct f2fs_sm_info */
+	DCC_INFO,	/* struct discard_cmd_control */
 	NM_INFO,	/* struct f2fs_nm_info */
 	F2FS_SBI,	/* struct f2fs_sb_info */
 #ifdef CONFIG_F2FS_FAULT_INJECTION
@@ -168,6 +169,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
 		return (unsigned char *)sbi->gc_thread;
 	else if (struct_type == SM_INFO)
 		return (unsigned char *)SM_I(sbi);
+	else if (struct_type == DCC_INFO)
+		return (unsigned char *)SM_I(sbi)->dcc_info;
 	else if (struct_type == NM_INFO)
 		return (unsigned char *)NM_I(sbi);
 	else if (struct_type == F2FS_SBI)
@@ -283,7 +286,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
+F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
-- 
GitLab


From 1546996348b33dc44dff829bc86fea8a8536164d Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Mon, 9 Jan 2017 20:32:07 -0800
Subject: [PATCH 0474/1084] f2fs: add a kernel thread to issue discard commands
 asynchronously

This patch adds a kernel thread to issue discard commands.
It proposes three states, D_PREP, D_SUBMIT, and D_DONE to identify current
bio status.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h    |  12 +++++
 fs/f2fs/segment.c | 128 +++++++++++++++++++++++++++++++++++-----------
 2 files changed, 109 insertions(+), 31 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 90eb2b3645a3..3a57d97ed9b4 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -133,6 +133,8 @@ enum {
 		(SM_I(sbi)->trim_sections * (sbi)->segs_per_sec)
 #define BATCHED_TRIM_BLOCKS(sbi)	\
 		(BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
+
+#define DISCARD_ISSUE_RATE	8
 #define DEF_CP_INTERVAL			60	/* 60 secs */
 #define DEF_IDLE_INTERVAL		5	/* 5 secs */
 
@@ -181,18 +183,28 @@ struct discard_entry {
 	int len;		/* # of consecutive blocks of the discard */
 };
 
+enum {
+	D_PREP,
+	D_SUBMIT,
+	D_DONE,
+};
+
 struct discard_cmd {
 	struct list_head list;		/* command list */
 	struct completion wait;		/* compleation */
 	block_t lstart;			/* logical start address */
 	block_t len;			/* length */
 	struct bio *bio;		/* bio */
+	int state;			/* state */
 };
 
 struct discard_cmd_control {
+	struct task_struct *f2fs_issue_discard;	/* discard thread */
 	struct list_head discard_entry_list;	/* 4KB discard entry list */
 	int nr_discards;			/* # of discards in the list */
 	struct list_head discard_cmd_list;	/* discard cmd list */
+	wait_queue_head_t discard_wait_queue;	/* waiting queue for wake-up */
+	struct mutex cmd_lock;
 	int max_discards;			/* max. discards to be issued */
 };
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index a009f8a70c3d..ed38644907ba 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -628,7 +628,7 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 	mutex_unlock(&dirty_i->seglist_lock);
 }
 
-static struct discard_cmd *__add_discard_cmd(struct f2fs_sb_info *sbi,
+static void __add_discard_cmd(struct f2fs_sb_info *sbi,
 			struct bio *bio, block_t lstart, block_t len)
 {
 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
@@ -638,12 +638,30 @@ static struct discard_cmd *__add_discard_cmd(struct f2fs_sb_info *sbi,
 	dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
 	INIT_LIST_HEAD(&dc->list);
 	dc->bio = bio;
+	bio->bi_private = dc;
 	dc->lstart = lstart;
 	dc->len = len;
+	dc->state = D_PREP;
 	init_completion(&dc->wait);
+
+	mutex_lock(&dcc->cmd_lock);
 	list_add_tail(&dc->list, cmd_list);
+	mutex_unlock(&dcc->cmd_lock);
+}
+
+static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc)
+{
+	int err = dc->bio->bi_error;
 
-	return dc;
+	if (err == -EOPNOTSUPP)
+		err = 0;
+
+	if (err)
+		f2fs_msg(sbi->sb, KERN_INFO,
+				"Issue discard failed, ret: %d", err);
+	bio_put(dc->bio);
+	list_del(&dc->list);
+	kmem_cache_free(discard_cmd_slab, dc);
 }
 
 /* This should be covered by global mutex, &sit_i->sentry_lock */
@@ -653,31 +671,28 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
 	struct list_head *wait_list = &(dcc->discard_cmd_list);
 	struct discard_cmd *dc, *tmp;
 
+	mutex_lock(&dcc->cmd_lock);
 	list_for_each_entry_safe(dc, tmp, wait_list, list) {
-		struct bio *bio = dc->bio;
-		int err;
 
-		if (!completion_done(&dc->wait)) {
-			if ((dc->lstart <= blkaddr &&
-					blkaddr < dc->lstart + dc->len) ||
-					blkaddr == NULL_ADDR)
+		if (blkaddr == NULL_ADDR) {
+			if (dc->state == D_PREP) {
+				dc->state = D_SUBMIT;
+				submit_bio(dc->bio);
+			}
+			wait_for_completion_io(&dc->wait);
+
+			__remove_discard_cmd(sbi, dc);
+			continue;
+		}
+
+		if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) {
+			if (dc->state == D_SUBMIT)
 				wait_for_completion_io(&dc->wait);
 			else
-				continue;
+				__remove_discard_cmd(sbi, dc);
 		}
-
-		err = bio->bi_error;
-		if (err == -EOPNOTSUPP)
-			err = 0;
-
-		if (err)
-			f2fs_msg(sbi->sb, KERN_INFO,
-				"Issue discard failed, ret: %d", err);
-
-		bio_put(bio);
-		list_del(&dc->list);
-		kmem_cache_free(discard_cmd_slab, dc);
 	}
+	mutex_unlock(&dcc->cmd_lock);
 }
 
 static void f2fs_submit_discard_endio(struct bio *bio)
@@ -685,8 +700,48 @@ static void f2fs_submit_discard_endio(struct bio *bio)
 	struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
 
 	complete(&dc->wait);
+	dc->state = D_DONE;
 }
 
+static int issue_discard_thread(void *data)
+{
+	struct f2fs_sb_info *sbi = data;
+	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+	wait_queue_head_t *q = &dcc->discard_wait_queue;
+	struct list_head *cmd_list = &dcc->discard_cmd_list;
+	struct discard_cmd *dc, *tmp;
+	struct blk_plug plug;
+	int iter = 0;
+repeat:
+	if (kthread_should_stop())
+		return 0;
+
+	blk_start_plug(&plug);
+
+	mutex_lock(&dcc->cmd_lock);
+	list_for_each_entry_safe(dc, tmp, cmd_list, list) {
+		if (dc->state == D_PREP) {
+			dc->state = D_SUBMIT;
+			submit_bio(dc->bio);
+			if (iter++ > DISCARD_ISSUE_RATE)
+				break;
+		} else if (dc->state == D_DONE) {
+			__remove_discard_cmd(sbi, dc);
+		}
+	}
+	mutex_unlock(&dcc->cmd_lock);
+
+	blk_finish_plug(&plug);
+
+	iter = 0;
+	congestion_wait(BLK_RW_SYNC, HZ/50);
+
+	wait_event_interruptible(*q,
+		kthread_should_stop() || !list_empty(&dcc->discard_cmd_list));
+	goto repeat;
+}
+
+
 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
 static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
 		struct block_device *bdev, block_t blkstart, block_t blklen)
@@ -707,13 +762,11 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
 				SECTOR_FROM_BLOCK(blklen),
 				GFP_NOFS, 0, &bio);
 	if (!err && bio) {
-		struct discard_cmd *dc = __add_discard_cmd(sbi, bio,
-						lblkstart, blklen);
-
-		bio->bi_private = dc;
 		bio->bi_end_io = f2fs_submit_discard_endio;
 		bio->bi_opf |= REQ_SYNC;
-		submit_bio(bio);
+
+		__add_discard_cmd(sbi, bio, lblkstart, blklen);
+		wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue);
 	}
 	return err;
 }
@@ -919,14 +972,11 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list);
 	struct discard_entry *entry, *this;
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
-	struct blk_plug plug;
 	unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
 	unsigned int start = 0, end = -1;
 	unsigned int secno, start_segno;
 	bool force = (cpc->reason == CP_DISCARD);
 
-	blk_start_plug(&plug);
-
 	mutex_lock(&dirty_i->seglist_lock);
 
 	while (1) {
@@ -979,12 +1029,11 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		SM_I(sbi)->dcc_info->nr_discards -= entry->len;
 		kmem_cache_free(discard_entry_slab, entry);
 	}
-
-	blk_finish_plug(&plug);
 }
 
 int create_discard_cmd_control(struct f2fs_sb_info *sbi)
 {
+	dev_t dev = sbi->sb->s_bdev->bd_dev;
 	struct discard_cmd_control *dcc;
 	int err = 0;
 
@@ -999,11 +1048,22 @@ int create_discard_cmd_control(struct f2fs_sb_info *sbi)
 
 	INIT_LIST_HEAD(&dcc->discard_entry_list);
 	INIT_LIST_HEAD(&dcc->discard_cmd_list);
+	mutex_init(&dcc->cmd_lock);
 	dcc->nr_discards = 0;
 	dcc->max_discards = 0;
 
+	init_waitqueue_head(&dcc->discard_wait_queue);
 	SM_I(sbi)->dcc_info = dcc;
 init_thread:
+	dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
+				"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
+	if (IS_ERR(dcc->f2fs_issue_discard)) {
+		err = PTR_ERR(dcc->f2fs_issue_discard);
+		kfree(dcc);
+		SM_I(sbi)->dcc_info = NULL;
+		return err;
+	}
+
 	return err;
 }
 
@@ -1011,6 +1071,12 @@ void destroy_discard_cmd_control(struct f2fs_sb_info *sbi, bool free)
 {
 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 
+	if (dcc && dcc->f2fs_issue_discard) {
+		struct task_struct *discard_thread = dcc->f2fs_issue_discard;
+
+		dcc->f2fs_issue_discard = NULL;
+		kthread_stop(discard_thread);
+	}
 	if (free) {
 		kfree(dcc);
 		SM_I(sbi)->dcc_info = NULL;
-- 
GitLab


From dcc9165dbf9961cf2848af728f8be31f28a3c790 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 11 Jan 2017 10:20:04 -0800
Subject: [PATCH 0475/1084] f2fs: show # of on-going flush and discard bios

This patch adds stat information for flush and discard commands.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c   | 11 +++++++++--
 fs/f2fs/f2fs.h    |  3 ++-
 fs/f2fs/segment.c |  6 ++++++
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index f9f6b0aeba02..0ca977a94c13 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -54,6 +54,12 @@ static void update_general_status(struct f2fs_sb_info *sbi)
 	si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
 	si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
 	si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
+	if (SM_I(sbi) && SM_I(sbi)->fcc_info)
+		si->nr_flush =
+			atomic_read(&SM_I(sbi)->fcc_info->submit_flush);
+	if (SM_I(sbi) && SM_I(sbi)->dcc_info)
+		si->nr_discard =
+			atomic_read(&SM_I(sbi)->dcc_info->submit_discard);
 	si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
 	si->rsvd_segs = reserved_segments(sbi);
 	si->overp_segs = overprovision_segments(sbi);
@@ -318,8 +324,9 @@ static int stat_show(struct seq_file *s, void *v)
 		seq_printf(s, "  - Inner Struct Count: tree: %d(%d), node: %d\n",
 				si->ext_tree, si->zombie_tree, si->ext_node);
 		seq_puts(s, "\nBalancing F2FS Async:\n");
-		seq_printf(s, "  - IO (CP: %4d, Data: %4d)\n",
-			   si->nr_wb_cp_data, si->nr_wb_data);
+		seq_printf(s, "  - IO (CP: %4d, Data: %4d, Flush: %4d, Discard: %4d)\n",
+			   si->nr_wb_cp_data, si->nr_wb_data,
+			   si->nr_flush, si->nr_discard);
 		seq_printf(s, "  - inmem: %4d, atomic IO: %4d (Max. %4d)\n",
 			   si->inmem_pages, si->aw_cnt, si->max_aw_cnt);
 		seq_printf(s, "  - nodes: %4d in %4d\n",
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 3a57d97ed9b4..5d9731036306 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -206,6 +206,7 @@ struct discard_cmd_control {
 	wait_queue_head_t discard_wait_queue;	/* waiting queue for wake-up */
 	struct mutex cmd_lock;
 	int max_discards;			/* max. discards to be issued */
+	atomic_t submit_discard;		/* # of issued discard */
 };
 
 /* for the list of fsync inodes, used only during recovery */
@@ -2262,7 +2263,7 @@ struct f2fs_stat_info {
 	unsigned int ndirty_dirs, ndirty_files, ndirty_all;
 	int nats, dirty_nats, sits, dirty_sits, free_nids, alloc_nids;
 	int total_count, utilization;
-	int bg_gc, nr_wb_cp_data, nr_wb_data;
+	int bg_gc, nr_wb_cp_data, nr_wb_data, nr_flush, nr_discard;
 	int inline_xattr, inline_inode, inline_dir, orphans;
 	int aw_cnt, max_aw_cnt;
 	unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index ed38644907ba..52b133725b7f 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -653,6 +653,9 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *d
 {
 	int err = dc->bio->bi_error;
 
+	if (dc->state == D_DONE)
+		atomic_dec(&(SM_I(sbi)->dcc_info->submit_discard));
+
 	if (err == -EOPNOTSUPP)
 		err = 0;
 
@@ -678,6 +681,7 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
 			if (dc->state == D_PREP) {
 				dc->state = D_SUBMIT;
 				submit_bio(dc->bio);
+				atomic_inc(&dcc->submit_discard);
 			}
 			wait_for_completion_io(&dc->wait);
 
@@ -723,6 +727,7 @@ static int issue_discard_thread(void *data)
 		if (dc->state == D_PREP) {
 			dc->state = D_SUBMIT;
 			submit_bio(dc->bio);
+			atomic_inc(&dcc->submit_discard);
 			if (iter++ > DISCARD_ISSUE_RATE)
 				break;
 		} else if (dc->state == D_DONE) {
@@ -1049,6 +1054,7 @@ int create_discard_cmd_control(struct f2fs_sb_info *sbi)
 	INIT_LIST_HEAD(&dcc->discard_entry_list);
 	INIT_LIST_HEAD(&dcc->discard_cmd_list);
 	mutex_init(&dcc->cmd_lock);
+	atomic_set(&dcc->submit_discard, 0);
 	dcc->nr_discards = 0;
 	dcc->max_discards = 0;
 
-- 
GitLab


From dc91de78e5e1d44238b5dd2b57d2e8e67cbc00a1 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 13 Jan 2017 13:12:29 -0800
Subject: [PATCH 0476/1084] f2fs: do not preallocate blocks which has wrong
 buffer

Sheng Yong reports needless preallocation if write(small_buffer, large_size)
is called.

In that case, f2fs preallocates large_size, but vfs returns early due to
small_buffer size. Let's detect it before preallocation phase in f2fs.

Reported-by: Sheng Yong <shengyong1@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 6 +++++-
 fs/f2fs/f2fs.h | 1 +
 fs/f2fs/file.c | 8 +++++++-
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 848d110dc1ca..2ea80215f26c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -749,6 +749,9 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
 	struct f2fs_map_blocks map;
 	int err = 0;
 
+	if (is_inode_flag_set(inode, FI_NO_PREALLOC))
+		return 0;
+
 	map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
 	map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
 	if (map.m_len > map.m_lblk)
@@ -1653,7 +1656,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
 	 * we already allocated all the blocks, so we don't need to get
 	 * the block addresses when there is no need to fill the page.
 	 */
-	if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE)
+	if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
+			!is_inode_flag_set(inode, FI_NO_PREALLOC))
 		return 0;
 
 	if (f2fs_has_inline_data(inode) ||
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5d9731036306..0045ef9a9ad2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1665,6 +1665,7 @@ enum {
 	FI_INLINE_DOTS,		/* indicate inline dot dentries */
 	FI_DO_DEFRAG,		/* indicate defragment is running */
 	FI_DIRTY_FILE,		/* indicate regular/symlink has dirty pages */
+	FI_NO_PREALLOC,		/* indicate skipped preallocated blocks */
 };
 
 static inline void __mark_inode_dirty_flag(struct inode *inode,
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index e45522115b1c..9c0f469cde13 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -20,6 +20,7 @@
 #include <linux/uaccess.h>
 #include <linux/mount.h>
 #include <linux/pagevec.h>
+#include <linux/uio.h>
 #include <linux/uuid.h>
 #include <linux/file.h>
 
@@ -2258,8 +2259,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	inode_lock(inode);
 	ret = generic_write_checks(iocb, from);
 	if (ret > 0) {
-		int err = f2fs_preallocate_blocks(iocb, from);
+		int err;
 
+		if (iov_iter_fault_in_readable(from, iov_iter_count(from)))
+			set_inode_flag(inode, FI_NO_PREALLOC);
+
+		err = f2fs_preallocate_blocks(iocb, from);
 		if (err) {
 			inode_unlock(inode);
 			return err;
@@ -2267,6 +2272,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		blk_start_plug(&plug);
 		ret = __generic_file_write_iter(iocb, from);
 		blk_finish_plug(&plug);
+		clear_inode_flag(inode, FI_NO_PREALLOC);
 	}
 	inode_unlock(inode);
 
-- 
GitLab


From b86e33075ed1909d8002745b56ecf73b833db143 Mon Sep 17 00:00:00 2001
From: Wei Fang <fangwei1@huawei.com>
Date: Sun, 22 Jan 2017 12:21:02 +0800
Subject: [PATCH 0477/1084] f2fs: fix a dead loop in f2fs_fiemap()

A dead loop can be triggered in f2fs_fiemap() using the test case
as below:

	...
	fd = open();
	fallocate(fd, 0, 0, 4294967296);
	ioctl(fd, FS_IOC_FIEMAP, fiemap_buf);
	...

It's caused by an overflow in __get_data_block():
	...
	bh->b_size = map.m_len << inode->i_blkbits;
	...
map.m_len is an unsigned int, and bh->b_size is a size_t which is 64 bits
on 64 bits archtecture, type conversion from an unsigned int to a size_t
will result in an overflow.

In the above-mentioned case, bh->b_size will be zero, and f2fs_fiemap()
will call get_data_block() at block 0 again an again.

Fix this by adding a force conversion before left shift.

Signed-off-by: Wei Fang <fangwei1@huawei.com>
Acked-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 2ea80215f26c..3b5f1d14cab3 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -964,7 +964,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
 	if (!err) {
 		map_bh(bh, inode->i_sb, map.m_pblk);
 		bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
-		bh->b_size = map.m_len << inode->i_blkbits;
+		bh->b_size = (u64)map.m_len << inode->i_blkbits;
 	}
 	return err;
 }
-- 
GitLab


From 04b9a5f0f51942f9fd20e97df4ce1508f6335c59 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Tue, 24 Jan 2017 12:47:55 +0900
Subject: [PATCH 0478/1084] Doc: f2fs: Fix typo in
 Documentation/filesystems/f2fs.txt

This patch fix a typo in f2fs.txt

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/filesystems/f2fs.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index d99faced79cb..0ab33d4c8406 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -131,7 +131,7 @@ inline_dentry          Enable the inline dir feature: data in new created
                        directory entries can be written into inode block. The
                        space of inode block which is used to store inline
                        dentries is limited to ~3.4k.
-noinline_dentry        Diable the inline dentry feature.
+noinline_dentry        Disable the inline dentry feature.
 flush_merge	       Merge concurrent cache_flush commands as much as possible
                        to eliminate redundant command issues. If the underlying
 		       device handles the cache_flush command relatively slowly,
-- 
GitLab


From ba38c27eb93e2d36bf940ca65c145f6e2aaa6d5c Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Tue, 24 Jan 2017 20:39:51 +0800
Subject: [PATCH 0479/1084] f2fs: enhance lookup xattr

Previously, in getxattr we will load all entries both in inline xattr and
xattr node block, and then do the lookup in all entries, but our lookup
flow shows low efficiency, since if we can lookup and hit in inline xattr
of inode page cache first, we don't need to load and lookup xattr node
block, which can obviously save cpu time and IO latency.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: initialize NULL to avoid warning]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/xattr.c | 132 ++++++++++++++++++++++++++++++++++++++++++------
 fs/f2fs/xattr.h |   7 +--
 2 files changed, 121 insertions(+), 18 deletions(-)

diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index c47ce2f330a1..f7482635a57d 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -217,6 +217,112 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
 	return entry;
 }
 
+static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr,
+					void **last_addr, int index,
+					size_t len, const char *name)
+{
+	struct f2fs_xattr_entry *entry;
+	unsigned int inline_size = F2FS_INLINE_XATTR_ADDRS << 2;
+
+	list_for_each_xattr(entry, base_addr) {
+		if ((void *)entry + sizeof(__u32) > base_addr + inline_size ||
+			(void *)XATTR_NEXT_ENTRY(entry) + sizeof(__u32) >
+			base_addr + inline_size) {
+			*last_addr = entry;
+			return NULL;
+		}
+		if (entry->e_name_index != index)
+			continue;
+		if (entry->e_name_len != len)
+			continue;
+		if (!memcmp(entry->e_name, name, len))
+			break;
+	}
+	return entry;
+}
+
+static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
+				unsigned int index, unsigned int len,
+				const char *name, struct f2fs_xattr_entry **xe,
+				void **base_addr)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	void *cur_addr, *txattr_addr, *last_addr = NULL;
+	nid_t xnid = F2FS_I(inode)->i_xattr_nid;
+	unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0;
+	unsigned int inline_size = 0;
+	int err = 0;
+
+	inline_size = inline_xattr_size(inode);
+
+	if (!size && !inline_size)
+		return -ENODATA;
+
+	txattr_addr = kzalloc(inline_size + size + sizeof(__u32),
+							GFP_F2FS_ZERO);
+	if (!txattr_addr)
+		return -ENOMEM;
+
+	/* read from inline xattr */
+	if (inline_size) {
+		struct page *page = NULL;
+		void *inline_addr;
+
+		if (ipage) {
+			inline_addr = inline_xattr_addr(ipage);
+		} else {
+			page = get_node_page(sbi, inode->i_ino);
+			if (IS_ERR(page)) {
+				err = PTR_ERR(page);
+				goto out;
+			}
+			inline_addr = inline_xattr_addr(page);
+		}
+		memcpy(txattr_addr, inline_addr, inline_size);
+		f2fs_put_page(page, 1);
+
+		*xe = __find_inline_xattr(txattr_addr, &last_addr,
+						index, len, name);
+		if (*xe)
+			goto check;
+	}
+
+	/* read from xattr node block */
+	if (xnid) {
+		struct page *xpage;
+		void *xattr_addr;
+
+		/* The inode already has an extended attribute block. */
+		xpage = get_node_page(sbi, xnid);
+		if (IS_ERR(xpage)) {
+			err = PTR_ERR(xpage);
+			goto out;
+		}
+
+		xattr_addr = page_address(xpage);
+		memcpy(txattr_addr + inline_size, xattr_addr, size);
+		f2fs_put_page(xpage, 1);
+	}
+
+	if (last_addr)
+		cur_addr = XATTR_HDR(last_addr) - 1;
+	else
+		cur_addr = txattr_addr;
+
+	*xe = __find_xattr(cur_addr, index, len, name);
+check:
+	if (IS_XATTR_LAST_ENTRY(*xe)) {
+		err = -ENODATA;
+		goto out;
+	}
+
+	*base_addr = txattr_addr;
+	return 0;
+out:
+	kzfree(txattr_addr);
+	return err;
+}
+
 static int read_all_xattrs(struct inode *inode, struct page *ipage,
 							void **base_addr)
 {
@@ -348,8 +454,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
 	}
 
 	xattr_addr = page_address(xpage);
-	memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE -
-						sizeof(struct node_footer));
+	memcpy(xattr_addr, txattr_addr + inline_size, MAX_XATTR_BLOCK_SIZE);
 	set_page_dirty(xpage);
 	f2fs_put_page(xpage, 1);
 
@@ -361,10 +466,11 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
 int f2fs_getxattr(struct inode *inode, int index, const char *name,
 		void *buffer, size_t buffer_size, struct page *ipage)
 {
-	struct f2fs_xattr_entry *entry;
-	void *base_addr;
+	struct f2fs_xattr_entry *entry = NULL;
 	int error = 0;
-	size_t size, len;
+	unsigned int size, len;
+	char *pval;
+	void *base_addr = NULL;
 
 	if (name == NULL)
 		return -EINVAL;
@@ -373,30 +479,26 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
 	if (len > F2FS_NAME_LEN)
 		return -ERANGE;
 
-	error = read_all_xattrs(inode, ipage, &base_addr);
+	error = lookup_all_xattrs(inode, ipage, index, len, name,
+				&entry, &base_addr);
 	if (error)
 		return error;
 
-	entry = __find_xattr(base_addr, index, len, name);
-	if (IS_XATTR_LAST_ENTRY(entry)) {
-		error = -ENODATA;
-		goto cleanup;
-	}
-
 	size = le16_to_cpu(entry->e_value_size);
 
 	if (buffer && size > buffer_size) {
 		error = -ERANGE;
-		goto cleanup;
+		goto out;
 	}
 
+	pval = entry->e_name + entry->e_name_len;
+
 	if (buffer) {
 		char *pval = entry->e_name + entry->e_name_len;
 		memcpy(buffer, pval, size);
 	}
 	error = size;
-
-cleanup:
+out:
 	kzfree(base_addr);
 	return error;
 }
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index f990de20cdcd..d5a94928c116 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -72,9 +72,10 @@ struct f2fs_xattr_entry {
 		for (entry = XATTR_FIRST_ENTRY(addr);\
 				!IS_XATTR_LAST_ENTRY(entry);\
 				entry = XATTR_NEXT_ENTRY(entry))
-
-#define MIN_OFFSET(i)	XATTR_ALIGN(inline_xattr_size(i) + PAGE_SIZE -	\
-				sizeof(struct node_footer) - sizeof(__u32))
+#define MAX_XATTR_BLOCK_SIZE	(PAGE_SIZE - sizeof(struct node_footer))
+#define VALID_XATTR_BLOCK_SIZE	(MAX_XATTR_BLOCK_SIZE - sizeof(__u32))
+#define MIN_OFFSET(i)		XATTR_ALIGN(inline_xattr_size(i) +	\
+						VALID_XATTR_BLOCK_SIZE)
 
 #define MAX_VALUE_LEN(i)	(MIN_OFFSET(i) -			\
 				sizeof(struct f2fs_xattr_header) -	\
-- 
GitLab


From dba79f38bc60d98b605bdbbf5613aa3fb8f8ff7c Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Wed, 25 Jan 2017 10:52:39 +0800
Subject: [PATCH 0480/1084] f2fs: fix to avoid overflow when left shifting page
 offset

We use following method to calculate size with current page index:
size = index << PAGE_SHIFT
If type of index has only 32-bits size, left shifting will incur overflow,
which makes result incorrect.

So let's cast index with 64-bits type to avoid such issue.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/recovery.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 4fb4471a3206..e93316ea8d1b 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -428,8 +428,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
 		}
 
 		if (!file_keep_isize(inode) &&
-				(i_size_read(inode) <= (start << PAGE_SHIFT)))
-			f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT);
+			(i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT)))
+			f2fs_i_size_write(inode,
+				(loff_t)(start + 1) << PAGE_SHIFT);
 
 		/*
 		 * dest is reserved block, invalidate src block
-- 
GitLab


From 73545817c90edcb367a65720f669cddce633bc46 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Wed, 25 Jan 2017 10:52:40 +0800
Subject: [PATCH 0481/1084] f2fs: fix null pointer dereference when issuing
 flush in ->fsync

We only allocate flush merge control structure sbi::sm_info::fcc_info when
flush_merge option is on, but in f2fs_issue_flush we still try to access
member of the control structure without that option, it incurs panic as
show below, fix it.

Call Trace:
 __remove_ino_entry+0xa9/0xc0 [f2fs]
 f2fs_do_sync_file.isra.27+0x214/0x6d0 [f2fs]
 f2fs_sync_file+0x18/0x20 [f2fs]
 vfs_fsync_range+0x3d/0xb0
 __do_page_fault+0x261/0x4d0
 do_fsync+0x3d/0x70
 SyS_fsync+0x10/0x20
 do_syscall_64+0x6e/0x180
 entry_SYSCALL64_slow_path+0x25/0x25
RIP: 0033:0x7f18ce260de0
RSP: 002b:00007ffdd4589258 EFLAGS: 00000246 ORIG_RAX: 000000000000004a
RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f18ce260de0
RDX: 0000000000000006 RSI: 00000000016c0360 RDI: 0000000000000003
RBP: 00000000016c0360 R08: 000000000000ffff R09: 000000000000001f
R10: 00007ffdd4589020 R11: 0000000000000246 R12: 00000000016c0100
R13: 0000000000000000 R14: 00000000016c1f00 R15: 00000000016c0100
Code: fb 81 e3 00 08 00 00 48 89 45 a0 0f 1f 44 00 00 31 c0 85 db 75 27 41 81 e7 00 04 00 00 74 0c 41 8b 45 20 85 c0 0f 85 81 00 00 00 <f0> 41 ff 45 20 4c 89 e7 e8 f8 e9 ff ff f0 41 ff 4d 20 48 83 c4
RIP: f2fs_issue_flush+0x5b/0x170 [f2fs] RSP: ffffc90003b5fd78
CR2: 0000000000000020
---[ end trace a09314c24f037648 ]---

Reported-by: Shuoran Liu <liushuoran@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 52b133725b7f..4e515bbbeddc 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -477,7 +477,10 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
 	if (test_opt(sbi, NOBARRIER))
 		return 0;
 
-	if (!test_opt(sbi, FLUSH_MERGE) || !atomic_read(&fcc->submit_flush)) {
+	if (!test_opt(sbi, FLUSH_MERGE))
+		return submit_flush_wait(sbi);
+
+	if (!atomic_read(&fcc->submit_flush)) {
 		int ret;
 
 		atomic_inc(&fcc->submit_flush);
-- 
GitLab


From 0cc0dec2b6f9bf0e69afbddc1c11e220a1cdf328 Mon Sep 17 00:00:00 2001
From: Kaixu Xia <xiakaixu@huawei.com>
Date: Fri, 27 Jan 2017 09:35:37 +0800
Subject: [PATCH 0482/1084] f2fs: show the fault injection mount option

This patch shows the fault injection mount option in
f2fs_show_options().

Signed-off-by: Kaixu Xia <xiakaixu@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 921228189acd..f97e8089fb54 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -556,6 +556,7 @@ static int parse_options(struct super_block *sb, char *options)
 				return -EINVAL;
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 			f2fs_build_fault_attr(sbi, arg);
+			set_opt(sbi, FAULT_INJECTION);
 #else
 			f2fs_msg(sb, KERN_INFO,
 				"FAULT_INJECTION was not selected");
@@ -946,6 +947,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 	seq_printf(seq, ",active_logs=%u", sbi->active_logs);
 	if (F2FS_IO_SIZE_BITS(sbi))
 		seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+	if (test_opt(sbi, FAULT_INJECTION))
+		seq_puts(seq, ",fault_injection");
+#endif
 
 	return 0;
 }
-- 
GitLab


From 8ed5974552086363aec2ac96fe01e3f2970baeab Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Sun, 29 Jan 2017 14:27:02 +0900
Subject: [PATCH 0483/1084] f2fs: declare missing static function

We missed two functions declared as static functions.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4e515bbbeddc..5c0b59fe9bef 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1039,7 +1039,7 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	}
 }
 
-int create_discard_cmd_control(struct f2fs_sb_info *sbi)
+static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
 {
 	dev_t dev = sbi->sb->s_bdev->bd_dev;
 	struct discard_cmd_control *dcc;
@@ -1076,7 +1076,7 @@ int create_discard_cmd_control(struct f2fs_sb_info *sbi)
 	return err;
 }
 
-void destroy_discard_cmd_control(struct f2fs_sb_info *sbi, bool free)
+static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi, bool free)
 {
 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 
-- 
GitLab


From c64ab12e365adb505c1be0f4ddea430d4a774514 Mon Sep 17 00:00:00 2001
From: DongOh Shin <doscode.kr@gmail.com>
Date: Mon, 30 Jan 2017 10:55:17 -0800
Subject: [PATCH 0484/1084] f2fs: fix 3 coding style errors in f2fs.h

Two coding style errors below have been resolved:
"Macros with complex values should be enclosed in parentheses"

And a coding style error below has been resolved:
"space prohibited before that ',' (ctx:WxW)"

Signed-off-by: DongOh Shin <doscode.kr@gmail.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 0045ef9a9ad2..26bc8773ebcf 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -108,9 +108,9 @@ struct f2fs_mount_info {
 #define F2FS_HAS_FEATURE(sb, mask)					\
 	((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
 #define F2FS_SET_FEATURE(sb, mask)					\
-	F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask)
+	(F2FS_SB(sb)->raw_super->feature |= cpu_to_le32(mask))
 #define F2FS_CLEAR_FEATURE(sb, mask)					\
-	F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask)
+	(F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask))
 
 /*
  * For checkpoint manager
@@ -2048,7 +2048,7 @@ void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
 				struct page *, struct inode *);
 int update_dent_inode(struct inode *, struct inode *, const struct qstr *);
 void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *,
-			const struct qstr *, f2fs_hash_t , unsigned int);
+			const struct qstr *, f2fs_hash_t, unsigned int);
 int f2fs_add_regular_entry(struct inode *, const struct qstr *,
 			const struct qstr *, struct inode *, nid_t, umode_t);
 int __f2fs_do_add_link(struct inode *, struct fscrypt_name*, struct inode *,
-- 
GitLab


From cac5a3d8f517cfc7a882ffd4a0eba220f02deee8 Mon Sep 17 00:00:00 2001
From: DongOh Shin <doscode.kr@gmail.com>
Date: Mon, 30 Jan 2017 10:55:18 -0800
Subject: [PATCH 0485/1084] f2fs: fix 446 coding style warnings in f2fs.h

1) Nine coding style warnings below have been resolved:
"Missing a blank line after declarations"

2) 435 coding style warnings below have been resolved:
"function definition argument 'x' should also have an identifier name"

3) Two coding style warnings below have been resolved:
"macros should not use a trailing semicolon"

Signed-off-by: DongOh Shin <doscode.kr@gmail.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h | 498 +++++++++++++++++++++++++++----------------------
 1 file changed, 270 insertions(+), 228 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 26bc8773ebcf..8c2c12fa2b08 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -231,6 +231,7 @@ struct fsync_inode_entry {
 static inline int update_nats_in_cursum(struct f2fs_journal *journal, int i)
 {
 	int before = nats_in_cursum(journal);
+
 	journal->n_nats = cpu_to_le16(before + i);
 	return before;
 }
@@ -238,6 +239,7 @@ static inline int update_nats_in_cursum(struct f2fs_journal *journal, int i)
 static inline int update_sits_in_cursum(struct f2fs_journal *journal, int i)
 {
 	int before = sits_in_cursum(journal);
+
 	journal->n_sits = cpu_to_le16(before + i);
 	return before;
 }
@@ -323,12 +325,14 @@ static inline void make_dentry_ptr(struct inode *inode,
 
 	if (type == 1) {
 		struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src;
+
 		d->max = NR_DENTRY_IN_BLOCK;
 		d->bitmap = &t->dentry_bitmap;
 		d->dentry = t->dentry;
 		d->filename = t->filename;
 	} else {
 		struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src;
+
 		d->max = NR_INLINE_DENTRY;
 		d->bitmap = &t->dentry_bitmap;
 		d->dentry = t->dentry;
@@ -517,7 +521,7 @@ static inline bool __is_front_mergeable(struct extent_info *cur,
 	return __is_extent_mergeable(cur, front);
 }
 
-extern void f2fs_mark_inode_dirty_sync(struct inode *, bool);
+extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync);
 static inline void __try_update_largest_extent(struct inode *inode,
 			struct extent_tree *et, struct extent_node *en)
 {
@@ -1478,6 +1482,7 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
 {
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 	struct page *page = find_lock_page(mapping, index);
+
 	if (page)
 		return page;
 
@@ -1561,6 +1566,7 @@ static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
 static inline bool IS_INODE(struct page *page)
 {
 	struct f2fs_node *p = F2FS_NODE(page);
+
 	return RAW_IS_INODE(p);
 }
 
@@ -1574,6 +1580,7 @@ static inline block_t datablock_addr(struct page *node_page,
 {
 	struct f2fs_node *raw_node;
 	__le32 *addr_array;
+
 	raw_node = F2FS_NODE(node_page);
 	addr_array = blkaddr_in_node(raw_node);
 	return le32_to_cpu(addr_array[offset]);
@@ -1810,6 +1817,7 @@ static inline unsigned int addrs_per_inode(struct inode *inode)
 static inline void *inline_xattr_addr(struct page *page)
 {
 	struct f2fs_inode *ri = F2FS_INODE(page);
+
 	return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -
 					F2FS_INLINE_XATTR_ADDRS]);
 }
@@ -1871,6 +1879,7 @@ static inline bool f2fs_is_drop_cache(struct inode *inode)
 static inline void *inline_data_addr(struct page *page)
 {
 	struct f2fs_inode *ri = F2FS_INODE(page);
+
 	return (void *)&(ri->i_addr[1]);
 }
 
@@ -1993,29 +2002,30 @@ static inline void *f2fs_kvzalloc(size_t size, gfp_t flags)
 /*
  * file.c
  */
-int f2fs_sync_file(struct file *, loff_t, loff_t, int);
-void truncate_data_blocks(struct dnode_of_data *);
-int truncate_blocks(struct inode *, u64, bool);
-int f2fs_truncate(struct inode *);
-int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
-int f2fs_setattr(struct dentry *, struct iattr *);
-int truncate_hole(struct inode *, pgoff_t, pgoff_t);
-int truncate_data_blocks_range(struct dnode_of_data *, int);
-long f2fs_ioctl(struct file *, unsigned int, unsigned long);
-long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long);
+int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
+void truncate_data_blocks(struct dnode_of_data *dn);
+int truncate_blocks(struct inode *inode, u64 from, bool lock);
+int f2fs_truncate(struct inode *inode);
+int f2fs_getattr(struct vfsmount *mnt, struct dentry *dentry,
+			struct kstat *stat);
+int f2fs_setattr(struct dentry *dentry, struct iattr *attr);
+int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end);
+int truncate_data_blocks_range(struct dnode_of_data *dn, int count);
+long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 
 /*
  * inode.c
  */
-void f2fs_set_inode_flags(struct inode *);
-struct inode *f2fs_iget(struct super_block *, unsigned long);
-struct inode *f2fs_iget_retry(struct super_block *, unsigned long);
-int try_to_free_nats(struct f2fs_sb_info *, int);
-int update_inode(struct inode *, struct page *);
-int update_inode_page(struct inode *);
-int f2fs_write_inode(struct inode *, struct writeback_control *);
-void f2fs_evict_inode(struct inode *);
-void handle_failed_inode(struct inode *);
+void f2fs_set_inode_flags(struct inode *inode);
+struct inode *f2fs_iget(struct super_block *sb, unsigned long ino);
+struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino);
+int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink);
+int update_inode(struct inode *inode, struct page *node_page);
+int update_inode_page(struct inode *inode);
+int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc);
+void f2fs_evict_inode(struct inode *inode);
+void handle_failed_inode(struct inode *inode);
 
 /*
  * namei.c
@@ -2025,40 +2035,47 @@ struct dentry *f2fs_get_parent(struct dentry *child);
 /*
  * dir.c
  */
-void set_de_type(struct f2fs_dir_entry *, umode_t);
-unsigned char get_de_type(struct f2fs_dir_entry *);
-struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *,
-			f2fs_hash_t, int *, struct f2fs_dentry_ptr *);
-int f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
-			unsigned int, struct fscrypt_str *);
-void do_make_empty_dir(struct inode *, struct inode *,
-			struct f2fs_dentry_ptr *);
-struct page *init_inode_metadata(struct inode *, struct inode *,
-		const struct qstr *, const struct qstr *, struct page *);
-void update_parent_metadata(struct inode *, struct inode *, unsigned int);
-int room_for_filename(const void *, int, int);
-void f2fs_drop_nlink(struct inode *, struct inode *);
-struct f2fs_dir_entry *__f2fs_find_entry(struct inode *, struct fscrypt_name *,
-							struct page **);
-struct f2fs_dir_entry *f2fs_find_entry(struct inode *, const struct qstr *,
-							struct page **);
-struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **);
-ino_t f2fs_inode_by_name(struct inode *, const struct qstr *, struct page **);
-void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
-				struct page *, struct inode *);
-int update_dent_inode(struct inode *, struct inode *, const struct qstr *);
-void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *,
-			const struct qstr *, f2fs_hash_t, unsigned int);
-int f2fs_add_regular_entry(struct inode *, const struct qstr *,
-			const struct qstr *, struct inode *, nid_t, umode_t);
-int __f2fs_do_add_link(struct inode *, struct fscrypt_name*, struct inode *,
-			nid_t, umode_t);
-int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t,
-			umode_t);
-void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *,
-							struct inode *);
-int f2fs_do_tmpfile(struct inode *, struct inode *);
-bool f2fs_empty_dir(struct inode *);
+void set_de_type(struct f2fs_dir_entry *de, umode_t mode);
+unsigned char get_de_type(struct f2fs_dir_entry *de);
+struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname,
+			f2fs_hash_t namehash, int *max_slots,
+			struct f2fs_dentry_ptr *d);
+int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
+			unsigned int start_pos, struct fscrypt_str *fstr);
+void do_make_empty_dir(struct inode *inode, struct inode *parent,
+			struct f2fs_dentry_ptr *d);
+struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
+			const struct qstr *new_name,
+			const struct qstr *orig_name, struct page *dpage);
+void update_parent_metadata(struct inode *dir, struct inode *inode,
+			unsigned int current_depth);
+int room_for_filename(const void *bitmap, int slots, int max_slots);
+void f2fs_drop_nlink(struct inode *dir, struct inode *inode);
+struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
+			struct fscrypt_name *fname, struct page **res_page);
+struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
+			const struct qstr *child, struct page **res_page);
+struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p);
+ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr,
+			struct page **page);
+void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
+			struct page *page, struct inode *inode);
+int update_dent_inode(struct inode *inode, struct inode *to,
+			const struct qstr *name);
+void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
+			const struct qstr *name, f2fs_hash_t name_hash,
+			unsigned int bit_pos);
+int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
+			const struct qstr *orig_name,
+			struct inode *inode, nid_t ino, umode_t mode);
+int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname,
+			struct inode *inode, nid_t ino, umode_t mode);
+int __f2fs_add_link(struct inode *dir, const struct qstr *name,
+			struct inode *inode, nid_t ino, umode_t mode);
+void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
+			struct inode *dir, struct inode *inode);
+int f2fs_do_tmpfile(struct inode *inode, struct inode *dir);
+bool f2fs_empty_dir(struct inode *dir);
 
 static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
 {
@@ -2069,18 +2086,18 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
 /*
  * super.c
  */
-int f2fs_inode_dirtied(struct inode *, bool);
-void f2fs_inode_synced(struct inode *);
-int f2fs_commit_super(struct f2fs_sb_info *, bool);
-int f2fs_sync_fs(struct super_block *, int);
+int f2fs_inode_dirtied(struct inode *inode, bool sync);
+void f2fs_inode_synced(struct inode *inode);
+int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
+int f2fs_sync_fs(struct super_block *sb, int sync);
 extern __printf(3, 4)
-void f2fs_msg(struct super_block *, const char *, const char *, ...);
+void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
 int sanity_check_ckpt(struct f2fs_sb_info *sbi);
 
 /*
  * hash.c
  */
-f2fs_hash_t f2fs_dentry_hash(const struct qstr *);
+f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info);
 
 /*
  * node.c
@@ -2088,164 +2105,183 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *);
 struct dnode_of_data;
 struct node_info;
 
-bool available_free_memory(struct f2fs_sb_info *, int);
-int need_dentry_mark(struct f2fs_sb_info *, nid_t);
-bool is_checkpointed_node(struct f2fs_sb_info *, nid_t);
-bool need_inode_block_update(struct f2fs_sb_info *, nid_t);
-void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
-pgoff_t get_next_page_offset(struct dnode_of_data *, pgoff_t);
-int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
-int truncate_inode_blocks(struct inode *, pgoff_t);
-int truncate_xattr_node(struct inode *, struct page *);
-int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
-int remove_inode_page(struct inode *);
-struct page *new_inode_page(struct inode *);
-struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
-void ra_node_page(struct f2fs_sb_info *, nid_t);
-struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
-struct page *get_node_page_ra(struct page *, int);
-void move_node_page(struct page *, int);
-int fsync_node_pages(struct f2fs_sb_info *, struct inode *,
-			struct writeback_control *, bool);
-int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *);
-void build_free_nids(struct f2fs_sb_info *, bool);
-bool alloc_nid(struct f2fs_sb_info *, nid_t *);
-void alloc_nid_done(struct f2fs_sb_info *, nid_t);
-void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
-int try_to_free_nids(struct f2fs_sb_info *, int);
-void recover_inline_xattr(struct inode *, struct page *);
-void recover_xattr_data(struct inode *, struct page *, block_t);
-int recover_inode_page(struct f2fs_sb_info *, struct page *);
-int restore_node_summary(struct f2fs_sb_info *, unsigned int,
-				struct f2fs_summary_block *);
-void flush_nat_entries(struct f2fs_sb_info *);
-int build_node_manager(struct f2fs_sb_info *);
-void destroy_node_manager(struct f2fs_sb_info *);
+bool available_free_memory(struct f2fs_sb_info *sbi, int type);
+int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
+bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
+bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
+void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni);
+pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
+int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
+int truncate_inode_blocks(struct inode *inode, pgoff_t from);
+int truncate_xattr_node(struct inode *inode, struct page *page);
+int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino);
+int remove_inode_page(struct inode *inode);
+struct page *new_inode_page(struct inode *inode);
+struct page *new_node_page(struct dnode_of_data *dn,
+			unsigned int ofs, struct page *ipage);
+void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid);
+struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid);
+struct page *get_node_page_ra(struct page *parent, int start);
+void move_node_page(struct page *node_page, int gc_type);
+int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
+			struct writeback_control *wbc, bool atomic);
+int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc);
+void build_free_nids(struct f2fs_sb_info *sbi, bool sync);
+bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
+void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
+void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
+int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink);
+void recover_inline_xattr(struct inode *inode, struct page *page);
+void recover_xattr_data(struct inode *inode, struct page *page,
+			block_t blkaddr);
+int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
+int restore_node_summary(struct f2fs_sb_info *sbi,
+			unsigned int segno, struct f2fs_summary_block *sum);
+void flush_nat_entries(struct f2fs_sb_info *sbi);
+int build_node_manager(struct f2fs_sb_info *sbi);
+void destroy_node_manager(struct f2fs_sb_info *sbi);
 int __init create_node_manager_caches(void);
 void destroy_node_manager_caches(void);
 
 /*
  * segment.c
  */
-void register_inmem_page(struct inode *, struct page *);
-void drop_inmem_pages(struct inode *);
-int commit_inmem_pages(struct inode *);
-void f2fs_balance_fs(struct f2fs_sb_info *, bool);
-void f2fs_balance_fs_bg(struct f2fs_sb_info *);
-int f2fs_issue_flush(struct f2fs_sb_info *);
-int create_flush_cmd_control(struct f2fs_sb_info *);
-void destroy_flush_cmd_control(struct f2fs_sb_info *, bool);
-void invalidate_blocks(struct f2fs_sb_info *, block_t);
-bool is_checkpointed_data(struct f2fs_sb_info *, block_t);
-void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
-void f2fs_wait_discard_bio(struct f2fs_sb_info *, block_t);
-void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
-void release_discard_addrs(struct f2fs_sb_info *);
-int npages_for_summary_flush(struct f2fs_sb_info *, bool);
-void allocate_new_segments(struct f2fs_sb_info *);
-int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
-bool exist_trim_candidates(struct f2fs_sb_info *, struct cp_control *);
-struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
-void update_meta_page(struct f2fs_sb_info *, void *, block_t);
-void write_meta_page(struct f2fs_sb_info *, struct page *);
-void write_node_page(unsigned int, struct f2fs_io_info *);
-void write_data_page(struct dnode_of_data *, struct f2fs_io_info *);
-void rewrite_data_page(struct f2fs_io_info *);
-void __f2fs_replace_block(struct f2fs_sb_info *, struct f2fs_summary *,
-					block_t, block_t, bool, bool);
-void f2fs_replace_block(struct f2fs_sb_info *, struct dnode_of_data *,
-				block_t, block_t, unsigned char, bool, bool);
-void allocate_data_block(struct f2fs_sb_info *, struct page *,
-		block_t, block_t *, struct f2fs_summary *, int);
-void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool);
-void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *, block_t);
-void write_data_summaries(struct f2fs_sb_info *, block_t);
-void write_node_summaries(struct f2fs_sb_info *, block_t);
-int lookup_journal_in_cursum(struct f2fs_journal *, int, unsigned int, int);
-void flush_sit_entries(struct f2fs_sb_info *, struct cp_control *);
-int build_segment_manager(struct f2fs_sb_info *);
-void destroy_segment_manager(struct f2fs_sb_info *);
+void register_inmem_page(struct inode *inode, struct page *page);
+void drop_inmem_pages(struct inode *inode);
+int commit_inmem_pages(struct inode *inode);
+void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need);
+void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi);
+int f2fs_issue_flush(struct f2fs_sb_info *sbi);
+int create_flush_cmd_control(struct f2fs_sb_info *sbi);
+void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free);
+void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
+bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
+void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new);
+void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr);
+void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+void release_discard_addrs(struct f2fs_sb_info *sbi);
+int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
+void allocate_new_segments(struct f2fs_sb_info *sbi);
+int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
+bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno);
+void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr);
+void write_meta_page(struct f2fs_sb_info *sbi, struct page *page);
+void write_node_page(unsigned int nid, struct f2fs_io_info *fio);
+void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio);
+void rewrite_data_page(struct f2fs_io_info *fio);
+void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+			block_t old_blkaddr, block_t new_blkaddr,
+			bool recover_curseg, bool recover_newaddr);
+void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
+			block_t old_addr, block_t new_addr,
+			unsigned char version, bool recover_curseg,
+			bool recover_newaddr);
+void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+			block_t old_blkaddr, block_t *new_blkaddr,
+			struct f2fs_summary *sum, int type);
+void f2fs_wait_on_page_writeback(struct page *page,
+			enum page_type type, bool ordered);
+void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi,
+			block_t blkaddr);
+void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
+void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
+int lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
+			unsigned int val, int alloc);
+void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+int build_segment_manager(struct f2fs_sb_info *sbi);
+void destroy_segment_manager(struct f2fs_sb_info *sbi);
 int __init create_segment_manager_caches(void);
 void destroy_segment_manager_caches(void);
 
 /*
  * checkpoint.c
  */
-void f2fs_stop_checkpoint(struct f2fs_sb_info *, bool);
-struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
-struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
-struct page *get_tmp_page(struct f2fs_sb_info *, pgoff_t);
-bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int);
-int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool);
-void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t);
-long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
-void add_ino_entry(struct f2fs_sb_info *, nid_t, int type);
-void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type);
-void release_ino_entry(struct f2fs_sb_info *, bool);
-bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
-int f2fs_sync_inode_meta(struct f2fs_sb_info *);
-int acquire_orphan_inode(struct f2fs_sb_info *);
-void release_orphan_inode(struct f2fs_sb_info *);
-void add_orphan_inode(struct inode *);
-void remove_orphan_inode(struct f2fs_sb_info *, nid_t);
-int recover_orphan_inodes(struct f2fs_sb_info *);
-int get_valid_checkpoint(struct f2fs_sb_info *);
-void update_dirty_page(struct inode *, struct page *);
-void remove_dirty_inode(struct inode *);
-int sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type);
-int write_checkpoint(struct f2fs_sb_info *, struct cp_control *);
-void init_ino_entry_info(struct f2fs_sb_info *);
+void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
+struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
+struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
+struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
+bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type);
+int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
+			int type, bool sync);
+void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index);
+long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
+			long nr_to_write);
+void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
+void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
+void release_ino_entry(struct f2fs_sb_info *sbi, bool all);
+bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode);
+int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi);
+int acquire_orphan_inode(struct f2fs_sb_info *sbi);
+void release_orphan_inode(struct f2fs_sb_info *sbi);
+void add_orphan_inode(struct inode *inode);
+void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino);
+int recover_orphan_inodes(struct f2fs_sb_info *sbi);
+int get_valid_checkpoint(struct f2fs_sb_info *sbi);
+void update_dirty_page(struct inode *inode, struct page *page);
+void remove_dirty_inode(struct inode *inode);
+int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
+int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc);
+void init_ino_entry_info(struct f2fs_sb_info *sbi);
 int __init create_checkpoint_caches(void);
 void destroy_checkpoint_caches(void);
 
 /*
  * data.c
  */
-void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int);
-void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *, struct inode *,
-				struct page *, nid_t, enum page_type, int);
-void f2fs_flush_merged_bios(struct f2fs_sb_info *);
-int f2fs_submit_page_bio(struct f2fs_io_info *);
-int f2fs_submit_page_mbio(struct f2fs_io_info *);
-struct block_device *f2fs_target_device(struct f2fs_sb_info *,
-				block_t, struct bio *);
-int f2fs_target_device_index(struct f2fs_sb_info *, block_t);
-void set_data_blkaddr(struct dnode_of_data *);
-void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t);
-int reserve_new_blocks(struct dnode_of_data *, blkcnt_t);
-int reserve_new_block(struct dnode_of_data *);
-int f2fs_get_block(struct dnode_of_data *, pgoff_t);
-int f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *);
-int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
-struct page *get_read_data_page(struct inode *, pgoff_t, int, bool);
-struct page *find_data_page(struct inode *, pgoff_t);
-struct page *get_lock_data_page(struct inode *, pgoff_t, bool);
-struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
-int do_write_data_page(struct f2fs_io_info *);
-int f2fs_map_blocks(struct inode *, struct f2fs_map_blocks *, int, int);
-int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
-void f2fs_set_page_dirty_nobuffers(struct page *);
-void f2fs_invalidate_page(struct page *, unsigned int, unsigned int);
-int f2fs_release_page(struct page *, gfp_t);
+void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
+			int rw);
+void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
+				struct inode *inode, struct page *page,
+				nid_t ino, enum page_type type, int rw);
+void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi);
+int f2fs_submit_page_bio(struct f2fs_io_info *fio);
+int f2fs_submit_page_mbio(struct f2fs_io_info *fio);
+struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
+			block_t blk_addr, struct bio *bio);
+int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr);
+void set_data_blkaddr(struct dnode_of_data *dn);
+void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr);
+int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count);
+int reserve_new_block(struct dnode_of_data *dn);
+int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
+int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from);
+int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
+struct page *get_read_data_page(struct inode *inode, pgoff_t index,
+			int op_flags, bool for_write);
+struct page *find_data_page(struct inode *inode, pgoff_t index);
+struct page *get_lock_data_page(struct inode *inode, pgoff_t index,
+			bool for_write);
+struct page *get_new_data_page(struct inode *inode,
+			struct page *ipage, pgoff_t index, bool new_i_size);
+int do_write_data_page(struct f2fs_io_info *fio);
+int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
+			int create, int flag);
+int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+			u64 start, u64 len);
+void f2fs_set_page_dirty_nobuffers(struct page *page);
+void f2fs_invalidate_page(struct page *page, unsigned int offset,
+			unsigned int length);
+int f2fs_release_page(struct page *page, gfp_t wait);
 #ifdef CONFIG_MIGRATION
-int f2fs_migrate_page(struct address_space *, struct page *, struct page *,
-				enum migrate_mode);
+int f2fs_migrate_page(struct address_space *mapping, struct page *newpage,
+			struct page *page, enum migrate_mode mode);
 #endif
 
 /*
  * gc.c
  */
-int start_gc_thread(struct f2fs_sb_info *);
-void stop_gc_thread(struct f2fs_sb_info *);
-block_t start_bidx_of_node(unsigned int, struct inode *);
-int f2fs_gc(struct f2fs_sb_info *, bool, bool);
-void build_gc_manager(struct f2fs_sb_info *);
+int start_gc_thread(struct f2fs_sb_info *sbi);
+void stop_gc_thread(struct f2fs_sb_info *sbi);
+block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
+int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background);
+void build_gc_manager(struct f2fs_sb_info *sbi);
 
 /*
  * recovery.c
  */
-int recover_fsync_data(struct f2fs_sb_info *, bool);
-bool space_for_roll_forward(struct f2fs_sb_info *);
+int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only);
+bool space_for_roll_forward(struct f2fs_sb_info *sbi);
 
 /*
  * debug.c
@@ -2339,9 +2375,9 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
 #define stat_inc_inplace_blocks(sbi)					\
 		(atomic_inc(&(sbi)->inplace_count))
 #define stat_inc_atomic_write(inode)					\
-		(atomic_inc(&F2FS_I_SB(inode)->aw_cnt));
+		(atomic_inc(&F2FS_I_SB(inode)->aw_cnt))
 #define stat_dec_atomic_write(inode)					\
-		(atomic_dec(&F2FS_I_SB(inode)->aw_cnt));
+		(atomic_dec(&F2FS_I_SB(inode)->aw_cnt))
 #define stat_update_max_atomic_write(inode)				\
 	do {								\
 		int cur = atomic_read(&F2FS_I_SB(inode)->aw_cnt);	\
@@ -2381,8 +2417,8 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
 		si->bg_node_blks += (gc_type == BG_GC) ? (blks) : 0;	\
 	} while (0)
 
-int f2fs_build_stats(struct f2fs_sb_info *);
-void f2fs_destroy_stats(struct f2fs_sb_info *);
+int f2fs_build_stats(struct f2fs_sb_info *sbi);
+void f2fs_destroy_stats(struct f2fs_sb_info *sbi);
 int __init f2fs_create_root_stats(void);
 void f2fs_destroy_root_stats(void);
 #else
@@ -2434,49 +2470,55 @@ extern struct kmem_cache *inode_entry_slab;
 /*
  * inline.c
  */
-bool f2fs_may_inline_data(struct inode *);
-bool f2fs_may_inline_dentry(struct inode *);
-void read_inline_data(struct page *, struct page *);
-bool truncate_inline_inode(struct page *, u64);
-int f2fs_read_inline_data(struct inode *, struct page *);
-int f2fs_convert_inline_page(struct dnode_of_data *, struct page *);
-int f2fs_convert_inline_inode(struct inode *);
-int f2fs_write_inline_data(struct inode *, struct page *);
-bool recover_inline_data(struct inode *, struct page *);
-struct f2fs_dir_entry *find_in_inline_dir(struct inode *,
-				struct fscrypt_name *, struct page **);
-int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *);
-int f2fs_add_inline_entry(struct inode *, const struct qstr *,
-		const struct qstr *, struct inode *, nid_t, umode_t);
-void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *,
-						struct inode *, struct inode *);
-bool f2fs_empty_inline_dir(struct inode *);
-int f2fs_read_inline_dir(struct file *, struct dir_context *,
-						struct fscrypt_str *);
-int f2fs_inline_data_fiemap(struct inode *,
-		struct fiemap_extent_info *, __u64, __u64);
+bool f2fs_may_inline_data(struct inode *inode);
+bool f2fs_may_inline_dentry(struct inode *inode);
+void read_inline_data(struct page *page, struct page *ipage);
+bool truncate_inline_inode(struct page *ipage, u64 from);
+int f2fs_read_inline_data(struct inode *inode, struct page *page);
+int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page);
+int f2fs_convert_inline_inode(struct inode *inode);
+int f2fs_write_inline_data(struct inode *inode, struct page *page);
+bool recover_inline_data(struct inode *inode, struct page *npage);
+struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
+			struct fscrypt_name *fname, struct page **res_page);
+int make_empty_inline_dir(struct inode *inode, struct inode *parent,
+			struct page *ipage);
+int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
+			const struct qstr *orig_name,
+			struct inode *inode, nid_t ino, umode_t mode);
+void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
+			struct inode *dir, struct inode *inode);
+bool f2fs_empty_inline_dir(struct inode *dir);
+int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
+			struct fscrypt_str *fstr);
+int f2fs_inline_data_fiemap(struct inode *inode,
+			struct fiemap_extent_info *fieinfo,
+			__u64 start, __u64 len);
 
 /*
  * shrinker.c
  */
-unsigned long f2fs_shrink_count(struct shrinker *, struct shrink_control *);
-unsigned long f2fs_shrink_scan(struct shrinker *, struct shrink_control *);
-void f2fs_join_shrinker(struct f2fs_sb_info *);
-void f2fs_leave_shrinker(struct f2fs_sb_info *);
+unsigned long f2fs_shrink_count(struct shrinker *shrink,
+			struct shrink_control *sc);
+unsigned long f2fs_shrink_scan(struct shrinker *shrink,
+			struct shrink_control *sc);
+void f2fs_join_shrinker(struct f2fs_sb_info *sbi);
+void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
 
 /*
  * extent_cache.c
  */
-unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int);
-bool f2fs_init_extent_tree(struct inode *, struct f2fs_extent *);
-void f2fs_drop_extent_tree(struct inode *);
-unsigned int f2fs_destroy_extent_node(struct inode *);
-void f2fs_destroy_extent_tree(struct inode *);
-bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *);
-void f2fs_update_extent_cache(struct dnode_of_data *);
+unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
+bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext);
+void f2fs_drop_extent_tree(struct inode *inode);
+unsigned int f2fs_destroy_extent_node(struct inode *inode);
+void f2fs_destroy_extent_tree(struct inode *inode);
+bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
+			struct extent_info *ei);
+void f2fs_update_extent_cache(struct dnode_of_data *dn);
 void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
-						pgoff_t, block_t, unsigned int);
-void init_extent_cache_info(struct f2fs_sb_info *);
+			pgoff_t fofs, block_t blkaddr, unsigned int len);
+void init_extent_cache_info(struct f2fs_sb_info *sbi);
 int __init create_extent_cache(void);
 void destroy_extent_cache(void);
 
-- 
GitLab


From a00861dbca9135b7ed56175646161f1d708b9efa Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 1 Feb 2017 15:40:11 -0800
Subject: [PATCH 0486/1084] f2fs: show # of APPEND and UPDATE inodes

This patch shows cached # of APPEND and UPDATE inode entries.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c | 6 ++++--
 fs/f2fs/f2fs.h  | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 0ca977a94c13..de8da9fc5c99 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -70,6 +70,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
 	si->inline_xattr = atomic_read(&sbi->inline_xattr);
 	si->inline_inode = atomic_read(&sbi->inline_inode);
 	si->inline_dir = atomic_read(&sbi->inline_dir);
+	si->append = sbi->im[APPEND_INO].ino_num;
+	si->update = sbi->im[UPDATE_INO].ino_num;
 	si->orphans = sbi->im[ORPHAN_INO].ino_num;
 	si->utilization = utilization(sbi);
 
@@ -264,8 +266,8 @@ static int stat_show(struct seq_file *s, void *v)
 			   si->inline_inode);
 		seq_printf(s, "  - Inline_dentry Inode: %u\n",
 			   si->inline_dir);
-		seq_printf(s, "  - Orphan Inode: %u\n",
-			   si->orphans);
+		seq_printf(s, "  - Orphan/Append/Update Inode: %u, %u, %u\n",
+			   si->orphans, si->append, si->update);
 		seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
 			   si->main_area_segs, si->main_area_sections,
 			   si->main_area_zones);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 8c2c12fa2b08..59a32e4b02f4 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2301,7 +2301,7 @@ struct f2fs_stat_info {
 	int nats, dirty_nats, sits, dirty_sits, free_nids, alloc_nids;
 	int total_count, utilization;
 	int bg_gc, nr_wb_cp_data, nr_wb_data, nr_flush, nr_discard;
-	int inline_xattr, inline_inode, inline_dir, orphans;
+	int inline_xattr, inline_inode, inline_dir, append, update, orphans;
 	int aw_cnt, max_aw_cnt;
 	unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
 	unsigned int bimodal, avg_vblocks;
-- 
GitLab


From b6705157b2db521ec7ff3b3ac205230f231798a6 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Feb 2017 22:31:51 +0100
Subject: [PATCH 0487/1084] drm/rockchip: add extcon dependency for DP

The newly added DP driver links against the extcon core, which fails when
extcon is a module and this driver is not:

drivers/gpu/drm/rockchip/cdn-dp-core.o: In function `cdn_dp_get_port_lanes':
cdn-dp-core.c:(.text.cdn_dp_get_port_lanes+0x24): undefined reference to `extcon_get_state'
cdn-dp-core.c:(.text.cdn_dp_get_port_lanes+0x44): undefined reference to `extcon_get_property'

Let's make Kconfig enforce correct behavior with a dependency.

Fixes: 1a0f7ed3abe2 ("drm/rockchip: cdn-dp: add cdn DP support for rk3399")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Acked-by: Mark Yao <mark.yao@rock-chips.com>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20170214213215.2888509-1-arnd@arndb.de
---
 drivers/gpu/drm/rockchip/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/rockchip/Kconfig b/drivers/gpu/drm/rockchip/Kconfig
index ad31b3eb408f..0e4eb845cbb0 100644
--- a/drivers/gpu/drm/rockchip/Kconfig
+++ b/drivers/gpu/drm/rockchip/Kconfig
@@ -24,6 +24,7 @@ config ROCKCHIP_ANALOGIX_DP
 config ROCKCHIP_CDN_DP
         tristate "Rockchip cdn DP"
         depends on DRM_ROCKCHIP
+	depends on EXTCON
 	select SND_SOC_HDMI_CODEC if SND_SOC
         help
 	  This selects support for Rockchip SoC specific extensions
-- 
GitLab


From efe0220fc2d2cc1cbd6c663dd3d652ac2b9afd1a Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 20 Feb 2017 08:08:15 +0100
Subject: [PATCH 0488/1084] drm/rockchip: cdn-dp: Fix error handling

It is likely that both 'clk_disable_unprepare()' should be called if
'pm_runtime_get_sync()' fails.

Add a new label for that, because 'err_set_rate' is not meaningful in this
case.

Add a missing call to 'pm_runtime_put()'.

Fixes: 1a0f7ed3abe2 ("drm/rockchip: cdn-dp: add cdn DP support for rk3399")

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Mark Yao <mark.yao@rock-chips.com>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20170220070815.23096-1-christophe.jaillet@wanadoo.fr
---
 drivers/gpu/drm/rockchip/cdn-dp-core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c
index 9ab67a670885..fd79a70b8552 100644
--- a/drivers/gpu/drm/rockchip/cdn-dp-core.c
+++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c
@@ -111,7 +111,7 @@ static int cdn_dp_clk_enable(struct cdn_dp_device *dp)
 	ret = pm_runtime_get_sync(dp->dev);
 	if (ret < 0) {
 		DRM_DEV_ERROR(dp->dev, "cannot get pm runtime %d\n", ret);
-		goto err_pclk;
+		goto err_pm_runtime_get;
 	}
 
 	reset_control_assert(dp->core_rst);
@@ -133,6 +133,8 @@ static int cdn_dp_clk_enable(struct cdn_dp_device *dp)
 	return 0;
 
 err_set_rate:
+	pm_runtime_put(dp->dev);
+err_pm_runtime_get:
 	clk_disable_unprepare(dp->core_clk);
 err_core_clk:
 	clk_disable_unprepare(dp->pclk);
-- 
GitLab


From c1b221078baf3d3275dc4309a716f6115696e2eb Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 2 Feb 2017 16:40:55 -0800
Subject: [PATCH 0489/1084] f2fs: move flush tracepoint

This patch moves the tracepoint location for flush command.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 5c0b59fe9bef..2f6d7370904c 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -426,6 +426,9 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi)
 	int ret = __submit_flush_wait(sbi->sb->s_bdev);
 	int i;
 
+	trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
+					test_opt(sbi, FLUSH_MERGE));
+
 	if (sbi->s_ndevs && !ret) {
 		for (i = 1; i < sbi->s_ndevs; i++) {
 			ret = __submit_flush_wait(FDEV(i).bdev);
@@ -471,9 +474,6 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
 	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
 	struct flush_cmd cmd;
 
-	trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
-					test_opt(sbi, FLUSH_MERGE));
-
 	if (test_opt(sbi, NOBARRIER))
 		return 0;
 
-- 
GitLab


From faa24895acfd49cb61055b762e39ee6a5452a389 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 2 Feb 2017 18:27:17 -0800
Subject: [PATCH 0490/1084] f2fs: move write_node_page above fsync_node_pages

This patch just moves write_node_page and introduces an inner function.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c | 140 ++++++++++++++++++++++++++-----------------------
 1 file changed, 73 insertions(+), 67 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 69c38a0022e7..0cb6e86bcf28 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1318,6 +1318,78 @@ static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
 	return last_page;
 }
 
+static int __write_node_page(struct page *page,
+				struct writeback_control *wbc)
+{
+	struct f2fs_sb_info *sbi = F2FS_P_SB(page);
+	nid_t nid;
+	struct node_info ni;
+	struct f2fs_io_info fio = {
+		.sbi = sbi,
+		.type = NODE,
+		.op = REQ_OP_WRITE,
+		.op_flags = wbc_to_write_flags(wbc),
+		.page = page,
+		.encrypted_page = NULL,
+	};
+
+	trace_f2fs_writepage(page, NODE);
+
+	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+		goto redirty_out;
+	if (unlikely(f2fs_cp_error(sbi)))
+		goto redirty_out;
+
+	/* get old block addr of this node page */
+	nid = nid_of_node(page);
+	f2fs_bug_on(sbi, page->index != nid);
+
+	if (wbc->for_reclaim) {
+		if (!down_read_trylock(&sbi->node_write))
+			goto redirty_out;
+	} else {
+		down_read(&sbi->node_write);
+	}
+
+	get_node_info(sbi, nid, &ni);
+
+	/* This page is already truncated */
+	if (unlikely(ni.blk_addr == NULL_ADDR)) {
+		ClearPageUptodate(page);
+		dec_page_count(sbi, F2FS_DIRTY_NODES);
+		up_read(&sbi->node_write);
+		unlock_page(page);
+		return 0;
+	}
+
+	set_page_writeback(page);
+	fio.old_blkaddr = ni.blk_addr;
+	write_node_page(nid, &fio);
+	set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
+	dec_page_count(sbi, F2FS_DIRTY_NODES);
+	up_read(&sbi->node_write);
+
+	if (wbc->for_reclaim)
+		f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, NODE, WRITE);
+
+	unlock_page(page);
+
+	if (unlikely(f2fs_cp_error(sbi)))
+		f2fs_submit_merged_bio(sbi, NODE, WRITE);
+
+	return 0;
+
+redirty_out:
+	redirty_page_for_writepage(wbc, page);
+	return AOP_WRITEPAGE_ACTIVATE;
+}
+
+static int f2fs_write_node_page(struct page *page,
+				struct writeback_control *wbc)
+{
+	return __write_node_page(page, wbc);
+}
+
 int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
 			struct writeback_control *wbc, bool atomic)
 {
@@ -1397,7 +1469,7 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
 			if (!clear_page_dirty_for_io(page))
 				goto continue_unlock;
 
-			ret = NODE_MAPPING(sbi)->a_ops->writepage(page, wbc);
+			ret = __write_node_page(page, wbc);
 			if (ret) {
 				unlock_page(page);
 				f2fs_put_page(last_page, 0);
@@ -1577,72 +1649,6 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
 	return ret;
 }
 
-static int f2fs_write_node_page(struct page *page,
-				struct writeback_control *wbc)
-{
-	struct f2fs_sb_info *sbi = F2FS_P_SB(page);
-	nid_t nid;
-	struct node_info ni;
-	struct f2fs_io_info fio = {
-		.sbi = sbi,
-		.type = NODE,
-		.op = REQ_OP_WRITE,
-		.op_flags = wbc_to_write_flags(wbc),
-		.page = page,
-		.encrypted_page = NULL,
-	};
-
-	trace_f2fs_writepage(page, NODE);
-
-	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
-		goto redirty_out;
-	if (unlikely(f2fs_cp_error(sbi)))
-		goto redirty_out;
-
-	/* get old block addr of this node page */
-	nid = nid_of_node(page);
-	f2fs_bug_on(sbi, page->index != nid);
-
-	if (wbc->for_reclaim) {
-		if (!down_read_trylock(&sbi->node_write))
-			goto redirty_out;
-	} else {
-		down_read(&sbi->node_write);
-	}
-
-	get_node_info(sbi, nid, &ni);
-
-	/* This page is already truncated */
-	if (unlikely(ni.blk_addr == NULL_ADDR)) {
-		ClearPageUptodate(page);
-		dec_page_count(sbi, F2FS_DIRTY_NODES);
-		up_read(&sbi->node_write);
-		unlock_page(page);
-		return 0;
-	}
-
-	set_page_writeback(page);
-	fio.old_blkaddr = ni.blk_addr;
-	write_node_page(nid, &fio);
-	set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
-	dec_page_count(sbi, F2FS_DIRTY_NODES);
-	up_read(&sbi->node_write);
-
-	if (wbc->for_reclaim)
-		f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, NODE, WRITE);
-
-	unlock_page(page);
-
-	if (unlikely(f2fs_cp_error(sbi)))
-		f2fs_submit_merged_bio(sbi, NODE, WRITE);
-
-	return 0;
-
-redirty_out:
-	redirty_page_for_writepage(wbc, page);
-	return AOP_WRITEPAGE_ACTIVATE;
-}
-
 static int f2fs_write_node_pages(struct address_space *mapping,
 			    struct writeback_control *wbc)
 {
-- 
GitLab


From e7c75ab099c8c8d4616c2ac10517e86a88b368d1 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 2 Feb 2017 18:18:06 -0800
Subject: [PATCH 0491/1084] f2fs: avoid out-of-order execution of atomic writes

We need to flush data writes before flushing last node block writes by using
FUA with PREFLUSH. We don't need to guarantee precedent node writes since if
those are not written, we can't reach to the last node block when scanning
node block chain during roll-forward recovery.
Afterwards f2fs_wait_on_page_writeback guarantees all the IO submission to
disk, which builds a valid node block chain.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c              |  3 ++-
 fs/f2fs/node.c              | 10 +++++++---
 include/trace/events/f2fs.h |  1 +
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 9c0f469cde13..a3808c49e326 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -278,7 +278,8 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
 flush_out:
 	remove_ino_entry(sbi, ino, UPDATE_INO);
 	clear_inode_flag(inode, FI_UPDATE_WRITE);
-	ret = f2fs_issue_flush(sbi);
+	if (!atomic)
+		ret = f2fs_issue_flush(sbi);
 	f2fs_update_time(sbi, REQ_TIME);
 out:
 	trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 0cb6e86bcf28..6a86f398aeac 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1318,7 +1318,7 @@ static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
 	return last_page;
 }
 
-static int __write_node_page(struct page *page,
+static int __write_node_page(struct page *page, bool atomic,
 				struct writeback_control *wbc)
 {
 	struct f2fs_sb_info *sbi = F2FS_P_SB(page);
@@ -1362,6 +1362,9 @@ static int __write_node_page(struct page *page,
 		return 0;
 	}
 
+	if (atomic && !test_opt(sbi, NOBARRIER))
+		fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
+
 	set_page_writeback(page);
 	fio.old_blkaddr = ni.blk_addr;
 	write_node_page(nid, &fio);
@@ -1387,7 +1390,7 @@ static int __write_node_page(struct page *page,
 static int f2fs_write_node_page(struct page *page,
 				struct writeback_control *wbc)
 {
-	return __write_node_page(page, wbc);
+	return __write_node_page(page, false, wbc);
 }
 
 int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
@@ -1469,7 +1472,8 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
 			if (!clear_page_dirty_for_io(page))
 				goto continue_unlock;
 
-			ret = __write_node_page(page, wbc);
+			ret = __write_node_page(page, atomic &&
+						page == last_page, wbc);
 			if (ret) {
 				unlock_page(page);
 				f2fs_put_page(last_page, 0);
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 04c527410ecc..82236792b50c 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -81,6 +81,7 @@ TRACE_DEFINE_ENUM(CP_DISCARD);
 		{ REQ_SYNC | REQ_PRIO,	"(SP)" },			\
 		{ REQ_META, 		"(M)" },			\
 		{ REQ_META | REQ_PRIO,	"(MP)" },			\
+		{ REQ_SYNC | REQ_PREFLUSH , "(SF)" },			\
 		{ REQ_SYNC | REQ_META | REQ_PRIO, "(SMP)" },		\
 		{ REQ_PREFLUSH | REQ_META | REQ_PRIO, "(FMP)" },	\
 		{ 0, " \b" })
-- 
GitLab


From f566bae8462c4a48b55bbf4e58d5b1e0a8563819 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 3 Feb 2017 17:18:00 -0800
Subject: [PATCH 0492/1084] f2fs: call internal __write_data_page directly

This patch introduces __write_data_page to call it by f2fs_write_cache_pages
directly..

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 3b5f1d14cab3..e78286ee3cc7 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1346,7 +1346,7 @@ int do_write_data_page(struct f2fs_io_info *fio)
 	return err;
 }
 
-static int f2fs_write_data_page(struct page *page,
+static int __write_data_page(struct page *page,
 					struct writeback_control *wbc)
 {
 	struct inode *inode = page->mapping->host;
@@ -1448,6 +1448,12 @@ static int f2fs_write_data_page(struct page *page,
 	return err;
 }
 
+static int f2fs_write_data_page(struct page *page,
+					struct writeback_control *wbc)
+{
+	return __write_data_page(page, wbc);
+}
+
 /*
  * This function was copied from write_cche_pages from mm/page-writeback.c.
  * The major change is making write step of cold data page separately from
@@ -1537,7 +1543,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
 			if (!clear_page_dirty_for_io(page))
 				goto continue_unlock;
 
-			ret = mapping->a_ops->writepage(page, wbc);
+			ret = __write_data_page(page, wbc);
 			if (unlikely(ret)) {
 				/*
 				 * keep nr_to_write, since vfs uses this to
-- 
GitLab


From d68f735b3bc934a7523a047aa952a577cf6ca171 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 3 Feb 2017 17:44:04 -0800
Subject: [PATCH 0493/1084] f2fs: check io submission more precisely

This patch check IO submission more precisely than previous rough check.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 23 +++++++++++++++++------
 fs/f2fs/f2fs.h |  1 +
 fs/f2fs/node.c | 27 +++++++++++++++++++--------
 3 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index e78286ee3cc7..e0b74b9ea388 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -379,6 +379,9 @@ int f2fs_submit_page_mbio(struct f2fs_io_info *fio)
 
 	bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
 
+	/* set submitted = 1 as a return value */
+	fio->submitted = 1;
+
 	if (!is_read)
 		inc_page_count(sbi, WB_DATA_TYPE(bio_page));
 
@@ -1346,8 +1349,8 @@ int do_write_data_page(struct f2fs_io_info *fio)
 	return err;
 }
 
-static int __write_data_page(struct page *page,
-					struct writeback_control *wbc)
+static int __write_data_page(struct page *page, bool *submitted,
+				struct writeback_control *wbc)
 {
 	struct inode *inode = page->mapping->host;
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -1365,6 +1368,7 @@ static int __write_data_page(struct page *page,
 		.op_flags = wbc_to_write_flags(wbc),
 		.page = page,
 		.encrypted_page = NULL,
+		.submitted = false,
 	};
 
 	trace_f2fs_writepage(page, DATA);
@@ -1430,13 +1434,19 @@ static int __write_data_page(struct page *page,
 	if (wbc->for_reclaim) {
 		f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, DATA, WRITE);
 		remove_dirty_inode(inode);
+		submitted = NULL;
 	}
 
 	unlock_page(page);
 	f2fs_balance_fs(sbi, need_balance_fs);
 
-	if (unlikely(f2fs_cp_error(sbi)))
+	if (unlikely(f2fs_cp_error(sbi))) {
 		f2fs_submit_merged_bio(sbi, DATA, WRITE);
+		submitted = NULL;
+	}
+
+	if (submitted)
+		*submitted = fio.submitted;
 
 	return 0;
 
@@ -1451,7 +1461,7 @@ static int __write_data_page(struct page *page,
 static int f2fs_write_data_page(struct page *page,
 					struct writeback_control *wbc)
 {
-	return __write_data_page(page, wbc);
+	return __write_data_page(page, NULL, wbc);
 }
 
 /*
@@ -1510,6 +1520,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
 
 		for (i = 0; i < nr_pages; i++) {
 			struct page *page = pvec.pages[i];
+			bool submitted = false;
 
 			if (page->index > end) {
 				done = 1;
@@ -1543,7 +1554,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
 			if (!clear_page_dirty_for_io(page))
 				goto continue_unlock;
 
-			ret = __write_data_page(page, wbc);
+			ret = __write_data_page(page, &submitted, wbc);
 			if (unlikely(ret)) {
 				/*
 				 * keep nr_to_write, since vfs uses this to
@@ -1557,7 +1568,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
 				done_index = page->index + 1;
 				done = 1;
 				break;
-			} else {
+			} else if (submitted) {
 				nwritten++;
 			}
 
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 59a32e4b02f4..77ae2a8f15ce 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -728,6 +728,7 @@ struct f2fs_io_info {
 	block_t old_blkaddr;	/* old block address before Cow */
 	struct page *page;	/* page to be written */
 	struct page *encrypted_page;	/* encrypted page */
+	bool submitted;		/* indicate IO submission */
 };
 
 #define is_read_io(rw) (rw == READ)
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 6a86f398aeac..5bd05e552d19 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1318,7 +1318,7 @@ static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
 	return last_page;
 }
 
-static int __write_node_page(struct page *page, bool atomic,
+static int __write_node_page(struct page *page, bool atomic, bool *submitted,
 				struct writeback_control *wbc)
 {
 	struct f2fs_sb_info *sbi = F2FS_P_SB(page);
@@ -1331,6 +1331,7 @@ static int __write_node_page(struct page *page, bool atomic,
 		.op_flags = wbc_to_write_flags(wbc),
 		.page = page,
 		.encrypted_page = NULL,
+		.submitted = false,
 	};
 
 	trace_f2fs_writepage(page, NODE);
@@ -1372,13 +1373,19 @@ static int __write_node_page(struct page *page, bool atomic,
 	dec_page_count(sbi, F2FS_DIRTY_NODES);
 	up_read(&sbi->node_write);
 
-	if (wbc->for_reclaim)
+	if (wbc->for_reclaim) {
 		f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, NODE, WRITE);
+		submitted = NULL;
+	}
 
 	unlock_page(page);
 
-	if (unlikely(f2fs_cp_error(sbi)))
+	if (unlikely(f2fs_cp_error(sbi))) {
 		f2fs_submit_merged_bio(sbi, NODE, WRITE);
+		submitted = NULL;
+	}
+	if (submitted)
+		*submitted = fio.submitted;
 
 	return 0;
 
@@ -1390,7 +1397,7 @@ static int __write_node_page(struct page *page, bool atomic,
 static int f2fs_write_node_page(struct page *page,
 				struct writeback_control *wbc)
 {
-	return __write_node_page(page, false, wbc);
+	return __write_node_page(page, false, NULL, wbc);
 }
 
 int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
@@ -1424,6 +1431,7 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
 
 		for (i = 0; i < nr_pages; i++) {
 			struct page *page = pvec.pages[i];
+			bool submitted = false;
 
 			if (unlikely(f2fs_cp_error(sbi))) {
 				f2fs_put_page(last_page, 0);
@@ -1473,12 +1481,13 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
 				goto continue_unlock;
 
 			ret = __write_node_page(page, atomic &&
-						page == last_page, wbc);
+						page == last_page,
+						&submitted, wbc);
 			if (ret) {
 				unlock_page(page);
 				f2fs_put_page(last_page, 0);
 				break;
-			} else {
+			} else if (submitted) {
 				nwritten++;
 			}
 
@@ -1534,6 +1543,7 @@ int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc)
 
 		for (i = 0; i < nr_pages; i++) {
 			struct page *page = pvec.pages[i];
+			bool submitted = false;
 
 			if (unlikely(f2fs_cp_error(sbi))) {
 				pagevec_release(&pvec);
@@ -1587,9 +1597,10 @@ int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc)
 			set_fsync_mark(page, 0);
 			set_dentry_mark(page, 0);
 
-			if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc))
+			ret = __write_node_page(page, false, &submitted, wbc);
+			if (ret)
 				unlock_page(page);
-			else
+			else if (submitted)
 				nwritten++;
 
 			if (--wbc->nr_to_write == 0)
-- 
GitLab


From 942fd3192f83cef54bc0d485937fd5382ac5acd0 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 1 Feb 2017 16:51:22 -0800
Subject: [PATCH 0494/1084] f2fs: check last page index in cached bio to decide
 submission

If the cached bio has the last page's index, then we need to submit it.
Otherwise, we don't need to submit it and can wait for further IO merges.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c |  3 ++-
 fs/f2fs/data.c       | 43 ++++++++++++++++++++++---------------------
 fs/f2fs/f2fs.h       |  4 ++--
 fs/f2fs/node.c       | 12 +++++++-----
 fs/f2fs/segment.c    | 13 +++++++------
 5 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index fb8cdfcaece6..e6e42a4b1344 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -249,7 +249,8 @@ static int f2fs_write_meta_page(struct page *page,
 	dec_page_count(sbi, F2FS_DIRTY_META);
 
 	if (wbc->for_reclaim)
-		f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, META, WRITE);
+		f2fs_submit_merged_bio_cond(sbi, page->mapping->host,
+						0, page->index, META, WRITE);
 
 	unlock_page(page);
 
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index e0b74b9ea388..d14cc8be14f1 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -243,8 +243,8 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
 	io->bio = NULL;
 }
 
-static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
-						struct page *page, nid_t ino)
+static bool __has_merged_page(struct f2fs_bio_info *io,
+				struct inode *inode, nid_t ino, pgoff_t idx)
 {
 	struct bio_vec *bvec;
 	struct page *target;
@@ -253,7 +253,7 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
 	if (!io->bio)
 		return false;
 
-	if (!inode && !page && !ino)
+	if (!inode && !ino)
 		return true;
 
 	bio_for_each_segment_all(bvec, io->bio, i) {
@@ -263,10 +263,11 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
 		else
 			target = fscrypt_control_page(bvec->bv_page);
 
+		if (idx != target->index)
+			continue;
+
 		if (inode && inode == target->mapping->host)
 			return true;
-		if (page && page == target)
-			return true;
 		if (ino && ino == ino_of_node(target))
 			return true;
 	}
@@ -275,22 +276,21 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
 }
 
 static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
-						struct page *page, nid_t ino,
-						enum page_type type)
+				nid_t ino, pgoff_t idx, enum page_type type)
 {
 	enum page_type btype = PAGE_TYPE_OF_BIO(type);
 	struct f2fs_bio_info *io = &sbi->write_io[btype];
 	bool ret;
 
 	down_read(&io->io_rwsem);
-	ret = __has_merged_page(io, inode, page, ino);
+	ret = __has_merged_page(io, inode, ino, idx);
 	up_read(&io->io_rwsem);
 	return ret;
 }
 
 static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
-				struct inode *inode, struct page *page,
-				nid_t ino, enum page_type type, int rw)
+				struct inode *inode, nid_t ino, pgoff_t idx,
+				enum page_type type, int rw)
 {
 	enum page_type btype = PAGE_TYPE_OF_BIO(type);
 	struct f2fs_bio_info *io;
@@ -299,7 +299,7 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
 
 	down_write(&io->io_rwsem);
 
-	if (!__has_merged_page(io, inode, page, ino))
+	if (!__has_merged_page(io, inode, ino, idx))
 		goto out;
 
 	/* change META to META_FLUSH in the checkpoint procedure */
@@ -318,15 +318,15 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
 void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
 									int rw)
 {
-	__f2fs_submit_merged_bio(sbi, NULL, NULL, 0, type, rw);
+	__f2fs_submit_merged_bio(sbi, NULL, 0, 0, type, rw);
 }
 
 void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
-				struct inode *inode, struct page *page,
-				nid_t ino, enum page_type type, int rw)
+				struct inode *inode, nid_t ino, pgoff_t idx,
+				enum page_type type, int rw)
 {
-	if (has_merged_page(sbi, inode, page, ino, type))
-		__f2fs_submit_merged_bio(sbi, inode, page, ino, type, rw);
+	if (has_merged_page(sbi, inode, ino, idx, type))
+		__f2fs_submit_merged_bio(sbi, inode, ino, idx, type, rw);
 }
 
 void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi)
@@ -1432,7 +1432,8 @@ static int __write_data_page(struct page *page, bool *submitted,
 		ClearPageUptodate(page);
 
 	if (wbc->for_reclaim) {
-		f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, DATA, WRITE);
+		f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index,
+						DATA, WRITE);
 		remove_dirty_inode(inode);
 		submitted = NULL;
 	}
@@ -1480,10 +1481,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
 	pgoff_t index;
 	pgoff_t end;		/* Inclusive */
 	pgoff_t done_index;
+	pgoff_t last_idx = ULONG_MAX;
 	int cycled;
 	int range_whole = 0;
 	int tag;
-	int nwritten = 0;
 
 	pagevec_init(&pvec, 0);
 
@@ -1569,7 +1570,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
 				done = 1;
 				break;
 			} else if (submitted) {
-				nwritten++;
+				last_idx = page->index;
 			}
 
 			if (--wbc->nr_to_write <= 0 &&
@@ -1591,9 +1592,9 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
 	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
 		mapping->writeback_index = done_index;
 
-	if (nwritten)
+	if (last_idx != ULONG_MAX)
 		f2fs_submit_merged_bio_cond(F2FS_M_SB(mapping), mapping->host,
-							NULL, 0, DATA, WRITE);
+						0, last_idx, DATA, WRITE);
 
 	return ret;
 }
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 77ae2a8f15ce..b8070f9e8285 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2233,8 +2233,8 @@ void destroy_checkpoint_caches(void);
 void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
 			int rw);
 void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
-				struct inode *inode, struct page *page,
-				nid_t ino, enum page_type type, int rw);
+				struct inode *inode, nid_t ino, pgoff_t idx,
+				enum page_type type, int rw);
 void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi);
 int f2fs_submit_page_bio(struct f2fs_io_info *fio);
 int f2fs_submit_page_mbio(struct f2fs_io_info *fio);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 5bd05e552d19..8203a2f8b350 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1374,7 +1374,8 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
 	up_read(&sbi->node_write);
 
 	if (wbc->for_reclaim) {
-		f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, NODE, WRITE);
+		f2fs_submit_merged_bio_cond(sbi, page->mapping->host, 0,
+						page->index, NODE, WRITE);
 		submitted = NULL;
 	}
 
@@ -1404,12 +1405,12 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
 			struct writeback_control *wbc, bool atomic)
 {
 	pgoff_t index, end;
+	pgoff_t last_idx = ULONG_MAX;
 	struct pagevec pvec;
 	int ret = 0;
 	struct page *last_page = NULL;
 	bool marked = false;
 	nid_t ino = inode->i_ino;
-	int nwritten = 0;
 
 	if (atomic) {
 		last_page = last_fsync_dnode(sbi, ino);
@@ -1488,7 +1489,7 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
 				f2fs_put_page(last_page, 0);
 				break;
 			} else if (submitted) {
-				nwritten++;
+				last_idx = page->index;
 			}
 
 			if (page == last_page) {
@@ -1514,8 +1515,9 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
 		goto retry;
 	}
 out:
-	if (nwritten)
-		f2fs_submit_merged_bio_cond(sbi, NULL, NULL, ino, NODE, WRITE);
+	if (last_idx != ULONG_MAX)
+		f2fs_submit_merged_bio_cond(sbi, NULL, ino, last_idx,
+							NODE, WRITE);
 	return ret ? -EIO: 0;
 }
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 2f6d7370904c..b2e0769a09d0 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -263,7 +263,7 @@ static int __commit_inmem_pages(struct inode *inode,
 		.op_flags = REQ_SYNC | REQ_PRIO,
 		.encrypted_page = NULL,
 	};
-	bool submit_bio = false;
+	pgoff_t last_idx = ULONG_MAX;
 	int err = 0;
 
 	list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
@@ -289,15 +289,15 @@ static int __commit_inmem_pages(struct inode *inode,
 
 			/* record old blkaddr for revoking */
 			cur->old_addr = fio.old_blkaddr;
-
-			submit_bio = true;
+			last_idx = page->index;
 		}
 		unlock_page(page);
 		list_move_tail(&cur->list, revoke_list);
 	}
 
-	if (submit_bio)
-		f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE);
+	if (last_idx != ULONG_MAX)
+		f2fs_submit_merged_bio_cond(sbi, inode, 0, last_idx,
+							DATA, WRITE);
 
 	if (!err)
 		__revoke_inmem_pages(inode, revoke_list, false, false);
@@ -1932,7 +1932,8 @@ void f2fs_wait_on_page_writeback(struct page *page,
 	if (PageWriteback(page)) {
 		struct f2fs_sb_info *sbi = F2FS_P_SB(page);
 
-		f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, type, WRITE);
+		f2fs_submit_merged_bio_cond(sbi, page->mapping->host,
+						0, page->index, type, WRITE);
 		if (ordered)
 			wait_on_page_writeback(page);
 		else
-- 
GitLab


From 7f54f51f46f69f3ccde2b65e682a51b0aa6199c3 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Mon, 6 Feb 2017 13:57:58 -0800
Subject: [PATCH 0495/1084] f2fs: remove preflush for nobarrier case

This patch removes REQ_PREFLUSH in the nobarrier case.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index d14cc8be14f1..8c61fa7fd27d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -306,9 +306,9 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
 	if (type >= META_FLUSH) {
 		io->fio.type = META_FLUSH;
 		io->fio.op = REQ_OP_WRITE;
-		io->fio.op_flags = REQ_PREFLUSH | REQ_META | REQ_PRIO;
+		io->fio.op_flags = REQ_META | REQ_PRIO;
 		if (!test_opt(sbi, NOBARRIER))
-			io->fio.op_flags |= REQ_FUA;
+			io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
 	}
 	__submit_merged_bio(io);
 out:
-- 
GitLab


From 6de3f12eb737dae30628296d75c67664e8b1506e Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <kernelpatch@126.com>
Date: Wed, 8 Feb 2017 05:08:01 +0800
Subject: [PATCH 0496/1084] f2fs: fix a typo in f2fs.txt

There is a typo "f2f2" in f2fs.txt, this patch fixes it.

Signed-off-by: Tiezhu Yang <kernelpatch@126.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/filesystems/f2fs.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 0ab33d4c8406..3d7e12d0cb91 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -176,7 +176,7 @@ f2fs. Each file shows the whole f2fs information.
 SYSFS ENTRIES
 ================================================================================
 
-Information about mounted f2f2 file systems can be found in
+Information about mounted f2fs file systems can be found in
 /sys/fs/f2fs.  Each mounted filesystem will have a directory in
 /sys/fs/f2fs based on its device name (i.e., /sys/fs/f2fs/sda).
 The files in each per-device directory are shown in table below.
-- 
GitLab


From 1200abb26f8287e723e79e134b66d0041c095b73 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 9 Feb 2017 13:25:35 -0800
Subject: [PATCH 0497/1084] f2fs: show checkpoint version at mount time

If we mounted f2fs successfully, let's show current checkpoint version.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f97e8089fb54..6eb543259e78 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2102,6 +2102,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 			sbi->valid_super_block ? 1 : 2, err);
 	}
 
+	f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx",
+				cur_cp_version(F2FS_CKPT(sbi)));
 	f2fs_update_time(sbi, CP_TIME);
 	f2fs_update_time(sbi, REQ_TIME);
 	return 0;
-- 
GitLab


From 2ad0ef846b38288103d8373d1196c465df2c4d7d Mon Sep 17 00:00:00 2001
From: Bhumika Goyal <bhumirks@gmail.com>
Date: Sat, 11 Feb 2017 15:50:46 +0530
Subject: [PATCH 0498/1084] f2fs: super: constify fscrypt_operations structure

Declare fscrypt_operations structure as const as it is only stored in
the s_cop field of a super_block structure. This field is of type const,
so fscrypt_operations structure having this property can be made const
too.

File size before: fs/f2fs/super.o
   text	   data	    bss	    dec	    hex	filename
  54131	  31355	    184	  85670	  14ea6	fs/f2fs/super.o

File size after: fs/f2fs/super.o
   text	   data	    bss	    dec	    hex	filename
  54227	  31259	    184	  85670	  14ea6	fs/f2fs/super.o

Signed-off-by: Bhumika Goyal <bhumirks@gmail.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 6eb543259e78..bd716b12f835 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1209,7 +1209,7 @@ static unsigned f2fs_max_namelen(struct inode *inode)
 			inode->i_sb->s_blocksize : F2FS_NAME_LEN;
 }
 
-static struct fscrypt_operations f2fs_cryptops = {
+static const struct fscrypt_operations f2fs_cryptops = {
 	.get_context	= f2fs_get_context,
 	.key_prefix	= f2fs_key_prefix,
 	.set_context	= f2fs_set_context,
@@ -1218,7 +1218,7 @@ static struct fscrypt_operations f2fs_cryptops = {
 	.max_namelen	= f2fs_max_namelen,
 };
 #else
-static struct fscrypt_operations f2fs_cryptops = {
+static const struct fscrypt_operations f2fs_cryptops = {
 	.is_encrypted	= f2fs_encrypted_inode,
 };
 #endif
-- 
GitLab


From d260081ccf37f57b74396ec48f415f27d1b01b13 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Wed, 8 Feb 2017 17:39:45 +0800
Subject: [PATCH 0499/1084] f2fs: change recovery policy of xattr node block

Currently, if we call fsync after updating the xattr date belongs to the
file, f2fs needs to trigger checkpoint to keep xattr data consistent. But,
this policy cause low performance as checkpoint will block most foreground
operations and cause unneeded and unrelated IOs around checkpoint.

This patch will reuse regular file recovery policy for xattr node block,
so, we change to write xattr node block tagged with fsync flag to warm
area instead of cold area, and during recovery, we search warm node chain
for fsynced xattr block, and do the recovery.

So, for below application IO pattern, performance can be improved
obviously:
- touch file
- create/update/delete xattr entry in file
- fsync file

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h     |  3 +--
 fs/f2fs/file.c     |  3 ---
 fs/f2fs/node.c     | 29 +++++++++++++++++------------
 fs/f2fs/node.h     |  2 +-
 fs/f2fs/recovery.c |  8 +++-----
 fs/f2fs/xattr.c    |  2 --
 6 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b8070f9e8285..28e954080890 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -460,7 +460,6 @@ struct f2fs_inode_info {
 	f2fs_hash_t chash;		/* hash value of given file name */
 	unsigned int clevel;		/* maximum level of given file name */
 	nid_t i_xattr_nid;		/* node id that contains xattrs */
-	unsigned long long xattr_ver;	/* cp version of xattr modification */
 	loff_t	last_disk_size;		/* lastly written file size */
 
 	struct list_head dirty_list;	/* dirty list for dirs and files */
@@ -2133,7 +2132,7 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
 void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
 int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink);
 void recover_inline_xattr(struct inode *inode, struct page *page);
-void recover_xattr_data(struct inode *inode, struct page *page,
+int recover_xattr_data(struct inode *inode, struct page *page,
 			block_t blkaddr);
 int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
 int restore_node_summary(struct f2fs_sb_info *sbi,
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index a3808c49e326..120164815030 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -142,8 +142,6 @@ static inline bool need_do_checkpoint(struct inode *inode)
 		need_cp = true;
 	else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
 		need_cp = true;
-	else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
-		need_cp = true;
 	else if (test_opt(sbi, FASTBOOT))
 		need_cp = true;
 	else if (sbi->active_logs == 2)
@@ -169,7 +167,6 @@ static void try_to_fix_pino(struct inode *inode)
 	nid_t pino;
 
 	down_write(&fi->i_sem);
-	fi->xattr_ver = 0;
 	if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
 			get_parent_ino(inode, &pino)) {
 		f2fs_i_pino_write(inode, pino);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 8203a2f8b350..f8fb25a27b46 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -971,9 +971,6 @@ int truncate_xattr_node(struct inode *inode, struct page *page)
 
 	f2fs_i_xnid_write(inode, 0);
 
-	/* need to do checkpoint during fsync */
-	F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
-
 	set_new_dnode(&dn, inode, page, npage, nid);
 
 	if (page)
@@ -2054,18 +2051,18 @@ void recover_inline_xattr(struct inode *inode, struct page *page)
 	f2fs_put_page(ipage, 1);
 }
 
-void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
+int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
 	nid_t new_xnid = nid_of_node(page);
 	struct node_info ni;
+	struct page *xpage;
 
-	/* 1: invalidate the previous xattr nid */
 	if (!prev_xnid)
 		goto recover_xnid;
 
-	/* Deallocate node address */
+	/* 1: invalidate the previous xattr nid */
 	get_node_info(sbi, prev_xnid, &ni);
 	f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
 	invalidate_blocks(sbi, ni.blk_addr);
@@ -2073,19 +2070,27 @@ void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
 	set_node_addr(sbi, &ni, NULL_ADDR, false);
 
 recover_xnid:
-	/* 2: allocate new xattr nid */
+	/* 2: update xattr nid in inode */
+	remove_free_nid(sbi, new_xnid);
+	f2fs_i_xnid_write(inode, new_xnid);
 	if (unlikely(!inc_valid_node_count(sbi, inode)))
 		f2fs_bug_on(sbi, 1);
+	update_inode_page(inode);
+
+	/* 3: update and set xattr node page dirty */
+	xpage = grab_cache_page(NODE_MAPPING(sbi), new_xnid);
+	if (!xpage)
+		return -ENOMEM;
+
+	memcpy(F2FS_NODE(xpage), F2FS_NODE(page), PAGE_SIZE);
 
-	remove_free_nid(sbi, new_xnid);
 	get_node_info(sbi, new_xnid, &ni);
 	ni.ino = inode->i_ino;
 	set_node_addr(sbi, &ni, NEW_ADDR, false);
-	f2fs_i_xnid_write(inode, new_xnid);
+	set_page_dirty(xpage);
+	f2fs_put_page(xpage, 1);
 
-	/* 3: update xattr blkaddr */
-	refresh_sit_entry(sbi, NEW_ADDR, blkaddr);
-	set_node_addr(sbi, &ni, blkaddr, false);
+	return 0;
 }
 
 int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 29ff783eb9c3..d3d289306469 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -358,7 +358,7 @@ static inline bool IS_DNODE(struct page *node_page)
 	unsigned int ofs = ofs_of_node(node_page);
 
 	if (f2fs_has_xattr_block(ofs))
-		return false;
+		return true;
 
 	if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK ||
 			ofs == 5 + 2 * NIDS_PER_BLOCK)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index e93316ea8d1b..d025aa83fb5b 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -378,11 +378,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
 	if (IS_INODE(page)) {
 		recover_inline_xattr(inode, page);
 	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
-		/*
-		 * Deprecated; xattr blocks should be found from cold log.
-		 * But, we should remain this for backward compatibility.
-		 */
-		recover_xattr_data(inode, page, blkaddr);
+		err = recover_xattr_data(inode, page, blkaddr);
+		if (!err)
+			recovered++;
 		goto out;
 	}
 
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index f7482635a57d..b50f6b581a71 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -458,8 +458,6 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
 	set_page_dirty(xpage);
 	f2fs_put_page(xpage, 1);
 
-	/* need to checkpoint during fsync */
-	F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
 	return 0;
 }
 
-- 
GitLab


From 9b064f7d0c9ab179b9cb18e144ab65d7ea953443 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Sat, 11 Feb 2017 10:46:44 -0800
Subject: [PATCH 0500/1084] f2fs: remove build_free_nids() during checkpoint

Let's avoid build_free_nids() in checkpoint path.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index e6e42a4b1344..042f8d9afe44 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -999,8 +999,6 @@ static int block_operations(struct f2fs_sb_info *sbi)
 static void unblock_operations(struct f2fs_sb_info *sbi)
 {
 	up_write(&sbi->node_write);
-
-	build_free_nids(sbi, false);
 	f2fs_unlock_all(sbi);
 }
 
-- 
GitLab


From 25cc5d3b9dde074bfa06024fd6486020963807da Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Mon, 13 Feb 2017 17:02:44 -0800
Subject: [PATCH 0501/1084] f2fs: avoid reading NAT page by get_node_info

We've not seen this buggy case for a long time, so it's time to avoid this
unnecessary get_node_info() call which reading NAT page to cache nat entry.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index f8fb25a27b46..8ebc4c78e6a4 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1028,7 +1028,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
 				unsigned int ofs, struct page *ipage)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
-	struct node_info old_ni, new_ni;
+	struct node_info new_ni;
 	struct page *page;
 	int err;
 
@@ -1043,13 +1043,15 @@ struct page *new_node_page(struct dnode_of_data *dn,
 		err = -ENOSPC;
 		goto fail;
 	}
-
-	get_node_info(sbi, dn->nid, &old_ni);
-
-	/* Reinitialize old_ni with new node page */
-	f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR);
-	new_ni = old_ni;
+#ifdef CONFIG_F2FS_CHECK_FS
+	get_node_info(sbi, dn->nid, &new_ni);
+	f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
+#endif
+	new_ni.nid = dn->nid;
 	new_ni.ino = dn->inode->i_ino;
+	new_ni.blk_addr = NULL_ADDR;
+	new_ni.flag = 0;
+	new_ni.version = 0;
 	set_node_addr(sbi, &new_ni, NEW_ADDR, false);
 
 	f2fs_wait_on_page_writeback(page, NODE, true);
-- 
GitLab


From 23cf7212a1cae2158dcf5f41d9733d59e320f8f6 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Wed, 15 Feb 2017 10:34:45 +0800
Subject: [PATCH 0502/1084] f2fs: introduce noinline_xattr mount option

This patch introduces new mount option 'noinline_xattr', so we can disable
inline xattr functionality which is already set as a default mount option.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/filesystems/f2fs.txt |  1 +
 fs/f2fs/super.c                    | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 3d7e12d0cb91..4f6531a4701b 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -125,6 +125,7 @@ active_logs=%u         Support configuring the number of active logs. In the
 disable_ext_identify   Disable the extension list configured by mkfs, so f2fs
                        does not aware of cold files such as media files.
 inline_xattr           Enable the inline xattrs feature.
+noinline_xattr         Disable the inline xattrs feature.
 inline_data            Enable the inline data feature: New created small(<~3.4k)
                        files can be written into inode block.
 inline_dentry          Enable the inline dir feature: data in new created
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index bd716b12f835..1dd0ed17a790 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -89,6 +89,7 @@ enum {
 	Opt_active_logs,
 	Opt_disable_ext_identify,
 	Opt_inline_xattr,
+	Opt_noinline_xattr,
 	Opt_inline_data,
 	Opt_inline_dentry,
 	Opt_noinline_dentry,
@@ -122,6 +123,7 @@ static match_table_t f2fs_tokens = {
 	{Opt_active_logs, "active_logs=%u"},
 	{Opt_disable_ext_identify, "disable_ext_identify"},
 	{Opt_inline_xattr, "inline_xattr"},
+	{Opt_noinline_xattr, "noinline_xattr"},
 	{Opt_inline_data, "inline_data"},
 	{Opt_inline_dentry, "inline_dentry"},
 	{Opt_noinline_dentry, "noinline_dentry"},
@@ -444,6 +446,9 @@ static int parse_options(struct super_block *sb, char *options)
 		case Opt_inline_xattr:
 			set_opt(sbi, INLINE_XATTR);
 			break;
+		case Opt_noinline_xattr:
+			clear_opt(sbi, INLINE_XATTR);
+			break;
 #else
 		case Opt_user_xattr:
 			f2fs_msg(sb, KERN_INFO,
@@ -457,6 +462,10 @@ static int parse_options(struct super_block *sb, char *options)
 			f2fs_msg(sb, KERN_INFO,
 				"inline_xattr options not supported");
 			break;
+		case Opt_noinline_xattr:
+			f2fs_msg(sb, KERN_INFO,
+				"noinline_xattr options not supported");
+			break;
 #endif
 #ifdef CONFIG_F2FS_FS_POSIX_ACL
 		case Opt_acl:
@@ -909,6 +918,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 		seq_puts(seq, ",nouser_xattr");
 	if (test_opt(sbi, INLINE_XATTR))
 		seq_puts(seq, ",inline_xattr");
+	else
+		seq_puts(seq, ",noinline_xattr");
 #endif
 #ifdef CONFIG_F2FS_FS_POSIX_ACL
 	if (test_opt(sbi, POSIX_ACL))
-- 
GitLab


From 39133a5015e2fa405a0387053d65580988a6a1bc Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Wed, 8 Feb 2017 17:39:44 +0800
Subject: [PATCH 0503/1084] f2fs: enable inline_xattr by default

In android, since SElinux is enable, security policy will be appliedd for
each file, it stores in inode as an xattr entry, so it will take one 4k
size node block additionally for each file.

Let's enable inline_xattr by default in order to save storage space.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 1dd0ed17a790..751e1068db17 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1039,6 +1039,7 @@ static void default_options(struct f2fs_sb_info *sbi)
 	sbi->active_logs = NR_CURSEG_TYPE;
 
 	set_opt(sbi, BG_GC);
+	set_opt(sbi, INLINE_XATTR);
 	set_opt(sbi, INLINE_DATA);
 	set_opt(sbi, INLINE_DENTRY);
 	set_opt(sbi, EXTENT_CACHE);
-- 
GitLab


From 5b6c6be2d878bd7ec4dc2cb4e2a2da2779fe52ab Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Tue, 14 Feb 2017 19:32:51 -0800
Subject: [PATCH 0504/1084] f2fs: use SSR for warm node as well

We have had node chains, but haven't used it so far due to stale node blocks.
Now, we have crc|cp_ver in node footer and give random cp_ver at format time,
we can start to use it again.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index b2e0769a09d0..0b42b0cdd674 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1563,7 +1563,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
 
 	if (force)
 		new_curseg(sbi, type, true);
-	else if (type == CURSEG_WARM_NODE)
+	else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
+					type == CURSEG_WARM_NODE)
 		new_curseg(sbi, type, false);
 	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
 		new_curseg(sbi, type, false);
-- 
GitLab


From d50aaeec905baebcef0c401768b987ed18458015 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 15 Feb 2017 11:14:06 -0800
Subject: [PATCH 0505/1084] f2fs: show actual device info in tracepoints

This patch shows actual device information in the tracepoints.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c           | 10 ++++----
 include/trace/events/f2fs.h | 49 ++++++++++++++++++++-----------------
 2 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 0b42b0cdd674..68db07f857f8 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -426,11 +426,11 @@ static int submit_flush_wait(struct f2fs_sb_info *sbi)
 	int ret = __submit_flush_wait(sbi->sb->s_bdev);
 	int i;
 
-	trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
-					test_opt(sbi, FLUSH_MERGE));
-
 	if (sbi->s_ndevs && !ret) {
 		for (i = 1; i < sbi->s_ndevs; i++) {
+			trace_f2fs_issue_flush(FDEV(i).bdev,
+					test_opt(sbi, NOBARRIER),
+					test_opt(sbi, FLUSH_MERGE));
 			ret = __submit_flush_wait(FDEV(i).bdev);
 			if (ret)
 				break;
@@ -758,7 +758,7 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
 	block_t lblkstart = blkstart;
 	int err;
 
-	trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
+	trace_f2fs_issue_discard(bdev, blkstart, blklen);
 
 	if (sbi->s_ndevs) {
 		int devi = f2fs_target_device_index(sbi, blkstart);
@@ -815,7 +815,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
 		return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen);
 	case BLK_ZONE_TYPE_SEQWRITE_REQ:
 	case BLK_ZONE_TYPE_SEQWRITE_PREF:
-		trace_f2fs_issue_reset_zone(sbi->sb, blkstart);
+		trace_f2fs_issue_reset_zone(bdev, blkstart);
 		return blkdev_reset_zones(bdev, sector,
 					  nr_sects, GFP_NOFS);
 	default:
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 82236792b50c..153572e256d6 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -6,8 +6,8 @@
 
 #include <linux/tracepoint.h>
 
-#define show_dev(entry)		MAJOR(entry->dev), MINOR(entry->dev)
-#define show_dev_ino(entry)	show_dev(entry), (unsigned long)entry->ino
+#define show_dev(dev)		MAJOR(dev), MINOR(dev)
+#define show_dev_ino(entry)	show_dev(entry->dev), (unsigned long)entry->ino
 
 TRACE_DEFINE_ENUM(NODE);
 TRACE_DEFINE_ENUM(DATA);
@@ -245,7 +245,7 @@ TRACE_EVENT(f2fs_sync_fs,
 	),
 
 	TP_printk("dev = (%d,%d), superblock is %s, wait = %d",
-		show_dev(__entry),
+		show_dev(__entry->dev),
 		__entry->dirty ? "dirty" : "not dirty",
 		__entry->wait)
 );
@@ -544,7 +544,7 @@ TRACE_EVENT(f2fs_background_gc,
 	),
 
 	TP_printk("dev = (%d,%d), wait_ms = %ld, prefree = %u, free = %u",
-		show_dev(__entry),
+		show_dev(__entry->dev),
 		__entry->wait_ms,
 		__entry->prefree,
 		__entry->free)
@@ -586,7 +586,7 @@ TRACE_EVENT(f2fs_get_victim,
 
 	TP_printk("dev = (%d,%d), type = %s, policy = (%s, %s, %s), victim = %u "
 		"ofs_unit = %u, pre_victim_secno = %d, prefree = %u, free = %u",
-		show_dev(__entry),
+		show_dev(__entry->dev),
 		show_data_type(__entry->type),
 		show_gc_type(__entry->gc_type),
 		show_alloc_mode(__entry->alloc_mode),
@@ -723,7 +723,7 @@ TRACE_EVENT(f2fs_reserve_new_blocks,
 	),
 
 	TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u, count = %llu",
-		show_dev(__entry),
+		show_dev(__entry->dev),
 		(unsigned int)__entry->nid,
 		__entry->ofs_in_node,
 		(unsigned long long)__entry->count)
@@ -793,6 +793,7 @@ DECLARE_EVENT_CLASS(f2fs__bio,
 
 	TP_STRUCT__entry(
 		__field(dev_t,	dev)
+		__field(dev_t,	target)
 		__field(int,	op)
 		__field(int,	op_flags)
 		__field(int,	type)
@@ -802,6 +803,7 @@ DECLARE_EVENT_CLASS(f2fs__bio,
 
 	TP_fast_assign(
 		__entry->dev		= sb->s_dev;
+		__entry->target		= bio->bi_bdev->bd_dev;
 		__entry->op		= bio_op(bio);
 		__entry->op_flags	= bio->bi_opf;
 		__entry->type		= type;
@@ -809,8 +811,9 @@ DECLARE_EVENT_CLASS(f2fs__bio,
 		__entry->size		= bio->bi_iter.bi_size;
 	),
 
-	TP_printk("dev = (%d,%d), rw = %s%s, %s, sector = %lld, size = %u",
-		show_dev(__entry),
+	TP_printk("dev = (%d,%d)/(%d,%d), rw = %s%s, %s, sector = %lld, size = %u",
+		show_dev(__entry->target),
+		show_dev(__entry->dev),
 		show_bio_type(__entry->op, __entry->op_flags),
 		show_block_type(__entry->type),
 		(unsigned long long)__entry->sector,
@@ -1107,16 +1110,16 @@ TRACE_EVENT(f2fs_write_checkpoint,
 	),
 
 	TP_printk("dev = (%d,%d), checkpoint for %s, state = %s",
-		show_dev(__entry),
+		show_dev(__entry->dev),
 		show_cpreason(__entry->reason),
 		__entry->msg)
 );
 
 TRACE_EVENT(f2fs_issue_discard,
 
-	TP_PROTO(struct super_block *sb, block_t blkstart, block_t blklen),
+	TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen),
 
-	TP_ARGS(sb, blkstart, blklen),
+	TP_ARGS(dev, blkstart, blklen),
 
 	TP_STRUCT__entry(
 		__field(dev_t,	dev)
@@ -1125,22 +1128,22 @@ TRACE_EVENT(f2fs_issue_discard,
 	),
 
 	TP_fast_assign(
-		__entry->dev	= sb->s_dev;
+		__entry->dev	= dev->bd_dev;
 		__entry->blkstart = blkstart;
 		__entry->blklen = blklen;
 	),
 
 	TP_printk("dev = (%d,%d), blkstart = 0x%llx, blklen = 0x%llx",
-		show_dev(__entry),
+		show_dev(__entry->dev),
 		(unsigned long long)__entry->blkstart,
 		(unsigned long long)__entry->blklen)
 );
 
 TRACE_EVENT(f2fs_issue_reset_zone,
 
-	TP_PROTO(struct super_block *sb, block_t blkstart),
+	TP_PROTO(struct block_device *dev, block_t blkstart),
 
-	TP_ARGS(sb, blkstart),
+	TP_ARGS(dev, blkstart),
 
 	TP_STRUCT__entry(
 		__field(dev_t,	dev)
@@ -1148,21 +1151,21 @@ TRACE_EVENT(f2fs_issue_reset_zone,
 	),
 
 	TP_fast_assign(
-		__entry->dev	= sb->s_dev;
+		__entry->dev	= dev->bd_dev;
 		__entry->blkstart = blkstart;
 	),
 
 	TP_printk("dev = (%d,%d), reset zone at block = 0x%llx",
-		show_dev(__entry),
+		show_dev(__entry->dev),
 		(unsigned long long)__entry->blkstart)
 );
 
 TRACE_EVENT(f2fs_issue_flush,
 
-	TP_PROTO(struct super_block *sb, unsigned int nobarrier,
+	TP_PROTO(struct block_device *dev, unsigned int nobarrier,
 					unsigned int flush_merge),
 
-	TP_ARGS(sb, nobarrier, flush_merge),
+	TP_ARGS(dev, nobarrier, flush_merge),
 
 	TP_STRUCT__entry(
 		__field(dev_t,	dev)
@@ -1171,13 +1174,13 @@ TRACE_EVENT(f2fs_issue_flush,
 	),
 
 	TP_fast_assign(
-		__entry->dev	= sb->s_dev;
+		__entry->dev	= dev->bd_dev;
 		__entry->nobarrier = nobarrier;
 		__entry->flush_merge = flush_merge;
 	),
 
 	TP_printk("dev = (%d,%d), %s %s",
-		show_dev(__entry),
+		show_dev(__entry->dev),
 		__entry->nobarrier ? "skip (nobarrier)" : "issue",
 		__entry->flush_merge ? " with flush_merge" : "")
 );
@@ -1292,7 +1295,7 @@ TRACE_EVENT(f2fs_shrink_extent_tree,
 	),
 
 	TP_printk("dev = (%d,%d), shrunk: node_cnt = %u, tree_cnt = %u",
-		show_dev(__entry),
+		show_dev(__entry->dev),
 		__entry->node_cnt,
 		__entry->tree_cnt)
 );
@@ -1339,7 +1342,7 @@ DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes,
 	),
 
 	TP_printk("dev = (%d,%d), %s, dirty count = %lld",
-		show_dev(__entry),
+		show_dev(__entry->dev),
 		show_file_type(__entry->type),
 		__entry->count)
 );
-- 
GitLab


From 88c5c13a5027b36d914536fdba23f069d7067204 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Tue, 14 Feb 2017 09:54:37 -0800
Subject: [PATCH 0506/1084] f2fs: fix multiple f2fs_add_link() calls having
 same name

It turns out a stakable filesystem like sdcardfs in AOSP can trigger multiple
vfs_create() to lower filesystem. In that case, f2fs will add multiple dentries
having same name which breaks filesystem consistency.

Until upper layer fixes, let's work around by f2fs, which shows actually not
much performance regression.

Cc: <stable@vger.kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/dir.c  | 34 +++++++++++++++++++++++++++++-----
 fs/f2fs/f2fs.h |  1 +
 2 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 827c5daef4fc..54aa30ee028f 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -207,9 +207,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 		f2fs_put_page(dentry_page, 0);
 	}
 
-	if (!de && room && F2FS_I(dir)->chash != namehash) {
-		F2FS_I(dir)->chash = namehash;
-		F2FS_I(dir)->clevel = level;
+	/* This is to increase the speed of f2fs_create */
+	if (!de && room) {
+		F2FS_I(dir)->task = current;
+		if (F2FS_I(dir)->chash != namehash) {
+			F2FS_I(dir)->chash = namehash;
+			F2FS_I(dir)->clevel = level;
+		}
 	}
 
 	return de;
@@ -643,14 +647,34 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
 				struct inode *inode, nid_t ino, umode_t mode)
 {
 	struct fscrypt_name fname;
+	struct page *page = NULL;
+	struct f2fs_dir_entry *de = NULL;
 	int err;
 
 	err = fscrypt_setup_filename(dir, name, 0, &fname);
 	if (err)
 		return err;
 
-	err = __f2fs_do_add_link(dir, &fname, inode, ino, mode);
-
+	/*
+	 * An immature stakable filesystem shows a race condition between lookup
+	 * and create. If we have same task when doing lookup and create, it's
+	 * definitely fine as expected by VFS normally. Otherwise, let's just
+	 * verify on-disk dentry one more time, which guarantees filesystem
+	 * consistency more.
+	 */
+	if (current != F2FS_I(dir)->task) {
+		de = __f2fs_find_entry(dir, &fname, &page);
+		F2FS_I(dir)->task = NULL;
+	}
+	if (de) {
+		f2fs_dentry_kunmap(dir, page);
+		f2fs_put_page(page, 0);
+		err = -EEXIST;
+	} else if (IS_ERR(page)) {
+		err = PTR_ERR(page);
+	} else {
+		err = __f2fs_do_add_link(dir, &fname, inode, ino, mode);
+	}
 	fscrypt_free_filename(&fname);
 	return err;
 }
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 28e954080890..7b5e287ac127 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -459,6 +459,7 @@ struct f2fs_inode_info {
 	atomic_t dirty_pages;		/* # of dirty pages */
 	f2fs_hash_t chash;		/* hash value of given file name */
 	unsigned int clevel;		/* maximum level of given file name */
+	struct task_struct *task;	/* lookup and create consistency */
 	nid_t i_xattr_nid;		/* node id that contains xattrs */
 	loff_t	last_disk_size;		/* lastly written file size */
 
-- 
GitLab


From 5012de209b704ab460ddc5787a5f010cbd4af954 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 16 Feb 2017 10:35:41 -0800
Subject: [PATCH 0507/1084] f2fs: trace victim's cost selectecd by f2fs_gc

This patch adds min_cost of each victims.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/trace/events/f2fs.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 153572e256d6..ff31ccfb8111 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -565,6 +565,7 @@ TRACE_EVENT(f2fs_get_victim,
 		__field(int,	alloc_mode)
 		__field(int,	gc_mode)
 		__field(unsigned int,	victim)
+		__field(unsigned int,	cost)
 		__field(unsigned int,	ofs_unit)
 		__field(unsigned int,	pre_victim)
 		__field(unsigned int,	prefree)
@@ -578,20 +579,23 @@ TRACE_EVENT(f2fs_get_victim,
 		__entry->alloc_mode	= p->alloc_mode;
 		__entry->gc_mode	= p->gc_mode;
 		__entry->victim		= p->min_segno;
+		__entry->cost		= p->min_cost;
 		__entry->ofs_unit	= p->ofs_unit;
 		__entry->pre_victim	= pre_victim;
 		__entry->prefree	= prefree;
 		__entry->free		= free;
 	),
 
-	TP_printk("dev = (%d,%d), type = %s, policy = (%s, %s, %s), victim = %u "
-		"ofs_unit = %u, pre_victim_secno = %d, prefree = %u, free = %u",
+	TP_printk("dev = (%d,%d), type = %s, policy = (%s, %s, %s), "
+		"victim = %u, cost = %u, ofs_unit = %u, "
+		"pre_victim_secno = %d, prefree = %u, free = %u",
 		show_dev(__entry->dev),
 		show_data_type(__entry->type),
 		show_gc_type(__entry->gc_type),
 		show_alloc_mode(__entry->alloc_mode),
 		show_victim_policy(__entry->gc_mode),
 		__entry->victim,
+		__entry->cost,
 		__entry->ofs_unit,
 		(int)__entry->pre_victim,
 		__entry->prefree,
-- 
GitLab


From 05eeb118a0e9fdc675a1f9db05039693da3ea25e Mon Sep 17 00:00:00 2001
From: Yunlei He <heyunlei@huawei.com>
Date: Fri, 17 Feb 2017 17:16:38 +0800
Subject: [PATCH 0508/1084] f2fs: replace __get_victim by dirty_segments in
 FG_GC

In FG_GC process, it will search victim section twice. This will
cause some dirty section with less valid blocks skip garbage
collection.

section # 26425 : valid blocks # 3
142.037567: get_victim_by_default: victim 26425 : valid blocks # 3
142.037585: f2fs_get_victim: dev = (259,30), type = No TYPE, policy = (Foreground GC, LFS-mode, Greedy), victim = 26425 ofs_unit = 1, pre_victim_secno = 26425, prefree = 0, free = 244
142.039494: f2fs_get_victim: dev = (259,30), type = Hot DATA, policy = (Background GC, SSR-mode, Greedy), victim = 19022 ofs_unit = 1, pre_victim_secno = 26425, prefree = 0, free = 24
142.070247: new_curseg: Debug: alloc new segment 26746
142.244341: f2fs_get_victim: dev = (259,30), type = No TYPE, policy = (Foreground GC, LFS-mode, Greedy), victim = 26054 ofs_unit = 1, pre_victim_secno = 26054, prefree = 0, free = 243
142.254475: do_garbage_collect: Debug: FG_GC, seg_freed = 1
142.293131: f2fs_get_victim: dev = (259,30), type = Warm DATA, policy = (Background GC, SSR-mode, Greedy), victim = 23466 ofs_unit = 1, pre_victim_secno = -1, prefree = 0, free = 244
142.319001: f2fs_get_victim: dev = (259,30), type = Warm DATA, policy = (Background GC, SSR-mode, Greedy), victim = 23467 ofs_unit = 1, pre_victim_secno = -1, prefree = 0, free = 244
142.368879: get_victim_by_default: victim 26425 : valid blocks # 3
142.368894: f2fs_get_victim: dev = (259,30), type = No TYPE, policy = (Foreground GC, LFS-mode, Greedy), victim = 26425 ofs_unit = 1, pre_victim_secno = 26425, prefree = 0, free = 244
142.378127: f2fs_get_victim: dev = (259,30), type = Hot DATA, policy = (Background GC, SSR-mode, Greedy), victim = 19612 ofs_unit = 1, pre_victim_secno = 26425, prefree = 0, free = 24
142.416917: new_curseg: Debug: alloc new segment 26054
142.656794: f2fs_get_victim: dev = (259,30), type = No TYPE, policy = (Foreground GC, LFS-mode, Greedy), victim = 25404 ofs_unit = 1, pre_victim_secno = 25404, prefree = 0, free = 243
142.662139: do_garbage_collect: Debug: FG_GC, seg_freed = 1
142.684159: new_curseg: Debug: alloc new segment 25197
142.685059: get_victim_by_default: victim 26425 : valid blocks # 3
142.685079: f2fs_get_victim: dev = (259,30), type = No TYPE, policy = (Foreground GC, LFS-mode, Greedy), victim = 26425 ofs_unit = 1, pre_victim_secno = 26425, prefree = 0, free = 243
142.701427: f2fs_get_victim: dev = (259,30), type = No TYPE, policy = (Foreground GC, LFS-mode, Greedy), victim = 26238 ofs_unit = 1, pre_victim_secno = 26238, prefree = 0, free = 243
142.707105: do_garbage_collect: Debug: FG_GC, seg_freed = 1
142.802444: f2fs_get_victim: dev = (259,30), type = Warm DATA, policy = (Background GC, SSR-mode, Greedy), victim = 23473 ofs_unit = 1, pre_victim_secno = -1, prefree = 0, free = 244
142.804422: get_victim_by_default: victim 26425 : valid blocks # 3
142.804443: f2fs_get_victim: dev = (259,30), type = No TYPE, policy = (Foreground GC, LFS-mode, Greedy), victim = 26425 ofs_unit = 1, pre_victim_secno = 26425, prefree = 0, free = 244
142.851567: f2fs_get_victim: dev = (259,30), type = Hot DATA, policy = (Background GC, SSR-mode, Greedy), victim = 19092 ofs_unit = 1, pre_victim_secno = 26425, prefree = 0, free = 24
142.865014: new_curseg: Debug: alloc new segment 26238
143.082245: f2fs_get_victim: dev = (259,30), type = No TYPE, policy = (Foreground GC, LFS-mode, Greedy), victim = 26307 ofs_unit = 1, pre_victim_secno = 26307, prefree = 0, free = 244
143.088252: do_garbage_collect: Debug: FG_GC, seg_freed = 1
143.128307: new_curseg: Debug: alloc new segment 25404
143.181846: get_victim_by_default: victim 26425 : valid blocks # 3
143.181872: f2fs_get_victim: dev = (259,30), type = No TYPE, policy = (Foreground GC, LFS-mode, Greedy), victim = 26425 ofs_unit = 1, pre_victim_secno = 26425, prefree = 0, free = 244

Signed-off-by: Yunlei He <heyunlei@huawei.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/gc.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 88e5e7b10ab6..5ee258ebf6ca 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -927,8 +927,6 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
 
 	cpc.reason = __get_cp_reason(sbi);
 gc_more:
-	segno = NULL_SEGNO;
-
 	if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
 		goto stop;
 	if (unlikely(f2fs_cp_error(sbi))) {
@@ -943,12 +941,10 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
 		 * enough free sections, we should flush dent/node blocks and do
 		 * garbage collections.
 		 */
-		if (__get_victim(sbi, &segno, gc_type) ||
-						prefree_segments(sbi)) {
+		if (dirty_segments(sbi) || prefree_segments(sbi)) {
 			ret = write_checkpoint(sbi, &cpc);
 			if (ret)
 				goto stop;
-			segno = NULL_SEGNO;
 		} else if (has_not_enough_free_secs(sbi, 0, 0)) {
 			ret = write_checkpoint(sbi, &cpc);
 			if (ret)
@@ -959,7 +955,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
 		goto stop;
 	}
 
-	if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type))
+	if (!__get_victim(sbi, &segno, gc_type))
 		goto stop;
 	ret = 0;
 
-- 
GitLab


From 86d54795c94532075d862aa0a79f0c981dab4bdd Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 17 Feb 2017 09:55:55 -0800
Subject: [PATCH 0509/1084] f2fs: do not wait for writeback in write_begin

Otherwise we can get livelock like below.

[79880.428136] dbench          D    0 18405  18404 0x00000000
[79880.428139] Call Trace:
[79880.428142]  __schedule+0x219/0x6b0
[79880.428144]  schedule+0x36/0x80
[79880.428147]  schedule_timeout+0x243/0x2e0
[79880.428152]  ? update_sd_lb_stats+0x16b/0x5f0
[79880.428155]  ? ktime_get+0x3c/0xb0
[79880.428157]  io_schedule_timeout+0xa6/0x110
[79880.428161]  __lock_page+0xf7/0x130
[79880.428164]  ? unlock_page+0x30/0x30
[79880.428167]  pagecache_get_page+0x16b/0x250
[79880.428171]  grab_cache_page_write_begin+0x20/0x40
[79880.428182]  f2fs_write_begin+0xa2/0xdb0 [f2fs]
[79880.428192]  ? f2fs_mark_inode_dirty_sync+0x16/0x30 [f2fs]
[79880.428197]  ? kmem_cache_free+0x79/0x200
[79880.428203]  ? __mark_inode_dirty+0x17f/0x360
[79880.428206]  generic_perform_write+0xbb/0x190
[79880.428213]  ? file_update_time+0xa4/0xf0
[79880.428217]  __generic_file_write_iter+0x19b/0x1e0
[79880.428226]  f2fs_file_write_iter+0x9c/0x180 [f2fs]
[79880.428231]  __vfs_write+0xc5/0x140
[79880.428235]  vfs_write+0xb2/0x1b0
[79880.428238]  SyS_write+0x46/0xa0
[79880.428242]  entry_SYSCALL_64_fastpath+0x1e/0xad

Fixes: cae96a5c8ab6 ("f2fs: check io submission more precisely")
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8c61fa7fd27d..5f3bc98a387d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1759,7 +1759,12 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
 			goto fail;
 	}
 repeat:
-	page = grab_cache_page_write_begin(mapping, index, flags);
+	/*
+	 * Do not use grab_cache_page_write_begin() to avoid deadlock due to
+	 * wait_for_stable_page. Will wait that below with our IO control.
+	 */
+	page = pagecache_get_page(mapping, index,
+				FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
 	if (!page) {
 		err = -ENOMEM;
 		goto fail;
-- 
GitLab


From e93b9865251a0503d83fd570e7d5a7c8bc351715 Mon Sep 17 00:00:00 2001
From: Hou Pengyang <houpengyang@huawei.com>
Date: Thu, 16 Feb 2017 12:34:31 +0000
Subject: [PATCH 0510/1084] f2fs: add ovp valid_blocks check for bg gc victim
 to fg_gc

For foreground gc, greedy algorithm should be adapted, which makes
this formula work well:

	(2 * (100 / config.overprovision + 1) + 6)

But currently, we fg_gc have a prior to select bg_gc victim segments to gc
first, these victims are selected by cost-benefit algorithm, we can't guarantee
such segments have the small valid blocks, which may destroy the f2fs rule, on
the worstest case, would consume all the free segments.

This patch fix this by add a filter in check_bg_victims, if segment's has # of
valid blocks over overprovision ratio, skip such segments.

Cc: <stable@vger.kernel.org>
Signed-off-by: Hou Pengyang <houpengyang@huawei.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h    |  3 +++
 fs/f2fs/gc.c      | 22 ++++++++++++++++++++--
 fs/f2fs/segment.h |  9 +++++++++
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7b5e287ac127..146cc71ff91c 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -890,6 +890,9 @@ struct f2fs_sb_info {
 	struct f2fs_gc_kthread	*gc_thread;	/* GC thread */
 	unsigned int cur_victim_sec;		/* current victim section num */
 
+	/* threshold for converting bg victims for fg */
+	u64 fggc_threshold;
+
 	/* maximum # of trials to find a victim segment for SSR and GC */
 	unsigned int max_victim_search;
 
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 5ee258ebf6ca..e93aecb0138b 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -166,7 +166,8 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
 		p->ofs_unit = sbi->segs_per_sec;
 	}
 
-	if (p->max_search > sbi->max_victim_search)
+	/* we need to check every dirty segments in the FG_GC case */
+	if (gc_type != FG_GC && p->max_search > sbi->max_victim_search)
 		p->max_search = sbi->max_victim_search;
 
 	p->offset = sbi->last_victim[p->gc_mode];
@@ -199,6 +200,10 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
 	for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) {
 		if (sec_usage_check(sbi, secno))
 			continue;
+
+		if (no_fggc_candidate(sbi, secno))
+			continue;
+
 		clear_bit(secno, dirty_i->victim_secmap);
 		return secno * sbi->segs_per_sec;
 	}
@@ -322,13 +327,15 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
 			nsearched++;
 		}
 
-
 		secno = GET_SECNO(sbi, segno);
 
 		if (sec_usage_check(sbi, secno))
 			goto next;
 		if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
 			goto next;
+		if (gc_type == FG_GC && p.alloc_mode == LFS &&
+					no_fggc_candidate(sbi, secno))
+			goto next;
 
 		cost = get_gc_cost(sbi, segno, &p);
 
@@ -985,5 +992,16 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
 
 void build_gc_manager(struct f2fs_sb_info *sbi)
 {
+	u64 main_count, resv_count, ovp_count, blocks_per_sec;
+
 	DIRTY_I(sbi)->v_ops = &default_v_ops;
+
+	/* threshold of # of valid blocks in a section for victims of FG_GC */
+	main_count = SM_I(sbi)->main_segments << sbi->log_blocks_per_seg;
+	resv_count = SM_I(sbi)->reserved_segments << sbi->log_blocks_per_seg;
+	ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
+	blocks_per_sec = sbi->blocks_per_seg * sbi->segs_per_sec;
+
+	sbi->fggc_threshold = div_u64((main_count - ovp_count) * blocks_per_sec,
+					(main_count - resv_count));
 }
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 5cb5755c75d9..f4020f141d83 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -716,6 +716,15 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
 				- (base + 1) + type;
 }
 
+static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi,
+						unsigned int secno)
+{
+	if (get_valid_blocks(sbi, secno, sbi->segs_per_sec) >=
+						sbi->fggc_threshold)
+		return true;
+	return false;
+}
+
 static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
 {
 	if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno))
-- 
GitLab


From 98ba6af728de99953e25e550dbeca588c258ef03 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 16 Feb 2017 15:21:15 +0100
Subject: [PATCH 0511/1084] crush: do is_out test only if we do not collide

The is_out() test may require an additional hashing operation, so we
should skip it whenever possible.

Reflects ceph.git commit db107cc7f15cf2481894add325dc93e33479f529.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/crush/mapper.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 2e31217ccae3..84d2de047865 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -552,14 +552,12 @@ static int crush_choose_firstn(const struct crush_map *map,
 					}
 				}
 
-				if (!reject) {
+				if (!reject && !collide) {
 					/* out? */
 					if (itemtype == 0)
 						reject = is_out(map, weight,
 								weight_max,
 								item, x);
-					else
-						reject = 0;
 				}
 
 reject:
-- 
GitLab


From 7ba0487cca6199cc19bffd34c00df4ea70ac032a Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 16 Feb 2017 15:38:05 +0100
Subject: [PATCH 0512/1084] crush: fix dprintk compilation

The syntax error was not noticed because dprintk is a macro
and the code is discarded by default.

Reflects ceph.git commit f29b840c64a933b2cb13e3da6f3d785effd73a57.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/crush/mapper.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 84d2de047865..b5cd8c21bfdf 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -105,7 +105,7 @@ static int bucket_perm_choose(const struct crush_bucket *bucket,
 
 	/* calculate permutation up to pr */
 	for (i = 0; i < work->perm_n; i++)
-		dprintk(" perm_choose have %d: %d\n", i, bucket->perm[i]);
+		dprintk(" perm_choose have %d: %d\n", i, work->perm[i]);
 	while (work->perm_n <= pr) {
 		unsigned int p = work->perm_n;
 		/* no point in swapping the final entry */
@@ -122,7 +122,7 @@ static int bucket_perm_choose(const struct crush_bucket *bucket,
 		work->perm_n++;
 	}
 	for (i = 0; i < bucket->size; i++)
-		dprintk(" perm_choose  %d: %d\n", i, bucket->perm[i]);
+		dprintk(" perm_choose  %d: %d\n", i, work->perm[i]);
 
 	s = work->perm[pr];
 out:
-- 
GitLab


From f107548039807eb890e65ce5cd29d6ac52562f09 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 23 Feb 2017 13:39:59 +0300
Subject: [PATCH 0513/1084] ceph: tidy some white space in get_nonsnap_parent()

The white space here seems slightly messed up.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index fdbc3544e41c..c681762d76e6 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -679,8 +679,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
  * working with them. Once we hit a candidate dentry, we attempt to take a
  * reference to it, and return that as the result.
  */
-static struct inode *get_nonsnap_parent(struct dentry *dentry) { struct inode
-	*inode = NULL;
+static struct inode *get_nonsnap_parent(struct dentry *dentry)
+{
+	struct inode *inode = NULL;
 
 	while (dentry && !IS_ROOT(dentry)) {
 		inode = d_inode_rcu(dentry);
-- 
GitLab


From 1f8728b7adc4c2b351cda886a6916087595c9125 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 17 Feb 2017 10:55:23 +0100
Subject: [PATCH 0514/1084] PM / Domains: Move genpd_power_off() above
 genpd_power_on()

Following changes in genpd_power_on() makes it invoke genpd_power_off().
To enable these changes and avoiding to declare genpd_power_off(), let's
move its implementation above genpd_power_on(). In this way, following
changes should become easier to review.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 162 ++++++++++++++++++------------------
 1 file changed, 81 insertions(+), 81 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 3a75fb1b4126..3dc44f216067 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -273,6 +273,87 @@ static void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
 	queue_work(pm_wq, &genpd->power_off_work);
 }
 
+/**
+ * genpd_power_off - Remove power from a given PM domain.
+ * @genpd: PM domain to power down.
+ * @is_async: PM domain is powered down from a scheduled work
+ *
+ * If all of the @genpd's devices have been suspended and all of its subdomains
+ * have been powered down, remove power from @genpd.
+ */
+static int genpd_power_off(struct generic_pm_domain *genpd, bool is_async)
+{
+	struct pm_domain_data *pdd;
+	struct gpd_link *link;
+	unsigned int not_suspended = 0;
+
+	/*
+	 * Do not try to power off the domain in the following situations:
+	 * (1) The domain is already in the "power off" state.
+	 * (2) System suspend is in progress.
+	 */
+	if (genpd->status == GPD_STATE_POWER_OFF
+	    || genpd->prepared_count > 0)
+		return 0;
+
+	if (atomic_read(&genpd->sd_count) > 0)
+		return -EBUSY;
+
+	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
+		enum pm_qos_flags_status stat;
+
+		stat = dev_pm_qos_flags(pdd->dev,
+					PM_QOS_FLAG_NO_POWER_OFF
+						| PM_QOS_FLAG_REMOTE_WAKEUP);
+		if (stat > PM_QOS_FLAGS_NONE)
+			return -EBUSY;
+
+		/*
+		 * Do not allow PM domain to be powered off, when an IRQ safe
+		 * device is part of a non-IRQ safe domain.
+		 */
+		if (!pm_runtime_suspended(pdd->dev) ||
+			irq_safe_dev_in_no_sleep_domain(pdd->dev, genpd))
+			not_suspended++;
+	}
+
+	if (not_suspended > 1 || (not_suspended == 1 && is_async))
+		return -EBUSY;
+
+	if (genpd->gov && genpd->gov->power_down_ok) {
+		if (!genpd->gov->power_down_ok(&genpd->domain))
+			return -EAGAIN;
+	}
+
+	if (genpd->power_off) {
+		int ret;
+
+		if (atomic_read(&genpd->sd_count) > 0)
+			return -EBUSY;
+
+		/*
+		 * If sd_count > 0 at this point, one of the subdomains hasn't
+		 * managed to call genpd_power_on() for the master yet after
+		 * incrementing it.  In that case genpd_power_on() will wait
+		 * for us to drop the lock, so we can call .power_off() and let
+		 * the genpd_power_on() restore power for us (this shouldn't
+		 * happen very often).
+		 */
+		ret = _genpd_power_off(genpd, true);
+		if (ret)
+			return ret;
+	}
+
+	genpd->status = GPD_STATE_POWER_OFF;
+
+	list_for_each_entry(link, &genpd->slave_links, slave_node) {
+		genpd_sd_counter_dec(link->master);
+		genpd_queue_power_off_work(link->master);
+	}
+
+	return 0;
+}
+
 /**
  * genpd_power_on - Restore power to a given PM domain and its masters.
  * @genpd: PM domain to power up.
@@ -367,87 +448,6 @@ static int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
 	return NOTIFY_DONE;
 }
 
-/**
- * genpd_power_off - Remove power from a given PM domain.
- * @genpd: PM domain to power down.
- * @is_async: PM domain is powered down from a scheduled work
- *
- * If all of the @genpd's devices have been suspended and all of its subdomains
- * have been powered down, remove power from @genpd.
- */
-static int genpd_power_off(struct generic_pm_domain *genpd, bool is_async)
-{
-	struct pm_domain_data *pdd;
-	struct gpd_link *link;
-	unsigned int not_suspended = 0;
-
-	/*
-	 * Do not try to power off the domain in the following situations:
-	 * (1) The domain is already in the "power off" state.
-	 * (2) System suspend is in progress.
-	 */
-	if (genpd->status == GPD_STATE_POWER_OFF
-	    || genpd->prepared_count > 0)
-		return 0;
-
-	if (atomic_read(&genpd->sd_count) > 0)
-		return -EBUSY;
-
-	list_for_each_entry(pdd, &genpd->dev_list, list_node) {
-		enum pm_qos_flags_status stat;
-
-		stat = dev_pm_qos_flags(pdd->dev,
-					PM_QOS_FLAG_NO_POWER_OFF
-						| PM_QOS_FLAG_REMOTE_WAKEUP);
-		if (stat > PM_QOS_FLAGS_NONE)
-			return -EBUSY;
-
-		/*
-		 * Do not allow PM domain to be powered off, when an IRQ safe
-		 * device is part of a non-IRQ safe domain.
-		 */
-		if (!pm_runtime_suspended(pdd->dev) ||
-			irq_safe_dev_in_no_sleep_domain(pdd->dev, genpd))
-			not_suspended++;
-	}
-
-	if (not_suspended > 1 || (not_suspended == 1 && is_async))
-		return -EBUSY;
-
-	if (genpd->gov && genpd->gov->power_down_ok) {
-		if (!genpd->gov->power_down_ok(&genpd->domain))
-			return -EAGAIN;
-	}
-
-	if (genpd->power_off) {
-		int ret;
-
-		if (atomic_read(&genpd->sd_count) > 0)
-			return -EBUSY;
-
-		/*
-		 * If sd_count > 0 at this point, one of the subdomains hasn't
-		 * managed to call genpd_power_on() for the master yet after
-		 * incrementing it.  In that case genpd_power_on() will wait
-		 * for us to drop the lock, so we can call .power_off() and let
-		 * the genpd_power_on() restore power for us (this shouldn't
-		 * happen very often).
-		 */
-		ret = _genpd_power_off(genpd, true);
-		if (ret)
-			return ret;
-	}
-
-	genpd->status = GPD_STATE_POWER_OFF;
-
-	list_for_each_entry(link, &genpd->slave_links, slave_node) {
-		genpd_sd_counter_dec(link->master);
-		genpd_queue_power_off_work(link->master);
-	}
-
-	return 0;
-}
-
 /**
  * genpd_power_off_work_fn - Power off PM domain whose subdomain count is 0.
  * @work: Work structure used for scheduling the execution of this function.
-- 
GitLab


From 3c64649d1cf9f32fd16491e69ccc16385e0ef421 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 17 Feb 2017 10:55:24 +0100
Subject: [PATCH 0515/1084] PM / Domains: Rename is_async to one_dev_on for
 genpd_power_off()

The parameter name is_async, for genpd_power_off() gives a poor description
of its purpose. To clarify, let's rename it to one_dev_on and update the
documentation of it in the function header.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 3dc44f216067..179bb269a58f 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -276,12 +276,15 @@ static void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
 /**
  * genpd_power_off - Remove power from a given PM domain.
  * @genpd: PM domain to power down.
- * @is_async: PM domain is powered down from a scheduled work
+ * @one_dev_on: If invoked from genpd's ->runtime_suspend|resume() callback, the
+ * RPM status of the releated device is in an intermediate state, not yet turned
+ * into RPM_SUSPENDED. This means genpd_power_off() must allow one device to not
+ * be RPM_SUSPENDED, while it tries to power off the PM domain.
  *
  * If all of the @genpd's devices have been suspended and all of its subdomains
  * have been powered down, remove power from @genpd.
  */
-static int genpd_power_off(struct generic_pm_domain *genpd, bool is_async)
+static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on)
 {
 	struct pm_domain_data *pdd;
 	struct gpd_link *link;
@@ -317,7 +320,7 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool is_async)
 			not_suspended++;
 	}
 
-	if (not_suspended > 1 || (not_suspended == 1 && is_async))
+	if (not_suspended > 1 || (not_suspended == 1 && !one_dev_on))
 		return -EBUSY;
 
 	if (genpd->gov && genpd->gov->power_down_ok) {
@@ -459,7 +462,7 @@ static void genpd_power_off_work_fn(struct work_struct *work)
 	genpd = container_of(work, struct generic_pm_domain, power_off_work);
 
 	genpd_lock(genpd);
-	genpd_power_off(genpd, true);
+	genpd_power_off(genpd, false);
 	genpd_unlock(genpd);
 }
 
@@ -578,7 +581,7 @@ static int genpd_runtime_suspend(struct device *dev)
 		return 0;
 
 	genpd_lock(genpd);
-	genpd_power_off(genpd, false);
+	genpd_power_off(genpd, true);
 	genpd_unlock(genpd);
 
 	return 0;
@@ -658,7 +661,7 @@ static int genpd_runtime_resume(struct device *dev)
 	if (!pm_runtime_is_irq_safe(dev) ||
 		(pm_runtime_is_irq_safe(dev) && genpd_is_irq_safe(genpd))) {
 		genpd_lock(genpd);
-		genpd_power_off(genpd, 0);
+		genpd_power_off(genpd, true);
 		genpd_unlock(genpd);
 	}
 
-- 
GitLab


From 2da835452a0875866ca55d23126c77b9372f0015 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 17 Feb 2017 10:55:25 +0100
Subject: [PATCH 0516/1084] PM / Domains: Power off masters immediately in the
 power off sequence

Once a subdomain is powered off, genpd queues a power off work for each of
the subdomain's corresponding masters, thus postponing the masters to be
powered off to a later point.

When genpd used intermediate power off states, which was removed in
commit ba2bbfbf6307 ("PM / Domains: Remove intermediate states from the
power off sequence"), this behaviour made sense, but now it simply doesn't.

Genpd can easily try to power off the masters in the same context as the
subdomain, of course by acquiring/releasing the lock. Then, let's convert
to this behaviour, as it avoids unnecessary works from being queued.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 179bb269a58f..e697dec9d25b 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -284,7 +284,8 @@ static void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
  * If all of the @genpd's devices have been suspended and all of its subdomains
  * have been powered down, remove power from @genpd.
  */
-static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on)
+static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
+			   unsigned int depth)
 {
 	struct pm_domain_data *pdd;
 	struct gpd_link *link;
@@ -351,7 +352,9 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on)
 
 	list_for_each_entry(link, &genpd->slave_links, slave_node) {
 		genpd_sd_counter_dec(link->master);
-		genpd_queue_power_off_work(link->master);
+		genpd_lock_nested(link->master, depth + 1);
+		genpd_power_off(link->master, false, depth + 1);
+		genpd_unlock(link->master);
 	}
 
 	return 0;
@@ -405,7 +408,9 @@ static int genpd_power_on(struct generic_pm_domain *genpd, unsigned int depth)
 					&genpd->slave_links,
 					slave_node) {
 		genpd_sd_counter_dec(link->master);
-		genpd_queue_power_off_work(link->master);
+		genpd_lock_nested(link->master, depth + 1);
+		genpd_power_off(link->master, false, depth + 1);
+		genpd_unlock(link->master);
 	}
 
 	return ret;
@@ -462,7 +467,7 @@ static void genpd_power_off_work_fn(struct work_struct *work)
 	genpd = container_of(work, struct generic_pm_domain, power_off_work);
 
 	genpd_lock(genpd);
-	genpd_power_off(genpd, false);
+	genpd_power_off(genpd, false, 0);
 	genpd_unlock(genpd);
 }
 
@@ -581,7 +586,7 @@ static int genpd_runtime_suspend(struct device *dev)
 		return 0;
 
 	genpd_lock(genpd);
-	genpd_power_off(genpd, true);
+	genpd_power_off(genpd, true, 0);
 	genpd_unlock(genpd);
 
 	return 0;
@@ -661,7 +666,7 @@ static int genpd_runtime_resume(struct device *dev)
 	if (!pm_runtime_is_irq_safe(dev) ||
 		(pm_runtime_is_irq_safe(dev) && genpd_is_irq_safe(genpd))) {
 		genpd_lock(genpd);
-		genpd_power_off(genpd, true);
+		genpd_power_off(genpd, true, 0);
 		genpd_unlock(genpd);
 	}
 
-- 
GitLab


From 76aaf87b4cdc7f7115a32e4fda88310d42ce7fde Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Feb 2017 16:02:36 +0100
Subject: [PATCH 0517/1084] scsi: merge __scsi_execute into scsi_execute

All but one caller want the decoded sense header, so offer the existing
__scsi_execute helper as the public scsi_execute API to simply the
callers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ata/libata-scsi.c         | 12 +++-----
 drivers/scsi/cxlflash/superpipe.c |  8 ++----
 drivers/scsi/cxlflash/vlun.c      |  4 +--
 drivers/scsi/scsi_lib.c           | 48 ++++++++++++++-----------------
 drivers/scsi/scsi_transport_spi.c | 24 +++++++---------
 drivers/scsi/sr_ioctl.c           | 19 ++----------
 include/scsi/scsi_device.h        |  5 ++--
 7 files changed, 46 insertions(+), 74 deletions(-)

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 12d3a66600a3..1ac70744ae7b 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -600,6 +600,7 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg)
 	u8 args[4], *argbuf = NULL, *sensebuf = NULL;
 	int argsize = 0;
 	enum dma_data_direction data_dir;
+	struct scsi_sense_hdr sshdr;
 	int cmd_result;
 
 	if (arg == NULL)
@@ -648,7 +649,7 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg)
 	/* Good values for timeout and retries?  Values below
 	   from scsi_ioctl_send_command() for default case... */
 	cmd_result = scsi_execute(scsidev, scsi_cmd, data_dir, argbuf, argsize,
-				  sensebuf, (10*HZ), 5, 0, NULL);
+				  sensebuf, &sshdr, (10*HZ), 5, 0, 0, NULL);
 
 	if (driver_byte(cmd_result) == DRIVER_SENSE) {/* sense data available */
 		u8 *desc = sensebuf + 8;
@@ -657,9 +658,6 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg)
 		/* If we set cc then ATA pass-through will cause a
 		 * check condition even if no error. Filter that. */
 		if (cmd_result & SAM_STAT_CHECK_CONDITION) {
-			struct scsi_sense_hdr sshdr;
-			scsi_normalize_sense(sensebuf, SCSI_SENSE_BUFFERSIZE,
-					     &sshdr);
 			if (sshdr.sense_key == RECOVERED_ERROR &&
 			    sshdr.asc == 0 && sshdr.ascq == 0x1d)
 				cmd_result &= ~SAM_STAT_CHECK_CONDITION;
@@ -707,6 +705,7 @@ int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg)
 	int rc = 0;
 	u8 scsi_cmd[MAX_COMMAND_SIZE];
 	u8 args[7], *sensebuf = NULL;
+	struct scsi_sense_hdr sshdr;
 	int cmd_result;
 
 	if (arg == NULL)
@@ -734,7 +733,7 @@ int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg)
 	/* Good values for timeout and retries?  Values below
 	   from scsi_ioctl_send_command() for default case... */
 	cmd_result = scsi_execute(scsidev, scsi_cmd, DMA_NONE, NULL, 0,
-				sensebuf, (10*HZ), 5, 0, NULL);
+				sensebuf, &sshdr, (10*HZ), 5, 0, 0, NULL);
 
 	if (driver_byte(cmd_result) == DRIVER_SENSE) {/* sense data available */
 		u8 *desc = sensebuf + 8;
@@ -743,9 +742,6 @@ int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg)
 		/* If we set cc then ATA pass-through will cause a
 		 * check condition even if no error. Filter that. */
 		if (cmd_result & SAM_STAT_CHECK_CONDITION) {
-			struct scsi_sense_hdr sshdr;
-			scsi_normalize_sense(sensebuf, SCSI_SENSE_BUFFERSIZE,
-						&sshdr);
 			if (sshdr.sense_key == RECOVERED_ERROR &&
 			    sshdr.asc == 0 && sshdr.ascq == 0x1d)
 				cmd_result &= ~SAM_STAT_CHECK_CONDITION;
diff --git a/drivers/scsi/cxlflash/superpipe.c b/drivers/scsi/cxlflash/superpipe.c
index 90869cee2b20..5b812ed87f22 100644
--- a/drivers/scsi/cxlflash/superpipe.c
+++ b/drivers/scsi/cxlflash/superpipe.c
@@ -305,6 +305,7 @@ static int read_cap16(struct scsi_device *sdev, struct llun_info *lli)
 	struct cxlflash_cfg *cfg = shost_priv(sdev->host);
 	struct device *dev = &cfg->dev->dev;
 	struct glun_info *gli = lli->parent;
+	struct scsi_sense_hdr sshdr;
 	u8 *cmd_buf = NULL;
 	u8 *scsi_cmd = NULL;
 	u8 *sense_buf = NULL;
@@ -332,7 +333,8 @@ static int read_cap16(struct scsi_device *sdev, struct llun_info *lli)
 	/* Drop the ioctl read semahpore across lengthy call */
 	up_read(&cfg->ioctl_rwsem);
 	result = scsi_execute(sdev, scsi_cmd, DMA_FROM_DEVICE, cmd_buf,
-			      CMD_BUFSIZE, sense_buf, to, CMD_RETRIES, 0, NULL);
+			      CMD_BUFSIZE, sense_buf, &sshdr, to, CMD_RETRIES,
+			      0, 0, NULL);
 	down_read(&cfg->ioctl_rwsem);
 	rc = check_state(cfg);
 	if (rc) {
@@ -345,10 +347,6 @@ static int read_cap16(struct scsi_device *sdev, struct llun_info *lli)
 	if (driver_byte(result) == DRIVER_SENSE) {
 		result &= ~(0xFF<<24); /* DRIVER_SENSE is not an error */
 		if (result & SAM_STAT_CHECK_CONDITION) {
-			struct scsi_sense_hdr sshdr;
-
-			scsi_normalize_sense(sense_buf, SCSI_SENSE_BUFFERSIZE,
-					    &sshdr);
 			switch (sshdr.sense_key) {
 			case NO_SENSE:
 			case RECOVERED_ERROR:
diff --git a/drivers/scsi/cxlflash/vlun.c b/drivers/scsi/cxlflash/vlun.c
index 8fcc804dbef9..7aa06ef229fd 100644
--- a/drivers/scsi/cxlflash/vlun.c
+++ b/drivers/scsi/cxlflash/vlun.c
@@ -453,8 +453,8 @@ static int write_same16(struct scsi_device *sdev,
 		/* Drop the ioctl read semahpore across lengthy call */
 		up_read(&cfg->ioctl_rwsem);
 		result = scsi_execute(sdev, scsi_cmd, DMA_TO_DEVICE, cmd_buf,
-				      CMD_BUFSIZE, sense_buf, to, CMD_RETRIES,
-				      0, NULL);
+				      CMD_BUFSIZE, sense_buf, NULL, to,
+				      CMD_RETRIES, 0, 0, NULL);
 		down_read(&cfg->ioctl_rwsem);
 		rc = check_state(cfg);
 		if (rc) {
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 5f661486cf6e..b0cb3e0713f3 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -213,7 +213,26 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
 	__scsi_queue_insert(cmd, reason, 1);
 }
 
-static int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
+
+/**
+ * scsi_execute - insert request and wait for the result
+ * @sdev:	scsi device
+ * @cmd:	scsi command
+ * @data_direction: data direction
+ * @buffer:	data buffer
+ * @bufflen:	len of buffer
+ * @sense:	optional sense buffer
+ * @sshdr:	optional decoded sense header
+ * @timeout:	request timeout in seconds
+ * @retries:	number of times to retry request
+ * @flags:	flags for ->cmd_flags
+ * @rq_flags:	flags for ->rq_flags
+ * @resid:	optional residual length
+ *
+ * returns the req->errors value which is the scsi_cmnd result
+ * field.
+ */
+int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 		 int data_direction, void *buffer, unsigned bufflen,
 		 unsigned char *sense, struct scsi_sense_hdr *sshdr,
 		 int timeout, int retries, u64 flags, req_flags_t rq_flags,
@@ -268,31 +287,6 @@ static int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 
 	return ret;
 }
-
-/**
- * scsi_execute - insert request and wait for the result
- * @sdev:	scsi device
- * @cmd:	scsi command
- * @data_direction: data direction
- * @buffer:	data buffer
- * @bufflen:	len of buffer
- * @sense:	optional sense buffer
- * @timeout:	request timeout in seconds
- * @retries:	number of times to retry request
- * @flags:	or into request flags;
- * @resid:	optional residual length
- *
- * returns the req->errors value which is the scsi_cmnd result
- * field.
- */
-int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
-		 int data_direction, void *buffer, unsigned bufflen,
-		 unsigned char *sense, int timeout, int retries, u64 flags,
-		 int *resid)
-{
-	return __scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense,
-			NULL, timeout, retries, flags, 0, resid);
-}
 EXPORT_SYMBOL(scsi_execute);
 
 int scsi_execute_req_flags(struct scsi_device *sdev, const unsigned char *cmd,
@@ -300,7 +294,7 @@ int scsi_execute_req_flags(struct scsi_device *sdev, const unsigned char *cmd,
 		     struct scsi_sense_hdr *sshdr, int timeout, int retries,
 		     int *resid, u64 flags, req_flags_t rq_flags)
 {
-	return __scsi_execute(sdev, cmd, data_direction, buffer, bufflen,
+	return scsi_execute(sdev, cmd, data_direction, buffer, bufflen,
 			      NULL, sshdr, timeout, retries, flags, rq_flags,
 			      resid);
 }
diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c
index 319868f3f674..d0219e36080c 100644
--- a/drivers/scsi/scsi_transport_spi.c
+++ b/drivers/scsi/scsi_transport_spi.c
@@ -123,25 +123,21 @@ static int spi_execute(struct scsi_device *sdev, const void *cmd,
 {
 	int i, result;
 	unsigned char sense[SCSI_SENSE_BUFFERSIZE];
+	struct scsi_sense_hdr sshdr_tmp;
+
+	if (!sshdr)
+		sshdr = &sshdr_tmp;
 
 	for(i = 0; i < DV_RETRIES; i++) {
-		result = scsi_execute(sdev, cmd, dir, buffer, bufflen,
-				      sense, DV_TIMEOUT, /* retries */ 1,
+		result = scsi_execute(sdev, cmd, dir, buffer, bufflen, sense,
+				      sshdr, DV_TIMEOUT, /* retries */ 1,
 				      REQ_FAILFAST_DEV |
 				      REQ_FAILFAST_TRANSPORT |
 				      REQ_FAILFAST_DRIVER,
-				      NULL);
-		if (driver_byte(result) & DRIVER_SENSE) {
-			struct scsi_sense_hdr sshdr_tmp;
-			if (!sshdr)
-				sshdr = &sshdr_tmp;
-
-			if (scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE,
-						 sshdr)
-			    && sshdr->sense_key == UNIT_ATTENTION)
-				continue;
-		}
-		break;
+				      0, NULL);
+		if (!(driver_byte(result) & DRIVER_SENSE) ||
+		    sshdr->sense_key != UNIT_ATTENTION)
+			break;
 	}
 	return result;
 }
diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c
index dfffdf63e44c..4610c8c5693f 100644
--- a/drivers/scsi/sr_ioctl.c
+++ b/drivers/scsi/sr_ioctl.c
@@ -187,30 +187,19 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc)
 	struct scsi_device *SDev;
 	struct scsi_sense_hdr sshdr;
 	int result, err = 0, retries = 0;
-	struct request_sense *sense = cgc->sense;
 
 	SDev = cd->device;
 
-	if (!sense) {
-		sense = kmalloc(SCSI_SENSE_BUFFERSIZE, GFP_KERNEL);
-		if (!sense) {
-			err = -ENOMEM;
-			goto out;
-		}
-	}
-
       retry:
 	if (!scsi_block_when_processing_errors(SDev)) {
 		err = -ENODEV;
 		goto out;
 	}
 
-	memset(sense, 0, sizeof(*sense));
 	result = scsi_execute(SDev, cgc->cmd, cgc->data_direction,
-			      cgc->buffer, cgc->buflen, (char *)sense,
-			      cgc->timeout, IOCTL_RETRIES, 0, NULL);
-
-	scsi_normalize_sense((char *)sense, sizeof(*sense), &sshdr);
+			      cgc->buffer, cgc->buflen,
+			      (unsigned char *)cgc->sense, &sshdr,
+			      cgc->timeout, IOCTL_RETRIES, 0, 0, NULL);
 
 	/* Minimal error checking.  Ignore cases we know about, and report the rest. */
 	if (driver_byte(result) != 0) {
@@ -261,8 +250,6 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc)
 
 	/* Wake up a process waiting for device */
       out:
-	if (!cgc->sense)
-		kfree(sense);
 	cgc->stat = err;
 	return err;
 }
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index be41c76ddd48..cb4c8c889da0 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -410,8 +410,9 @@ extern int scsi_is_target_device(const struct device *);
 extern void scsi_sanitize_inquiry_string(unsigned char *s, int len);
 extern int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 			int data_direction, void *buffer, unsigned bufflen,
-			unsigned char *sense, int timeout, int retries,
-			u64 flags, int *resid);
+			unsigned char *sense, struct scsi_sense_hdr *sshdr,
+			int timeout, int retries, u64 flags,
+			req_flags_t rq_flags, int *resid);
 extern int scsi_execute_req_flags(struct scsi_device *sdev,
 	const unsigned char *cmd, int data_direction, void *buffer,
 	unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout,
-- 
GitLab


From fcbfffe2c5cbec0c1721b2261c316b961ad50208 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Feb 2017 16:02:37 +0100
Subject: [PATCH 0518/1084] scsi: remove scsi_execute_req_flags

And switch all callers to use scsi_execute instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/device_handler/scsi_dh_alua.c  | 16 ++++++----------
 drivers/scsi/device_handler/scsi_dh_emc.c   |  7 +++----
 drivers/scsi/device_handler/scsi_dh_hp_sw.c | 10 ++++------
 drivers/scsi/device_handler/scsi_dh_rdac.c  |  7 +++----
 drivers/scsi/scsi_lib.c                     | 11 -----------
 drivers/scsi/sd.c                           |  9 ++++-----
 drivers/scsi/ufs/ufshcd.c                   | 10 +++++-----
 include/scsi/scsi_device.h                  |  8 ++------
 8 files changed, 27 insertions(+), 51 deletions(-)

diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index d704752b6332..48e200102221 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -151,11 +151,9 @@ static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff,
 		cdb[1] = MI_REPORT_TARGET_PGS;
 	put_unaligned_be32(bufflen, &cdb[6]);
 
-	return scsi_execute_req_flags(sdev, cdb, DMA_FROM_DEVICE,
-				      buff, bufflen, sshdr,
-				      ALUA_FAILOVER_TIMEOUT * HZ,
-				      ALUA_FAILOVER_RETRIES, NULL,
-				      req_flags, 0);
+	return scsi_execute(sdev, cdb, DMA_FROM_DEVICE, buff, bufflen, NULL,
+			sshdr, ALUA_FAILOVER_TIMEOUT * HZ,
+			ALUA_FAILOVER_RETRIES, req_flags, 0, NULL);
 }
 
 /*
@@ -185,11 +183,9 @@ static int submit_stpg(struct scsi_device *sdev, int group_id,
 	cdb[1] = MO_SET_TARGET_PGS;
 	put_unaligned_be32(stpg_len, &cdb[6]);
 
-	return scsi_execute_req_flags(sdev, cdb, DMA_TO_DEVICE,
-				      stpg_data, stpg_len,
-				      sshdr, ALUA_FAILOVER_TIMEOUT * HZ,
-				      ALUA_FAILOVER_RETRIES, NULL,
-				      req_flags, 0);
+	return scsi_execute(sdev, cdb, DMA_TO_DEVICE, stpg_data, stpg_len, NULL,
+			sshdr, ALUA_FAILOVER_TIMEOUT * HZ,
+			ALUA_FAILOVER_RETRIES, req_flags, 0, NULL);
 }
 
 static struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size,
diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c
index f1578832ec7a..8654e940e1a8 100644
--- a/drivers/scsi/device_handler/scsi_dh_emc.c
+++ b/drivers/scsi/device_handler/scsi_dh_emc.c
@@ -276,10 +276,9 @@ static int send_trespass_cmd(struct scsi_device *sdev,
 	BUG_ON((len > CLARIION_BUFFER_SIZE));
 	memcpy(csdev->buffer, page22, len);
 
-	err = scsi_execute_req_flags(sdev, cdb, DMA_TO_DEVICE,
-				     csdev->buffer, len, &sshdr,
-				     CLARIION_TIMEOUT * HZ, CLARIION_RETRIES,
-				     NULL, req_flags, 0);
+	err = scsi_execute(sdev, cdb, DMA_TO_DEVICE, csdev->buffer, len, NULL,
+			&sshdr, CLARIION_TIMEOUT * HZ, CLARIION_RETRIES,
+			req_flags, 0, NULL);
 	if (err) {
 		if (scsi_sense_valid(&sshdr))
 			res = trespass_endio(sdev, &sshdr);
diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
index be43c940636d..62d314e07d11 100644
--- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c
+++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
@@ -100,9 +100,8 @@ static int hp_sw_tur(struct scsi_device *sdev, struct hp_sw_dh_data *h)
 		REQ_FAILFAST_DRIVER;
 
 retry:
-	res = scsi_execute_req_flags(sdev, cmd, DMA_NONE, NULL, 0, &sshdr,
-				     HP_SW_TIMEOUT, HP_SW_RETRIES,
-				     NULL, req_flags, 0);
+	res = scsi_execute(sdev, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
+			HP_SW_TIMEOUT, HP_SW_RETRIES, req_flags, 0, NULL);
 	if (res) {
 		if (scsi_sense_valid(&sshdr))
 			ret = tur_done(sdev, h, &sshdr);
@@ -139,9 +138,8 @@ static int hp_sw_start_stop(struct hp_sw_dh_data *h)
 		REQ_FAILFAST_DRIVER;
 
 retry:
-	res = scsi_execute_req_flags(sdev, cmd, DMA_NONE, NULL, 0, &sshdr,
-				     HP_SW_TIMEOUT, HP_SW_RETRIES,
-				     NULL, req_flags, 0);
+	res = scsi_execute(sdev, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
+			HP_SW_TIMEOUT, HP_SW_RETRIES, req_flags, 0, NULL);
 	if (res) {
 		if (!scsi_sense_valid(&sshdr)) {
 			sdev_printk(KERN_WARNING, sdev,
diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c
index b64eaae8533d..3cbab8710e58 100644
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
@@ -555,10 +555,9 @@ static void send_mode_select(struct work_struct *work)
 		(char *) h->ctlr->array_name, h->ctlr->index,
 		(retry_cnt == RDAC_RETRY_COUNT) ? "queueing" : "retrying");
 
-	if (scsi_execute_req_flags(sdev, cdb, DMA_TO_DEVICE,
-				   &h->ctlr->mode_select, data_size, &sshdr,
-				   RDAC_TIMEOUT * HZ,
-				   RDAC_RETRIES, NULL, req_flags, 0)) {
+	if (scsi_execute(sdev, cdb, DMA_TO_DEVICE, &h->ctlr->mode_select,
+			data_size, NULL, &sshdr, RDAC_TIMEOUT * HZ,
+			RDAC_RETRIES, req_flags, 0, NULL)) {
 		err = mode_select_handle_sense(sdev, &sshdr);
 		if (err == SCSI_DH_RETRY && retry_cnt--)
 			goto retry;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index b0cb3e0713f3..f5e45a252485 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -289,17 +289,6 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 }
 EXPORT_SYMBOL(scsi_execute);
 
-int scsi_execute_req_flags(struct scsi_device *sdev, const unsigned char *cmd,
-		     int data_direction, void *buffer, unsigned bufflen,
-		     struct scsi_sense_hdr *sshdr, int timeout, int retries,
-		     int *resid, u64 flags, req_flags_t rq_flags)
-{
-	return scsi_execute(sdev, cmd, data_direction, buffer, bufflen,
-			      NULL, sshdr, timeout, retries, flags, rq_flags,
-			      resid);
-}
-EXPORT_SYMBOL(scsi_execute_req_flags);
-
 /*
  * Function:    scsi_init_cmd_errh()
  *
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index be535d4215c4..c7839f6c35cc 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1508,9 +1508,8 @@ static int sd_sync_cache(struct scsi_disk *sdkp)
 		 * Leave the rest of the command zero to indicate
 		 * flush everything.
 		 */
-		res = scsi_execute_req_flags(sdp, cmd, DMA_NONE, NULL, 0,
-					     &sshdr, timeout, SD_MAX_RETRIES,
-					     NULL, 0, RQF_PM);
+		res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
+				timeout, SD_MAX_RETRIES, 0, RQF_PM, NULL);
 		if (res == 0)
 			break;
 	}
@@ -3296,8 +3295,8 @@ static int sd_start_stop_device(struct scsi_disk *sdkp, int start)
 	if (!scsi_device_online(sdp))
 		return -ENODEV;
 
-	res = scsi_execute_req_flags(sdp, cmd, DMA_NONE, NULL, 0, &sshdr,
-			       SD_TIMEOUT, SD_MAX_RETRIES, NULL, 0, RQF_PM);
+	res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
+			SD_TIMEOUT, SD_MAX_RETRIES, 0, RQF_PM, NULL);
 	if (res) {
 		sd_print_result(sdkp, "Start/Stop Unit failed", res);
 		if (driver_byte(res) & DRIVER_SENSE)
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 8b721f431dd0..dc6efbd1be8e 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -6915,9 +6915,9 @@ ufshcd_send_request_sense(struct ufs_hba *hba, struct scsi_device *sdp)
 		goto out;
 	}
 
-	ret = scsi_execute_req_flags(sdp, cmd, DMA_FROM_DEVICE, buffer,
-				UFSHCD_REQ_SENSE_SIZE, NULL,
-				msecs_to_jiffies(1000), 3, NULL, 0, RQF_PM);
+	ret = scsi_execute(sdp, cmd, DMA_FROM_DEVICE, buffer,
+			UFSHCD_REQ_SENSE_SIZE, NULL, NULL,
+			msecs_to_jiffies(1000), 3, 0, RQF_PM, NULL);
 	if (ret)
 		pr_err("%s: failed with err %d\n", __func__, ret);
 
@@ -6982,8 +6982,8 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
 	 * callbacks hence set the RQF_PM flag so that it doesn't resume the
 	 * already suspended childs.
 	 */
-	ret = scsi_execute_req_flags(sdp, cmd, DMA_NONE, NULL, 0, &sshdr,
-				     START_STOP_TIMEOUT, 0, NULL, 0, RQF_PM);
+	ret = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
+			START_STOP_TIMEOUT, 0, 0, RQF_PM, NULL);
 	if (ret) {
 		sdev_printk(KERN_WARNING, sdp,
 			    "START_STOP failed for power mode: %d, result %x\n",
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index cb4c8c889da0..6f22b39f1b0c 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -413,17 +413,13 @@ extern int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 			unsigned char *sense, struct scsi_sense_hdr *sshdr,
 			int timeout, int retries, u64 flags,
 			req_flags_t rq_flags, int *resid);
-extern int scsi_execute_req_flags(struct scsi_device *sdev,
-	const unsigned char *cmd, int data_direction, void *buffer,
-	unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout,
-	int retries, int *resid, u64 flags, req_flags_t rq_flags);
 static inline int scsi_execute_req(struct scsi_device *sdev,
 	const unsigned char *cmd, int data_direction, void *buffer,
 	unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout,
 	int retries, int *resid)
 {
-	return scsi_execute_req_flags(sdev, cmd, data_direction, buffer,
-		bufflen, sshdr, timeout, retries, resid, 0, 0);
+	return scsi_execute(sdev, cmd, data_direction, buffer,
+		bufflen, NULL, sshdr, timeout, retries,  0, 0, resid);
 }
 extern void sdev_disable_disk_events(struct scsi_device *sdev);
 extern void sdev_enable_disk_events(struct scsi_device *sdev);
-- 
GitLab


From 1afca6b5f31616d330fb64a0c87060640a75ff6a Mon Sep 17 00:00:00 2001
From: "Dupuis, Chad" <chad.dupuis@cavium.com>
Date: Thu, 23 Feb 2017 07:01:03 -0800
Subject: [PATCH 0519/1084] scsi: qedf: fixup compilation warning about
 atomic_t usage

Based on an original patch by Hannes Reinecke.

The driver didn't follow the atomic_t vs refcount_t change, and anyway
one should be using kref_read() instead of accessing the counter inside
an kref.

Fixes: 61d8658b4a435e ("scsi: qedf: Add QLogic FastLinQ offload FCoE driver framework.)
Cc: Hannes Reinecke <hare@suse.de>
Cc: Nilesh Javali <nilesh.javali@cavium.com>
Signed-off-by: Dupuis, Chad <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedf/qedf_els.c | 6 +++---
 drivers/scsi/qedf/qedf_io.c  | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/qedf/qedf_els.c b/drivers/scsi/qedf/qedf_els.c
index 78f1c252b649..59f3e5c73a13 100644
--- a/drivers/scsi/qedf/qedf_els.c
+++ b/drivers/scsi/qedf/qedf_els.c
@@ -183,7 +183,7 @@ static void qedf_rrq_compl(struct qedf_els_cb_arg *cb_arg)
 	    rrq_req->event != QEDF_IOREQ_EV_ELS_ERR_DETECT)
 		cancel_delayed_work_sync(&orig_io_req->timeout_work);
 
-	refcount = atomic_read(&orig_io_req->refcount.refcount);
+	refcount = kref_read(&orig_io_req->refcount);
 	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "rrq_compl: orig io = %p,"
 		   " orig xid = 0x%x, rrq_xid = 0x%x, refcount=%d\n",
 		   orig_io_req, orig_io_req->xid, rrq_req->xid, refcount);
@@ -474,7 +474,7 @@ static void qedf_srr_compl(struct qedf_els_cb_arg *cb_arg)
 	    srr_req->event != QEDF_IOREQ_EV_ELS_ERR_DETECT)
 		cancel_delayed_work_sync(&orig_io_req->timeout_work);
 
-	refcount = atomic_read(&orig_io_req->refcount.refcount);
+	refcount = kref_read(&orig_io_req->refcount);
 	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Entered: orig_io=%p,"
 		   " orig_io_xid=0x%x, rec_xid=0x%x, refcount=%d\n",
 		   orig_io_req, orig_io_req->xid, srr_req->xid, refcount);
@@ -758,7 +758,7 @@ static void qedf_rec_compl(struct qedf_els_cb_arg *cb_arg)
 	    rec_req->event != QEDF_IOREQ_EV_ELS_ERR_DETECT)
 		cancel_delayed_work_sync(&orig_io_req->timeout_work);
 
-	refcount = atomic_read(&orig_io_req->refcount.refcount);
+	refcount = kref_read(&orig_io_req->refcount);
 	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "Entered: orig_io=%p,"
 		   " orig_io_xid=0x%x, rec_xid=0x%x, refcount=%d\n",
 		   orig_io_req, orig_io_req->xid, rec_req->xid, refcount);
diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c
index 486c045ac8bb..ee0dcf9d3aba 100644
--- a/drivers/scsi/qedf/qedf_io.c
+++ b/drivers/scsi/qedf/qedf_io.c
@@ -998,7 +998,7 @@ static void qedf_trace_io(struct qedf_rport *fcport, struct qedf_ioreq *io_req,
 	io_log->sg_count = scsi_sg_count(sc_cmd);
 	io_log->result = sc_cmd->result;
 	io_log->jiffies = jiffies;
-	io_log->refcount = atomic_read(&io_req->refcount.refcount);
+	io_log->refcount = kref_read(&io_req->refcount);
 
 	if (direction == QEDF_IO_TRACE_REQ) {
 		/* For requests we only care abot the submission CPU */
@@ -1340,7 +1340,7 @@ void qedf_scsi_completion(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
 			/* Good I/O completion */
 			sc_cmd->result = DID_OK << 16;
 		} else {
-			refcount = atomic_read(&io_req->refcount.refcount);
+			refcount = kref_read(&io_req->refcount);
 			QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
 			    "%d:0:%d:%d xid=0x%0x op=0x%02x "
 			    "lba=%02x%02x%02x%02x cdb_status=%d "
@@ -1425,7 +1425,7 @@ void qedf_scsi_done(struct qedf_ctx *qedf, struct qedf_ioreq *io_req,
 	qedf_unmap_sg_list(qedf, io_req);
 
 	sc_cmd->result = result << 16;
-	refcount = atomic_read(&io_req->refcount.refcount);
+	refcount = kref_read(&io_req->refcount);
 	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "%d:0:%d:%d: Completing "
 	    "sc_cmd=%p result=0x%08x op=0x%02x lba=0x%02x%02x%02x%02x, "
 	    "allowed=%d retries=%d refcount=%d.\n",
@@ -1556,7 +1556,7 @@ static void qedf_flush_els_req(struct qedf_ctx *qedf,
 {
 	QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
 	    "Flushing ELS request xid=0x%x refcount=%d.\n", els_req->xid,
-	    atomic_read(&els_req->refcount.refcount));
+	    kref_read(&els_req->refcount));
 
 	/*
 	 * Need to distinguish this from a timeout when calling the
-- 
GitLab


From 8cc311167c22f9365304b2b20225df2d881c8843 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Mon, 20 Feb 2017 19:57:57 +0100
Subject: [PATCH 0520/1084] PM / OPP: fix off-by-one bug in
 dev_pm_opp_get_max_volt_latency loop

Reading array at given index before checking if index is valid results in
illegal memory access.

The bug was detected using KASAN framework.

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/opp/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 91ec3232d630..dae61720b314 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -231,7 +231,8 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device *dev)
 	 * The caller needs to ensure that opp_table (and hence the regulator)
 	 * isn't freed, while we are executing this routine.
 	 */
-	for (i = 0; reg = regulators[i], i < count; i++) {
+	for (i = 0; i < count; i++) {
+		reg = regulators[i];
 		ret = regulator_set_voltage_time(reg, uV[i].min, uV[i].max);
 		if (ret > 0)
 			latency_ns += ret * 1000;
-- 
GitLab


From 1d55abc0e98a0bf35f3af80665aac564e3b30572 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Wed, 22 Feb 2017 11:31:41 +0100
Subject: [PATCH 0521/1084] scsi: mpt3sas: switch to pci_alloc_irq_vectors

Cleanup the MSI-X handling allowing us to use the PCI-layer provided
vector allocation.

Signed-off-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Sreekanth Reddy <sreekanth.reddy@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mpt3sas/mpt3sas_base.c | 105 +++++++++++++---------------
 drivers/scsi/mpt3sas/mpt3sas_base.h |   2 -
 2 files changed, 48 insertions(+), 59 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index a3fe1fb55c17..5b7aec5d575a 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -1148,7 +1148,7 @@ mpt3sas_base_sync_reply_irqs(struct MPT3SAS_ADAPTER *ioc)
 		/* TMs are on msix_index == 0 */
 		if (reply_q->msix_index == 0)
 			continue;
-		synchronize_irq(reply_q->vector);
+		synchronize_irq(pci_irq_vector(ioc->pdev, reply_q->msix_index));
 	}
 }
 
@@ -1837,11 +1837,8 @@ _base_free_irq(struct MPT3SAS_ADAPTER *ioc)
 
 	list_for_each_entry_safe(reply_q, next, &ioc->reply_queue_list, list) {
 		list_del(&reply_q->list);
-		if (smp_affinity_enable) {
-			irq_set_affinity_hint(reply_q->vector, NULL);
-			free_cpumask_var(reply_q->affinity_hint);
-		}
-		free_irq(reply_q->vector, reply_q);
+		free_irq(pci_irq_vector(ioc->pdev, reply_q->msix_index),
+			 reply_q);
 		kfree(reply_q);
 	}
 }
@@ -1850,13 +1847,13 @@ _base_free_irq(struct MPT3SAS_ADAPTER *ioc)
  * _base_request_irq - request irq
  * @ioc: per adapter object
  * @index: msix index into vector table
- * @vector: irq vector
  *
  * Inserting respective reply_queue into the list.
  */
 static int
-_base_request_irq(struct MPT3SAS_ADAPTER *ioc, u8 index, u32 vector)
+_base_request_irq(struct MPT3SAS_ADAPTER *ioc, u8 index)
 {
+	struct pci_dev *pdev = ioc->pdev;
 	struct adapter_reply_queue *reply_q;
 	int r;
 
@@ -1868,14 +1865,6 @@ _base_request_irq(struct MPT3SAS_ADAPTER *ioc, u8 index, u32 vector)
 	}
 	reply_q->ioc = ioc;
 	reply_q->msix_index = index;
-	reply_q->vector = vector;
-
-	if (smp_affinity_enable) {
-		if (!zalloc_cpumask_var(&reply_q->affinity_hint, GFP_KERNEL)) {
-			kfree(reply_q);
-			return -ENOMEM;
-		}
-	}
 
 	atomic_set(&reply_q->busy, 0);
 	if (ioc->msix_enable)
@@ -1884,12 +1873,11 @@ _base_request_irq(struct MPT3SAS_ADAPTER *ioc, u8 index, u32 vector)
 	else
 		snprintf(reply_q->name, MPT_NAME_LENGTH, "%s%d",
 		    ioc->driver_name, ioc->id);
-	r = request_irq(vector, _base_interrupt, IRQF_SHARED, reply_q->name,
-	    reply_q);
+	r = request_irq(pci_irq_vector(pdev, index), _base_interrupt,
+			IRQF_SHARED, reply_q->name, reply_q);
 	if (r) {
 		pr_err(MPT3SAS_FMT "unable to allocate interrupt %d!\n",
-		    reply_q->name, vector);
-		free_cpumask_var(reply_q->affinity_hint);
+		       reply_q->name, pci_irq_vector(pdev, index));
 		kfree(reply_q);
 		return -EBUSY;
 	}
@@ -1925,6 +1913,21 @@ _base_assign_reply_queues(struct MPT3SAS_ADAPTER *ioc)
 	if (!nr_msix)
 		return;
 
+	if (smp_affinity_enable) {
+		list_for_each_entry(reply_q, &ioc->reply_queue_list, list) {
+			const cpumask_t *mask = pci_irq_get_affinity(ioc->pdev,
+							reply_q->msix_index);
+			if (!mask) {
+				pr_warn(MPT3SAS_FMT "no affinity for msi %x\n",
+					ioc->name, reply_q->msix_index);
+				continue;
+			}
+
+			for_each_cpu(cpu, mask)
+				ioc->cpu_msix_table[cpu] = reply_q->msix_index;
+		}
+		return;
+	}
 	cpu = cpumask_first(cpu_online_mask);
 
 	list_for_each_entry(reply_q, &ioc->reply_queue_list, list) {
@@ -1938,18 +1941,9 @@ _base_assign_reply_queues(struct MPT3SAS_ADAPTER *ioc)
 			group++;
 
 		for (i = 0 ; i < group ; i++) {
-			ioc->cpu_msix_table[cpu] = index;
-			if (smp_affinity_enable)
-				cpumask_or(reply_q->affinity_hint,
-				   reply_q->affinity_hint, get_cpu_mask(cpu));
+			ioc->cpu_msix_table[cpu] = reply_q->msix_index;
 			cpu = cpumask_next(cpu, cpu_online_mask);
 		}
-		if (smp_affinity_enable)
-			if (irq_set_affinity_hint(reply_q->vector,
-					   reply_q->affinity_hint))
-				dinitprintk(ioc, pr_info(MPT3SAS_FMT
-				 "Err setting affinity hint to irq vector %d\n",
-				 ioc->name, reply_q->vector));
 		index++;
 	}
 }
@@ -1976,10 +1970,10 @@ _base_disable_msix(struct MPT3SAS_ADAPTER *ioc)
 static int
 _base_enable_msix(struct MPT3SAS_ADAPTER *ioc)
 {
-	struct msix_entry *entries, *a;
 	int r;
 	int i, local_max_msix_vectors;
 	u8 try_msix = 0;
+	unsigned int irq_flags = PCI_IRQ_MSIX;
 
 	if (msix_disable == -1 || msix_disable == 0)
 		try_msix = 1;
@@ -1991,7 +1985,7 @@ _base_enable_msix(struct MPT3SAS_ADAPTER *ioc)
 		goto try_ioapic;
 
 	ioc->reply_queue_count = min_t(int, ioc->cpu_count,
-	    ioc->msix_vector_count);
+		ioc->msix_vector_count);
 
 	printk(MPT3SAS_FMT "MSI-X vectors supported: %d, no of cores"
 	  ": %d, max_msix_vectors: %d\n", ioc->name, ioc->msix_vector_count,
@@ -2002,56 +1996,51 @@ _base_enable_msix(struct MPT3SAS_ADAPTER *ioc)
 	else
 		local_max_msix_vectors = max_msix_vectors;
 
-	if (local_max_msix_vectors > 0) {
+	if (local_max_msix_vectors > 0)
 		ioc->reply_queue_count = min_t(int, local_max_msix_vectors,
 			ioc->reply_queue_count);
-		ioc->msix_vector_count = ioc->reply_queue_count;
-	} else if (local_max_msix_vectors == 0)
+	else if (local_max_msix_vectors == 0)
 		goto try_ioapic;
 
 	if (ioc->msix_vector_count < ioc->cpu_count)
 		smp_affinity_enable = 0;
 
-	entries = kcalloc(ioc->reply_queue_count, sizeof(struct msix_entry),
-	    GFP_KERNEL);
-	if (!entries) {
-		dfailprintk(ioc, pr_info(MPT3SAS_FMT
-			"kcalloc failed @ at %s:%d/%s() !!!\n",
-			ioc->name, __FILE__, __LINE__, __func__));
-		goto try_ioapic;
-	}
+	if (smp_affinity_enable)
+		irq_flags |= PCI_IRQ_AFFINITY;
 
-	for (i = 0, a = entries; i < ioc->reply_queue_count; i++, a++)
-		a->entry = i;
-
-	r = pci_enable_msix_exact(ioc->pdev, entries, ioc->reply_queue_count);
-	if (r) {
+	r = pci_alloc_irq_vectors(ioc->pdev, 1, ioc->reply_queue_count,
+				  irq_flags);
+	if (r < 0) {
 		dfailprintk(ioc, pr_info(MPT3SAS_FMT
-			"pci_enable_msix_exact failed (r=%d) !!!\n",
+			"pci_alloc_irq_vectors failed (r=%d) !!!\n",
 			ioc->name, r));
-		kfree(entries);
 		goto try_ioapic;
 	}
 
 	ioc->msix_enable = 1;
-	for (i = 0, a = entries; i < ioc->reply_queue_count; i++, a++) {
-		r = _base_request_irq(ioc, i, a->vector);
+	ioc->reply_queue_count = r;
+	for (i = 0; i < ioc->reply_queue_count; i++) {
+		r = _base_request_irq(ioc, i);
 		if (r) {
 			_base_free_irq(ioc);
 			_base_disable_msix(ioc);
-			kfree(entries);
 			goto try_ioapic;
 		}
 	}
 
-	kfree(entries);
 	return 0;
 
 /* failback to io_apic interrupt routing */
  try_ioapic:
 
 	ioc->reply_queue_count = 1;
-	r = _base_request_irq(ioc, 0, ioc->pdev->irq);
+	r = pci_alloc_irq_vectors(ioc->pdev, 1, 1, PCI_IRQ_LEGACY);
+	if (r < 0) {
+		dfailprintk(ioc, pr_info(MPT3SAS_FMT
+			"pci_alloc_irq_vector(legacy) failed (r=%d) !!!\n",
+			ioc->name, r));
+	} else
+		r = _base_request_irq(ioc, 0);
 
 	return r;
 }
@@ -2222,7 +2211,8 @@ mpt3sas_base_map_resources(struct MPT3SAS_ADAPTER *ioc)
 	list_for_each_entry(reply_q, &ioc->reply_queue_list, list)
 		pr_info(MPT3SAS_FMT "%s: IRQ %d\n",
 		    reply_q->name,  ((ioc->msix_enable) ? "PCI-MSI-X enabled" :
-		    "IO-APIC enabled"), reply_q->vector);
+		    "IO-APIC enabled"),
+		    pci_irq_vector(ioc->pdev, reply_q->msix_index));
 
 	pr_info(MPT3SAS_FMT "iomem(0x%016llx), mapped(0x%p), size(%d)\n",
 	    ioc->name, (unsigned long long)chip_phys, ioc->chip, memap_sz);
@@ -5357,7 +5347,8 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc)
 		    sizeof(resource_size_t *), GFP_KERNEL);
 		if (!ioc->reply_post_host_index) {
 			dfailprintk(ioc, pr_info(MPT3SAS_FMT "allocation "
-				"for cpu_msix_table failed!!!\n", ioc->name));
+				"for reply_post_host_index failed!!!\n",
+				ioc->name));
 			r = -ENOMEM;
 			goto out_free_resources;
 		}
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index 4ab634fc27df..7fe7e6ed595b 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -731,12 +731,10 @@ struct _event_ack_list {
 struct adapter_reply_queue {
 	struct MPT3SAS_ADAPTER	*ioc;
 	u8			msix_index;
-	unsigned int		vector;
 	u32			reply_post_host_index;
 	Mpi2ReplyDescriptorsUnion_t *reply_post_free;
 	char			name[MPT_NAME_LENGTH];
 	atomic_t		busy;
-	cpumask_var_t		affinity_hint;
 	struct list_head	list;
 };
 
-- 
GitLab


From 17b4eaf475684bd12aeecf358a7ea6e3bec871a5 Mon Sep 17 00:00:00 2001
From: Tang Yuantian <Yuantian.Tang@nxp.com>
Date: Tue, 21 Feb 2017 14:51:24 +0800
Subject: [PATCH 0522/1084] cpufreq: qoriq: clean up unused code

This snip code is not needed anymore since its user
get_hard_smp_processor_id() has been removed.

Signed-off-by: Tang Yuantian <yuantian.tang@nxp.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/qoriq-cpufreq.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
index a6fefac8afe4..bfec1bcd3835 100644
--- a/drivers/cpufreq/qoriq-cpufreq.c
+++ b/drivers/cpufreq/qoriq-cpufreq.c
@@ -23,10 +23,6 @@
 #include <linux/slab.h>
 #include <linux/smp.h>
 
-#if !defined(CONFIG_ARM)
-#include <asm/smp.h>	/* for get_hard_smp_processor_id() in UP configs */
-#endif
-
 /**
  * struct cpu_data
  * @pclk: the parent clock of cpu
-- 
GitLab


From a56e574067c20d01d8fc74863fa187dd66da7b94 Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Wed, 22 Feb 2017 07:23:13 -0800
Subject: [PATCH 0523/1084] scsi: aacraid: Fixed expander hotplug for SMART
 family

Current driver Hotplug processing code skips over Enclosure channel,
therefore any addition/removal of expander enclosure is not processed.
Additionally device addition code relies on older device type, which
prevents the hotplug of adapter expanders.

Fixed by removing code that skips over Enclosure channels and using the
latest device type for addition or removal or enclosure expanders.

Fixes: 6223a39fe6fbbeef (scsi: aacraid: Added support for hotplug)
Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: Dave Carroll <david.carroll@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/commsup.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index a2ea70d8a13a..1994c7445b54 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -1908,9 +1908,6 @@ static void aac_resolve_luns(struct aac_dev *dev)
 	for (bus = 0; bus < AAC_MAX_BUSES; bus++) {
 		for (target = 0; target < AAC_MAX_TARGETS; target++) {
 
-			if (aac_phys_to_logical(bus) == ENCLOSURE_CHANNEL)
-				continue;
-
 			if (bus == CONTAINER_CHANNEL)
 				channel = CONTAINER_CHANNEL;
 			else
@@ -1922,7 +1919,7 @@ static void aac_resolve_luns(struct aac_dev *dev)
 			sdev = scsi_device_lookup(dev->scsi_host_ptr, channel,
 					target, 0);
 
-			if (!sdev && devtype)
+			if (!sdev && new_devtype)
 				scsi_add_device(dev->scsi_host_ptr, channel,
 						target, 0);
 			else if (sdev && new_devtype != devtype)
-- 
GitLab


From 35bfa99e81f45079728f6b2ac553dabb0bc62c7d Mon Sep 17 00:00:00 2001
From: Christophe Jaillet <christophe.jaillet@wanadoo.fr>
Date: Tue, 21 Feb 2017 21:41:53 +0100
Subject: [PATCH 0524/1084] PM / runtime: Fix some typos

Signed-off-by: Christophe Jaillet <christophe.jaillet@wanadoo.fr>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/runtime_pm.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 4870980e967e..64546eb9a16a 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -100,7 +100,7 @@ knows what to do to handle the device).
   * If the suspend callback returns an error code different from -EBUSY and
     -EAGAIN, the PM core regards this as a fatal error and will refuse to run
     the helper functions described in Section 4 for the device until its status
-    is directly set to  either'active', or 'suspended' (the PM core provides
+    is directly set to  either 'active', or 'suspended' (the PM core provides
     special helper functions for this purpose).
 
 In particular, if the driver requires remote wakeup capability (i.e. hardware
@@ -217,7 +217,7 @@ defined in include/linux/pm.h:
       one to complete
 
   spinlock_t lock;
-    - lock used for synchronisation
+    - lock used for synchronization
 
   atomic_t usage_count;
     - the usage counter of the device
@@ -565,7 +565,7 @@ appropriate to ensure that the device is not put back to sleep during the
 probe. This can happen with systems such as the network device layer.
 
 It may be desirable to suspend the device once ->probe() has finished.
-Therefore the driver core uses the asyncronous pm_request_idle() to submit a
+Therefore the driver core uses the asynchronous pm_request_idle() to submit a
 request to execute the subsystem-level idle callback for the device at that
 time.  A driver that makes use of the runtime autosuspend feature, may want to
 update the last busy mark before returning from ->probe().
-- 
GitLab


From d08d1b27fe2a7f6923952613f5fab56ae47a6f5b Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 22 Feb 2017 13:58:52 +0530
Subject: [PATCH 0525/1084] PM / QoS: Remove global notifiers

They were never used in the kernel, so get rid of them.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/pm_qos_interface.txt | 13 +-----
 drivers/base/power/qos.c                 | 50 ++----------------------
 include/linux/pm_qos.h                   |  8 ----
 3 files changed, 5 insertions(+), 66 deletions(-)

diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt
index 129f7c0e1483..21d2d48f87a2 100644
--- a/Documentation/power/pm_qos_interface.txt
+++ b/Documentation/power/pm_qos_interface.txt
@@ -163,8 +163,7 @@ of flags and remove sysfs attributes pm_qos_no_power_off and pm_qos_remote_wakeu
 under the device's power directory.
 
 Notification mechanisms:
-The per-device PM QoS framework has 2 different and distinct notification trees:
-a per-device notification tree and a global notification tree.
+The per-device PM QoS framework has a per-device notification tree.
 
 int dev_pm_qos_add_notifier(device, notifier):
 Adds a notification callback function for the device.
@@ -174,16 +173,6 @@ is changed (for resume latency device PM QoS only).
 int dev_pm_qos_remove_notifier(device, notifier):
 Removes the notification callback function for the device.
 
-int dev_pm_qos_add_global_notifier(notifier):
-Adds a notification callback function in the global notification tree of the
-framework.
-The callback is called when the aggregated value for any device is changed
-(for resume latency device PM QoS only).
-
-int dev_pm_qos_remove_global_notifier(notifier):
-Removes the notification callback function from the global notification tree
-of the framework.
-
 
 Active state latency tolerance
 
diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c
index d888d9869b6a..271bec73185e 100644
--- a/drivers/base/power/qos.c
+++ b/drivers/base/power/qos.c
@@ -17,12 +17,9 @@
  *
  * This QoS design is best effort based. Dependents register their QoS needs.
  * Watchers register to keep track of the current QoS needs of the system.
- * Watchers can register different types of notification callbacks:
- *  . a per-device notification callback using the dev_pm_qos_*_notifier API.
- *    The notification chain data is stored in the per-device constraint
- *    data struct.
- *  . a system-wide notification callback using the dev_pm_qos_*_global_notifier
- *    API. The notification chain data is stored in a static variable.
+ * Watchers can register a per-device notification callback using the
+ * dev_pm_qos_*_notifier API. The notification chain data is stored in the
+ * per-device constraint data struct.
  *
  * Note about the per-device constraint data struct allocation:
  * . The per-device constraints data struct ptr is tored into the device
@@ -49,8 +46,6 @@
 static DEFINE_MUTEX(dev_pm_qos_mtx);
 static DEFINE_MUTEX(dev_pm_qos_sysfs_mtx);
 
-static BLOCKING_NOTIFIER_HEAD(dev_pm_notifiers);
-
 /**
  * __dev_pm_qos_flags - Check PM QoS flags for a given device.
  * @dev: Device to check the PM QoS flags for.
@@ -135,8 +130,7 @@ s32 dev_pm_qos_read_value(struct device *dev)
  * @value: Value to assign to the QoS request.
  *
  * Internal function to update the constraints list using the PM QoS core
- * code and if needed call the per-device and the global notification
- * callbacks
+ * code and if needed call the per-device callbacks.
  */
 static int apply_constraint(struct dev_pm_qos_request *req,
 			    enum pm_qos_req_action action, s32 value)
@@ -148,12 +142,6 @@ static int apply_constraint(struct dev_pm_qos_request *req,
 	case DEV_PM_QOS_RESUME_LATENCY:
 		ret = pm_qos_update_target(&qos->resume_latency,
 					   &req->data.pnode, action, value);
-		if (ret) {
-			value = pm_qos_read_value(&qos->resume_latency);
-			blocking_notifier_call_chain(&dev_pm_notifiers,
-						     (unsigned long)value,
-						     req);
-		}
 		break;
 	case DEV_PM_QOS_LATENCY_TOLERANCE:
 		ret = pm_qos_update_target(&qos->latency_tolerance,
@@ -535,36 +523,6 @@ int dev_pm_qos_remove_notifier(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(dev_pm_qos_remove_notifier);
 
-/**
- * dev_pm_qos_add_global_notifier - sets notification entry for changes to
- * target value of the PM QoS constraints for any device
- *
- * @notifier: notifier block managed by caller.
- *
- * Will register the notifier into a notification chain that gets called
- * upon changes to the target value for any device.
- */
-int dev_pm_qos_add_global_notifier(struct notifier_block *notifier)
-{
-	return blocking_notifier_chain_register(&dev_pm_notifiers, notifier);
-}
-EXPORT_SYMBOL_GPL(dev_pm_qos_add_global_notifier);
-
-/**
- * dev_pm_qos_remove_global_notifier - deletes notification for changes to
- * target value of PM QoS constraints for any device
- *
- * @notifier: notifier block to be removed.
- *
- * Will remove the notifier from the notification chain that gets called
- * upon changes to the target value for any device.
- */
-int dev_pm_qos_remove_global_notifier(struct notifier_block *notifier)
-{
-	return blocking_notifier_chain_unregister(&dev_pm_notifiers, notifier);
-}
-EXPORT_SYMBOL_GPL(dev_pm_qos_remove_global_notifier);
-
 /**
  * dev_pm_qos_add_ancestor_request - Add PM QoS request for device's ancestor.
  * @dev: Device whose ancestor to add the request for.
diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index d4d34791e463..3e2547d6e207 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -146,8 +146,6 @@ int dev_pm_qos_add_notifier(struct device *dev,
 			    struct notifier_block *notifier);
 int dev_pm_qos_remove_notifier(struct device *dev,
 			       struct notifier_block *notifier);
-int dev_pm_qos_add_global_notifier(struct notifier_block *notifier);
-int dev_pm_qos_remove_global_notifier(struct notifier_block *notifier);
 void dev_pm_qos_constraints_init(struct device *dev);
 void dev_pm_qos_constraints_destroy(struct device *dev);
 int dev_pm_qos_add_ancestor_request(struct device *dev,
@@ -199,12 +197,6 @@ static inline int dev_pm_qos_add_notifier(struct device *dev,
 static inline int dev_pm_qos_remove_notifier(struct device *dev,
 					     struct notifier_block *notifier)
 			{ return 0; }
-static inline int dev_pm_qos_add_global_notifier(
-					struct notifier_block *notifier)
-			{ return 0; }
-static inline int dev_pm_qos_remove_global_notifier(
-					struct notifier_block *notifier)
-			{ return 0; }
 static inline void dev_pm_qos_constraints_init(struct device *dev)
 {
 	dev->power.power_state = PMSG_ON;
-- 
GitLab


From 669a311b6ecc4eeb367a269bdd5f4e143caddb9c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 23 Feb 2017 10:57:23 +0000
Subject: [PATCH 0526/1084] scsi: qla2xxx: fix spelling mistake: "seperator" ->
 "separator"

Trivial fix to spelling mistake in pr_err message

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/tcm_qla2xxx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 3084983c1287..fbbe5abd6ddb 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -1738,7 +1738,7 @@ static struct se_wwn *tcm_qla2xxx_npiv_make_lport(
 
 	p = strchr(tmp, '@');
 	if (!p) {
-		pr_err("Unable to locate NPIV '@' seperator\n");
+		pr_err("Unable to locate NPIV '@' separator\n");
 		return ERR_PTR(-EINVAL);
 	}
 	*p++ = '\0';
-- 
GitLab


From ebe73647fb3895b75544cecdb6766a517fbf03d6 Mon Sep 17 00:00:00 2001
From: Don Brace <don.brace@microsemi.com>
Date: Thu, 23 Feb 2017 08:57:20 -0600
Subject: [PATCH 0527/1084] scsi: cciss: correct check map error.

Remove device driver failed to check map error messages

Reported-by: Johnny Bieren <jbieren@redhat.com>
Tested-by: Johnny Bieren <jbieren@redhat.com>
Reviewed-by: Scott Teel <scott.teel@microsemi.com>
Signed-off-by: Don Brace <don.brace@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/block/cciss.c | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 27d613795653..8e1a4554951c 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -348,7 +348,7 @@ static void cciss_unmap_sg_chain_block(ctlr_info_t *h, CommandList_struct *c)
 	pci_unmap_single(h->pdev, temp64.val, chain_sg->Len, PCI_DMA_TODEVICE);
 }
 
-static void cciss_map_sg_chain_block(ctlr_info_t *h, CommandList_struct *c,
+static int cciss_map_sg_chain_block(ctlr_info_t *h, CommandList_struct *c,
 	SGDescriptor_struct *chain_block, int len)
 {
 	SGDescriptor_struct *chain_sg;
@@ -359,8 +359,16 @@ static void cciss_map_sg_chain_block(ctlr_info_t *h, CommandList_struct *c,
 	chain_sg->Len = len;
 	temp64.val = pci_map_single(h->pdev, chain_block, len,
 				PCI_DMA_TODEVICE);
+	if (dma_mapping_error(&h->pdev->dev, temp64.val)) {
+		dev_warn(&h->pdev->dev,
+			"%s: error mapping chain block for DMA\n",
+			__func__);
+		return -1;
+	}
 	chain_sg->Addr.lower = temp64.val32.lower;
 	chain_sg->Addr.upper = temp64.val32.upper;
+
+	return 0;
 }
 
 #include "cciss_scsi.c"		/* For SCSI tape support */
@@ -3369,15 +3377,31 @@ static void do_cciss_request(struct request_queue *q)
 		temp64.val = (__u64) pci_map_page(h->pdev, sg_page(&tmp_sg[i]),
 						tmp_sg[i].offset,
 						tmp_sg[i].length, dir);
+		if (dma_mapping_error(&h->pdev->dev, temp64.val)) {
+			dev_warn(&h->pdev->dev,
+				"%s: error mapping page for DMA\n", __func__);
+			creq->errors = make_status_bytes(SAM_STAT_GOOD,
+							0, DRIVER_OK,
+							DID_SOFT_ERROR);
+			cmd_free(h, c);
+			return;
+		}
 		curr_sg[sg_index].Addr.lower = temp64.val32.lower;
 		curr_sg[sg_index].Addr.upper = temp64.val32.upper;
 		curr_sg[sg_index].Ext = 0;  /* we are not chaining */
 		++sg_index;
 	}
-	if (chained)
-		cciss_map_sg_chain_block(h, c, h->cmd_sg_list[c->cmdindex],
+	if (chained) {
+		if (cciss_map_sg_chain_block(h, c, h->cmd_sg_list[c->cmdindex],
 			(seg - (h->max_cmd_sgentries - 1)) *
-				sizeof(SGDescriptor_struct));
+				sizeof(SGDescriptor_struct))) {
+			creq->errors = make_status_bytes(SAM_STAT_GOOD,
+							0, DRIVER_OK,
+							DID_SOFT_ERROR);
+			cmd_free(h, c);
+			return;
+		}
+	}
 
 	/* track how many SG entries we are using */
 	if (seg > h->maxSG)
-- 
GitLab


From 6682c14bbe505a8b912c57faf544f866777ee48d Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 23 Feb 2017 14:53:39 -0500
Subject: [PATCH 0528/1084] NFSv4: fix getacl head length estimation

Bitmap and attrlen follow immediately after the op reply header.  This
was an oversight from commit bf118a342f.

Consequences of this are just minor efficiency (extra calls to
xdr_shrink_bufhead).

Fixes: bf118a342f10 "NFSv4: include bitmap in nfsv4 get acl data"
Reviewed-by: Kinglong Mee <kinglongmee@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4xdr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 3268a2393512..f0369e362753 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2514,7 +2514,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_compound_hdr(xdr, req, &hdr);
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, args->fh, &hdr);
-	replen = hdr.replen + op_decode_hdr_maxsz + 1;
+	replen = hdr.replen + op_decode_hdr_maxsz;
 	encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
 
 	xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
-- 
GitLab


From ed92d8c137b7794c2c2aa14479298b9885967607 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 23 Feb 2017 14:54:21 -0500
Subject: [PATCH 0529/1084] NFSv4: fix getacl ERANGE for some ACL buffer sizes

We're not taking into account that the space needed for the (variable
length) attr bitmap, with the result that we'd sometimes get a spurious
ERANGE when the ACL data got close to the end of a page.

Just add in an extra page to make sure.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 92b2805ffb82..1b183686c6d4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4954,7 +4954,7 @@ static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size
  */
 static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
 {
-	struct page *pages[NFS4ACL_MAXPAGES] = {NULL, };
+	struct page *pages[NFS4ACL_MAXPAGES + 1] = {NULL, };
 	struct nfs_getaclargs args = {
 		.fh = NFS_FH(inode),
 		.acl_pages = pages,
@@ -4968,13 +4968,9 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
 		.rpc_argp = &args,
 		.rpc_resp = &res,
 	};
-	unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
+	unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1;
 	int ret = -ENOMEM, i;
 
-	/* As long as we're doing a round trip to the server anyway,
-	 * let's be prepared for a page of acl data. */
-	if (npages == 0)
-		npages = 1;
 	if (npages > ARRAY_SIZE(pages))
 		return -ERANGE;
 
-- 
GitLab


From 2559a1ef688f933835912c731bed2254146a9b04 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Thu, 23 Feb 2017 09:08:02 +1100
Subject: [PATCH 0530/1084] scsi: mac_scsi: Fix MAC_SCSI=m option when SCSI=m

The mac_scsi driver still gets disabled when SCSI=m. This should have
been fixed back when I enabled the tristate but I didn't see the bug.

Fixes: 6e9ae6d560e1 ("[PATCH] mac_scsi: Add module option to Kconfig")
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 8aa9bd34123e..230043c1c90f 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1480,7 +1480,7 @@ config ATARI_SCSI
 
 config MAC_SCSI
 	tristate "Macintosh NCR5380 SCSI"
-	depends on MAC && SCSI=y
+	depends on MAC && SCSI
 	select SCSI_SPI_ATTRS
 	help
 	  This is the NCR 5380 SCSI controller included on most of the 68030
-- 
GitLab


From 4dd9920d991745c4a16f53a8f615f706fbe4b3f7 Mon Sep 17 00:00:00 2001
From: Robbie Ko <robbieko@synology.com>
Date: Thu, 5 Jan 2017 16:24:55 +0800
Subject: [PATCH 0531/1084] Btrfs: send, fix failure to rename top level inode
 due to name collision

Under certain situations, an incremental send operation can fail due to a
premature attempt to create a new top level inode (a direct child of the
subvolume/snapshot root) whose name collides with another inode that was
removed from the send snapshot.

Consider the following example scenario.

Parent snapshot:

  .                 (ino 256, gen 8)
  |---- a1/         (ino 257, gen 9)
  |---- a2/         (ino 258, gen 9)

Send snapshot:

  .                 (ino 256, gen 3)
  |---- a2/         (ino 257, gen 7)

In this scenario, when receiving the incremental send stream, the btrfs
receive command fails like this (ran in verbose mode, -vv argument):

  rmdir a1
  mkfile o257-7-0
  rename o257-7-0 -> a2
  ERROR: rename o257-7-0 -> a2 failed: Is a directory

What happens when computing the incremental send stream is:

1) An operation to remove the directory with inode number 257 and
   generation 9 is issued.

2) An operation to create the inode with number 257 and generation 7 is
   issued. This creates the inode with an orphanized name of "o257-7-0".

3) An operation rename the new inode 257 to its final name, "a2", is
   issued. This is incorrect because inode 258, which has the same name
   and it's a child of the same parent (root inode 256), was not yet
   processed and therefore no rmdir operation for it was yet issued.
   The rename operation is issued because we fail to detect that the
   name of the new inode 257 collides with inode 258, because their
   parent, a subvolume/snapshot root (inode 256) has a different
   generation in both snapshots.

So fix this by ignoring the generation value of a parent directory that
matches a root inode (number 256) when we are checking if the name of the
inode currently being processed collides with the name of some other
inode that was not yet processed.

We can achieve this scenario of different inodes with the same number but
different generation values either by mounting a filesystem with the inode
cache option (-o inode_cache) or by creating and sending snapshots across
different filesystems, like in the following example:

  $ mkfs.btrfs -f /dev/sdb
  $ mount /dev/sdb /mnt
  $ mkdir /mnt/a1
  $ mkdir /mnt/a2
  $ btrfs subvolume snapshot -r /mnt /mnt/snap1
  $ btrfs send /mnt/snap1 -f /tmp/1.snap
  $ umount /mnt

  $ mkfs.btrfs -f /dev/sdc
  $ mount /dev/sdc /mnt
  $ touch /mnt/a2
  $ btrfs subvolume snapshot -r /mnt /mnt/snap2
  $ btrfs receive /mnt -f /tmp/1.snap
  # Take note that once the filesystem is created, its current
  # generation has value 7 so the inode from the second snapshot has
  # a generation value of 7. And after receiving the first snapshot
  # the filesystem is at a generation value of 10, because the call to
  # create the second snapshot bumps the generation to 8 (the snapshot
  # creation ioctl does a transaction commit), the receive command calls
  # the snapshot creation ioctl to create the first snapshot, which bumps
  # the filesystem's generation to 9, and finally when the receive
  # operation finishes it calls an ioctl to transition the first snapshot
  # (snap1) from RW mode to RO mode, which does another transaction commit
  # and bumps the filesystem's generation to 10.
  $ rm -f /tmp/1.snap
  $ btrfs send /mnt/snap1 -f /tmp/1.snap
  $ btrfs send -p /mnt/snap1 /mnt/snap2 -f /tmp/2.snap
  $ umount /mnt

  $ mkfs.btrfs -f /dev/sdd
  $ mount /dev/sdd /mnt
  $ btrfs receive /mnt /tmp/1.snap
  # Receive of snapshot snap2 used to fail.
  $ btrfs receive /mnt /tmp/2.snap

Signed-off-by: Robbie Ko <robbieko@synology.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
[Rewrote changelog to be more precise and clear]
Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/send.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index d145ce804620..2ae32c4f7dd7 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1681,6 +1681,9 @@ static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen)
 {
 	int ret;
 
+	if (ino == BTRFS_FIRST_FREE_OBJECTID)
+		return 1;
+
 	ret = get_cur_inode_state(sctx, ino, gen);
 	if (ret < 0)
 		goto out;
@@ -1866,7 +1869,7 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
 	 * not deleted and then re-created, if it was then we have no overwrite
 	 * and we can just unlink this entry.
 	 */
-	if (sctx->parent_root) {
+	if (sctx->parent_root && dir != BTRFS_FIRST_FREE_OBJECTID) {
 		ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
 				     NULL, NULL, NULL);
 		if (ret < 0 && ret != -ENOENT)
-- 
GitLab


From fe9c798dbf4c78322549068255f611e4038a5b28 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 11 Jan 2017 02:15:39 +0000
Subject: [PATCH 0532/1084] Btrfs: incremental send, do not delay rename when
 parent inode is new

When we are checking if we need to delay the rename operation for an
inode we not checking if a parent inode that exists in the send and
parent snapshots is really the same inode or not, that is, we are not
comparing the generation number of the parent inode in the send and
parent snapshots. Not only this results in unnecessarily delaying a
rename operation but also can later on make us generate an incorrect
name for a new inode in the send snapshot that has the same number
as another inode in the parent snapshot but a different generation.

Here follows an example where this happens.

Parent snapshot:

 .                                                  (ino 256, gen 3)
 |--- dir258/                                       (ino 258, gen 7)
 |       |--- dir257/                               (ino 257, gen 7)
 |
 |--- dir259/                                       (ino 259, gen 7)

Send snapshot:

 .                                                  (ino 256, gen 3)
 |--- file258                                       (ino 258, gen 10)
 |
 |--- new_dir259/                                   (ino 259, gen 10)
          |--- dir257/                              (ino 257, gen 7)

The following steps happen when computing the incremental send stream:

1) When processing inode 257, its new parent is created using its orphan
   name (o257-21-0), and the rename operation for inode 257 is delayed
   because its new parent (inode 259) was not yet processed - this
   decision to delay the rename operation does not make much sense
   because the inode 259 in the send snapshot is a new inode, it's not
   the same as inode 259 in the parent snapshot.

2) When processing inode 258 we end up delaying its rmdir operation,
   because inode 257 was not yet renamed (moved away from the directory
   inode 258 represents). We also create the new inode 258 using its
   orphan name "o258-10-0", then rename it to its final name of "file258"
   and then issue a truncate operation for it. However this truncate
   operation contains an incorrect name, which corresponds to the orphan
   name and not to the final name, which makes the receiver fail. This
   happens because when we attempt to compute the inode's current name
   we verify that there's another inode with the same number (258) that
   has its rmdir operation pending and because of that we generate an
   orphan name for the new inode 258 (we do this in the function
   get_cur_path()).

Fix this by not delayed the rename operation of an inode if it has parents
with the same number but different generations in both snapshots.

The following steps reproduce this example scenario.

 $ mkfs.btrfs -f /dev/sdb
 $ mount /dev/sdb /mnt
 $ mkdir /mnt/dir257
 $ mkdir /mnt/dir258
 $ mkdir /mnt/dir259
 $ mv /mnt/dir257 /mnt/dir258/dir257
 $ btrfs subvolume snapshot -r /mnt /mnt/snap1

 $ mv /mnt/dir258/dir257 /mnt/dir257
 $ rmdir /mnt/dir258
 $ rmdir /mnt/dir259

 # Remount the filesystem so that the next created inodes will have the
 # numbers 258 and 259. This is because when a filesystem is mounted,
 # btrfs sets the subvolume's inode counter to a value corresponding to
 # the highest inode number in the subvolume plus 1. This inode counter
 # is used to assign a unique number to each new inode and it's
 # incremented by 1 after very inode creation.
 # Note: we unmount and then mount instead of doing a mount with
 # "-o remount" because otherwise the inode counter remains at value 260.
 $ umount /mnt
 $ mount /dev/sdb /mnt
 $ touch /mnt/file258
 $ mkdir /mnt/new_dir259
 $ mv /mnt/dir257 /mnt/new_dir259/dir257
 $ btrfs subvolume snapshot -r /mnt /mnt/snap2

 $ btrfs send /mnt/snap1 -f /tmp/1.snap
 $ btrfs send -p /mnt/snap1 /mnt/snap2 -f /tmp/2.snap

 $ umount /mnt
 $ mkfs.btrfs -f /dev/sdc
 $ mount /dev/sdc /mnt
 $ btrfs receive /mnt -f /tmo/1.snap
 $ btrfs receive /mnt -f /tmo/2.snap -vv
 receiving snapshot mysnap2 uuid=e059b6d1-7f55-f140-8d7c-9a3039d23c97, ctransid=10 parent_uuid=77e98cb6-8762-814f-9e05-e8ba877fc0b0, parent_ctransid=7
 utimes
 mkdir o259-10-0
 rename dir258 -> o258-7-0
 utimes
 mkfile o258-10-0
 rename o258-10-0 -> file258
 utimes
 truncate o258-10-0 size=0
 ERROR: truncate o258-10-0 failed: No such file or directory

Reported-by: Robbie Ko <robbieko@synology.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/send.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 2ae32c4f7dd7..2742324514e4 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -3559,6 +3559,7 @@ static int wait_for_parent_move(struct send_ctx *sctx,
 {
 	int ret = 0;
 	u64 ino = parent_ref->dir;
+	u64 ino_gen = parent_ref->dir_gen;
 	u64 parent_ino_before, parent_ino_after;
 	struct fs_path *path_before = NULL;
 	struct fs_path *path_after = NULL;
@@ -3579,6 +3580,8 @@ static int wait_for_parent_move(struct send_ctx *sctx,
 	 * at get_cur_path()).
 	 */
 	while (ino > BTRFS_FIRST_FREE_OBJECTID) {
+		u64 parent_ino_after_gen;
+
 		if (is_waiting_for_move(sctx, ino)) {
 			/*
 			 * If the current inode is an ancestor of ino in the
@@ -3601,7 +3604,7 @@ static int wait_for_parent_move(struct send_ctx *sctx,
 		fs_path_reset(path_after);
 
 		ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
-				    NULL, path_after);
+				    &parent_ino_after_gen, path_after);
 		if (ret < 0)
 			goto out;
 		ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
@@ -3618,10 +3621,20 @@ static int wait_for_parent_move(struct send_ctx *sctx,
 		if (ino > sctx->cur_ino &&
 		    (parent_ino_before != parent_ino_after || len1 != len2 ||
 		     memcmp(path_before->start, path_after->start, len1))) {
-			ret = 1;
-			break;
+			u64 parent_ino_gen;
+
+			ret = get_inode_info(sctx->parent_root, ino, NULL,
+					     &parent_ino_gen, NULL, NULL, NULL,
+					     NULL);
+			if (ret < 0)
+				goto out;
+			if (ino_gen == parent_ino_gen) {
+				ret = 1;
+				break;
+			}
 		}
 		ino = parent_ino_after;
+		ino_gen = parent_ino_after_gen;
 	}
 
 out:
-- 
GitLab


From 0191410158840d6176de3267daa4604ad6a773fb Mon Sep 17 00:00:00 2001
From: Robbie Ko <robbieko@synology.com>
Date: Thu, 5 Jan 2017 16:24:58 +0800
Subject: [PATCH 0533/1084] Btrfs: incremental send, do not issue invalid rmdir
 operations

When both the parent and send snapshots have a directory inode with the
same number but different generations (therefore they are different
inodes) and both have an entry with the same name, an incremental send
stream will contain an invalid rmdir operation that refers to the
orphanized name of the inode from the parent snapshot.

The following example scenario shows how this happens.

Parent snapshot:

 .
 |---- d259_old/               (ino 259, gen 9)
 |         |---- d1/           (ino 258, gen 9)
 |
 |---- f                       (ino 257, gen 9)

Send snapshot:

 .
 |---- d258/                   (ino 258, gen 7)
 |---- d259/                   (ino 259, gen 7)
         |---- d1/             (ino 257, gen 7)

When the kernel is processing inode 258 it notices that in both snapshots
there is an inode numbered 259 that is a parent of an inode 258. However
it ignores the fact that the inodes numbered 259 have different generations
in both snapshots, which means they are effectively different inodes.
Then it checks that both inodes 259 have a dentry named "d1" and because
of that it issues a rmdir operation with orphanized name of the inode 258
from the parent snapshot. This happens at send.c:process_record_refs(),
which calls send.c:did_overwrite_first_ref() that returns true and because
of that later on at process_recorded_refs() such rmdir operation is issued
because the inode being currently processed (258) is a directory and it
was deleted in the send snapshot (and replaced with another inode that has
the same number and is a directory too).
Fix this issue by comparing the generations of parent directory inodes
that have the same number and make send.c:did_overwrite_first_ref() when
the generations are different.

The following steps reproduce the problem.

 $ mkfs.btrfs -f /dev/sdb
 $ mount /dev/sdb /mnt
 $ touch /mnt/f
 $ mkdir /mnt/d1
 $ mkdir /mnt/d259_old
 $ mv /mnt/d1 /mnt/d259_old/d1
 $ btrfs subvolume snapshot -r /mnt /mnt/snap1
 $ btrfs send /mnt/snap1 -f /tmp/1.snap
 $ umount /mnt

 $ mkfs.btrfs -f /dev/sdc
 $ mount /dev/sdc /mnt
 $ mkdir /mnt/d1
 $ mkdir /mnt/dir258
 $ mkdir /mnt/dir259
 $ mv /mnt/d1 /mnt/dir259/d1
 $ btrfs subvolume snapshot -r /mnt /mnt/snap2
 $ btrfs receive /mnt/ -f /tmp/1.snap
 # Take note that once the filesystem is created, its current
 # generation has value 7 so the inodes from the second snapshot all have
 # a generation value of 7. And after receiving the first snapshot
 # the filesystem is at a generation value of 10, because the call to
 # create the second snapshot bumps the generation to 8 (the snapshot
 # creation ioctl does a transaction commit), the receive command calls
 # the snapshot creation ioctl to create the first snapshot, which bumps
 # the filesystem's generation to 9, and finally when the receive
 # operation finishes it calls an ioctl to transition the first snapshot
 # (snap1) from RW mode to RO mode, which does another transaction commit
 # and bumps the filesystem's generation to 10. This means all the inodes
 # in the first snapshot (snap1) have a generation value of 9.
 $ rm -f /tmp/1.snap
 $ btrfs send /mnt/snap1 -f /tmp/1.snap
 $ btrfs send -p /mnt/snap1 /mnt/snap2 -f /tmp/2.snap
 $ umount /mnt

 $ mkfs.btrfs -f /dev/sdd
 $ mount /dev/sdd /mnt
 $ btrfs receive /mnt -f /tmp/1.snap
 $ btrfs receive -vv /mnt -f /tmp/2.snap
 receiving snapshot mysnap2 uuid=9c03962f-f620-0047-9f98-32e5a87116d9, ctransid=7 parent_uuid=d17a6e3f-14e5-df4f-be39-a7951a5399aa, parent_ctransid=9
 utimes
 unlink f
 mkdir o257-7-0
 mkdir o259-7-0
 rename o257-7-0 -> o259-7-0/d1
 chown o259-7-0/d1 - uid=0, gid=0
 chmod o259-7-0/d1 - mode=0755
 utimes o259-7-0/d1
 rmdir o258-9-0
 ERROR: rmdir o258-9-0 failed: No such file or directory

Signed-off-by: Robbie Ko <robbieko@synology.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
[Rewrote changelog to be more precise and clear]
Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/send.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 2742324514e4..712922ea64d2 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1937,6 +1937,19 @@ static int did_overwrite_ref(struct send_ctx *sctx,
 	if (ret <= 0)
 		goto out;
 
+	if (dir != BTRFS_FIRST_FREE_OBJECTID) {
+		ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL,
+				     NULL, NULL, NULL);
+		if (ret < 0 && ret != -ENOENT)
+			goto out;
+		if (ret) {
+			ret = 0;
+			goto out;
+		}
+		if (gen != dir_gen)
+			goto out;
+	}
+
 	/* check if the ref was overwritten by another ref */
 	ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
 			&ow_inode, &other_type);
-- 
GitLab


From 6f546216e9f9e95d6783547ce6113eb13e2daa54 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Sat, 28 Jan 2017 01:47:56 +0000
Subject: [PATCH 0534/1084] Btrfs: bulk delete checksum items in the same leaf

Very often we have the checksums for an extent spread in multiple items
in the checksums tree, and currently the algorithm to delete them starts
by looking for them one by one and then deleting them one by one, which
is not optimal since each deletion involves shifting all the other items
in the leaf and when the leaf reaches some low threshold, to move items
off the leaf into its left and right neighbor leafs. Also, after each
item deletion we release our search path and start a new search for other
checksums items.

So optimize this by deleting in bulk all the items in the same leaf that
contain checksums for the extent being freed.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
---
 fs/btrfs/file-item.c | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index f7b9a92ad56d..e35df48b4383 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -643,7 +643,33 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
 
 		/* delete the entire item, it is inside our range */
 		if (key.offset >= bytenr && csum_end <= end_byte) {
-			ret = btrfs_del_item(trans, root, path);
+			int del_nr = 1;
+
+			/*
+			 * Check how many csum items preceding this one in this
+			 * leaf correspond to our range and then delete them all
+			 * at once.
+			 */
+			if (key.offset > bytenr && path->slots[0] > 0) {
+				int slot = path->slots[0] - 1;
+
+				while (slot >= 0) {
+					struct btrfs_key pk;
+
+					btrfs_item_key_to_cpu(leaf, &pk, slot);
+					if (pk.offset < bytenr ||
+					    pk.type != BTRFS_EXTENT_CSUM_KEY ||
+					    pk.objectid !=
+					    BTRFS_EXTENT_CSUM_OBJECTID)
+						break;
+					path->slots[0] = slot;
+					del_nr++;
+					key.offset = pk.offset;
+					slot--;
+				}
+			}
+			ret = btrfs_del_items(trans, root, path,
+					      path->slots[0], del_nr);
 			if (ret)
 				goto out;
 			if (key.offset == bytenr)
-- 
GitLab


From 91e1f56a8b3c94cb5ac9ce12b806134dc33c1eeb Mon Sep 17 00:00:00 2001
From: Robbie Ko <robbieko@synology.com>
Date: Fri, 7 Oct 2016 10:01:29 +0800
Subject: [PATCH 0535/1084] Btrfs: fix leak of subvolume writers counter

When falling back from a nocow write to a regular cow write, we were
leaking the subvolume writers counter in 2 situations, preventing
snapshot creation from ever completing in the future, as it waits
for that counter to go down to zero before the snapshot creation
starts.

Signed-off-by: Robbie Ko <robbieko@synology.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
[Improved changelog and subject]
Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/inode.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c38391e948d9..4efe9d82944c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1331,10 +1331,16 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 			 * either valid or do not exist.
 			 */
 			if (csum_exist_in_range(fs_info, disk_bytenr,
-						num_bytes))
+						num_bytes)) {
+				if (!nolock)
+					btrfs_end_write_no_snapshoting(root);
 				goto out_check;
-			if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
+			}
+			if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) {
+				if (!nolock)
+					btrfs_end_write_no_snapshoting(root);
 				goto out_check;
+			}
 			nocow = 1;
 		} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
 			extent_end = found_key.offset +
-- 
GitLab


From 3168021cf9b4906f7bd9871770235f14c5a17715 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 1 Feb 2017 14:58:02 +0000
Subject: [PATCH 0536/1084] Btrfs: do not create explicit holes when replaying
 log tree if NO_HOLES enabled

We log holes explicitly by using file extent items, however when replaying
a log tree, if a logged file extent item corresponds to a hole and the
NO_HOLES feature is enabled we do not need to copy the file extent item
into the fs/subvolume tree, as the absence of such file extent items is
the purpose of the NO_HOLES feature. So skip the copying of file extent
items representing holes when the NO_HOLES feature is enabled.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/tree-log.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 3806853cde08..3cc972330ab3 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -673,6 +673,10 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
 		unsigned long dest_offset;
 		struct btrfs_key ins;
 
+		if (btrfs_file_extent_disk_bytenr(eb, item) == 0 &&
+		    btrfs_fs_incompat(fs_info, NO_HOLES))
+			goto update_inode;
+
 		ret = btrfs_insert_empty_item(trans, root, path, key,
 					      sizeof(*item));
 		if (ret)
@@ -825,6 +829,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
 	}
 
 	inode_add_bytes(inode, nbytes);
+update_inode:
 	ret = btrfs_update_inode(trans, root, inode);
 out:
 	if (inode)
-- 
GitLab


From 5cdd7db6c5c9d33dc66ecdd81aa8020276c7d140 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 1 Feb 2017 22:39:50 +0000
Subject: [PATCH 0537/1084] Btrfs: fix assertion failure when freeing block
 groups at close_ctree()

At close_ctree() we free the block groups and then only after we wait for
any running worker kthreads to finish and shutdown the workqueues. This
behaviour is racy and it triggers an assertion failure when freeing block
groups because while we are doing it we can have for example a block group
caching kthread running, and in that case the block group's reference
count can still be greater than 1 by the time we assert its reference count
is 1, leading to an assertion failure:

[19041.198004] assertion failed: atomic_read(&block_group->count) == 1, file: fs/btrfs/extent-tree.c, line: 9799
[19041.200584] ------------[ cut here ]------------
[19041.201692] kernel BUG at fs/btrfs/ctree.h:3418!
[19041.202830] invalid opcode: 0000 [#1] PREEMPT SMP
[19041.203929] Modules linked in: btrfs xor raid6_pq dm_flakey dm_mod crc32c_generic ppdev sg psmouse acpi_cpufreq pcspkr parport_pc evdev tpm_tis parport tpm_tis_core i2c_piix4 i2c_core tpm serio_raw processor button loop autofs4 ext4 crc16 jbd2 mbcache sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix virtio_pci libata virtio_ring virtio e1000 scsi_mod floppy [last unloaded: btrfs]
[19041.208082] CPU: 6 PID: 29051 Comm: umount Not tainted 4.9.0-rc7-btrfs-next-36+ #1
[19041.208082] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014
[19041.208082] task: ffff88015f028980 task.stack: ffffc9000ad34000
[19041.208082] RIP: 0010:[<ffffffffa03e319e>]  [<ffffffffa03e319e>] assfail.constprop.41+0x1c/0x1e [btrfs]
[19041.208082] RSP: 0018:ffffc9000ad37d60  EFLAGS: 00010286
[19041.208082] RAX: 0000000000000061 RBX: ffff88015ecb4000 RCX: 0000000000000001
[19041.208082] RDX: ffff88023f392fb8 RSI: ffffffff817ef7ba RDI: 00000000ffffffff
[19041.208082] RBP: ffffc9000ad37d60 R08: 0000000000000001 R09: 0000000000000000
[19041.208082] R10: ffffc9000ad37cb0 R11: ffffffff82f2b66d R12: ffff88023431d170
[19041.208082] R13: ffff88015ecb40c0 R14: ffff88023431d000 R15: ffff88015ecb4100
[19041.208082] FS:  00007f44f3d42840(0000) GS:ffff88023f380000(0000) knlGS:0000000000000000
[19041.208082] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[19041.208082] CR2: 00007f65d623b000 CR3: 00000002166f2000 CR4: 00000000000006e0
[19041.208082] Stack:
[19041.208082]  ffffc9000ad37d98 ffffffffa035989f ffff88015ecb4000 ffff88015ecb5630
[19041.208082]  ffff88014f6be000 0000000000000000 00007ffcf0ba6a10 ffffc9000ad37df8
[19041.208082]  ffffffffa0368cd4 ffff88014e9658e0 ffffc9000ad37e08 ffffffff811a634d
[19041.208082] Call Trace:
[19041.208082]  [<ffffffffa035989f>] btrfs_free_block_groups+0x17f/0x392 [btrfs]
[19041.208082]  [<ffffffffa0368cd4>] close_ctree+0x1c5/0x2e1 [btrfs]
[19041.208082]  [<ffffffff811a634d>] ? evict_inodes+0x132/0x141
[19041.208082]  [<ffffffffa034356d>] btrfs_put_super+0x15/0x17 [btrfs]
[19041.208082]  [<ffffffff8118fc32>] generic_shutdown_super+0x6a/0xeb
[19041.208082]  [<ffffffff8119004f>] kill_anon_super+0x12/0x1c
[19041.208082]  [<ffffffffa0343370>] btrfs_kill_super+0x16/0x21 [btrfs]
[19041.208082]  [<ffffffff8118fad1>] deactivate_locked_super+0x3b/0x68
[19041.208082]  [<ffffffff8118fb34>] deactivate_super+0x36/0x39
[19041.208082]  [<ffffffff811a9946>] cleanup_mnt+0x58/0x76
[19041.208082]  [<ffffffff811a99a2>] __cleanup_mnt+0x12/0x14
[19041.208082]  [<ffffffff81071573>] task_work_run+0x6f/0x95
[19041.208082]  [<ffffffff81001897>] prepare_exit_to_usermode+0xa3/0xc1
[19041.208082]  [<ffffffff81001a23>] syscall_return_slowpath+0x16e/0x1d2
[19041.208082]  [<ffffffff814c607d>] entry_SYSCALL_64_fastpath+0xab/0xad
[19041.208082] Code: c7 ae a0 3e a0 48 89 e5 e8 4e 74 d4 e0 0f 0b 55 89 f1 48 c7 c2 0b a4 3e a0 48 89 fe 48 c7 c7 a4 a6 3e a0 48 89 e5 e8 30 74 d4 e0 <0f> 0b 55 31 d2 48 89 e5 e8 d5 b9 f7 ff 5d c3 48 63 f6 55 31 c9
[19041.208082] RIP  [<ffffffffa03e319e>] assfail.constprop.41+0x1c/0x1e [btrfs]
[19041.208082]  RSP <ffffc9000ad37d60>
[19041.279264] ---[ end trace 23330586f16f064d ]---

This started happening as of kernel 4.8, since commit f3bca8028bd9
("Btrfs: add ASSERT for block group's memory leak") introduced these
assertions.

So fix this by freeing the block groups only after waiting for all
worker kthreads to complete and shutdown the workqueues.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
---
 fs/btrfs/disk-io.c     | 6 +++---
 fs/btrfs/extent-tree.c | 9 ++++++---
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 2b06f557c176..d0e61d58b121 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3261,7 +3261,6 @@ int open_ctree(struct super_block *sb,
 
 fail_block_groups:
 	btrfs_put_block_group_cache(fs_info);
-	btrfs_free_block_groups(fs_info);
 
 fail_tree_roots:
 	free_root_pointers(fs_info, 1);
@@ -3269,6 +3268,7 @@ int open_ctree(struct super_block *sb,
 
 fail_sb_buffer:
 	btrfs_stop_all_workers(fs_info);
+	btrfs_free_block_groups(fs_info);
 fail_alloc:
 fail_iput:
 	btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -3977,8 +3977,6 @@ void close_ctree(struct btrfs_fs_info *fs_info)
 
 	btrfs_put_block_group_cache(fs_info);
 
-	btrfs_free_block_groups(fs_info);
-
 	/*
 	 * we must make sure there is not any read request to
 	 * submit after we stopping all workers.
@@ -3986,6 +3984,8 @@ void close_ctree(struct btrfs_fs_info *fs_info)
 	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
 	btrfs_stop_all_workers(fs_info);
 
+	btrfs_free_block_groups(fs_info);
+
 	clear_bit(BTRFS_FS_OPEN, &fs_info->flags);
 	free_root_pointers(fs_info, 1);
 
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c35b96633554..438c7312de33 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9740,6 +9740,11 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
 	}
 }
 
+/*
+ * Must be called only after stopping all workers, since we could have block
+ * group caching kthreads running, and therefore they could race with us if we
+ * freed the block groups before stopping them.
+ */
 int btrfs_free_block_groups(struct btrfs_fs_info *info)
 {
 	struct btrfs_block_group_cache *block_group;
@@ -9779,9 +9784,6 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
 		list_del(&block_group->list);
 		up_write(&block_group->space_info->groups_sem);
 
-		if (block_group->cached == BTRFS_CACHE_STARTED)
-			wait_block_group_cache_done(block_group);
-
 		/*
 		 * We haven't cached this block group, which means we could
 		 * possibly have excluded extents on this block group.
@@ -9791,6 +9793,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
 			free_excluded_extents(info, block_group);
 
 		btrfs_remove_free_space_cache(block_group);
+		ASSERT(block_group->cached != BTRFS_CACHE_STARTED);
 		ASSERT(list_empty(&block_group->dirty_list));
 		ASSERT(list_empty(&block_group->io_list));
 		ASSERT(list_empty(&block_group->bg_list));
-- 
GitLab


From a9b9477db2937934e469db800317ec3ef7e81b51 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Sat, 4 Feb 2017 17:12:00 +0000
Subject: [PATCH 0538/1084] Btrfs: fix use-after-free due to wrong order of
 destroying work queues

Before we destroy all work queues (and wait for their tasks to complete)
we were destroying the work queues used for metadata I/O operations, which
can result in a use-after-free problem because most tasks from all work
queues do metadata I/O operations. For example, the tasks from the caching
workers work queue (fs_info->caching_workers), which is destroyed only
after the work queue used for metadata reads (fs_info->endio_meta_workers)
is destroyed, do metadata reads, which result in attempts to queue tasks
into the later work queue, triggering a use-after-free with a trace like
the following:

[23114.613543] general protection fault: 0000 [#1] PREEMPT SMP
[23114.614442] Modules linked in: dm_thin_pool dm_persistent_data dm_bio_prison dm_bufio libcrc32c btrfs xor raid6_pq dm_flakey dm_mod crc32c_generic
acpi_cpufreq tpm_tis tpm_tis_core tpm ppdev parport_pc parport i2c_piix4 processor sg evdev i2c_core psmouse pcspkr serio_raw button loop autofs4 ext4 crc16
jbd2 mbcache sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix virtio_pci libata virtio_ring virtio e1000 scsi_mod floppy [last unloaded: scsi_debug]
[23114.616932] CPU: 9 PID: 4537 Comm: kworker/u32:8 Not tainted 4.9.0-rc7-btrfs-next-36+ #1
[23114.616932] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014
[23114.616932] Workqueue: btrfs-cache btrfs_cache_helper [btrfs]
[23114.616932] task: ffff880221d45780 task.stack: ffffc9000bc50000
[23114.616932] RIP: 0010:[<ffffffffa037c1bf>]  [<ffffffffa037c1bf>] btrfs_queue_work+0x2c/0x190 [btrfs]
[23114.616932] RSP: 0018:ffff88023f443d60  EFLAGS: 00010246
[23114.616932] RAX: 0000000000000000 RBX: 6b6b6b6b6b6b6b6b RCX: 0000000000000102
[23114.616932] RDX: ffffffffa0419000 RSI: ffff88011df534f0 RDI: ffff880101f01c00
[23114.616932] RBP: ffff88023f443d80 R08: 00000000000f7000 R09: 000000000000ffff
[23114.616932] R10: ffff88023f443d48 R11: 0000000000001000 R12: ffff88011df534f0
[23114.616932] R13: ffff880135963868 R14: 0000000000001000 R15: 0000000000001000
[23114.616932] FS:  0000000000000000(0000) GS:ffff88023f440000(0000) knlGS:0000000000000000
[23114.616932] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[23114.616932] CR2: 00007f0fb9f8e520 CR3: 0000000001a0b000 CR4: 00000000000006e0
[23114.616932] Stack:
[23114.616932]  ffff880101f01c00 ffff88011df534f0 ffff880135963868 0000000000001000
[23114.616932]  ffff88023f443da0 ffffffffa03470af ffff880149b37200 ffff880135963868
[23114.616932]  ffff88023f443db8 ffffffff8125293c ffff880149b37200 ffff88023f443de0
[23114.616932] Call Trace:
[23114.616932]  <IRQ> [23114.616932]  [<ffffffffa03470af>] end_workqueue_bio+0xd5/0xda [btrfs]
[23114.616932]  [<ffffffff8125293c>] bio_endio+0x54/0x57
[23114.616932]  [<ffffffffa0377929>] btrfs_end_bio+0xf7/0x106 [btrfs]
[23114.616932]  [<ffffffff8125293c>] bio_endio+0x54/0x57
[23114.616932]  [<ffffffff8125955f>] blk_update_request+0x21a/0x30f
[23114.616932]  [<ffffffffa0022316>] scsi_end_request+0x31/0x182 [scsi_mod]
[23114.616932]  [<ffffffffa00235fc>] scsi_io_completion+0x1ce/0x4c8 [scsi_mod]
[23114.616932]  [<ffffffffa001ba9d>] scsi_finish_command+0x104/0x10d [scsi_mod]
[23114.616932]  [<ffffffffa002311f>] scsi_softirq_done+0x101/0x10a [scsi_mod]
[23114.616932]  [<ffffffff8125fbd9>] blk_done_softirq+0x82/0x8d
[23114.616932]  [<ffffffff814c8a4b>] __do_softirq+0x1ab/0x412
[23114.616932]  [<ffffffff8105b01d>] irq_exit+0x49/0x99
[23114.616932]  [<ffffffff81035135>] smp_call_function_single_interrupt+0x24/0x26
[23114.616932]  [<ffffffff814c7ec9>] call_function_single_interrupt+0x89/0x90
[23114.616932]  <EOI> [23114.616932]  [<ffffffffa0023262>] ? scsi_request_fn+0x13a/0x2a1 [scsi_mod]
[23114.616932]  [<ffffffff814c5966>] ? _raw_spin_unlock_irq+0x2c/0x4a
[23114.616932]  [<ffffffff814c596c>] ? _raw_spin_unlock_irq+0x32/0x4a
[23114.616932]  [<ffffffff814c5966>] ? _raw_spin_unlock_irq+0x2c/0x4a
[23114.616932]  [<ffffffffa0023262>] scsi_request_fn+0x13a/0x2a1 [scsi_mod]
[23114.616932]  [<ffffffff8125590e>] __blk_run_queue_uncond+0x22/0x2b
[23114.616932]  [<ffffffff81255930>] __blk_run_queue+0x19/0x1b
[23114.616932]  [<ffffffff8125ab01>] blk_queue_bio+0x268/0x282
[23114.616932]  [<ffffffff81258f44>] generic_make_request+0xbd/0x160
[23114.616932]  [<ffffffff812590e7>] submit_bio+0x100/0x11d
[23114.616932]  [<ffffffff81298603>] ? __this_cpu_preempt_check+0x13/0x15
[23114.616932]  [<ffffffff812a1805>] ? __percpu_counter_add+0x8e/0xa7
[23114.616932]  [<ffffffffa03bfd47>] btrfsic_submit_bio+0x1a/0x1d [btrfs]
[23114.616932]  [<ffffffffa0377db2>] btrfs_map_bio+0x1f4/0x26d [btrfs]
[23114.616932]  [<ffffffffa0348a33>] btree_submit_bio_hook+0x74/0xbf [btrfs]
[23114.616932]  [<ffffffffa03489bf>] ? btrfs_wq_submit_bio+0x160/0x160 [btrfs]
[23114.616932]  [<ffffffffa03697a9>] submit_one_bio+0x6b/0x89 [btrfs]
[23114.616932]  [<ffffffffa036f5be>] read_extent_buffer_pages+0x170/0x1ec [btrfs]
[23114.616932]  [<ffffffffa03471fa>] ? free_root_pointers+0x64/0x64 [btrfs]
[23114.616932]  [<ffffffffa0348adf>] readahead_tree_block+0x3f/0x4c [btrfs]
[23114.616932]  [<ffffffffa032e115>] read_block_for_search.isra.20+0x1ce/0x23d [btrfs]
[23114.616932]  [<ffffffffa032fab8>] btrfs_search_slot+0x65f/0x774 [btrfs]
[23114.616932]  [<ffffffffa036eff1>] ? free_extent_buffer+0x73/0x7e [btrfs]
[23114.616932]  [<ffffffffa0331ba4>] btrfs_next_old_leaf+0xa1/0x33c [btrfs]
[23114.616932]  [<ffffffffa0331e4f>] btrfs_next_leaf+0x10/0x12 [btrfs]
[23114.616932]  [<ffffffffa0336aa6>] caching_thread+0x22d/0x416 [btrfs]
[23114.616932]  [<ffffffffa037bce9>] btrfs_scrubparity_helper+0x187/0x3b6 [btrfs]
[23114.616932]  [<ffffffffa037c036>] btrfs_cache_helper+0xe/0x10 [btrfs]
[23114.616932]  [<ffffffff8106cf96>] process_one_work+0x273/0x4e4
[23114.616932]  [<ffffffff8106d6db>] worker_thread+0x1eb/0x2ca
[23114.616932]  [<ffffffff8106d4f0>] ? rescuer_thread+0x2b6/0x2b6
[23114.616932]  [<ffffffff81072a81>] kthread+0xd5/0xdd
[23114.616932]  [<ffffffff810729ac>] ? __kthread_unpark+0x5a/0x5a
[23114.616932]  [<ffffffff814c6257>] ret_from_fork+0x27/0x40
[23114.616932] Code: 1f 44 00 00 55 48 89 e5 41 56 41 55 41 54 53 49 89 f4 48 8b 46 70 a8 04 74 09 48 8b 5f 08 48 85 db 75 03 48 8b 1f 49 89 5c 24 68 <83> 7b
64 ff 74 04 f0 ff 43 58 49 83 7c 24 08 00 74 2c 4c 8d 6b
[23114.616932] RIP  [<ffffffffa037c1bf>] btrfs_queue_work+0x2c/0x190 [btrfs]
[23114.616932]  RSP <ffff88023f443d60>
[23114.689493] ---[ end trace 6e48b6bc707ca34b ]---
[23114.690166] Kernel panic - not syncing: Fatal exception in interrupt
[23114.691283] Kernel Offset: disabled
[23114.691918] ---[ end Kernel panic - not syncing: Fatal exception in interrupt

The following diagram shows the sequence of operations that lead to the
use-after-free problem from the above trace:

        CPU 1                               CPU 2                                     CPU 3

                                       caching_thread()
 close_ctree()
   btrfs_stop_all_workers()
     btrfs_destroy_workqueue(
      fs_info->endio_meta_workers)

                                         btrfs_search_slot()
                                          read_block_for_search()
                                           readahead_tree_block()
                                            read_extent_buffer_pages()
                                             submit_one_bio()
                                              btree_submit_bio_hook()
                                               btrfs_bio_wq_end_io()
                                                --> sets the bio's
                                                    bi_end_io callback
                                                    to end_workqueue_bio()
                                               --> bio is submitted
                                                                                  bio completes
                                                                                  and its bi_end_io callback
                                                                                  is invoked
                                                                                   --> end_workqueue_bio()
                                                                                       --> attempts to queue
                                                                                           a task on fs_info->endio_meta_workers

     btrfs_destroy_workqueue(
      fs_info->caching_workers)

So fix this by destroying the queues used for metadata I/O tasks only
after destroying all the other queues.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
---
 fs/btrfs/disk-io.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d0e61d58b121..32a9ec11888d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2205,11 +2205,9 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
 	btrfs_destroy_workqueue(fs_info->delalloc_workers);
 	btrfs_destroy_workqueue(fs_info->workers);
 	btrfs_destroy_workqueue(fs_info->endio_workers);
-	btrfs_destroy_workqueue(fs_info->endio_meta_workers);
 	btrfs_destroy_workqueue(fs_info->endio_raid56_workers);
 	btrfs_destroy_workqueue(fs_info->endio_repair_workers);
 	btrfs_destroy_workqueue(fs_info->rmw_workers);
-	btrfs_destroy_workqueue(fs_info->endio_meta_write_workers);
 	btrfs_destroy_workqueue(fs_info->endio_write_workers);
 	btrfs_destroy_workqueue(fs_info->endio_freespace_worker);
 	btrfs_destroy_workqueue(fs_info->submit_workers);
@@ -2219,6 +2217,13 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
 	btrfs_destroy_workqueue(fs_info->flush_workers);
 	btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers);
 	btrfs_destroy_workqueue(fs_info->extent_workers);
+	/*
+	 * Now that all other work queues are destroyed, we can safely destroy
+	 * the queues used for metadata I/O, since tasks from those other work
+	 * queues can do metadata I/O operations.
+	 */
+	btrfs_destroy_workqueue(fs_info->endio_meta_workers);
+	btrfs_destroy_workqueue(fs_info->endio_meta_write_workers);
 }
 
 static void free_root_extent_buffers(struct btrfs_root *root)
-- 
GitLab


From 82bfb2e7b645c8f228dc3b6d3b27b0b10125ca4f Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 14 Feb 2017 17:56:32 +0000
Subject: [PATCH 0539/1084] Btrfs: incremental send, fix unnecessary hole
 writes for sparse files

When using the NO_HOLES feature, during an incremental send we often issue
write operations for holes when we should not, because that range is already
a hole in the destination snapshot. While that does not change the contents
of the file at the receiver, it avoids preservation of file holes, leading
to wasted disk space and extra IO during send/receive.

A couple examples where the holes are not preserved follows.

 $ mkfs.btrfs -O no-holes -f /dev/sdb
 $ mount /dev/sdb /mnt
 $ xfs_io -f -c "pwrite -S 0xaa 0 4K" /mnt/foo
 $ xfs_io -f -c "pwrite -S 0xaa 0 4K" -c "pwrite -S 0xbb 1028K 4K" /mnt/bar
 $ btrfs subvolume snapshot -r /mnt /mnt/snap1

 # Now add one new extent to our first test file, increasing its size and
 # leaving a 1Mb hole between the first extent and this new extent.
 $ xfs_io -c "pwrite -S 0xbb 1028K 4K" /mnt/foo

 # Now overwrite the last extent of our second test file.
 $ xfs_io -c "pwrite -S 0xcc 1028K 4K" /mnt/bar

 $ btrfs subvolume snapshot -r /mnt /mnt/snap2

 $ xfs_io -r -c "fiemap -v" /mnt/snap2/foo
 /mnt/snap2/foo:
 EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
   0: [0..7]:          25088..25095         8 0x2000
   1: [8..2055]:       hole              2048
   2: [2056..2063]:    24576..24583         8 0x2001

 $ xfs_io -r -c "fiemap -v" /mnt/snap2/bar
 /mnt/snap2/bar:
 EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
   0: [0..7]:          25096..25103         8 0x2000
   1: [8..2055]:       hole              2048
   2: [2056..2063]:    24584..24591         8 0x2001

  $ btrfs send /mnt/snap1 -f /tmp/1.snap
  $ btrfs send -p /mnt/snap1 /mnt/snap2 -f /tmp/2.snap

  $ umount /mnt
  # It's not relevant to enable no-holes in the new filesystem.
  $ mkfs.btrfs -O no-holes -f /dev/sdc
  $ mount /dev/sdc /mnt
  $ btrfs receive /mnt -f /tmp/1.snap
  $ btrfs receive /mnt -f /tmp/2.snap

  $ xfs_io -r -c "fiemap -v" /mnt/snap2/foo
  /mnt/snap2/foo:
  EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
    0: [0..7]:          24576..24583         8 0x2000
    1: [8..2063]:       25624..27679      2056   0x1

  $ xfs_io -r -c "fiemap -v" /mnt/snap2/bar
  /mnt/snap2/bar:
  EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
    0: [0..7]:          24584..24591         8 0x2000
    1: [8..2063]:       27680..29735      2056   0x1

The holes do not exist in the second filesystem and they were replaced
with extents filled with the byte 0x00, making each file take 1032Kb of
space instead of 8Kb.

So fix this by not issuing the write operations consisting of buffers
filled with the byte 0x00 when the destination snapshot already has a
hole for the respective range.

A test case for fstests will follow soon.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/send.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 86 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 712922ea64d2..456c8901489b 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5306,6 +5306,81 @@ static int get_last_extent(struct send_ctx *sctx, u64 offset)
 	return ret;
 }
 
+static int range_is_hole_in_parent(struct send_ctx *sctx,
+				   const u64 start,
+				   const u64 end)
+{
+	struct btrfs_path *path;
+	struct btrfs_key key;
+	struct btrfs_root *root = sctx->parent_root;
+	u64 search_start = start;
+	int ret;
+
+	path = alloc_path_for_send();
+	if (!path)
+		return -ENOMEM;
+
+	key.objectid = sctx->cur_ino;
+	key.type = BTRFS_EXTENT_DATA_KEY;
+	key.offset = search_start;
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0)
+		goto out;
+	if (ret > 0 && path->slots[0] > 0)
+		path->slots[0]--;
+
+	while (search_start < end) {
+		struct extent_buffer *leaf = path->nodes[0];
+		int slot = path->slots[0];
+		struct btrfs_file_extent_item *fi;
+		u64 extent_end;
+
+		if (slot >= btrfs_header_nritems(leaf)) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret < 0)
+				goto out;
+			else if (ret > 0)
+				break;
+			continue;
+		}
+
+		btrfs_item_key_to_cpu(leaf, &key, slot);
+		if (key.objectid < sctx->cur_ino ||
+		    key.type < BTRFS_EXTENT_DATA_KEY)
+			goto next;
+		if (key.objectid > sctx->cur_ino ||
+		    key.type > BTRFS_EXTENT_DATA_KEY ||
+		    key.offset >= end)
+			break;
+
+		fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+		if (btrfs_file_extent_type(leaf, fi) ==
+		    BTRFS_FILE_EXTENT_INLINE) {
+			u64 size = btrfs_file_extent_inline_len(leaf, slot, fi);
+
+			extent_end = ALIGN(key.offset + size,
+					   root->fs_info->sectorsize);
+		} else {
+			extent_end = key.offset +
+				btrfs_file_extent_num_bytes(leaf, fi);
+		}
+		if (extent_end <= start)
+			goto next;
+		if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) {
+			search_start = extent_end;
+			goto next;
+		}
+		ret = 0;
+		goto out;
+next:
+		path->slots[0]++;
+	}
+	ret = 1;
+out:
+	btrfs_free_path(path);
+	return ret;
+}
+
 static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
 			   struct btrfs_key *key)
 {
@@ -5350,8 +5425,17 @@ static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
 			return ret;
 	}
 
-	if (sctx->cur_inode_last_extent < key->offset)
-		ret = send_hole(sctx, key->offset);
+	if (sctx->cur_inode_last_extent < key->offset) {
+		ret = range_is_hole_in_parent(sctx,
+					      sctx->cur_inode_last_extent,
+					      key->offset);
+		if (ret < 0)
+			return ret;
+		else if (ret == 0)
+			ret = send_hole(sctx, key->offset);
+		else
+			ret = 0;
+	}
 	sctx->cur_inode_last_extent = extent_end;
 	return ret;
 }
-- 
GitLab


From 76b42abbf7488121c4f9f1ea5941123306e25d99 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 14 Feb 2017 16:56:01 +0000
Subject: [PATCH 0540/1084] Btrfs: fix data loss after truncate when using the
 no-holes feature

If we have a file with an implicit hole (NO_HOLES feature enabled) that
has an extent following the hole, delayed writes against regions of the
file behind the hole happened before but were not yet flushed and then
we truncate the file to a smaller size that lies inside the hole, we
end up persisting a wrong disk_i_size value for our inode that leads to
data loss after umounting and mounting again the filesystem or after
the inode is evicted and loaded again.

This happens because at inode.c:btrfs_truncate_inode_items() we end up
setting last_size to the offset of the extent that we deleted and that
followed the hole. We then pass that value to btrfs_ordered_update_i_size()
which updates the inode's disk_i_size to a value smaller then the offset
of the buffered (delayed) writes.

Example reproducer:

 $ mkfs.btrfs -f /dev/sdb
 $ mount /dev/sdb /mnt

 $ xfs_io -f -c "pwrite -S 0x01 0K 32K" /mnt/foo
 $ xfs_io -d -c "pwrite -S 0x02 -b 32K 64K 32K" /mnt/foo
 $ xfs_io -c "truncate 60K" /mnt/foo
   --> inode's disk_i_size updated to 0

 $ md5sum /mnt/foo
 3c5ca3c3ab42f4b04d7e7eb0b0d4d806  /mnt/foo

 $ umount /dev/sdb
 $ mount /dev/sdb /mnt

 $ md5sum /mnt/foo
 d41d8cd98f00b204e9800998ecf8427e  /mnt/foo
   --> Empty file, all data lost!

Cc: <stable@vger.kernel.org>  # 3.14+
Fixes: 16e7549f045d ("Btrfs: incompatible format change to remove hole extents")
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
---
 fs/btrfs/inode.c | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4efe9d82944c..70df45192424 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4418,19 +4418,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 		if (found_type > min_type) {
 			del_item = 1;
 		} else {
-			if (item_end < new_size) {
-				/*
-				 * With NO_HOLES mode, for the following mapping
-				 *
-				 * [0-4k][hole][8k-12k]
-				 *
-				 * if truncating isize down to 6k, it ends up
-				 * isize being 8k.
-				 */
-				if (btrfs_fs_incompat(root->fs_info, NO_HOLES))
-					last_size = new_size;
+			if (item_end < new_size)
 				break;
-			}
 			if (found_key.offset >= new_size)
 				del_item = 1;
 			else
@@ -4613,8 +4602,12 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 			btrfs_abort_transaction(trans, ret);
 	}
 error:
-	if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
+	if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
+		ASSERT(last_size >= new_size);
+		if (!err && last_size > new_size)
+			last_size = new_size;
 		btrfs_ordered_update_i_size(inode, last_size, NULL);
+	}
 
 	btrfs_free_path(path);
 
-- 
GitLab


From 263d3995c93c6020576f6c93506412a0b9d1e932 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Fri, 17 Feb 2017 18:43:57 +0000
Subject: [PATCH 0541/1084] Btrfs: try harder to migrate items to left sibling
 before splitting a leaf

Before attempting to split a leaf we try to migrate items from the leaf to
its right and left siblings. We start by trying to move items into the
rigth sibling and, if the new item is meant to be inserted at the end of
our leaf, we try to free from our leaf an amount of bytes equal to the
number of bytes used by the new item, by setting the variable space_needed
to the byte size of that new item. However if we fail to move enough items
to the right sibling due to lack of space in that sibling, we then try
to move items into the left sibling, and in that case we try to free
an amount equal to the size of the new item from our leaf, when we need
only to free an amount corresponding to the size of the new item minus
the current free space of our leaf. So make sure that before we try to
move items to the left sibling we do set the variable space_needed with
a value corresponding to the new item's size minus the leaf's current
free space.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
---
 fs/btrfs/ctree.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 1192bc7d2ee7..1fb60ee77b4a 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -4159,6 +4159,9 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
 
 	/* try to push all the items before our slot into the next leaf */
 	slot = path->slots[0];
+	space_needed = data_size;
+	if (slot > 0)
+		space_needed -= btrfs_leaf_free_space(fs_info, path->nodes[0]);
 	ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
 	if (ret < 0)
 		return ret;
@@ -4214,6 +4217,10 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
 		if (wret < 0)
 			return wret;
 		if (wret) {
+			space_needed = data_size;
+			if (slot > 0)
+				space_needed -= btrfs_leaf_free_space(fs_info,
+								      l);
 			wret = push_leaf_left(trans, root, path, space_needed,
 					      space_needed, 0, (u32)-1);
 			if (wret < 0)
-- 
GitLab


From 279b5165ffadf57e2596e0ad438cb9b69b76f320 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 16 Feb 2017 10:28:37 +0100
Subject: [PATCH 0542/1084] perf/core: Remove confusing comment and move
 put_ctx()

Since commit:

  321027c1fe77 ("perf/core: Fix concurrent sys_perf_event_open() vs. 'move_group' race")

... the code looks like (assuming move_group==1):

  gctx = __perf_event_ctx_lock_double(group_leader, ctx);

  perf_remove_from_context(group_leader, 0);
  list_for_each_entry(sibling, &group_leader->sibling_list, group_entry) {
	perf_remove_from_context(sibling, 0);
	put_ctx(gctx);
  }

  /* ... */

  /* misleading comment about how this is the last reference */
  put_ctx(gctx);

  perf_event_ctx_unlock(group_leader, gctx);

What that 'last' put_ctx() does is drop @group_leader's reference on
gctx after having dropped all its potential sibling references.

But the thing is that __perf_event_ctx_lock_double() returns with a
reference _and_ a held lock, and perf_event_ctx_unlock() unlocks that
lock and drops that reference. Therefore that put_ctx() cannot be the
'last' of anything, nor is there an unbalance in puts.

To reduce confusion, remove the comment and place the put_ctx() next
to the remove_from_context() call.

Reported-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 77a932b54a64..94d7b9aae925 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9955,6 +9955,7 @@ SYSCALL_DEFINE5(perf_event_open,
 		 * of swizzling perf_event::ctx.
 		 */
 		perf_remove_from_context(group_leader, 0);
+		put_ctx(gctx);
 
 		list_for_each_entry(sibling, &group_leader->sibling_list,
 				    group_entry) {
@@ -9993,13 +9994,6 @@ SYSCALL_DEFINE5(perf_event_open,
 		perf_event__state_init(group_leader);
 		perf_install_in_context(ctx, group_leader, group_leader->cpu);
 		get_ctx(ctx);
-
-		/*
-		 * Now that all events are installed in @ctx, nothing
-		 * references @gctx anymore, so drop the last reference we have
-		 * on it.
-		 */
-		put_ctx(gctx);
 	}
 
 	/*
-- 
GitLab


From 7bbba0eb1af34694868d028b80475981f90e6bee Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 15 Feb 2017 16:12:20 +0100
Subject: [PATCH 0543/1084] perf/core: Fix perf_event_enable_on_exec()
 timekeeping (again)

Where commit:

  7fce250915ef ("perf: Fix scaling vs.  perf_event_enable_on_exec()")

disabled the ctx-time a-priory, such that all events get enabled and
scheduled at the time point in time, there is one hole in that patch,
when no events do get enabled nothing re-enables the ctx-time.

Reported-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Fixes: 7fce250915ef ("perf: Fix scaling vs.  perf_event_enable_on_exec()")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 94d7b9aae925..d4e3f8d8238b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3522,6 +3522,8 @@ static void perf_event_enable_on_exec(int ctxn)
 	if (enabled) {
 		clone_ctx = unclone_ctx(ctx);
 		ctx_resched(cpuctx, ctx, event_type);
+	} else {
+		ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
 	}
 	perf_ctx_unlock(cpuctx, ctx);
 
-- 
GitLab


From 1572e45a924f254d9570093abde46430c3172e3d Mon Sep 17 00:00:00 2001
From: Tan Xiaojun <tanxiaojun@huawei.com>
Date: Thu, 23 Feb 2017 14:04:39 +0800
Subject: [PATCH 0544/1084] perf/core: Fix the perf_cpu_time_max_percent check

Use "proc_dointvec_minmax" instead of "proc_dointvec" to check the input
value from user-space.

If not, we can set a big value and some vars will overflow like
"sysctl_perf_event_sample_rate" which will cause a lot of unexpected
problems.

Signed-off-by: Tan Xiaojun <tanxiaojun@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <acme@kernel.org>
Cc: <alexander.shishkin@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1487829879-56237-1-git-send-email-tanxiaojun@huawei.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index d4e3f8d8238b..c1c1cdf0b811 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -455,7 +455,7 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
 				void __user *buffer, size_t *lenp,
 				loff_t *ppos)
 {
-	int ret = proc_dointvec(table, write, buffer, lenp, ppos);
+	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 
 	if (ret || !write)
 		return ret;
-- 
GitLab


From 8cb68b343a66cf19834472012590490d34d31703 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 2 Feb 2017 16:55:06 +0100
Subject: [PATCH 0545/1084] sched/core: Fix update_rq_clock() splat on hotplug
 (and suspend/resume)

The hotplug code still triggers the warning about using a stale
rq->clock value.

Fix things up to actually run update_rq_clock() in a place where we
record the 'UPDATED' flag, and then modify the annotation to retain
this flag over the rq->lock fiddling that happens as a result of
actually migrating all the tasks elsewhere.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Tested-by: Mike Galbraith <efault@gmx.de>
Tested-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
Tested-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ross Zwisler <zwisler@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 4d25b35ea372 ("sched/fair: Restore previous rq_flags when migrating tasks in hotplug")
Link: http://lkml.kernel.org/r/20170202155506.GX6515@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 34e2291a9a6c..2d6e828a0471 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5557,7 +5557,7 @@ static void migrate_tasks(struct rq *dead_rq)
 {
 	struct rq *rq = dead_rq;
 	struct task_struct *next, *stop = rq->stop;
-	struct rq_flags rf, old_rf;
+	struct rq_flags rf;
 	int dest_cpu;
 
 	/*
@@ -5576,7 +5576,9 @@ static void migrate_tasks(struct rq *dead_rq)
 	 * class method both need to have an up-to-date
 	 * value of rq->clock[_task]
 	 */
+	rq_pin_lock(rq, &rf);
 	update_rq_clock(rq);
+	rq_unpin_lock(rq, &rf);
 
 	for (;;) {
 		/*
@@ -5589,7 +5591,7 @@ static void migrate_tasks(struct rq *dead_rq)
 		/*
 		 * pick_next_task() assumes pinned rq->lock:
 		 */
-		rq_pin_lock(rq, &rf);
+		rq_repin_lock(rq, &rf);
 		next = pick_next_task(rq, &fake_task, &rf);
 		BUG_ON(!next);
 		next->sched_class->put_prev_task(rq, next);
@@ -5618,13 +5620,6 @@ static void migrate_tasks(struct rq *dead_rq)
 			continue;
 		}
 
-		/*
-		 * __migrate_task() may return with a different
-		 * rq->lock held and a new cookie in 'rf', but we need
-		 * to preserve rf::clock_update_flags for 'dead_rq'.
-		 */
-		old_rf = rf;
-
 		/* Find suitable destination for @next, with force if needed. */
 		dest_cpu = select_fallback_rq(dead_rq->cpu, next);
 
@@ -5633,7 +5628,6 @@ static void migrate_tasks(struct rq *dead_rq)
 			raw_spin_unlock(&rq->lock);
 			rq = dead_rq;
 			raw_spin_lock(&rq->lock);
-			rf = old_rf;
 		}
 		raw_spin_unlock(&next->pi_lock);
 	}
-- 
GitLab


From a499c3ead88ccf147fc50689e85a530ad923ce36 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Tue, 21 Feb 2017 23:52:55 -0800
Subject: [PATCH 0546/1084] sched/fair: Update rq clock before changing a
 task's CPU affinity

This is triggered during boot when CONFIG_SCHED_DEBUG is enabled:

 ------------[ cut here ]------------
 WARNING: CPU: 6 PID: 81 at kernel/sched/sched.h:812 set_next_entity+0x11d/0x380
 rq->clock_update_flags < RQCF_ACT_SKIP
 CPU: 6 PID: 81 Comm: torture_shuffle Not tainted 4.10.0+ #1
 Hardware name: LENOVO ThinkCentre M8500t-N000/SHARKBAY, BIOS FBKTC1AUS 02/16/2016
 Call Trace:
  dump_stack+0x85/0xc2
  __warn+0xcb/0xf0
  warn_slowpath_fmt+0x5f/0x80
  set_next_entity+0x11d/0x380
  set_curr_task_fair+0x2b/0x60
  do_set_cpus_allowed+0x139/0x180
  __set_cpus_allowed_ptr+0x113/0x260
  set_cpus_allowed_ptr+0x10/0x20
  torture_shuffle+0xfd/0x180
  kthread+0x10f/0x150
  ? torture_shutdown_init+0x60/0x60
  ? kthread_create_on_node+0x60/0x60
  ret_from_fork+0x31/0x40
 ---[ end trace dd94d92344cea9c6 ]---

The task is running && !queued, so there is no rq clock update before calling
set_curr_task().

This patch fixes it by updating rq clock after holding rq->lock/pi_lock
just as what other dequeue + put_prev + enqueue + set_curr story does.

Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1487749975-5994-1-git-send-email-wanpeng.li@hotmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d6e828a0471..cc1e3e0d29b0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1087,6 +1087,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
 	int ret = 0;
 
 	rq = task_rq_lock(p, &rf);
+	update_rq_clock(rq);
 
 	if (p->flags & PF_KTHREAD) {
 		/*
-- 
GitLab


From 29dee3c03abce04cd527878ef5f9e5f91b7b83f4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 10 Feb 2017 16:27:52 +0100
Subject: [PATCH 0547/1084] locking/refcounts: Out-of-line everything

Linus asked to please make this real C code.

And since size then isn't an issue what so ever anymore, remove the
debug knob and make all WARN()s unconditional.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dwindsor@gmail.com
Cc: elena.reshetova@intel.com
Cc: gregkh@linuxfoundation.org
Cc: ishkamiel@gmail.com
Cc: keescook@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/refcount.h | 277 ++-------------------------------------
 lib/Kconfig.debug        |  13 --
 lib/Makefile             |   2 +-
 lib/refcount.c           | 267 +++++++++++++++++++++++++++++++++++++
 4 files changed, 280 insertions(+), 279 deletions(-)
 create mode 100644 lib/refcount.c

diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index 600aadf9cca4..0e8cfb2ce91e 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -1,55 +1,10 @@
 #ifndef _LINUX_REFCOUNT_H
 #define _LINUX_REFCOUNT_H
 
-/*
- * Variant of atomic_t specialized for reference counts.
- *
- * The interface matches the atomic_t interface (to aid in porting) but only
- * provides the few functions one should use for reference counting.
- *
- * It differs in that the counter saturates at UINT_MAX and will not move once
- * there. This avoids wrapping the counter and causing 'spurious'
- * use-after-free issues.
- *
- * Memory ordering rules are slightly relaxed wrt regular atomic_t functions
- * and provide only what is strictly required for refcounts.
- *
- * The increments are fully relaxed; these will not provide ordering. The
- * rationale is that whatever is used to obtain the object we're increasing the
- * reference count on will provide the ordering. For locked data structures,
- * its the lock acquire, for RCU/lockless data structures its the dependent
- * load.
- *
- * Do note that inc_not_zero() provides a control dependency which will order
- * future stores against the inc, this ensures we'll never modify the object
- * if we did not in fact acquire a reference.
- *
- * The decrements will provide release order, such that all the prior loads and
- * stores will be issued before, it also provides a control dependency, which
- * will order us against the subsequent free().
- *
- * The control dependency is against the load of the cmpxchg (ll/sc) that
- * succeeded. This means the stores aren't fully ordered, but this is fine
- * because the 1->0 transition indicates no concurrency.
- *
- * Note that the allocator is responsible for ordering things between free()
- * and alloc().
- *
- */
-
 #include <linux/atomic.h>
-#include <linux/bug.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 
-#ifdef CONFIG_DEBUG_REFCOUNT
-#define REFCOUNT_WARN(cond, str) WARN_ON(cond)
-#define __refcount_check	__must_check
-#else
-#define REFCOUNT_WARN(cond, str) (void)(cond)
-#define __refcount_check
-#endif
-
 typedef struct refcount_struct {
 	atomic_t refs;
 } refcount_t;
@@ -66,229 +21,21 @@ static inline unsigned int refcount_read(const refcount_t *r)
 	return atomic_read(&r->refs);
 }
 
-static inline __refcount_check
-bool refcount_add_not_zero(unsigned int i, refcount_t *r)
-{
-	unsigned int old, new, val = atomic_read(&r->refs);
-
-	for (;;) {
-		if (!val)
-			return false;
-
-		if (unlikely(val == UINT_MAX))
-			return true;
-
-		new = val + i;
-		if (new < val)
-			new = UINT_MAX;
-		old = atomic_cmpxchg_relaxed(&r->refs, val, new);
-		if (old == val)
-			break;
-
-		val = old;
-	}
-
-	REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
-
-	return true;
-}
-
-static inline void refcount_add(unsigned int i, refcount_t *r)
-{
-	REFCOUNT_WARN(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n");
-}
-
-/*
- * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN.
- *
- * Provides no memory ordering, it is assumed the caller has guaranteed the
- * object memory to be stable (RCU, etc.). It does provide a control dependency
- * and thereby orders future stores. See the comment on top.
- */
-static inline __refcount_check
-bool refcount_inc_not_zero(refcount_t *r)
-{
-	unsigned int old, new, val = atomic_read(&r->refs);
-
-	for (;;) {
-		new = val + 1;
-
-		if (!val)
-			return false;
-
-		if (unlikely(!new))
-			return true;
-
-		old = atomic_cmpxchg_relaxed(&r->refs, val, new);
-		if (old == val)
-			break;
-
-		val = old;
-	}
-
-	REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
-
-	return true;
-}
-
-/*
- * Similar to atomic_inc(), will saturate at UINT_MAX and WARN.
- *
- * Provides no memory ordering, it is assumed the caller already has a
- * reference on the object, will WARN when this is not so.
- */
-static inline void refcount_inc(refcount_t *r)
-{
-	REFCOUNT_WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
-}
-
-/*
- * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
- * decrement when saturated at UINT_MAX.
- *
- * Provides release memory ordering, such that prior loads and stores are done
- * before, and provides a control dependency such that free() must come after.
- * See the comment on top.
- */
-static inline __refcount_check
-bool refcount_sub_and_test(unsigned int i, refcount_t *r)
-{
-	unsigned int old, new, val = atomic_read(&r->refs);
-
-	for (;;) {
-		if (unlikely(val == UINT_MAX))
-			return false;
-
-		new = val - i;
-		if (new > val) {
-			REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n");
-			return false;
-		}
-
-		old = atomic_cmpxchg_release(&r->refs, val, new);
-		if (old == val)
-			break;
-
-		val = old;
-	}
-
-	return !new;
-}
-
-static inline __refcount_check
-bool refcount_dec_and_test(refcount_t *r)
-{
-	return refcount_sub_and_test(1, r);
-}
+extern __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r);
+extern void refcount_add(unsigned int i, refcount_t *r);
 
-/*
- * Similar to atomic_dec(), it will WARN on underflow and fail to decrement
- * when saturated at UINT_MAX.
- *
- * Provides release memory ordering, such that prior loads and stores are done
- * before.
- */
-static inline
-void refcount_dec(refcount_t *r)
-{
-	REFCOUNT_WARN(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n");
-}
-
-/*
- * No atomic_t counterpart, it attempts a 1 -> 0 transition and returns the
- * success thereof.
- *
- * Like all decrement operations, it provides release memory order and provides
- * a control dependency.
- *
- * It can be used like a try-delete operator; this explicit case is provided
- * and not cmpxchg in generic, because that would allow implementing unsafe
- * operations.
- */
-static inline __refcount_check
-bool refcount_dec_if_one(refcount_t *r)
-{
-	return atomic_cmpxchg_release(&r->refs, 1, 0) == 1;
-}
-
-/*
- * No atomic_t counterpart, it decrements unless the value is 1, in which case
- * it will return false.
- *
- * Was often done like: atomic_add_unless(&var, -1, 1)
- */
-static inline __refcount_check
-bool refcount_dec_not_one(refcount_t *r)
-{
-	unsigned int old, new, val = atomic_read(&r->refs);
+extern __must_check bool refcount_inc_not_zero(refcount_t *r);
+extern void refcount_inc(refcount_t *r);
 
-	for (;;) {
-		if (unlikely(val == UINT_MAX))
-			return true;
+extern __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r);
+extern void refcount_sub(unsigned int i, refcount_t *r);
 
-		if (val == 1)
-			return false;
+extern __must_check bool refcount_dec_and_test(refcount_t *r);
+extern void refcount_dec(refcount_t *r);
 
-		new = val - 1;
-		if (new > val) {
-			REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n");
-			return true;
-		}
-
-		old = atomic_cmpxchg_release(&r->refs, val, new);
-		if (old == val)
-			break;
-
-		val = old;
-	}
-
-	return true;
-}
-
-/*
- * Similar to atomic_dec_and_mutex_lock(), it will WARN on underflow and fail
- * to decrement when saturated at UINT_MAX.
- *
- * Provides release memory ordering, such that prior loads and stores are done
- * before, and provides a control dependency such that free() must come after.
- * See the comment on top.
- */
-static inline __refcount_check
-bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock)
-{
-	if (refcount_dec_not_one(r))
-		return false;
-
-	mutex_lock(lock);
-	if (!refcount_dec_and_test(r)) {
-		mutex_unlock(lock);
-		return false;
-	}
-
-	return true;
-}
-
-/*
- * Similar to atomic_dec_and_lock(), it will WARN on underflow and fail to
- * decrement when saturated at UINT_MAX.
- *
- * Provides release memory ordering, such that prior loads and stores are done
- * before, and provides a control dependency such that free() must come after.
- * See the comment on top.
- */
-static inline __refcount_check
-bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock)
-{
-	if (refcount_dec_not_one(r))
-		return false;
-
-	spin_lock(lock);
-	if (!refcount_dec_and_test(r)) {
-		spin_unlock(lock);
-		return false;
-	}
-
-	return true;
-}
+extern __must_check bool refcount_dec_if_one(refcount_t *r);
+extern __must_check bool refcount_dec_not_one(refcount_t *r);
+extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock);
+extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock);
 
 #endif /* _LINUX_REFCOUNT_H */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index acedbe626d47..0dbce99d8433 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -716,19 +716,6 @@ source "lib/Kconfig.kmemcheck"
 
 source "lib/Kconfig.kasan"
 
-config DEBUG_REFCOUNT
-	bool "Verbose refcount checks"
-	help
-	  Say Y here if you want reference counters (refcount_t and kref) to
-	  generate WARNs on dubious usage. Without this refcount_t will still
-	  be a saturating counter and avoid Use-After-Free by turning it into
-	  a resource leak Denial-Of-Service.
-
-	  Use of this option will increase kernel text size but will alert the
-	  admin of potential abuse.
-
-	  If in doubt, say "N".
-
 endmenu # "Memory Debugging"
 
 config ARCH_HAS_KCOV
diff --git a/lib/Makefile b/lib/Makefile
index 19ea76149a37..192e4d03caf9 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -36,7 +36,7 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
 	 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
 	 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \
-	 once.o
+	 once.o refcount.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += hexdump.o
diff --git a/lib/refcount.c b/lib/refcount.c
new file mode 100644
index 000000000000..1d33366189d1
--- /dev/null
+++ b/lib/refcount.c
@@ -0,0 +1,267 @@
+/*
+ * Variant of atomic_t specialized for reference counts.
+ *
+ * The interface matches the atomic_t interface (to aid in porting) but only
+ * provides the few functions one should use for reference counting.
+ *
+ * It differs in that the counter saturates at UINT_MAX and will not move once
+ * there. This avoids wrapping the counter and causing 'spurious'
+ * use-after-free issues.
+ *
+ * Memory ordering rules are slightly relaxed wrt regular atomic_t functions
+ * and provide only what is strictly required for refcounts.
+ *
+ * The increments are fully relaxed; these will not provide ordering. The
+ * rationale is that whatever is used to obtain the object we're increasing the
+ * reference count on will provide the ordering. For locked data structures,
+ * its the lock acquire, for RCU/lockless data structures its the dependent
+ * load.
+ *
+ * Do note that inc_not_zero() provides a control dependency which will order
+ * future stores against the inc, this ensures we'll never modify the object
+ * if we did not in fact acquire a reference.
+ *
+ * The decrements will provide release order, such that all the prior loads and
+ * stores will be issued before, it also provides a control dependency, which
+ * will order us against the subsequent free().
+ *
+ * The control dependency is against the load of the cmpxchg (ll/sc) that
+ * succeeded. This means the stores aren't fully ordered, but this is fine
+ * because the 1->0 transition indicates no concurrency.
+ *
+ * Note that the allocator is responsible for ordering things between free()
+ * and alloc().
+ *
+ */
+
+#include <linux/refcount.h>
+#include <linux/bug.h>
+
+bool refcount_add_not_zero(unsigned int i, refcount_t *r)
+{
+	unsigned int old, new, val = atomic_read(&r->refs);
+
+	for (;;) {
+		if (!val)
+			return false;
+
+		if (unlikely(val == UINT_MAX))
+			return true;
+
+		new = val + i;
+		if (new < val)
+			new = UINT_MAX;
+		old = atomic_cmpxchg_relaxed(&r->refs, val, new);
+		if (old == val)
+			break;
+
+		val = old;
+	}
+
+	WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(refcount_add_not_zero);
+
+void refcount_add(unsigned int i, refcount_t *r)
+{
+	WARN(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n");
+}
+EXPORT_SYMBOL_GPL(refcount_add);
+
+/*
+ * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller has guaranteed the
+ * object memory to be stable (RCU, etc.). It does provide a control dependency
+ * and thereby orders future stores. See the comment on top.
+ */
+bool refcount_inc_not_zero(refcount_t *r)
+{
+	unsigned int old, new, val = atomic_read(&r->refs);
+
+	for (;;) {
+		new = val + 1;
+
+		if (!val)
+			return false;
+
+		if (unlikely(!new))
+			return true;
+
+		old = atomic_cmpxchg_relaxed(&r->refs, val, new);
+		if (old == val)
+			break;
+
+		val = old;
+	}
+
+	WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(refcount_inc_not_zero);
+
+/*
+ * Similar to atomic_inc(), will saturate at UINT_MAX and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller already has a
+ * reference on the object, will WARN when this is not so.
+ */
+void refcount_inc(refcount_t *r)
+{
+	WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
+}
+EXPORT_SYMBOL_GPL(refcount_inc);
+
+bool refcount_sub_and_test(unsigned int i, refcount_t *r)
+{
+	unsigned int old, new, val = atomic_read(&r->refs);
+
+	for (;;) {
+		if (unlikely(val == UINT_MAX))
+			return false;
+
+		new = val - i;
+		if (new > val) {
+			WARN(new > val, "refcount_t: underflow; use-after-free.\n");
+			return false;
+		}
+
+		old = atomic_cmpxchg_release(&r->refs, val, new);
+		if (old == val)
+			break;
+
+		val = old;
+	}
+
+	return !new;
+}
+EXPORT_SYMBOL_GPL(refcount_sub_and_test);
+
+/*
+ * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
+ * decrement when saturated at UINT_MAX.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides a control dependency such that free() must come after.
+ * See the comment on top.
+ */
+bool refcount_dec_and_test(refcount_t *r)
+{
+	return refcount_sub_and_test(1, r);
+}
+EXPORT_SYMBOL_GPL(refcount_dec_and_test);
+
+/*
+ * Similar to atomic_dec(), it will WARN on underflow and fail to decrement
+ * when saturated at UINT_MAX.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before.
+ */
+
+void refcount_dec(refcount_t *r)
+{
+	WARN(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n");
+}
+EXPORT_SYMBOL_GPL(refcount_dec);
+
+/*
+ * No atomic_t counterpart, it attempts a 1 -> 0 transition and returns the
+ * success thereof.
+ *
+ * Like all decrement operations, it provides release memory order and provides
+ * a control dependency.
+ *
+ * It can be used like a try-delete operator; this explicit case is provided
+ * and not cmpxchg in generic, because that would allow implementing unsafe
+ * operations.
+ */
+bool refcount_dec_if_one(refcount_t *r)
+{
+	return atomic_cmpxchg_release(&r->refs, 1, 0) == 1;
+}
+EXPORT_SYMBOL_GPL(refcount_dec_if_one);
+
+/*
+ * No atomic_t counterpart, it decrements unless the value is 1, in which case
+ * it will return false.
+ *
+ * Was often done like: atomic_add_unless(&var, -1, 1)
+ */
+bool refcount_dec_not_one(refcount_t *r)
+{
+	unsigned int old, new, val = atomic_read(&r->refs);
+
+	for (;;) {
+		if (unlikely(val == UINT_MAX))
+			return true;
+
+		if (val == 1)
+			return false;
+
+		new = val - 1;
+		if (new > val) {
+			WARN(new > val, "refcount_t: underflow; use-after-free.\n");
+			return true;
+		}
+
+		old = atomic_cmpxchg_release(&r->refs, val, new);
+		if (old == val)
+			break;
+
+		val = old;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(refcount_dec_not_one);
+
+/*
+ * Similar to atomic_dec_and_mutex_lock(), it will WARN on underflow and fail
+ * to decrement when saturated at UINT_MAX.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides a control dependency such that free() must come after.
+ * See the comment on top.
+ */
+bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock)
+{
+	if (refcount_dec_not_one(r))
+		return false;
+
+	mutex_lock(lock);
+	if (!refcount_dec_and_test(r)) {
+		mutex_unlock(lock);
+		return false;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(refcount_dec_and_mutex_lock);
+
+/*
+ * Similar to atomic_dec_and_lock(), it will WARN on underflow and fail to
+ * decrement when saturated at UINT_MAX.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides a control dependency such that free() must come after.
+ * See the comment on top.
+ */
+bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock)
+{
+	if (refcount_dec_not_one(r))
+		return false;
+
+	spin_lock(lock);
+	if (!refcount_dec_and_test(r)) {
+		spin_unlock(lock);
+		return false;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(refcount_dec_and_lock);
+
-- 
GitLab


From 318b1dedcd39012624f466d281627553e9fa2570 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Thu, 23 Feb 2017 15:09:34 +0200
Subject: [PATCH 0548/1084] locking/refcounts: Add missing kernel.h header to
 have UINT_MAX defined

Fix header dependency.

Suggested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1487855374-21993-1-git-send-email-elena.reshetova@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/refcount.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index 0e8cfb2ce91e..0023fee4bbbc 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -4,6 +4,7 @@
 #include <linux/atomic.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
+#include <linux/kernel.h>
 
 typedef struct refcount_struct {
 	atomic_t refs;
-- 
GitLab


From d1091c7fa3d52ebce4dd3f15d04155b3469b2f90 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Tue, 21 Feb 2017 15:35:32 -0600
Subject: [PATCH 0549/1084] objtool: Improve detection of BUG() and other dead
 ends

The BUG() macro's use of __builtin_unreachable() via the unreachable()
macro tells gcc that the instruction is a dead end, and that it's safe
to assume the current code path will not execute past the previous
instruction.

On x86, the BUG() macro is implemented with the 'ud2' instruction.  When
objtool's branch analysis sees that instruction, it knows the current
code path has come to a dead end.

Peter Zijlstra has been working on a patch to change the WARN macros to
use 'ud2'.  That patch will break objtool's assumption that 'ud2' is
always a dead end.

Generally it's best for objtool to avoid making those kinds of
assumptions anyway.  The more ignorant it is of kernel code internals,
the better.

So create a more generic way for objtool to detect dead ends by adding
an annotation to the unreachable() macro.  The annotation stores a
pointer to the end of the unreachable code path in an '__unreachable'
section.  Objtool can read that section to find the dead ends.

Tested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/41a6d33971462ebd944a1c60ad4bf5be86c17b77.1487712920.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/vmlinux.lds.S   |  1 +
 include/linux/compiler-gcc.h    | 13 ++++++-
 tools/objtool/arch.h            |  5 ++-
 tools/objtool/arch/x86/decode.c |  3 --
 tools/objtool/builtin-check.c   | 60 ++++++++++++++++++++++++++++++---
 5 files changed, 71 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index e79f15f108a8..ad0118fbce90 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -346,6 +346,7 @@ SECTIONS
 	/DISCARD/ : {
 		*(.eh_frame)
 		*(__func_stack_frame_non_standard)
+		*(__unreachable)
 	}
 }
 
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 0444b1336268..8ea159fc489d 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -195,6 +195,17 @@
 #endif
 #endif
 
+#ifdef CONFIG_STACK_VALIDATION
+#define annotate_unreachable() ({					\
+	asm("1:\t\n"							\
+	    ".pushsection __unreachable, \"a\"\t\n"			\
+	    ".long 1b\t\n"						\
+	    ".popsection\t\n");						\
+})
+#else
+#define annotate_unreachable()
+#endif
+
 /*
  * Mark a position in code as unreachable.  This can be used to
  * suppress control flow warnings after asm blocks that transfer
@@ -204,7 +215,7 @@
  * this in the preprocessor, but we can live with this because they're
  * unreleased.  Really, we need to have autoconf for the kernel.
  */
-#define unreachable() __builtin_unreachable()
+#define unreachable() annotate_unreachable(); __builtin_unreachable()
 
 /* Mark a function definition as prohibited from being cloned. */
 #define __noclone	__attribute__((__noclone__, __optimize__("no-tracer")))
diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h
index f7350fcedc70..a59e061c0b4a 100644
--- a/tools/objtool/arch.h
+++ b/tools/objtool/arch.h
@@ -31,9 +31,8 @@
 #define INSN_CALL_DYNAMIC	8
 #define INSN_RETURN		9
 #define INSN_CONTEXT_SWITCH	10
-#define INSN_BUG		11
-#define INSN_NOP		12
-#define INSN_OTHER		13
+#define INSN_NOP		11
+#define INSN_OTHER		12
 #define INSN_LAST		INSN_OTHER
 
 int arch_decode_instruction(struct elf *elf, struct section *sec,
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 039636ffb6c8..6ac99e3266eb 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -118,9 +118,6 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 			 op2 == 0x35)
 			/* sysenter, sysret */
 			*type = INSN_CONTEXT_SWITCH;
-		else if (op2 == 0x0b || op2 == 0xb9)
-			/* ud2 */
-			*type = INSN_BUG;
 		else if (op2 == 0x0d || op2 == 0x1f)
 			/* nopl/nopw */
 			*type = INSN_NOP;
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index e8a1f699058a..5fc52ee3264c 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -51,7 +51,7 @@ struct instruction {
 	unsigned int len, state;
 	unsigned char type;
 	unsigned long immediate;
-	bool alt_group, visited;
+	bool alt_group, visited, dead_end;
 	struct symbol *call_dest;
 	struct instruction *jump_dest;
 	struct list_head alts;
@@ -329,6 +329,54 @@ static int decode_instructions(struct objtool_file *file)
 	return 0;
 }
 
+/*
+ * Find all uses of the unreachable() macro, which are code path dead ends.
+ */
+static int add_dead_ends(struct objtool_file *file)
+{
+	struct section *sec;
+	struct rela *rela;
+	struct instruction *insn;
+	bool found;
+
+	sec = find_section_by_name(file->elf, ".rela__unreachable");
+	if (!sec)
+		return 0;
+
+	list_for_each_entry(rela, &sec->rela_list, list) {
+		if (rela->sym->type != STT_SECTION) {
+			WARN("unexpected relocation symbol type in .rela__unreachable");
+			return -1;
+		}
+		insn = find_insn(file, rela->sym->sec, rela->addend);
+		if (insn)
+			insn = list_prev_entry(insn, list);
+		else if (rela->addend == rela->sym->sec->len) {
+			found = false;
+			list_for_each_entry_reverse(insn, &file->insn_list, list) {
+				if (insn->sec == rela->sym->sec) {
+					found = true;
+					break;
+				}
+			}
+
+			if (!found) {
+				WARN("can't find unreachable insn at %s+0x%x",
+				     rela->sym->sec->name, rela->addend);
+				return -1;
+			}
+		} else {
+			WARN("can't find unreachable insn at %s+0x%x",
+			     rela->sym->sec->name, rela->addend);
+			return -1;
+		}
+
+		insn->dead_end = true;
+	}
+
+	return 0;
+}
+
 /*
  * Warnings shouldn't be reported for ignored functions.
  */
@@ -843,6 +891,10 @@ static int decode_sections(struct objtool_file *file)
 	if (ret)
 		return ret;
 
+	ret = add_dead_ends(file);
+	if (ret)
+		return ret;
+
 	add_ignores(file);
 
 	ret = add_jump_destinations(file);
@@ -1037,13 +1089,13 @@ static int validate_branch(struct objtool_file *file,
 
 			return 0;
 
-		case INSN_BUG:
-			return 0;
-
 		default:
 			break;
 		}
 
+		if (insn->dead_end)
+			return 0;
+
 		insn = next_insn_same_sec(file, insn);
 		if (!insn) {
 			WARN("%s: unexpected end of section", sec->name);
-- 
GitLab


From 96b777452d8881480fd5be50112f791c17db4b6b Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Wed, 8 Feb 2017 14:27:27 +0300
Subject: [PATCH 0550/1084] sched/cgroup: Move sched_online_group() back into
 css_online() to fix crash

Commit:

  2f5177f0fd7e ("sched/cgroup: Fix/cleanup cgroup teardown/init")

.. moved sched_online_group() from css_online() to css_alloc().
It exposes half-baked task group into global lists before initializing
generic cgroup stuff.

LTP testcase (third in cgroup_regression_test) written for testing
similar race in kernels 2.6.26-2.6.28 easily triggers this oops:

  BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
  IP: kernfs_path_from_node_locked+0x260/0x320
  CPU: 1 PID: 30346 Comm: cat Not tainted 4.10.0-rc5-test #4
  Call Trace:
  ? kernfs_path_from_node+0x4f/0x60
  kernfs_path_from_node+0x3e/0x60
  print_rt_rq+0x44/0x2b0
  print_rt_stats+0x7a/0xd0
  print_cpu+0x2fc/0xe80
  ? __might_sleep+0x4a/0x80
  sched_debug_show+0x17/0x30
  seq_read+0xf2/0x3b0
  proc_reg_read+0x42/0x70
  __vfs_read+0x28/0x130
  ? security_file_permission+0x9b/0xc0
  ? rw_verify_area+0x4e/0xb0
  vfs_read+0xa5/0x170
  SyS_read+0x46/0xa0
  entry_SYSCALL_64_fastpath+0x1e/0xad

Here the task group is already linked into the global RCU-protected 'task_groups'
list, but the css->cgroup pointer is still NULL.

This patch reverts this chunk and moves online back to css_online().

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 2f5177f0fd7e ("sched/cgroup: Fix/cleanup cgroup teardown/init")
Link: http://lkml.kernel.org/r/148655324740.424917.5302984537258726349.stgit@buzz
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index cc1e3e0d29b0..e01bd807f0af 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6811,11 +6811,20 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 	if (IS_ERR(tg))
 		return ERR_PTR(-ENOMEM);
 
-	sched_online_group(tg, parent);
-
 	return &tg->css;
 }
 
+/* Expose task group only after completing cgroup initialization */
+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
+{
+	struct task_group *tg = css_tg(css);
+	struct task_group *parent = css_tg(css->parent);
+
+	if (parent)
+		sched_online_group(tg, parent);
+	return 0;
+}
+
 static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
 {
 	struct task_group *tg = css_tg(css);
@@ -7221,6 +7230,7 @@ static struct cftype cpu_files[] = {
 
 struct cgroup_subsys cpu_cgrp_subsys = {
 	.css_alloc	= cpu_cgroup_css_alloc,
+	.css_online	= cpu_cgroup_css_online,
 	.css_released	= cpu_cgroup_css_released,
 	.css_free	= cpu_cgroup_css_free,
 	.fork		= cpu_cgroup_fork,
-- 
GitLab


From ea6eac904f0a5a0c223bcfb133ec880ba9c70ae3 Mon Sep 17 00:00:00 2001
From: Shanker Donthineni <shankerd@codeaurora.org>
Date: Wed, 22 Feb 2017 21:10:48 -0600
Subject: [PATCH 0551/1084] arm64: Avoid clobbering mm in erratum workaround on
 QDF2400

Commit 38fd94b0275c ("arm64: Work around Falkor erratum 1003") tried to
work around a hardware erratum, but actually caused a system crash of
its own during switch_mm:

 cpu_do_switch_mm+0x20/0x40
 efi_virtmap_load+0x34/0x40
 virt_efi_get_next_variable+0x64/0xc8
 efivar_init+0x8c/0x348
 efisubsys_init+0xd4/0x270
 do_one_initcall+0x80/0x110
 kernel_init_freeable+0x19c/0x240
 kernel_init+0x10/0x100
 ret_from_fork+0x10/0x50

 Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b

In cpu_do_switch_mm, x1 contains the mm_struct pointer, which needs to
be preserved by the pre_ttbr0_update_workaround macro rather than passed
as a temporary.

This patch clobbers x2 and x3 instead, keeping the mm_struct intact
after the workaround has run.

Fixes: 38fd94b0275c ("arm64: Work around Falkor erratum 1003")
Tested-by: Manoj Iyer <manoj.iyer@canonical.com>
Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/proc.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index cd4d53d7e458..877d42fb0df6 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -138,7 +138,7 @@ ENDPROC(cpu_do_resume)
  *	- pgd_phys - physical address of new TTB
  */
 ENTRY(cpu_do_switch_mm)
-	pre_ttbr0_update_workaround x0, x1, x2
+	pre_ttbr0_update_workaround x0, x2, x3
 	mmid	x1, x1				// get mm->context.id
 	bfi	x0, x1, #48, #16		// set the ASID
 	msr	ttbr0_el1, x0			// set TTBR0
-- 
GitLab


From d81bbe6d882461dec4b71dbe2aa85565fcca4187 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 23 Feb 2017 16:22:55 +0000
Subject: [PATCH 0552/1084] Revert "arm64: mm: set the contiguous bit for
 kernel mappings where appropriate"

This reverts commit 0bfc445dec9dd8130d22c9f4476eed7598524129.

When we change the permissions of regions mapped using contiguous
entries, the architecture requires us to follow a Break-Before-Make
strategy, breaking *all* associated entries before we can change any of
the following properties from the entries:

 - presence of the contiguous bit
 - output address
 - attributes
 - permissiones

Failure to do so can result in a number of problems (e.g. TLB conflict
aborts and/or erroneous results from TLB lookups).

See ARM DDI 0487A.k_iss10775, "Misprogramming of the Contiguous bit",
page D4-1762.

We do not take this into account when altering the permissions of kernel
segments in mark_rodata_ro(), where we change the permissions of live
contiguous entires one-by-one, leaving them transiently inconsistent.
This has been observed to result in failures on some fast model
configurations.

Unfortunately, we cannot follow Break-Before-Make here as we'd have to
unmap kernel text and data used to perform the sequence.

For the timebeing, revert commit 0bfc445dec9dd813 so as to avoid issues
resulting from this misuse of the contiguous bit.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reported-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <Will.Deacon@arm.com>
Cc: stable@vger.kernel.org # v4.10
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/mmu.c | 34 ++++------------------------------
 1 file changed, 4 insertions(+), 30 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index b805c017f789..d28dbcf596b6 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -109,10 +109,8 @@ static bool pgattr_change_is_safe(u64 old, u64 new)
 static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
 				  unsigned long end, unsigned long pfn,
 				  pgprot_t prot,
-				  phys_addr_t (*pgtable_alloc)(void),
-				  bool page_mappings_only)
+				  phys_addr_t (*pgtable_alloc)(void))
 {
-	pgprot_t __prot = prot;
 	pte_t *pte;
 
 	BUG_ON(pmd_sect(*pmd));
@@ -130,18 +128,7 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
 	do {
 		pte_t old_pte = *pte;
 
-		/*
-		 * Set the contiguous bit for the subsequent group of PTEs if
-		 * its size and alignment are appropriate.
-		 */
-		if (((addr | PFN_PHYS(pfn)) & ~CONT_PTE_MASK) == 0) {
-			if (end - addr >= CONT_PTE_SIZE && !page_mappings_only)
-				__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
-			else
-				__prot = prot;
-		}
-
-		set_pte(pte, pfn_pte(pfn, __prot));
+		set_pte(pte, pfn_pte(pfn, prot));
 		pfn++;
 
 		/*
@@ -160,7 +147,6 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
 				  phys_addr_t (*pgtable_alloc)(void),
 				  bool page_mappings_only)
 {
-	pgprot_t __prot = prot;
 	pmd_t *pmd;
 	unsigned long next;
 
@@ -187,18 +173,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
 		/* try section mapping first */
 		if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
 		      !page_mappings_only) {
-			/*
-			 * Set the contiguous bit for the subsequent group of
-			 * PMDs if its size and alignment are appropriate.
-			 */
-			if (((addr | phys) & ~CONT_PMD_MASK) == 0) {
-				if (end - addr >= CONT_PMD_SIZE)
-					__prot = __pgprot(pgprot_val(prot) |
-							  PTE_CONT);
-				else
-					__prot = prot;
-			}
-			pmd_set_huge(pmd, phys, __prot);
+			pmd_set_huge(pmd, phys, prot);
 
 			/*
 			 * After the PMD entry has been populated once, we
@@ -208,8 +183,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
 						      pmd_val(*pmd)));
 		} else {
 			alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
-				       prot, pgtable_alloc,
-				       page_mappings_only);
+				       prot, pgtable_alloc);
 
 			BUG_ON(pmd_val(old_pmd) != 0 &&
 			       pmd_val(old_pmd) != pmd_val(*pmd));
-- 
GitLab


From 638f863dbbc8da16834ee0acc6ac10754f79c486 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 23 Feb 2017 16:03:17 +0000
Subject: [PATCH 0553/1084] arm64/cpufeature: check correct field width when
 updating sys_val

When we're updating a register's sys_val, we use arm64_ftr_value() to
find the new field value. We use cpuid_feature_extract_field() to find
the new value, but this implicitly assumes a 4-bit field, so we may
extract more bits than we mean to for fields like CTR_EL0.L1ip.

This affects update_cpu_ftr_reg(), where we may extract erroneous values
for ftr_cur and ftr_new. Depending on the additional bits extracted in
either case, we may erroneously detect that the value is mismatched, and
we'll try to compute a new safe value.

Dependent on these extra bits and feature type, arm64_ftr_safe_value()
may pessimistically select the always-safe value, or may erroneously
choose either the extracted cur or new value as the safe option. The
extra bits will subsequently be masked out in arm64_ftr_set_value(), so
we may choose a higher value, yet write back a lower one.

Fix this by passing the width down explicitly in arm64_ftr_value(), so
we always extract the correct amount.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/cpufeature.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 4ce82ed3e7c3..05310ad8c5ab 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -184,16 +184,22 @@ static inline u64 arm64_ftr_reg_user_value(const struct arm64_ftr_reg *reg)
 }
 
 static inline int __attribute_const__
-cpuid_feature_extract_field(u64 features, int field, bool sign)
+cpuid_feature_extract_field_width(u64 features, int field, int width, bool sign)
 {
 	return (sign) ?
-		cpuid_feature_extract_signed_field(features, field) :
-		cpuid_feature_extract_unsigned_field(features, field);
+		cpuid_feature_extract_signed_field_width(features, field, width) :
+		cpuid_feature_extract_unsigned_field_width(features, field, width);
+}
+
+static inline int __attribute_const__
+cpuid_feature_extract_field(u64 features, int field, bool sign)
+{
+	return cpuid_feature_extract_field_width(features, field, 4, sign);
 }
 
 static inline s64 arm64_ftr_value(const struct arm64_ftr_bits *ftrp, u64 val)
 {
-	return (s64)cpuid_feature_extract_field(val, ftrp->shift, ftrp->sign);
+	return (s64)cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width, ftrp->sign);
 }
 
 static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
-- 
GitLab


From 3436c4bdb30de421d46f58c9174669fbcfd40ce0 Mon Sep 17 00:00:00 2001
From: Yunlong Song <yunlong.song@huawei.com>
Date: Tue, 21 Feb 2017 16:59:26 +0800
Subject: [PATCH 0554/1084] f2fs: put allocate_segment after refresh_sit_entry

SIT information should be updated before segment allocation, since SSR needs
latest valid block information. Current code does not update the old_blkaddr
info in sit_entry, so adjust the allocate_segment to its proper location. Commit
5e443818fa0b2a2845561ee25bec181424fb2889 ("f2fs: handle dirty segments inside
refresh_sit_entry") puts it into wrong location.

Signed-off-by: Yunlong Song <yunlong.song@huawei.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 68db07f857f8..454c07d470fc 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1759,14 +1759,15 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 
 	stat_inc_block_count(sbi, curseg);
 
-	if (!__has_curseg_space(sbi, type))
-		sit_i->s_ops->allocate_segment(sbi, type, false);
 	/*
 	 * SIT information should be updated before segment allocation,
 	 * since SSR needs latest valid block information.
 	 */
 	refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
 
+	if (!__has_curseg_space(sbi, type))
+		sit_i->s_ops->allocate_segment(sbi, type, false);
+
 	mutex_unlock(&sit_i->sentry_lock);
 
 	if (page && IS_NODESEG(type))
-- 
GitLab


From b9cd20619e359d199b755543474c3d853c8e3415 Mon Sep 17 00:00:00 2001
From: Hou Pengyang <houpengyang@huawei.com>
Date: Wed, 22 Feb 2017 10:28:59 +0000
Subject: [PATCH 0555/1084] f2fs: node segment is prior to data segment
 selected victim

As data segment gc may lead dnode dirty, so the greedy cost for data segment
should be valid blocks * 2, that is data segment is prior to node segment.

Signed-off-by: Hou Pengyang <houpengyang@huawei.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/gc.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index e93aecb0138b..11416c7cb705 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -242,6 +242,16 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
 	return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
 }
 
+static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi,
+						unsigned int segno)
+{
+	unsigned int valid_blocks =
+			get_valid_blocks(sbi, segno, sbi->segs_per_sec);
+
+	return IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
+				valid_blocks * 2 : valid_blocks;
+}
+
 static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
 			unsigned int segno, struct victim_sel_policy *p)
 {
@@ -250,7 +260,7 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
 
 	/* alloc_mode == LFS */
 	if (p->gc_mode == GC_GREEDY)
-		return get_valid_blocks(sbi, segno, sbi->segs_per_sec);
+		return get_greedy_cost(sbi, segno);
 	else
 		return get_cb_cost(sbi, segno);
 }
-- 
GitLab


From 035e97adab26c1121cedaeb9bd04cf48a8e8cf51 Mon Sep 17 00:00:00 2001
From: Yunlong Song <yunlong.song@huawei.com>
Date: Wed, 22 Feb 2017 20:50:49 +0800
Subject: [PATCH 0556/1084] f2fs: do SSR for data when there is enough free
 space

In allocate_segment_by_default(), need_SSR() already detected it's time to do
SSR. So, let's try to find victims for data segments more aggressively in time.

Signed-off-by: Yunlong Song <yunlong.song@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 454c07d470fc..46e29c555299 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1540,7 +1540,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
 	const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
 
-	if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0, 0))
+	if (IS_NODESEG(type))
 		return v_ops->get_victim(sbi,
 				&(curseg)->next_segno, BG_GC, type, SSR);
 
-- 
GitLab


From d0db7703ac186ff2fbed1e8b862bd43ec3fd2812 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 22 Feb 2017 16:39:11 -0800
Subject: [PATCH 0557/1084] f2fs: do SSR in higher priority

Let's check SSR in prior to LFS allocation.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 46e29c555299..8ba802e1bb06 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1324,17 +1324,6 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
 	f2fs_put_page(page, 1);
 }
 
-static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
-{
-	struct curseg_info *curseg = CURSEG_I(sbi, type);
-	unsigned int segno = curseg->segno + 1;
-	struct free_segmap_info *free_i = FREE_I(sbi);
-
-	if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
-		return !test_bit(segno, free_i->free_segmap);
-	return 0;
-}
-
 /*
  * Find a new segment from the free segments bitmap to right order
  * This function should be returned with success, otherwise BUG
@@ -1559,21 +1548,17 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
 static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
 						int type, bool force)
 {
-	struct curseg_info *curseg = CURSEG_I(sbi, type);
-
 	if (force)
 		new_curseg(sbi, type, true);
 	else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
 					type == CURSEG_WARM_NODE)
 		new_curseg(sbi, type, false);
-	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
-		new_curseg(sbi, type, false);
 	else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
 		change_curseg(sbi, type, true);
 	else
 		new_curseg(sbi, type, false);
 
-	stat_inc_seg_type(sbi, curseg);
+	stat_inc_seg_type(sbi, CURSEG_I(sbi, type));
 }
 
 void allocate_new_segments(struct f2fs_sb_info *sbi)
-- 
GitLab


From c192f7a4779559dddeb8c03de2b6ef499115c938 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 22 Feb 2017 17:10:18 -0800
Subject: [PATCH 0558/1084] f2fs: find data segments across all the types

Previously, if type is CURSEG_HOT_DATA, we only check CURSEG_HOT_DATA only.
This patch fixes to search all the different types for SSR.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 8ba802e1bb06..630e7045c53e 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1528,16 +1528,23 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
 {
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
 	const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
+	int i;
+
+	/* need_SSR() already forces to do this */
+	if (v_ops->get_victim(sbi, &(curseg)->next_segno, BG_GC, type, SSR))
+		return 1;
 
 	if (IS_NODESEG(type))
-		return v_ops->get_victim(sbi,
-				&(curseg)->next_segno, BG_GC, type, SSR);
+		return 0;
 
 	/* For data segments, let's do SSR more intensively */
-	for (; type >= CURSEG_HOT_DATA; type--)
+	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
+		if (i == type)
+			continue;
 		if (v_ops->get_victim(sbi, &(curseg)->next_segno,
-						BG_GC, type, SSR))
+						BG_GC, i, SSR))
 			return 1;
+	}
 	return 0;
 }
 
-- 
GitLab


From 70d625cbdbbb99e38953b8b7ac792a6471e68efb Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 22 Feb 2017 17:02:32 -0800
Subject: [PATCH 0559/1084] f2fs: do SSR for node segments more aggresively

This patch gives more SSR chances for node blocks.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 630e7045c53e..d780ecccc4ac 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1528,17 +1528,22 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
 {
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
 	const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
-	int i;
+	int i, n;
 
 	/* need_SSR() already forces to do this */
 	if (v_ops->get_victim(sbi, &(curseg)->next_segno, BG_GC, type, SSR))
 		return 1;
 
-	if (IS_NODESEG(type))
-		return 0;
+	/* For node segments, let's do SSR more intensively */
+	if (IS_NODESEG(type)) {
+		i = CURSEG_HOT_NODE;
+		n = CURSEG_COLD_NODE;
+	} else {
+		i = CURSEG_HOT_DATA;
+		n = CURSEG_COLD_DATA;
+	}
 
-	/* For data segments, let's do SSR more intensively */
-	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
+	for (; i <= n; i++) {
 		if (i == type)
 			continue;
 		if (v_ops->get_victim(sbi, &(curseg)->next_segno,
-- 
GitLab


From 55f2a04588c5881d90e22631b17a84cd25d17cc4 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 13 Feb 2017 14:44:19 +0100
Subject: [PATCH 0560/1084] ceph: remove special ack vs commit behavior

- ask for a commit reply instead of an ack reply in
  __ceph_pool_perm_get()
- don't ask for both ack and commit replies in ceph_sync_write()
- since just only one reply is requested now, i_unsafe_writes list
  will always be empty -- kill ceph_sync_write_wait() and go back to
  a standard ->evict_inode()

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 fs/ceph/addr.c  |  2 +-
 fs/ceph/caps.c  |  2 --
 fs/ceph/file.c  | 88 +------------------------------------------------
 fs/ceph/inode.c |  9 -----
 fs/ceph/super.c |  1 -
 fs/ceph/super.h |  4 +--
 6 files changed, 3 insertions(+), 103 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4547bbf80e4f..3f0474c55f05 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1872,7 +1872,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
 		goto out_unlock;
 	}
 
-	wr_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ACK;
+	wr_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
 	osd_req_op_init(wr_req, 0, CEPH_OSD_OP_CREATE, CEPH_OSD_OP_FLAG_EXCL);
 	ceph_oloc_copy(&wr_req->r_base_oloc, &rd_req->r_base_oloc);
 	ceph_oid_copy(&wr_req->r_base_oid, &rd_req->r_base_oid);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 3c2dfd72e5b2..cd966f276a8d 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2091,8 +2091,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 
 	dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
 
-	ceph_sync_write_wait(inode);
-
 	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
 	if (ret < 0)
 		goto out;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index baad3b688ada..023cb7fd9b5f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -795,89 +795,6 @@ static void ceph_aio_retry_work(struct work_struct *work)
 	kfree(aio_work);
 }
 
-/*
- * Write commit request unsafe callback, called to tell us when a
- * request is unsafe (that is, in flight--has been handed to the
- * messenger to send to its target osd).  It is called again when
- * we've received a response message indicating the request is
- * "safe" (its CEPH_OSD_FLAG_ONDISK flag is set), or when a request
- * is completed early (and unsuccessfully) due to a timeout or
- * interrupt.
- *
- * This is used if we requested both an ACK and ONDISK commit reply
- * from the OSD.
- */
-static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
-{
-	struct ceph_inode_info *ci = ceph_inode(req->r_inode);
-
-	dout("%s %p tid %llu %ssafe\n", __func__, req, req->r_tid,
-		unsafe ? "un" : "");
-	if (unsafe) {
-		ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
-		spin_lock(&ci->i_unsafe_lock);
-		list_add_tail(&req->r_unsafe_item,
-			      &ci->i_unsafe_writes);
-		spin_unlock(&ci->i_unsafe_lock);
-
-		complete_all(&req->r_completion);
-	} else {
-		spin_lock(&ci->i_unsafe_lock);
-		list_del_init(&req->r_unsafe_item);
-		spin_unlock(&ci->i_unsafe_lock);
-		ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR);
-	}
-}
-
-/*
- * Wait on any unsafe replies for the given inode.  First wait on the
- * newest request, and make that the upper bound.  Then, if there are
- * more requests, keep waiting on the oldest as long as it is still older
- * than the original request.
- */
-void ceph_sync_write_wait(struct inode *inode)
-{
-	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct list_head *head = &ci->i_unsafe_writes;
-	struct ceph_osd_request *req;
-	u64 last_tid;
-
-	if (!S_ISREG(inode->i_mode))
-		return;
-
-	spin_lock(&ci->i_unsafe_lock);
-	if (list_empty(head))
-		goto out;
-
-	/* set upper bound as _last_ entry in chain */
-
-	req = list_last_entry(head, struct ceph_osd_request,
-			      r_unsafe_item);
-	last_tid = req->r_tid;
-
-	do {
-		ceph_osdc_get_request(req);
-		spin_unlock(&ci->i_unsafe_lock);
-
-		dout("sync_write_wait on tid %llu (until %llu)\n",
-		     req->r_tid, last_tid);
-		wait_for_completion(&req->r_done_completion);
-		ceph_osdc_put_request(req);
-
-		spin_lock(&ci->i_unsafe_lock);
-		/*
-		 * from here on look at first entry in chain, since we
-		 * only want to wait for anything older than last_tid
-		 */
-		if (list_empty(head))
-			break;
-		req = list_first_entry(head, struct ceph_osd_request,
-				       r_unsafe_item);
-	} while (req->r_tid < last_tid);
-out:
-	spin_unlock(&ci->i_unsafe_lock);
-}
-
 static ssize_t
 ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 		       struct ceph_snap_context *snapc,
@@ -1119,8 +1036,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
 
 	flags = CEPH_OSD_FLAG_ORDERSNAP |
 		CEPH_OSD_FLAG_ONDISK |
-		CEPH_OSD_FLAG_WRITE |
-		CEPH_OSD_FLAG_ACK;
+		CEPH_OSD_FLAG_WRITE;
 
 	while ((len = iov_iter_count(from)) > 0) {
 		size_t left;
@@ -1166,8 +1082,6 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
 			goto out;
 		}
 
-		/* get a second commit callback */
-		req->r_unsafe_callback = ceph_sync_write_unsafe;
 		req->r_inode = inode;
 
 		osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 68f46132b157..fd8f771f99b7 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -499,7 +499,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 	ci->i_rdcache_gen = 0;
 	ci->i_rdcache_revoking = 0;
 
-	INIT_LIST_HEAD(&ci->i_unsafe_writes);
 	INIT_LIST_HEAD(&ci->i_unsafe_dirops);
 	INIT_LIST_HEAD(&ci->i_unsafe_iops);
 	spin_lock_init(&ci->i_unsafe_lock);
@@ -583,14 +582,6 @@ int ceph_drop_inode(struct inode *inode)
 	return 1;
 }
 
-void ceph_evict_inode(struct inode *inode)
-{
-	/* wait unsafe sync writes */
-	ceph_sync_write_wait(inode);
-	truncate_inode_pages_final(&inode->i_data);
-	clear_inode(inode);
-}
-
 static inline blkcnt_t calc_inode_blocks(u64 size)
 {
 	return (size + (1<<9) - 1) >> 9;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index a0a0b6d02f89..0ec8d0114e57 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -757,7 +757,6 @@ static const struct super_operations ceph_super_ops = {
 	.destroy_inode	= ceph_destroy_inode,
 	.write_inode    = ceph_write_inode,
 	.drop_inode	= ceph_drop_inode,
-	.evict_inode	= ceph_evict_inode,
 	.sync_fs        = ceph_sync_fs,
 	.put_super	= ceph_put_super,
 	.show_options   = ceph_show_options,
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 950170136be9..e9410bcf4113 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -343,7 +343,6 @@ struct ceph_inode_info {
 	u32 i_rdcache_gen;      /* incremented each time we get FILE_CACHE. */
 	u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
 
-	struct list_head i_unsafe_writes; /* uncommitted sync writes */
 	struct list_head i_unsafe_dirops; /* uncommitted mds dir ops */
 	struct list_head i_unsafe_iops;   /* uncommitted mds inode ops */
 	spinlock_t i_unsafe_lock;
@@ -753,7 +752,6 @@ extern const struct inode_operations ceph_file_iops;
 extern struct inode *ceph_alloc_inode(struct super_block *sb);
 extern void ceph_destroy_inode(struct inode *inode);
 extern int ceph_drop_inode(struct inode *inode);
-extern void ceph_evict_inode(struct inode *inode);
 
 extern struct inode *ceph_get_inode(struct super_block *sb,
 				    struct ceph_vino vino);
@@ -933,7 +931,7 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 extern int ceph_release(struct inode *inode, struct file *filp);
 extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
 				  char *data, size_t len);
-extern void ceph_sync_write_wait(struct inode *inode);
+
 /* dir.c */
 extern const struct file_operations ceph_dir_fops;
 extern const struct file_operations ceph_snapdir_fops;
-- 
GitLab


From b18b9550e4059ceea0393c518eb323b95243f92f Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Sat, 11 Feb 2017 18:46:08 +0100
Subject: [PATCH 0561/1084] libceph: get rid of ack vs commit

- CEPH_OSD_FLAG_ACK shouldn't be set anymore, so assert on it
- remove support for handling ack replies (OSDs will send ack replies
  only if clients request them)
- drop the "do lingering callbacks under osd->lock" logic from
  handle_reply() -- lreq->lock is sufficient in all three cases

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/osd_client.h |   6 +-
 net/ceph/osd_client.c           | 113 ++++++++------------------------
 2 files changed, 27 insertions(+), 92 deletions(-)

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 03a6653d329a..2ea0c282f3dc 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -22,7 +22,6 @@ struct ceph_osd_client;
  * completion callback for async writepages
  */
 typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
-typedef void (*ceph_osdc_unsafe_callback_t)(struct ceph_osd_request *, bool);
 
 #define CEPH_HOMELESS_OSD	-1
 
@@ -170,15 +169,12 @@ struct ceph_osd_request {
 	unsigned int		r_num_ops;
 
 	int               r_result;
-	bool              r_got_reply;
 
 	struct ceph_osd_client *r_osdc;
 	struct kref       r_kref;
 	bool              r_mempool;
-	struct completion r_completion;
-	struct completion r_done_completion;  /* fsync waiter */
+	struct completion r_completion;       /* private to osd_client.c */
 	ceph_osdc_callback_t r_callback;
-	ceph_osdc_unsafe_callback_t r_unsafe_callback;
 	struct list_head  r_unsafe_item;
 
 	struct inode *r_inode;         	      /* for use by callbacks */
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index ac4753421d0c..e1c6c2b4a295 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -460,7 +460,6 @@ static void request_init(struct ceph_osd_request *req)
 
 	kref_init(&req->r_kref);
 	init_completion(&req->r_completion);
-	init_completion(&req->r_done_completion);
 	RB_CLEAR_NODE(&req->r_node);
 	RB_CLEAR_NODE(&req->r_mc_node);
 	INIT_LIST_HEAD(&req->r_unsafe_item);
@@ -1637,7 +1636,7 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
 	bool need_send = false;
 	bool promoted = false;
 
-	WARN_ON(req->r_tid || req->r_got_reply);
+	WARN_ON(req->r_tid);
 	dout("%s req %p wrlocked %d\n", __func__, req, wrlocked);
 
 again:
@@ -1705,17 +1704,10 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
 
 static void account_request(struct ceph_osd_request *req)
 {
-	unsigned int mask = CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK;
+	WARN_ON(req->r_flags & CEPH_OSD_FLAG_ACK);
+	WARN_ON(!(req->r_flags & (CEPH_OSD_FLAG_READ | CEPH_OSD_FLAG_WRITE)));
 
-	if (req->r_flags & CEPH_OSD_FLAG_READ) {
-		WARN_ON(req->r_flags & mask);
-		req->r_flags |= CEPH_OSD_FLAG_ACK;
-	} else if (req->r_flags & CEPH_OSD_FLAG_WRITE)
-		WARN_ON(!(req->r_flags & mask));
-	else
-		WARN_ON(1);
-
-	WARN_ON(req->r_unsafe_callback && (req->r_flags & mask) != mask);
+	req->r_flags |= CEPH_OSD_FLAG_ONDISK;
 	atomic_inc(&req->r_osdc->num_requests);
 }
 
@@ -1750,15 +1742,15 @@ static void finish_request(struct ceph_osd_request *req)
 
 static void __complete_request(struct ceph_osd_request *req)
 {
-	if (req->r_callback)
+	if (req->r_callback) {
+		dout("%s req %p tid %llu cb %pf result %d\n", __func__, req,
+		     req->r_tid, req->r_callback, req->r_result);
 		req->r_callback(req);
-	else
-		complete_all(&req->r_completion);
+	}
 }
 
 /*
- * Note that this is open-coded in handle_reply(), which has to deal
- * with ack vs commit, dup acks, etc.
+ * This is open-coded in handle_reply().
  */
 static void complete_request(struct ceph_osd_request *req, int err)
 {
@@ -1767,7 +1759,7 @@ static void complete_request(struct ceph_osd_request *req, int err)
 	req->r_result = err;
 	finish_request(req);
 	__complete_request(req);
-	complete_all(&req->r_done_completion);
+	complete_all(&req->r_completion);
 	ceph_osdc_put_request(req);
 }
 
@@ -1793,7 +1785,7 @@ static void cancel_request(struct ceph_osd_request *req)
 
 	cancel_map_check(req);
 	finish_request(req);
-	complete_all(&req->r_done_completion);
+	complete_all(&req->r_completion);
 	ceph_osdc_put_request(req);
 }
 
@@ -2170,7 +2162,6 @@ static void linger_commit_cb(struct ceph_osd_request *req)
 	mutex_lock(&lreq->lock);
 	dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq,
 	     lreq->linger_id, req->r_result);
-	WARN_ON(!__linger_registered(lreq));
 	linger_reg_commit_complete(lreq, req->r_result);
 	lreq->committed = true;
 
@@ -2786,31 +2777,8 @@ static int decode_MOSDOpReply(const struct ceph_msg *msg, struct MOSDOpReply *m)
 }
 
 /*
- * We are done with @req if
- *   - @m is a safe reply, or
- *   - @m is an unsafe reply and we didn't want a safe one
- */
-static bool done_request(const struct ceph_osd_request *req,
-			 const struct MOSDOpReply *m)
-{
-	return (m->result < 0 ||
-		(m->flags & CEPH_OSD_FLAG_ONDISK) ||
-		!(req->r_flags & CEPH_OSD_FLAG_ONDISK));
-}
-
-/*
- * handle osd op reply.  either call the callback if it is specified,
- * or do the completion to wake up the waiting thread.
- *
- * ->r_unsafe_callback is set?	yes			no
- *
- * first reply is OK (needed	r_cb/r_completion,	r_cb/r_completion,
- * any or needed/got safe)	r_done_completion	r_done_completion
- *
- * first reply is unsafe	r_unsafe_cb(true)	(nothing)
- *
- * when we get the safe reply	r_unsafe_cb(false),	r_cb/r_completion,
- *				r_done_completion	r_done_completion
+ * Handle MOSDOpReply.  Set ->r_result and call the callback if it is
+ * specified.
  */
 static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
 {
@@ -2819,7 +2787,6 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
 	struct MOSDOpReply m;
 	u64 tid = le64_to_cpu(msg->hdr.tid);
 	u32 data_len = 0;
-	bool already_acked;
 	int ret;
 	int i;
 
@@ -2898,50 +2865,22 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
 		       le32_to_cpu(msg->hdr.data_len), req->r_tid);
 		goto fail_request;
 	}
-	dout("%s req %p tid %llu acked %d result %d data_len %u\n", __func__,
-	     req, req->r_tid, req->r_got_reply, m.result, data_len);
-
-	already_acked = req->r_got_reply;
-	if (!already_acked) {
-		req->r_result = m.result ?: data_len;
-		req->r_replay_version = m.replay_version; /* struct */
-		req->r_got_reply = true;
-	} else if (!(m.flags & CEPH_OSD_FLAG_ONDISK)) {
-		dout("req %p tid %llu dup ack\n", req, req->r_tid);
-		goto out_unlock_session;
-	}
-
-	if (done_request(req, &m)) {
-		finish_request(req);
-		if (req->r_linger) {
-			WARN_ON(req->r_unsafe_callback);
-			dout("req %p tid %llu cb (locked)\n", req, req->r_tid);
-			__complete_request(req);
-		}
-	}
+	dout("%s req %p tid %llu result %d data_len %u\n", __func__,
+	     req, req->r_tid, m.result, data_len);
 
+	/*
+	 * Since we only ever request ONDISK, we should only ever get
+	 * one (type of) reply back.
+	 */
+	WARN_ON(!(m.flags & CEPH_OSD_FLAG_ONDISK));
+	req->r_result = m.result ?: data_len;
+	finish_request(req);
 	mutex_unlock(&osd->lock);
 	up_read(&osdc->lock);
 
-	if (done_request(req, &m)) {
-		if (already_acked && req->r_unsafe_callback) {
-			dout("req %p tid %llu safe-cb\n", req, req->r_tid);
-			req->r_unsafe_callback(req, false);
-		} else if (!req->r_linger) {
-			dout("req %p tid %llu cb\n", req, req->r_tid);
-			__complete_request(req);
-		}
-		complete_all(&req->r_done_completion);
-		ceph_osdc_put_request(req);
-	} else {
-		if (req->r_unsafe_callback) {
-			dout("req %p tid %llu unsafe-cb\n", req, req->r_tid);
-			req->r_unsafe_callback(req, true);
-		} else {
-			WARN_ON(1);
-		}
-	}
-
+	__complete_request(req);
+	complete_all(&req->r_completion);
+	ceph_osdc_put_request(req);
 	return;
 
 fail_request:
@@ -3541,7 +3480,7 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc)
 			up_read(&osdc->lock);
 			dout("%s waiting on req %p tid %llu last_tid %llu\n",
 			     __func__, req, req->r_tid, last_tid);
-			wait_for_completion(&req->r_done_completion);
+			wait_for_completion(&req->r_completion);
 			ceph_osdc_put_request(req);
 			goto again;
 		}
-- 
GitLab


From 54ea0046b6fe36ec18e82d282a29a18da6cdea0f Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Sat, 11 Feb 2017 18:48:41 +0100
Subject: [PATCH 0562/1084] libceph, rbd, ceph: WRITE | ONDISK -> WRITE

CEPH_OSD_FLAG_ONDISK is set in account_request().

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 drivers/block/rbd.c        |  6 ++----
 fs/ceph/addr.c             | 14 +++++---------
 fs/ceph/file.c             | 15 ++++-----------
 net/ceph/cls_lock_client.c | 12 ++++++------
 net/ceph/osd_client.c      |  9 ++++-----
 5 files changed, 21 insertions(+), 35 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index ffb8d3afc6b4..695ef552aaf0 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1981,8 +1981,7 @@ static struct ceph_osd_request *rbd_osd_req_create(
 
 	return __rbd_osd_req_create(rbd_dev, snapc, num_ops,
 	    (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD) ?
-	    CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK : CEPH_OSD_FLAG_READ,
-	    obj_request);
+	    CEPH_OSD_FLAG_WRITE : CEPH_OSD_FLAG_READ, obj_request);
 }
 
 /*
@@ -2008,8 +2007,7 @@ rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request)
 
 	return __rbd_osd_req_create(img_request->rbd_dev,
 				    img_request->snapc, num_osd_ops,
-				    CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
-				    obj_request);
+				    CEPH_OSD_FLAG_WRITE, obj_request);
 }
 
 static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 3f0474c55f05..6ecb920602ed 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1018,8 +1018,7 @@ static int ceph_writepages_start(struct address_space *mapping,
 					&ci->i_layout, vino,
 					offset, &len, 0, num_ops,
 					CEPH_OSD_OP_WRITE,
-					CEPH_OSD_FLAG_WRITE |
-					CEPH_OSD_FLAG_ONDISK,
+					CEPH_OSD_FLAG_WRITE,
 					snapc, truncate_seq,
 					truncate_size, false);
 		if (IS_ERR(req)) {
@@ -1029,8 +1028,7 @@ static int ceph_writepages_start(struct address_space *mapping,
 						min(num_ops,
 						    CEPH_OSD_SLAB_OPS),
 						CEPH_OSD_OP_WRITE,
-						CEPH_OSD_FLAG_WRITE |
-						CEPH_OSD_FLAG_ONDISK,
+						CEPH_OSD_FLAG_WRITE,
 						snapc, truncate_seq,
 						truncate_size, true);
 			BUG_ON(IS_ERR(req));
@@ -1680,8 +1678,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
 
 	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
 				    ceph_vino(inode), 0, &len, 0, 1,
-				    CEPH_OSD_OP_CREATE,
-				    CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+				    CEPH_OSD_OP_CREATE, CEPH_OSD_FLAG_WRITE,
 				    NULL, 0, 0, false);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
@@ -1698,8 +1695,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
 
 	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
 				    ceph_vino(inode), 0, &len, 1, 3,
-				    CEPH_OSD_OP_WRITE,
-				    CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+				    CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
 				    NULL, ci->i_truncate_seq,
 				    ci->i_truncate_size, false);
 	if (IS_ERR(req)) {
@@ -1872,7 +1868,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
 		goto out_unlock;
 	}
 
-	wr_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
+	wr_req->r_flags = CEPH_OSD_FLAG_WRITE;
 	osd_req_op_init(wr_req, 0, CEPH_OSD_OP_CREATE, CEPH_OSD_OP_FLAG_EXCL);
 	ceph_oloc_copy(&wr_req->r_base_oloc, &rd_req->r_base_oloc);
 	ceph_oid_copy(&wr_req->r_base_oid, &rd_req->r_base_oid);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 023cb7fd9b5f..26cc95421cca 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -759,9 +759,7 @@ static void ceph_aio_retry_work(struct work_struct *work)
 		goto out;
 	}
 
-	req->r_flags =	CEPH_OSD_FLAG_ORDERSNAP |
-			CEPH_OSD_FLAG_ONDISK |
-			CEPH_OSD_FLAG_WRITE;
+	req->r_flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE;
 	ceph_oloc_copy(&req->r_base_oloc, &orig_req->r_base_oloc);
 	ceph_oid_copy(&req->r_base_oid, &orig_req->r_base_oid);
 
@@ -833,9 +831,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 		if (ret2 < 0)
 			dout("invalidate_inode_pages2_range returned %d\n", ret2);
 
-		flags = CEPH_OSD_FLAG_ORDERSNAP |
-			CEPH_OSD_FLAG_ONDISK |
-			CEPH_OSD_FLAG_WRITE;
+		flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE;
 	} else {
 		flags = CEPH_OSD_FLAG_READ;
 	}
@@ -1034,9 +1030,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
 	if (ret < 0)
 		dout("invalidate_inode_pages2_range returned %d\n", ret);
 
-	flags = CEPH_OSD_FLAG_ORDERSNAP |
-		CEPH_OSD_FLAG_ONDISK |
-		CEPH_OSD_FLAG_WRITE;
+	flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE;
 
 	while ((len = iov_iter_count(from)) > 0) {
 		size_t left;
@@ -1531,8 +1525,7 @@ static int ceph_zero_partial_object(struct inode *inode,
 					ceph_vino(inode),
 					offset, length,
 					0, 1, op,
-					CEPH_OSD_FLAG_WRITE |
-					CEPH_OSD_FLAG_ONDISK,
+					CEPH_OSD_FLAG_WRITE,
 					NULL, 0, 0, false);
 	if (IS_ERR(req)) {
 		ret = PTR_ERR(req);
diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c
index f13a1ea87459..b9233b990399 100644
--- a/net/ceph/cls_lock_client.c
+++ b/net/ceph/cls_lock_client.c
@@ -69,8 +69,8 @@ int ceph_cls_lock(struct ceph_osd_client *osdc,
 	dout("%s lock_name %s type %d cookie %s tag %s desc %s flags 0x%x\n",
 	     __func__, lock_name, type, cookie, tag, desc, flags);
 	ret = ceph_osdc_call(osdc, oid, oloc, "lock", "lock",
-			     CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
-			     lock_op_page, lock_op_buf_size, NULL, NULL);
+			     CEPH_OSD_FLAG_WRITE, lock_op_page,
+			     lock_op_buf_size, NULL, NULL);
 
 	dout("%s: status %d\n", __func__, ret);
 	__free_page(lock_op_page);
@@ -117,8 +117,8 @@ int ceph_cls_unlock(struct ceph_osd_client *osdc,
 
 	dout("%s lock_name %s cookie %s\n", __func__, lock_name, cookie);
 	ret = ceph_osdc_call(osdc, oid, oloc, "lock", "unlock",
-			     CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
-			     unlock_op_page, unlock_op_buf_size, NULL, NULL);
+			     CEPH_OSD_FLAG_WRITE, unlock_op_page,
+			     unlock_op_buf_size, NULL, NULL);
 
 	dout("%s: status %d\n", __func__, ret);
 	__free_page(unlock_op_page);
@@ -170,8 +170,8 @@ int ceph_cls_break_lock(struct ceph_osd_client *osdc,
 	dout("%s lock_name %s cookie %s locker %s%llu\n", __func__, lock_name,
 	     cookie, ENTITY_NAME(*locker));
 	ret = ceph_osdc_call(osdc, oid, oloc, "lock", "break_lock",
-			     CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
-			     break_op_page, break_op_buf_size, NULL, NULL);
+			     CEPH_OSD_FLAG_WRITE, break_op_page,
+			     break_op_buf_size, NULL, NULL);
 
 	dout("%s: status %d\n", __func__, ret);
 	__free_page(break_op_page);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index e1c6c2b4a295..5c0938ddddf6 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1704,7 +1704,7 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
 
 static void account_request(struct ceph_osd_request *req)
 {
-	WARN_ON(req->r_flags & CEPH_OSD_FLAG_ACK);
+	WARN_ON(req->r_flags & (CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK));
 	WARN_ON(!(req->r_flags & (CEPH_OSD_FLAG_READ | CEPH_OSD_FLAG_WRITE)));
 
 	req->r_flags |= CEPH_OSD_FLAG_ONDISK;
@@ -3539,7 +3539,7 @@ ceph_osdc_watch(struct ceph_osd_client *osdc,
 
 	ceph_oid_copy(&lreq->t.base_oid, oid);
 	ceph_oloc_copy(&lreq->t.base_oloc, oloc);
-	lreq->t.flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
+	lreq->t.flags = CEPH_OSD_FLAG_WRITE;
 	lreq->mtime = CURRENT_TIME;
 
 	lreq->reg_req = alloc_linger_request(lreq);
@@ -3597,7 +3597,7 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
 
 	ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
 	ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
-	req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
+	req->r_flags = CEPH_OSD_FLAG_WRITE;
 	req->r_mtime = CURRENT_TIME;
 	osd_req_op_watch_init(req, 0, lreq->linger_id,
 			      CEPH_OSD_WATCH_OP_UNWATCH);
@@ -4163,8 +4163,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 	int page_align = off & ~PAGE_MASK;
 
 	req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 0, 1,
-				    CEPH_OSD_OP_WRITE,
-				    CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+				    CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
 				    snapc, truncate_seq, truncate_size,
 				    true);
 	if (IS_ERR(req))
-- 
GitLab


From 4ab495bfe58dfe6b346ae3949a406163152919c8 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Fri, 24 Feb 2017 01:15:55 +0100
Subject: [PATCH 0563/1084] nfsd: remove superfluous KERN_INFO

dprintk already provides a KERN_* prefix; this KERN_INFO just shows up
as some odd characters in the output.

Simplify the message a bit while we're there.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d35eb077330f..e9ef50addddb 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2281,7 +2281,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r
 out_err:
 	conn->cb_addr.ss_family = AF_UNSPEC;
 	conn->cb_addrlen = 0;
-	dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
+	dprintk("NFSD: this client (clientid %08x/%08x) "
 		"will not receive delegations\n",
 		clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
 
-- 
GitLab


From 05a45a2db42543c5f1a32e08f545aebbd7cb4790 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 24 Feb 2017 13:25:22 -0500
Subject: [PATCH 0564/1084] sunrpc: turn bitfield flags in svc_version into
 bools

It's just simpler to read this way, IMO. Also, no need to explicitly
set vs_hidden to false in the nfsacl ones.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfs/callback_xdr.c      | 4 ++--
 fs/nfsd/nfs2acl.c          | 1 -
 fs/nfsd/nfs3acl.c          | 1 -
 fs/nfsd/nfs4proc.c         | 2 +-
 include/linux/sunrpc/svc.h | 9 +++++----
 net/sunrpc/svc.c           | 2 +-
 6 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index eb094c6011d8..e9836f611d9c 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -1083,7 +1083,7 @@ struct svc_version nfs4_callback_version1 = {
 	.vs_proc = nfs4_callback_procedures1,
 	.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
 	.vs_dispatch = NULL,
-	.vs_hidden = 1,
+	.vs_hidden = true,
 };
 
 struct svc_version nfs4_callback_version4 = {
@@ -1092,5 +1092,5 @@ struct svc_version nfs4_callback_version4 = {
 	.vs_proc = nfs4_callback_procedures1,
 	.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
 	.vs_dispatch = NULL,
-	.vs_hidden = 1,
+	.vs_hidden = true,
 };
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index d08cd88155c7..838f90f3f890 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -376,5 +376,4 @@ struct svc_version	nfsd_acl_version2 = {
 		.vs_proc	= nfsd_acl_procedures2,
 		.vs_dispatch	= nfsd_dispatch,
 		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
-		.vs_hidden	= 0,
 };
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 0c890347cde3..dcb5f79076c0 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -266,6 +266,5 @@ struct svc_version	nfsd_acl_version3 = {
 		.vs_proc	= nfsd_acl_procedures3,
 		.vs_dispatch	= nfsd_dispatch,
 		.vs_xdrsize	= NFS3_SVC_XDRSIZE,
-		.vs_hidden	= 0,
 };
 
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 02750a4c8770..89e582fa58cd 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2542,7 +2542,7 @@ struct svc_version	nfsd_version4 = {
 		.vs_proc	= nfsd_procedures4,
 		.vs_dispatch	= nfsd_dispatch,
 		.vs_xdrsize	= NFS4_SVC_XDRSIZE,
-		.vs_rpcb_optnl	= 1,
+		.vs_rpcb_optnl	= true,
 };
 
 /*
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 7321ae933867..96467c95f02e 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -400,10 +400,11 @@ struct svc_version {
 	struct svc_procedure *	vs_proc;	/* per-procedure info */
 	u32			vs_xdrsize;	/* xdrsize needed for this version */
 
-	unsigned int		vs_hidden : 1,	/* Don't register with portmapper.
-						 * Only used for nfsacl so far. */
-				vs_rpcb_optnl:1;/* Don't care the result of register.
-						 * Only used for nfsv4. */
+	/* Don't register with rpcbind */
+	bool			vs_hidden;
+
+	/* Don't care if the rpcbind registration fails */
+	bool			vs_rpcb_optnl;
 
 	/* Override dispatch function (e.g. when caching replies).
 	 * A return value of 0 means drop the request. 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 75f290bddca1..85bcdea67a3f 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -385,7 +385,7 @@ static int svc_uses_rpcbind(struct svc_serv *serv)
 		for (i = 0; i < progp->pg_nvers; i++) {
 			if (progp->pg_vers[i] == NULL)
 				continue;
-			if (progp->pg_vers[i]->vs_hidden == 0)
+			if (!progp->pg_vers[i]->vs_hidden)
 				return 1;
 		}
 	}
-- 
GitLab


From 362142b25843fb059941aaa01b91501d5d8652cc Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 24 Feb 2017 13:25:23 -0500
Subject: [PATCH 0565/1084] sunrpc: flag transports as having congestion
 control

NFSv4 requires a transport protocol with congestion control in most
cases.

On an IP network, that means that NFSv4 over UDP should be forbidden.

The situation with RDMA is a bit more nuanced, but most RDMA transports
are suitable for this. For now, we assume that all RDMA transports are
suitable, but we may need to revise that at some point.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_xprt.h          | 1 +
 net/sunrpc/svcsock.c                     | 1 +
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 8 ++++++++
 3 files changed, 10 insertions(+)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 7440290f64ac..ddb7f94a9d06 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -67,6 +67,7 @@ struct svc_xprt {
 #define XPT_CACHE_AUTH	11		/* cache auth info */
 #define XPT_LOCAL	12		/* connection from loopback interface */
 #define XPT_KILL_TEMP   13		/* call xpo_kill_temp_xprt before closing */
+#define XPT_CONG_CTRL	14		/* has congestion control */
 
 	struct svc_serv		*xpt_server;	/* service for transport */
 	atomic_t    	    	xpt_reserved;	/* space on outq that is rsvd */
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index de066acdb34e..1956b8b96b2d 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1306,6 +1306,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
 	svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class,
 		      &svsk->sk_xprt, serv);
 	set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
+	set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
 	if (sk->sk_state == TCP_LISTEN) {
 		dprintk("setting up TCP socket for listening\n");
 		set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index ab2fd5377f94..9d9e4e16080f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -569,6 +569,14 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 	spin_lock_init(&cma_xprt->sc_ctxt_lock);
 	spin_lock_init(&cma_xprt->sc_map_lock);
 
+	/*
+	 * Note that this implies that the underlying transport support
+	 * has some form of congestion control (see RFC 7530 section 3.1
+	 * paragraph 2). For now, we assume that all supported RDMA
+	 * transports are suitable here.
+	 */
+	set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags);
+
 	if (listener)
 		set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
 
-- 
GitLab


From 5283b03ee5cd28d516646298bead09b238d92ddc Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 24 Feb 2017 13:25:24 -0500
Subject: [PATCH 0566/1084] nfs/nfsd/sunrpc: enforce transport requirements for
 NFSv4

NFSv4 requires a transport "that is specified to avoid network
congestion" (RFC 7530, section 3.1, paragraph 2).  In practical terms,
that means that you should not run NFSv4 over UDP. The server has never
enforced that requirement, however.

This patchset fixes this by adding a new flag to the svc_version that
states that it has these transport requirements. With that, we can check
that the transport has XPT_CONG_CTRL set before processing an RPC. If it
doesn't we reject it with RPC_PROG_MISMATCH.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfs/callback_xdr.c      |  2 ++
 fs/nfsd/nfs4proc.c         | 13 +++++++------
 include/linux/sunrpc/svc.h |  3 +++
 net/sunrpc/svc.c           | 15 +++++++++++++++
 4 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index e9836f611d9c..fd0284c1dc32 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -1084,6 +1084,7 @@ struct svc_version nfs4_callback_version1 = {
 	.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
 	.vs_dispatch = NULL,
 	.vs_hidden = true,
+	.vs_need_cong_ctrl = true,
 };
 
 struct svc_version nfs4_callback_version4 = {
@@ -1093,4 +1094,5 @@ struct svc_version nfs4_callback_version4 = {
 	.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
 	.vs_dispatch = NULL,
 	.vs_hidden = true,
+	.vs_need_cong_ctrl = true,
 };
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 89e582fa58cd..cbeeda1e94a2 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2537,12 +2537,13 @@ static struct svc_procedure		nfsd_procedures4[2] = {
 };
 
 struct svc_version	nfsd_version4 = {
-		.vs_vers	= 4,
-		.vs_nproc	= 2,
-		.vs_proc	= nfsd_procedures4,
-		.vs_dispatch	= nfsd_dispatch,
-		.vs_xdrsize	= NFS4_SVC_XDRSIZE,
-		.vs_rpcb_optnl	= true,
+	.vs_vers		= 4,
+	.vs_nproc		= 2,
+	.vs_proc		= nfsd_procedures4,
+	.vs_dispatch		= nfsd_dispatch,
+	.vs_xdrsize		= NFS4_SVC_XDRSIZE,
+	.vs_rpcb_optnl		= true,
+	.vs_need_cong_ctrl	= true,
 };
 
 /*
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 96467c95f02e..e770abeed32d 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -406,6 +406,9 @@ struct svc_version {
 	/* Don't care if the rpcbind registration fails */
 	bool			vs_rpcb_optnl;
 
+	/* Need xprt with congestion control */
+	bool			vs_need_cong_ctrl;
+
 	/* Override dispatch function (e.g. when caching replies).
 	 * A return value of 0 means drop the request. 
 	 * vs_dispatch == NULL means use default dispatcher.
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 85bcdea67a3f..1fc3ff822168 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1169,6 +1169,21 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 	  !(versp = progp->pg_vers[vers]))
 		goto err_bad_vers;
 
+	/*
+	 * Some protocol versions (namely NFSv4) require some form of
+	 * congestion control.  (See RFC 7530 section 3.1 paragraph 2)
+	 * In other words, UDP is not allowed. We mark those when setting
+	 * up the svc_xprt, and verify that here.
+	 *
+	 * The spec is not very clear about what error should be returned
+	 * when someone tries to access a server that is listening on UDP
+	 * for lower versions. RPC_PROG_MISMATCH seems to be the closest
+	 * fit.
+	 */
+	if (versp->vs_need_cong_ctrl &&
+	    !test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags))
+		goto err_bad_vers;
+
 	procp = versp->vs_proc + proc;
 	if (proc >= versp->vs_nproc || !procp->pc_func)
 		goto err_bad_proc;
-- 
GitLab


From 7259f1dfe718234fee3f87d98d082e7f98d1d712 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 24 Feb 2017 13:25:25 -0500
Subject: [PATCH 0567/1084] sunrpc: don't register UDP port with rpcbind when
 version needs congestion control

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svc.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 1fc3ff822168..8492acb4011f 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -976,6 +976,13 @@ int svc_register(const struct svc_serv *serv, struct net *net,
 			if (vers->vs_hidden)
 				continue;
 
+			/*
+			 * Don't register a UDP port if we need congestion
+			 * control.
+			 */
+			if (vers->vs_need_cong_ctrl && proto == IPPROTO_UDP)
+				continue;
+
 			error = __svc_register(net, progp->pg_name, progp->pg_prog,
 						i, family, proto, port);
 
-- 
GitLab


From 44b46d739d3546188eb59f11e6888d77ebfcc3c1 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Sat, 25 Feb 2017 04:12:40 +0530
Subject: [PATCH 0568/1084] ALSA: hda - Add Geminilake PCI ID

Geminilake is another Intel part, so need to add PCI ID for it.

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_intel.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 16108f0eb688..c8256a89375a 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2255,6 +2255,9 @@ static const struct pci_device_id azx_ids[] = {
 	/* Broxton-T */
 	{ PCI_DEVICE(0x8086, 0x1a98),
 	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON },
+	/* Gemini-Lake */
+	{ PCI_DEVICE(0x8086, 0x3198),
+	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON },
 	/* Haswell */
 	{ PCI_DEVICE(0x8086, 0x0a0c),
 	  .driver_data = AZX_DRIVER_HDMI | AZX_DCAPS_INTEL_HASWELL },
-- 
GitLab


From 3d1e236022cc1426b0834565995ddee2ca231cee Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Fri, 24 Feb 2017 22:31:02 -0600
Subject: [PATCH 0569/1084] objtool: Prevent GCC from merging
 annotate_unreachable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

0-day bot reported some new objtool warnings which were caused by the
new annotate_unreachable() macro:

  fs/afs/flock.o: warning: objtool: afs_do_unlk()+0x0: duplicate frame pointer save
  fs/afs/flock.o: warning: objtool: afs_do_unlk()+0x0: frame pointer state mismatch
  fs/btrfs/delayed-inode.o: warning: objtool: btrfs_delete_delayed_dir_index()+0x0: duplicate frame pointer save
  fs/btrfs/delayed-inode.o: warning: objtool: btrfs_delete_delayed_dir_index()+0x0: frame pointer state mismatch
  fs/dlm/lock.o: warning: objtool: _grant_lock()+0x0: duplicate frame pointer save
  fs/dlm/lock.o: warning: objtool: _grant_lock()+0x0: frame pointer state mismatch
  fs/ocfs2/alloc.o: warning: objtool: ocfs2_mv_path()+0x0: duplicate frame pointer save
  fs/ocfs2/alloc.o: warning: objtool: ocfs2_mv_path()+0x0: frame pointer state mismatch

It turns out that, for older versions of GCC, if a function has multiple
BUG() incantations, GCC will sometimes merge the corresponding
annotate_unreachable() inline asm statements into a single block.  That
has the undesirable effect of removing one of the entries in the
__unreachable section, confusing objtool greatly.

A workaround for this issue is to ensure that each instance of the
inline asm statement uses a different label, so that GCC sees the
statements are unique and leaves them alone.  The inline asm ‘%=’ token
could be used for that, but unfortunately older versions of GCC don't
support it.  So I implemented a poor man's version of it with the
__LINE__ macro.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends")
Link: http://lkml.kernel.org/r/0c14b00baf9f68d1b0221ddb6c88b925181c8be8.1487997036.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/compiler-gcc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 8ea159fc489d..de471346b365 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -197,10 +197,10 @@
 
 #ifdef CONFIG_STACK_VALIDATION
 #define annotate_unreachable() ({					\
-	asm("1:\t\n"							\
+	asm("%c0:\t\n"							\
 	    ".pushsection __unreachable, \"a\"\t\n"			\
-	    ".long 1b\t\n"						\
-	    ".popsection\t\n");						\
+	    ".long %c0b\t\n"						\
+	    ".popsection\t\n" : : "i" (__LINE__));			\
 })
 #else
 #define annotate_unreachable()
-- 
GitLab


From c0464062bfea9cd2ef6643d93429eafe8f6c2a4a Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 25 Feb 2017 02:08:12 -0800
Subject: [PATCH 0570/1084] PCI: dwc: Fix crashes seen due to missing
 assignments

Fix the following crash, seen in dwc/pci-imx6.

  Unable to handle kernel NULL pointer dereference at virtual address 00000070
  pgd = c0004000
  [00000070] *pgd=00000000
  Internal error: Oops: 805 [#1] SMP ARM
  Modules linked in:
  CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.10.0-09686-g9e31489 #1
  Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
  task: cb850000 task.stack: cb84e000
  PC is at imx6_pcie_probe+0x2f4/0x414
  ...

While at it, fix the same problem in various drivers instead of waiting for
individual crash reports.

The change in the imx6 driver was tested with qemu. The changes in other
drivers are based on code inspection and have been compile tested only.

Fixes: 442ec4c04d12 ("PCI: dwc: all: Split struct pcie_port into host-only and core structures")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>  # designware-plat
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/pci/dwc/pci-exynos.c           | 1 +
 drivers/pci/dwc/pci-imx6.c             | 1 +
 drivers/pci/dwc/pci-keystone.c         | 2 ++
 drivers/pci/dwc/pci-layerscape.c       | 2 ++
 drivers/pci/dwc/pcie-armada8k.c        | 2 ++
 drivers/pci/dwc/pcie-artpec6.c         | 2 ++
 drivers/pci/dwc/pcie-designware-plat.c | 2 ++
 drivers/pci/dwc/pcie-hisi.c            | 2 ++
 drivers/pci/dwc/pcie-qcom.c            | 2 ++
 drivers/pci/dwc/pcie-spear13xx.c       | 2 ++
 10 files changed, 18 insertions(+)

diff --git a/drivers/pci/dwc/pci-exynos.c b/drivers/pci/dwc/pci-exynos.c
index 001c91a945aa..993b650ef275 100644
--- a/drivers/pci/dwc/pci-exynos.c
+++ b/drivers/pci/dwc/pci-exynos.c
@@ -668,6 +668,7 @@ static int __init exynos_pcie_probe(struct platform_device *pdev)
 	pci->dev = dev;
 	pci->ops = &dw_pcie_ops;
 
+	ep->pci = pci;
 	ep->ops = (const struct exynos_pcie_ops *)
 		of_device_get_match_data(dev);
 
diff --git a/drivers/pci/dwc/pci-imx6.c b/drivers/pci/dwc/pci-imx6.c
index 3ab6761db9e8..801e46cd266d 100644
--- a/drivers/pci/dwc/pci-imx6.c
+++ b/drivers/pci/dwc/pci-imx6.c
@@ -605,6 +605,7 @@ static int __init imx6_pcie_probe(struct platform_device *pdev)
 	pci->dev = dev;
 	pci->ops = &dw_pcie_ops;
 
+	imx6_pcie->pci = pci;
 	imx6_pcie->variant =
 		(enum imx6_pcie_variants)of_device_get_match_data(dev);
 
diff --git a/drivers/pci/dwc/pci-keystone.c b/drivers/pci/dwc/pci-keystone.c
index 8dc66409182d..fcc9723bad6e 100644
--- a/drivers/pci/dwc/pci-keystone.c
+++ b/drivers/pci/dwc/pci-keystone.c
@@ -401,6 +401,8 @@ static int __init ks_pcie_probe(struct platform_device *pdev)
 	pci->dev = dev;
 	pci->ops = &dw_pcie_ops;
 
+	ks_pcie->pci = pci;
+
 	/* initialize SerDes Phy if present */
 	phy = devm_phy_get(dev, "pcie-phy");
 	if (PTR_ERR_OR_ZERO(phy) == -EPROBE_DEFER)
diff --git a/drivers/pci/dwc/pci-layerscape.c b/drivers/pci/dwc/pci-layerscape.c
index 175c09e3a932..c32e392a0ae6 100644
--- a/drivers/pci/dwc/pci-layerscape.c
+++ b/drivers/pci/dwc/pci-layerscape.c
@@ -280,6 +280,8 @@ static int __init ls_pcie_probe(struct platform_device *pdev)
 	pci->dev = dev;
 	pci->ops = pcie->drvdata->dw_pcie_ops;
 
+	pcie->pci = pci;
+
 	dbi_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
 	pci->dbi_base = devm_ioremap_resource(dev, dbi_base);
 	if (IS_ERR(pci->dbi_base))
diff --git a/drivers/pci/dwc/pcie-armada8k.c b/drivers/pci/dwc/pcie-armada8k.c
index 66bac6fbfa9f..f110e3b24a26 100644
--- a/drivers/pci/dwc/pcie-armada8k.c
+++ b/drivers/pci/dwc/pcie-armada8k.c
@@ -220,6 +220,8 @@ static int armada8k_pcie_probe(struct platform_device *pdev)
 	pci->dev = dev;
 	pci->ops = &dw_pcie_ops;
 
+	pcie->pci = pci;
+
 	pcie->clk = devm_clk_get(dev, NULL);
 	if (IS_ERR(pcie->clk))
 		return PTR_ERR(pcie->clk);
diff --git a/drivers/pci/dwc/pcie-artpec6.c b/drivers/pci/dwc/pcie-artpec6.c
index 59ecc9e66436..fcd3ef845883 100644
--- a/drivers/pci/dwc/pcie-artpec6.c
+++ b/drivers/pci/dwc/pcie-artpec6.c
@@ -253,6 +253,8 @@ static int artpec6_pcie_probe(struct platform_device *pdev)
 
 	pci->dev = dev;
 
+	artpec6_pcie->pci = pci;
+
 	dbi_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi");
 	pci->dbi_base = devm_ioremap_resource(dev, dbi_base);
 	if (IS_ERR(pci->dbi_base))
diff --git a/drivers/pci/dwc/pcie-designware-plat.c b/drivers/pci/dwc/pcie-designware-plat.c
index 65250f63515c..b6c832ba39dd 100644
--- a/drivers/pci/dwc/pcie-designware-plat.c
+++ b/drivers/pci/dwc/pcie-designware-plat.c
@@ -104,6 +104,8 @@ static int dw_plat_pcie_probe(struct platform_device *pdev)
 
 	pci->dev = dev;
 
+	dw_plat_pcie->pci = pci;
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	pci->dbi_base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(pci->dbi_base))
diff --git a/drivers/pci/dwc/pcie-hisi.c b/drivers/pci/dwc/pcie-hisi.c
index e3e4fedd9f68..fd66a3199db7 100644
--- a/drivers/pci/dwc/pcie-hisi.c
+++ b/drivers/pci/dwc/pcie-hisi.c
@@ -284,6 +284,8 @@ static int hisi_pcie_probe(struct platform_device *pdev)
 
 	driver = dev->driver;
 
+	hisi_pcie->pci = pci;
+
 	hisi_pcie->soc_ops = of_device_get_match_data(dev);
 
 	hisi_pcie->subctrl =
diff --git a/drivers/pci/dwc/pcie-qcom.c b/drivers/pci/dwc/pcie-qcom.c
index e36abe0d9d6f..67eb7f5926dd 100644
--- a/drivers/pci/dwc/pcie-qcom.c
+++ b/drivers/pci/dwc/pcie-qcom.c
@@ -686,6 +686,8 @@ static int qcom_pcie_probe(struct platform_device *pdev)
 	pci->ops = &dw_pcie_ops;
 	pp = &pci->pp;
 
+	pcie->pci = pci;
+
 	pcie->ops = (struct qcom_pcie_ops *)of_device_get_match_data(dev);
 
 	pcie->reset = devm_gpiod_get_optional(dev, "perst", GPIOD_OUT_LOW);
diff --git a/drivers/pci/dwc/pcie-spear13xx.c b/drivers/pci/dwc/pcie-spear13xx.c
index 348f9c5e0433..eaa4ea8e2ea4 100644
--- a/drivers/pci/dwc/pcie-spear13xx.c
+++ b/drivers/pci/dwc/pcie-spear13xx.c
@@ -247,6 +247,8 @@ static int spear13xx_pcie_probe(struct platform_device *pdev)
 	pci->dev = dev;
 	pci->ops = &dw_pcie_ops;
 
+	spear13xx_pcie->pci = pci;
+
 	spear13xx_pcie->phy = devm_phy_get(dev, "pcie-phy");
 	if (IS_ERR(spear13xx_pcie->phy)) {
 		ret = PTR_ERR(spear13xx_pcie->phy);
-- 
GitLab


From ca1c39ef76376b67303d01f94fe98bb68bb3861a Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 21 Feb 2017 07:34:00 +0100
Subject: [PATCH 0571/1084] iio: adc: xilinx: Fix error handling

Reorder error handling labels in order to match the way resources have
been allocated.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/adc/xilinx-xadc-core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c
index 0a6beb3d99cb..56cf5907a5f0 100644
--- a/drivers/iio/adc/xilinx-xadc-core.c
+++ b/drivers/iio/adc/xilinx-xadc-core.c
@@ -1208,7 +1208,7 @@ static int xadc_probe(struct platform_device *pdev)
 
 	ret = xadc->ops->setup(pdev, indio_dev, irq);
 	if (ret)
-		goto err_free_samplerate_trigger;
+		goto err_clk_disable_unprepare;
 
 	ret = request_irq(irq, xadc->ops->interrupt_handler, 0,
 			dev_name(&pdev->dev), indio_dev);
@@ -1268,6 +1268,8 @@ static int xadc_probe(struct platform_device *pdev)
 
 err_free_irq:
 	free_irq(irq, indio_dev);
+err_clk_disable_unprepare:
+	clk_disable_unprepare(xadc->clk);
 err_free_samplerate_trigger:
 	if (xadc->ops->flags & XADC_FLAGS_BUFFERED)
 		iio_trigger_free(xadc->samplerate_trigger);
@@ -1277,8 +1279,6 @@ static int xadc_probe(struct platform_device *pdev)
 err_triggered_buffer_cleanup:
 	if (xadc->ops->flags & XADC_FLAGS_BUFFERED)
 		iio_triggered_buffer_cleanup(indio_dev);
-err_clk_disable_unprepare:
-	clk_disable_unprepare(xadc->clk);
 err_device_free:
 	kfree(indio_dev->channels);
 
-- 
GitLab


From 99cde44edfa06b5f910c1704cb15d36310a56545 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Tue, 17 Jan 2017 13:35:03 +0530
Subject: [PATCH 0572/1084] parisc: eisa: Remove coding style errors

This patch removes coding style errors in the eisa driver.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/parisc/eisa.c | 104 +++++++++++++++++++++---------------------
 1 file changed, 52 insertions(+), 52 deletions(-)

diff --git a/drivers/parisc/eisa.c b/drivers/parisc/eisa.c
index 103095bbe8c0..59edee911f7d 100644
--- a/drivers/parisc/eisa.c
+++ b/drivers/parisc/eisa.c
@@ -14,16 +14,16 @@
  * Wax ASIC also includes a PS/2 and RS-232 controller, but those are
  * dealt with elsewhere; this file is concerned only with the EISA portions
  * of Wax.
- * 
- * 
+ *
+ *
  * HINT:
  * -----
  * To allow an ISA card to work properly in the EISA slot you need to
- * set an edge trigger level. This may be done on the palo command line 
- * by adding the kernel parameter "eisa_irq_edge=n,n2,[...]]", with 
+ * set an edge trigger level. This may be done on the palo command line
+ * by adding the kernel parameter "eisa_irq_edge=n,n2,[...]]", with
  * n and n2 as the irq levels you want to use.
- * 
- * Example: "eisa_irq_edge=10,11" allows ISA cards to operate at 
+ *
+ * Example: "eisa_irq_edge=10,11" allows ISA cards to operate at
  * irq levels 10 and 11.
  */
 
@@ -46,9 +46,9 @@
 #include <asm/eisa_eeprom.h>
 
 #if 0
-#define EISA_DBG(msg, arg... ) printk(KERN_DEBUG "eisa: " msg , ## arg )
+#define EISA_DBG(msg, arg...) printk(KERN_DEBUG "eisa: " msg, ## arg)
 #else
-#define EISA_DBG(msg, arg... )  
+#define EISA_DBG(msg, arg...)
 #endif
 
 #define SNAKES_EEPROM_BASE_ADDR 0xF0810400
@@ -108,7 +108,7 @@ void eisa_out8(unsigned char data, unsigned short port)
 
 void eisa_out16(unsigned short data, unsigned short port)
 {
-	if (EISA_bus)	
+	if (EISA_bus)
 		gsc_writew(cpu_to_le16(data), eisa_permute(port));
 }
 
@@ -135,9 +135,9 @@ static int master_mask;
 static int slave_mask;
 
 /* the trig level can be set with the
- * eisa_irq_edge=n,n,n commandline parameter 
- * We should really read this from the EEPROM 
- * in the furure. 
+ * eisa_irq_edge=n,n,n commandline parameter
+ * We should really read this from the EEPROM
+ * in the furure.
  */
 /* irq 13,8,2,1,0 must be edge */
 static unsigned int eisa_irq_level __read_mostly; /* default to edge triggered */
@@ -170,7 +170,7 @@ static void eisa_unmask_irq(struct irq_data *d)
 	unsigned int irq = d->irq;
 	unsigned long flags;
 	EISA_DBG("enable irq %d\n", irq);
-		
+
 	spin_lock_irqsave(&eisa_irq_lock, flags);
         if (irq & 8) {
 		slave_mask &= ~(1 << (irq&7));
@@ -194,7 +194,7 @@ static irqreturn_t eisa_irq(int wax_irq, void *intr_dev)
 {
 	int irq = gsc_readb(0xfc01f000); /* EISA supports 16 irqs */
 	unsigned long flags;
-        
+
 	spin_lock_irqsave(&eisa_irq_lock, flags);
 	/* read IRR command */
 	eisa_out8(0x0a, 0x20);
@@ -202,31 +202,31 @@ static irqreturn_t eisa_irq(int wax_irq, void *intr_dev)
 
 	EISA_DBG("irq IAR %02x 8259-1 irr %02x 8259-2 irr %02x\n",
 		   irq, eisa_in8(0x20), eisa_in8(0xa0));
-   
+
 	/* read ISR command */
 	eisa_out8(0x0a, 0x20);
 	eisa_out8(0x0a, 0xa0);
 	EISA_DBG("irq 8259-1 isr %02x imr %02x 8259-2 isr %02x imr %02x\n",
 		 eisa_in8(0x20), eisa_in8(0x21), eisa_in8(0xa0), eisa_in8(0xa1));
-	
+
 	irq &= 0xf;
-	
+
 	/* mask irq and write eoi */
 	if (irq & 8) {
 		slave_mask |= (1 << (irq&7));
 		eisa_out8(slave_mask, 0xa1);
 		eisa_out8(0x60 | (irq&7),0xa0);/* 'Specific EOI' to slave */
-		eisa_out8(0x62,0x20);	/* 'Specific EOI' to master-IRQ2 */
-		
+		eisa_out8(0x62, 0x20);	/* 'Specific EOI' to master-IRQ2 */
+
 	} else {
 		master_mask |= (1 << (irq&7));
 		eisa_out8(master_mask, 0x21);
-		eisa_out8(0x60|irq,0x20);	/* 'Specific EOI' to master */
+		eisa_out8(0x60|irq, 0x20);	/* 'Specific EOI' to master */
 	}
 	spin_unlock_irqrestore(&eisa_irq_lock, flags);
 
 	generic_handle_irq(irq);
-   
+
 	spin_lock_irqsave(&eisa_irq_lock, flags);
 	/* unmask */
         if (irq & 8) {
@@ -254,44 +254,44 @@ static struct irqaction irq2_action = {
 static void init_eisa_pic(void)
 {
 	unsigned long flags;
-	
+
 	spin_lock_irqsave(&eisa_irq_lock, flags);
 
 	eisa_out8(0xff, 0x21); /* mask during init */
 	eisa_out8(0xff, 0xa1); /* mask during init */
-	
+
 	/* master pic */
-	eisa_out8(0x11,0x20); /* ICW1 */   
-	eisa_out8(0x00,0x21); /* ICW2 */   
-	eisa_out8(0x04,0x21); /* ICW3 */   
-	eisa_out8(0x01,0x21); /* ICW4 */   
-	eisa_out8(0x40,0x20); /* OCW2 */   
-	
+	eisa_out8(0x11, 0x20); /* ICW1 */
+	eisa_out8(0x00, 0x21); /* ICW2 */
+	eisa_out8(0x04, 0x21); /* ICW3 */
+	eisa_out8(0x01, 0x21); /* ICW4 */
+	eisa_out8(0x40, 0x20); /* OCW2 */
+
 	/* slave pic */
-	eisa_out8(0x11,0xa0); /* ICW1 */   
-	eisa_out8(0x08,0xa1); /* ICW2 */   
-        eisa_out8(0x02,0xa1); /* ICW3 */   
-	eisa_out8(0x01,0xa1); /* ICW4 */   
-	eisa_out8(0x40,0xa0); /* OCW2 */   
-        
+	eisa_out8(0x11, 0xa0); /* ICW1 */
+	eisa_out8(0x08, 0xa1); /* ICW2 */
+	eisa_out8(0x02, 0xa1); /* ICW3 */
+	eisa_out8(0x01, 0xa1); /* ICW4 */
+	eisa_out8(0x40, 0xa0); /* OCW2 */
+
 	udelay(100);
-	
-	slave_mask = 0xff; 
-	master_mask = 0xfb; 
+
+	slave_mask = 0xff;
+	master_mask = 0xfb;
 	eisa_out8(slave_mask, 0xa1); /* OCW1 */
 	eisa_out8(master_mask, 0x21); /* OCW1 */
-	
+
 	/* setup trig level */
 	EISA_DBG("EISA edge/level %04x\n", eisa_irq_level);
-	
+
 	eisa_out8(eisa_irq_level&0xff, 0x4d0); /* Set all irq's to edge  */
-	eisa_out8((eisa_irq_level >> 8) & 0xff, 0x4d1); 
-	
+	eisa_out8((eisa_irq_level >> 8) & 0xff, 0x4d1);
+
 	EISA_DBG("pic0 mask %02x\n", eisa_in8(0x21));
 	EISA_DBG("pic1 mask %02x\n", eisa_in8(0xa1));
 	EISA_DBG("pic0 edge/level %02x\n", eisa_in8(0x4d0));
 	EISA_DBG("pic1 edge/level %02x\n", eisa_in8(0x4d1));
-	
+
 	spin_unlock_irqrestore(&eisa_irq_lock, flags);
 }
 
@@ -305,7 +305,7 @@ static int __init eisa_probe(struct parisc_device *dev)
 
 	char *name = is_mongoose(dev) ? "Mongoose" : "Wax";
 
-	printk(KERN_INFO "%s EISA Adapter found at 0x%08lx\n", 
+	printk(KERN_INFO "%s EISA Adapter found at 0x%08lx\n",
 		name, (unsigned long)dev->hpa.start);
 
 	eisa_dev.hba.dev = dev;
@@ -336,14 +336,14 @@ static int __init eisa_probe(struct parisc_device *dev)
 		printk(KERN_ERR "EISA: request_irq failed!\n");
 		return result;
 	}
-	
+
 	/* Reserve IRQ2 */
 	setup_irq(2, &irq2_action);
 	for (i = 0; i < 16; i++) {
 		irq_set_chip_and_handler(i, &eisa_interrupt_type,
 					 handle_simple_irq);
 	}
-	
+
 	EISA_bus = 1;
 
 	if (dev->num_addrs) {
@@ -375,7 +375,7 @@ static int __init eisa_probe(struct parisc_device *dev)
 			return -1;
 		}
 	}
-	
+
 	return 0;
 }
 
@@ -404,7 +404,7 @@ void eisa_make_irq_level(int num)
 {
 	if (eisa_irq_configured& (1<<num)) {
 		printk(KERN_WARNING
-		       "IRQ %d polarity configured twice (last to level)\n", 
+		       "IRQ %d polarity configured twice (last to level)\n",
 		       num);
 	}
 	eisa_irq_level |= (1<<num); /* set the corresponding bit */
@@ -414,7 +414,7 @@ void eisa_make_irq_level(int num)
 void eisa_make_irq_edge(int num)
 {
 	if (eisa_irq_configured& (1<<num)) {
-		printk(KERN_WARNING 
+		printk(KERN_WARNING
 		       "IRQ %d polarity configured twice (last to edge)\n",
 		       num);
 	}
@@ -430,18 +430,18 @@ static int __init eisa_irq_setup(char *str)
 	EISA_DBG("IRQ setup\n");
 	while (cur != NULL) {
 		char *pe;
-		
+
 		val = (int) simple_strtoul(cur, &pe, 0);
 		if (val > 15 || val < 0) {
 			printk(KERN_ERR "eisa: EISA irq value are 0-15\n");
 			continue;
 		}
-		if (val == 2) { 
+		if (val == 2) {
 			val = 9;
 		}
 		eisa_make_irq_edge(val); /* clear the corresponding bit */
 		EISA_DBG("setting IRQ %d to edge-triggered mode\n", val);
-		
+
 		if ((cur = strchr(cur, ','))) {
 			cur++;
 		} else {
-- 
GitLab


From af640d17ecee1f82a949b580cc4a3e8f29c377b9 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Wed, 18 Jan 2017 13:41:31 +0530
Subject: [PATCH 0573/1084] parisc: eisa: Fix resource leaks in error paths

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/parisc/eisa.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/parisc/eisa.c b/drivers/parisc/eisa.c
index 59edee911f7d..7e2f6d5a6aaf 100644
--- a/drivers/parisc/eisa.c
+++ b/drivers/parisc/eisa.c
@@ -334,7 +334,7 @@ static int __init eisa_probe(struct parisc_device *dev)
 	result = request_irq(dev->irq, eisa_irq, IRQF_SHARED, "EISA", &eisa_dev);
 	if (result) {
 		printk(KERN_ERR "EISA: request_irq failed!\n");
-		return result;
+		goto error_release;
 	}
 
 	/* Reserve IRQ2 */
@@ -358,6 +358,11 @@ static int __init eisa_probe(struct parisc_device *dev)
 		}
 	}
 	eisa_eeprom_addr = ioremap_nocache(eisa_dev.eeprom_addr, HPEE_MAX_LENGTH);
+	if (!eisa_eeprom_addr) {
+		result = -ENOMEM;
+		printk(KERN_ERR "EISA: ioremap_nocache failed!\n");
+		goto error_free_irq;
+	}
 	result = eisa_enumerator(eisa_dev.eeprom_addr, &eisa_dev.hba.io_space,
 			&eisa_dev.hba.lmmio_space);
 	init_eisa_pic();
@@ -372,11 +377,20 @@ static int __init eisa_probe(struct parisc_device *dev)
 		eisa_dev.root.dma_mask = 0xffffffff; /* wild guess */
 		if (eisa_root_register (&eisa_dev.root)) {
 			printk(KERN_ERR "EISA: Failed to register EISA root\n");
-			return -1;
+			result = -ENOMEM;
+			goto error_iounmap;
 		}
 	}
 
 	return 0;
+
+error_iounmap:
+	iounmap(eisa_eeprom_addr);
+error_free_irq:
+	free_irq(dev->irq, &eisa_dev);
+error_release:
+	release_resource(&eisa_dev.hba.io_space);
+	return result;
 }
 
 static const struct parisc_device_id eisa_tbl[] = {
-- 
GitLab


From 09b871ffd4d8ddc005a9480fb69ff1897caaeb1f Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sun, 29 Jan 2017 18:19:33 +0100
Subject: [PATCH 0574/1084] parisc: Define access_ok() as macro

Define access_ok() as macro instead of static function. This fixes build
warnings in code where the second parameter is given as unsigned long.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/uaccess.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 9a2aee1b90fc..fb4382c28259 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -32,11 +32,7 @@
  * that put_user is the same as __put_user, etc.
  */
 
-static inline long access_ok(int type, const void __user * addr,
-		unsigned long size)
-{
-	return 1;
-}
+#define access_ok(type, uaddr, size) (1)
 
 #define put_user __put_user
 #define get_user __get_user
-- 
GitLab


From e28f701b21ee4b4eb716e4717fa2aa78c28a8523 Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Thu, 2 Feb 2017 18:52:34 +0530
Subject: [PATCH 0575/1084] parisc: ccio-dma: Handle return NULL error from
 ioremap_nocache

Fix error paths and return -ENOMEM instead of '1'.

Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/parisc/ccio-dma.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index 553ef8a5d588..b0558e2451b5 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -1539,7 +1539,7 @@ static int __init ccio_probe(struct parisc_device *dev)
 	ioc = kzalloc(sizeof(struct ioc), GFP_KERNEL);
 	if (ioc == NULL) {
 		printk(KERN_ERR MODULE_NAME ": memory allocation failure\n");
-		return 1;
+		return -ENOMEM;
 	}
 
 	ioc->name = dev->id.hversion == U2_IOA_RUNWAY ? "U2" : "UTurn";
@@ -1554,6 +1554,10 @@ static int __init ccio_probe(struct parisc_device *dev)
 
 	ioc->hw_path = dev->hw_path;
 	ioc->ioc_regs = ioremap_nocache(dev->hpa.start, 4096);
+	if (!ioc->ioc_regs) {
+		kfree(ioc);
+		return -ENOMEM;
+	}
 	ccio_ioc_init(ioc);
 	ccio_init_resources(ioc);
 	hppa_dma_ops = &ccio_ops;
-- 
GitLab


From 8351badf349b22b47c9696f54656db65d7834e42 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 8 Feb 2017 10:20:35 +0300
Subject: [PATCH 0576/1084] parisc: fix a printk

We want to do a pr_cont() here and not a pr_warn().

Fixes: b391667eb45a ("parisc: Report trap type as human readable string")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/mm/fault.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 1a0b4f63f0e9..c3f8d34a11cf 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -238,8 +238,8 @@ show_signal_msg(struct pt_regs *regs, unsigned long code,
 		vma ? ',':'\n');
 
 	if (vma)
-		pr_warn(KERN_CONT " vm_start = 0x%08lx, vm_end = 0x%08lx\n",
-				vma->vm_start, vma->vm_end);
+		pr_cont(" vm_start = 0x%08lx, vm_end = 0x%08lx\n",
+			vma->vm_start, vma->vm_end);
 
 	show_regs(regs);
 }
-- 
GitLab


From ef470a60e10eb12635d7b84c9502cea3028d44e8 Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Mon, 20 Feb 2017 14:02:46 -0500
Subject: [PATCH 0577/1084] parisc: Remove flush_user_dcache_range and
 flush_user_icache_range

The functions flush_user_dcache_range() and flush_user_icache_range()
are only used by the parisc signal handling code.  This code only needs
to flush a couple of lines, so the threshold check is unnecessary
overhead.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/cacheflush.h |  2 --
 arch/parisc/kernel/cache.c           | 18 ------------------
 arch/parisc/kernel/signal.c          | 13 +++++++------
 3 files changed, 7 insertions(+), 26 deletions(-)

diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index 7bd69bd43a01..19c9c3c5f267 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -27,8 +27,6 @@ void flush_user_dcache_range_asm(unsigned long, unsigned long);
 void flush_kernel_dcache_range_asm(unsigned long, unsigned long);
 void flush_kernel_dcache_page_asm(void *);
 void flush_kernel_icache_page(void *);
-void flush_user_dcache_range(unsigned long, unsigned long);
-void flush_user_icache_range(unsigned long, unsigned long);
 
 /* Cache flush operations */
 
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 977f0a4f5ecf..20b013cbd297 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -573,24 +573,6 @@ void flush_cache_mm(struct mm_struct *mm)
 	}
 }
 
-void
-flush_user_dcache_range(unsigned long start, unsigned long end)
-{
-	if ((end - start) < parisc_cache_flush_threshold)
-		flush_user_dcache_range_asm(start,end);
-	else
-		flush_data_cache();
-}
-
-void
-flush_user_icache_range(unsigned long start, unsigned long end)
-{
-	if ((end - start) < parisc_cache_flush_threshold)
-		flush_user_icache_range_asm(start,end);
-	else
-		flush_instruction_cache();
-}
-
 void flush_cache_range(struct vm_area_struct *vma,
 		unsigned long start, unsigned long end)
 {
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index e58925ac64d1..f6aaca27ac4e 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -232,6 +232,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 	struct rt_sigframe __user *frame;
 	unsigned long rp, usp;
 	unsigned long haddr, sigframe_size;
+	unsigned long start, end;
 	int err = 0;
 #ifdef CONFIG_64BIT
 	struct compat_rt_sigframe __user * compat_frame;
@@ -299,10 +300,10 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 	}
 #endif
 
-	flush_user_dcache_range((unsigned long) &frame->tramp[0],
-			   (unsigned long) &frame->tramp[TRAMP_SIZE]);
-	flush_user_icache_range((unsigned long) &frame->tramp[0],
-			   (unsigned long) &frame->tramp[TRAMP_SIZE]);
+	start = (unsigned long) &frame->tramp[0];
+	end = (unsigned long) &frame->tramp[TRAMP_SIZE];
+	flush_user_dcache_range_asm(start, end);
+	flush_user_icache_range_asm(start, end);
 
 	/* TRAMP Words 0-4, Length 5 = SIGRESTARTBLOCK_TRAMP
 	 * TRAMP Words 5-9, Length 4 = SIGRETURN_TRAMP
@@ -548,8 +549,8 @@ insert_restart_trampoline(struct pt_regs *regs)
 		WARN_ON(err);
 
 		/* flush data/instruction cache for new insns */
-		flush_user_dcache_range(start, end);
-		flush_user_icache_range(start, end);
+		flush_user_dcache_range_asm(start, end);
+		flush_user_icache_range_asm(start, end);
 
 		regs->gr[31] = regs->gr[30] + 8;
 		return;
-- 
GitLab


From 678a3bd1b3de6d2ebf604e7d708bc8150bb667e9 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 9 Feb 2017 22:22:13 -0500
Subject: [PATCH 0578/1084] tools/power turbostat: fix bugs in --add option

When --add was used more than once, overflowed buffers
caused some counters to be stored on top of others,
corrupting the results.  Simplify the code by simply
reserving space for up to 16 added counters per each
cpu, core, package.

Per-cpu added counters were being printed only per-core.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 88 ++++++++++++++++-----------
 1 file changed, 52 insertions(+), 36 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index f13f61b065c6..c7fadf0faa4b 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -154,6 +154,7 @@ char *progname;
 
 cpu_set_t *cpu_present_set, *cpu_affinity_set;
 size_t cpu_present_setsize, cpu_affinity_setsize;
+#define MAX_ADDED_COUNTERS 16
 
 struct thread_data {
 	unsigned long long tsc;
@@ -166,7 +167,7 @@ struct thread_data {
 	unsigned int flags;
 #define CPU_IS_FIRST_THREAD_IN_CORE	0x2
 #define CPU_IS_FIRST_CORE_IN_PACKAGE	0x4
-	unsigned long long counter[1];
+	unsigned long long counter[MAX_ADDED_COUNTERS];
 } *thread_even, *thread_odd;
 
 struct core_data {
@@ -175,7 +176,7 @@ struct core_data {
 	unsigned long long c7;
 	unsigned int core_temp_c;
 	unsigned int core_id;
-	unsigned long long counter[1];
+	unsigned long long counter[MAX_ADDED_COUNTERS];
 } *core_even, *core_odd;
 
 struct pkg_data {
@@ -200,7 +201,7 @@ struct pkg_data {
 	unsigned int rapl_pkg_perf_status;	/* MSR_PKG_PERF_STATUS */
 	unsigned int rapl_dram_perf_status;	/* MSR_DRAM_PERF_STATUS */
 	unsigned int pkg_temp_c;
-	unsigned long long counter[1];
+	unsigned long long counter[MAX_ADDED_COUNTERS];
 } *package_even, *package_odd;
 
 #define ODD_COUNTERS thread_odd, core_odd, package_odd
@@ -228,9 +229,9 @@ struct msr_counter {
 };
 
 struct sys_counters {
-	unsigned int thread_counter_bytes;
-	unsigned int core_counter_bytes;
-	unsigned int package_counter_bytes;
+	unsigned int added_thread_counters;
+	unsigned int added_core_counters;
+	unsigned int added_package_counters;
 	struct msr_counter *tp;
 	struct msr_counter *cp;
 	struct msr_counter *pp;
@@ -374,12 +375,6 @@ void print_header(void)
 
 	if (do_nhm_cstates)
 		outp += sprintf(outp, "\tCPU%%c1");
-	if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
-		outp += sprintf(outp, "\tCPU%%c3");
-	if (do_nhm_cstates)
-		outp += sprintf(outp, "\tCPU%%c6");
-	if (do_snb_cstates)
-		outp += sprintf(outp, "\tCPU%%c7");
 
 	for (mp = sys.tp; mp; mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
@@ -392,6 +387,14 @@ void print_header(void)
 		}
 	}
 
+	if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
+		outp += sprintf(outp, "\tCPU%%c3");
+	if (do_nhm_cstates)
+		outp += sprintf(outp, "\tCPU%%c6");
+	if (do_snb_cstates)
+		outp += sprintf(outp, "\tCPU%%c7");
+
+
 	if (do_dts)
 		outp += sprintf(outp, "\tCoreTmp");
 
@@ -635,20 +638,11 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (do_smi)
 		outp += sprintf(outp, "\t%d", t->smi_count);
 
+	/* C1 */
 	if (do_nhm_cstates)
 		outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/t->tsc);
 
-	/* print per-core data only for 1st thread in core */
-	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
-		goto done;
-
-	if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/t->tsc);
-	if (do_nhm_cstates)
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/t->tsc);
-	if (do_snb_cstates)
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/t->tsc);
-
+	/* Added counters */
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 32)
@@ -656,12 +650,23 @@ int format_counters(struct thread_data *t, struct core_data *c,
 			else
 				outp += sprintf(outp, "\t0x%016llx", t->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
-			outp += sprintf(outp, "\t%8lld", t->counter[i]);
+			outp += sprintf(outp, "\t%lld", t->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
 			outp += sprintf(outp, "\t%.2f", 100.0 * t->counter[i]/t->tsc);
 		}
 	}
 
+	/* print per-core data only for 1st thread in core */
+	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+		goto done;
+
+	if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
+		outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/t->tsc);
+	if (do_nhm_cstates)
+		outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/t->tsc);
+	if (do_snb_cstates)
+		outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/t->tsc);
+
 
 	if (do_dts)
 		outp += sprintf(outp, "\t%d", c->core_temp_c);
@@ -673,7 +678,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 			else
 				outp += sprintf(outp, "\t0x%016llx", c->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
-			outp += sprintf(outp, "\t%8lld", c->counter[i]);
+			outp += sprintf(outp, "\t%lld", c->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
 			outp += sprintf(outp, "\t%.2f", 100.0 * c->counter[i]/t->tsc);
 		}
@@ -770,7 +775,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 			else
 				outp += sprintf(outp, "\t0x%016llx", p->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
-			outp += sprintf(outp, "\t%8lld", p->counter[i]);
+			outp += sprintf(outp, "\t%lld", p->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
 			outp += sprintf(outp, "\t%.2f", 100.0 * p->counter[i]/t->tsc);
 		}
@@ -1036,7 +1041,6 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 
 	p->gfx_rc6_ms = 0;
 	p->gfx_mhz = 0;
-
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
 		t->counter[i] = 0;
 
@@ -3662,7 +3666,7 @@ allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data
 	int i;
 
 	*t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
-		topo.num_packages, sizeof(struct thread_data) + sys.thread_counter_bytes);
+		topo.num_packages, sizeof(struct thread_data));
 	if (*t == NULL)
 		goto error;
 
@@ -3671,14 +3675,14 @@ allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data
 		(*t)[i].cpu_id = -1;
 
 	*c = calloc(topo.num_cores_per_pkg * topo.num_packages,
-		sizeof(struct core_data) + sys.core_counter_bytes);
+		sizeof(struct core_data));
 	if (*c == NULL)
 		goto error;
 
 	for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
 		(*c)[i].core_id = -1;
 
-	*p = calloc(topo.num_packages, sizeof(struct pkg_data) + sys.package_counter_bytes);
+	*p = calloc(topo.num_packages, sizeof(struct pkg_data));
 	if (*p == NULL)
 		goto error;
 
@@ -3901,24 +3905,36 @@ int add_counter(unsigned int msr_num, char *name, unsigned int width,
 	switch (scope) {
 
 	case SCOPE_CPU:
-		sys.thread_counter_bytes += 64;
 		msrp->next = sys.tp;
 		sys.tp = msrp;
-		sys.thread_counter_bytes += sizeof(unsigned long long);
+		sys.added_thread_counters++;
+		if (sys.added_thread_counters > MAX_ADDED_COUNTERS) {
+			fprintf(stderr, "exceeded max %d added thread counters\n",
+				MAX_ADDED_COUNTERS);
+			exit(-1);
+		}
 		break;
 
 	case SCOPE_CORE:
-		sys.core_counter_bytes += 64;
 		msrp->next = sys.cp;
 		sys.cp = msrp;
-		sys.core_counter_bytes += sizeof(unsigned long long);
+		sys.added_core_counters++;
+		if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
+			fprintf(stderr, "exceeded max %d added core counters\n",
+				MAX_ADDED_COUNTERS);
+			exit(-1);
+		}
 		break;
 
 	case SCOPE_PACKAGE:
-		sys.package_counter_bytes += 64;
 		msrp->next = sys.pp;
 		sys.pp = msrp;
-		sys.package_counter_bytes += sizeof(unsigned long long);
+		sys.added_package_counters++;
+		if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
+			fprintf(stderr, "exceeded max %d added package counters\n",
+				MAX_ADDED_COUNTERS);
+			exit(-1);
+		}
 		break;
 	}
 
-- 
GitLab


From 812db3f77b9a3f6ed59baf7a0d5c3fd8ec8ef86a Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 10 Feb 2017 00:25:41 -0500
Subject: [PATCH 0579/1084] tools/power turbostat: Add --show and --hide
 parameters

Add the "--show" and "--hide" cmdline parameters.

By default, turbostat shows all columns.

turbostat --hide counter_list
will continue showing all columns, except for those listed.

turbostat --show counter_list
will show _only_ the listed columns

These features work for built-in counters, and have no effect
on columns added with the --add parameter.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 |   4 +
 tools/power/x86/turbostat/turbostat.c | 463 +++++++++++++++++++-------
 2 files changed, 347 insertions(+), 120 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 03cb639b292e..e8fb1e02d121 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -47,6 +47,10 @@ name as necessary to disambiguate it from others is necessary.  Note that option
 		default: delta
 .fi
 .PP
+\fB--hide column\fP do not show the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.
+.PP
+\fB--show column\fP show only the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.
+.PP
 \fB--Dump\fP displays the raw counter values.
 .PP
 \fB--debug\fP displays additional system configuration information.  Invoking this parameter
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index c7fadf0faa4b..fff280b50af0 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -52,7 +52,6 @@ unsigned int debug;
 unsigned int rapl_joules;
 unsigned int summary_only;
 unsigned int dump_only;
-unsigned int do_nhm_cstates;
 unsigned int do_snb_cstates;
 unsigned int do_knl_cstates;
 unsigned int do_pc2;
@@ -72,24 +71,17 @@ unsigned int genuine_intel;
 unsigned int has_invariant_tsc;
 unsigned int do_nhm_platform_info;
 unsigned int aperf_mperf_multiplier = 1;
-int do_irq = 1;
-int do_smi;
 double bclk;
 double base_hz;
 unsigned int has_base_hz;
 double tsc_tweak = 1.0;
-unsigned int show_pkg;
-unsigned int show_core;
-unsigned int show_cpu;
 unsigned int show_pkg_only;
 unsigned int show_core_only;
 char *output_buffer, *outp;
 unsigned int do_rapl;
 unsigned int do_dts;
 unsigned int do_ptm;
-unsigned int do_gfx_rc6_ms;
 unsigned long long  gfx_cur_rc6_ms;
-unsigned int do_gfx_mhz;
 unsigned int gfx_cur_mhz;
 unsigned int tcc_activation_temp;
 unsigned int tcc_activation_temp_override;
@@ -226,6 +218,9 @@ struct msr_counter {
 	enum counter_type type;
 	enum counter_format format;
 	struct msr_counter *next;
+	unsigned int flags;
+#define	FLAGS_HIDE	(1 << 0)
+#define	FLAGS_SHOW	(1 << 1)
 };
 
 struct sys_counters {
@@ -341,39 +336,153 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
 }
 
 /*
- * Example Format w/ field column widths:
- *
- *  Package    Core     CPU Avg_MHz Bzy_MHz TSC_MHz     IRQ   SMI   Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 ThreadC CoreTmp  CoreCnt PkgTmp  GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt PkgCnt
- * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678
+ * Each string in this array is compared in --show and --hide cmdline.
+ * Thus, strings that are proper sub-sets must follow their more specific peers.
  */
+struct msr_counter bic[] = {
+	{ 0x0, "Package" },
+	{ 0x0, "Avg_MHz" },
+	{ 0x0, "Bzy_MHz" },
+	{ 0x0, "TSC_MHz" },
+	{ 0x0, "IRQ" },
+	{ 0x0, "SMI", 32, 0, FORMAT_DELTA, NULL},
+	{ 0x0, "Busy%" },
+	{ 0x0, "CPU%c1" },
+	{ 0x0, "CPU%c3" },
+	{ 0x0, "CPU%c6" },
+	{ 0x0, "CPU%c7" },
+	{ 0x0, "ThreadC" },
+	{ 0x0, "CoreTmp" },
+	{ 0x0, "CoreCnt" },
+	{ 0x0, "PkgTmp" },
+	{ 0x0, "GFX%rc6" },
+	{ 0x0, "GFXMHz" },
+	{ 0x0, "Pkg%pc2" },
+	{ 0x0, "Pkg%pc3" },
+	{ 0x0, "Pkg%pc6" },
+	{ 0x0, "Pkg%pc7" },
+	{ 0x0, "PkgWatt" },
+	{ 0x0, "CorWatt" },
+	{ 0x0, "GFXWatt" },
+	{ 0x0, "PkgCnt" },
+	{ 0x0, "RAMWatt" },
+	{ 0x0, "PKG_%" },
+	{ 0x0, "RAM_%" },
+	{ 0x0, "Pkg_J" },
+	{ 0x0, "Cor_J" },
+	{ 0x0, "GFX_J" },
+	{ 0x0, "RAM_J" },
+	{ 0x0, "Core" },
+	{ 0x0, "CPU" },
+};
+
+#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
+#define	BIC_Package	(1ULL << 0)
+#define	BIC_Avg_MHz	(1ULL << 1)
+#define	BIC_Bzy_MHz	(1ULL << 2)
+#define	BIC_TSC_MHz	(1ULL << 3)
+#define	BIC_IRQ		(1ULL << 4)
+#define	BIC_SMI		(1ULL << 5)
+#define	BIC_Busy	(1ULL << 6)
+#define	BIC_CPU_c1	(1ULL << 7)
+#define	BIC_CPU_c3	(1ULL << 8)
+#define	BIC_CPU_c6	(1ULL << 9)
+#define	BIC_CPU_c7	(1ULL << 10)
+#define	BIC_ThreadC	(1ULL << 11)
+#define	BIC_CoreTmp	(1ULL << 12)
+#define	BIC_CoreCnt	(1ULL << 13)
+#define	BIC_PkgTmp	(1ULL << 14)
+#define	BIC_GFX_rc6	(1ULL << 15)
+#define	BIC_GFXMHz	(1ULL << 16)
+#define	BIC_Pkgpc2	(1ULL << 17)
+#define	BIC_Pkgpc3	(1ULL << 18)
+#define	BIC_Pkgpc6	(1ULL << 19)
+#define	BIC_Pkgpc7	(1ULL << 20)
+#define	BIC_PkgWatt	(1ULL << 21)
+#define	BIC_CorWatt	(1ULL << 22)
+#define	BIC_GFXWatt	(1ULL << 23)
+#define	BIC_PkgCnt	(1ULL << 24)
+#define	BIC_RAMWatt	(1ULL << 27)
+#define	BIC_PKG__	(1ULL << 28)
+#define	BIC_RAM__	(1ULL << 29)
+#define	BIC_Pkg_J	(1ULL << 30)
+#define	BIC_Cor_J	(1ULL << 31)
+#define	BIC_GFX_J	(1ULL << 30)
+#define	BIC_RAM_J	(1ULL << 31)
+#define	BIC_Core	(1ULL << 32)
+#define	BIC_CPU		(1ULL << 33)
+
+unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL;
+unsigned long long bic_present;
+
+#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
+#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
+
+/*
+ * bic_lookup
+ * for all the strings in comma separate name_list,
+ * set the approprate bit in return value.
+ */
+unsigned long long bic_lookup(char *name_list)
+{
+	int i;
+	unsigned long long retval = 0;
+
+	while (name_list) {
+		char *comma;
+
+		comma = strchr(name_list, ',');
+
+		if (comma)
+			*comma = '\0';
+
+		for (i = 0; i < MAX_BIC; ++i) {
+			if (!strcmp(name_list, bic[i].name)) {
+				retval |= (1ULL << i);
+				break;
+			}
+		}
+		if (i == MAX_BIC) {
+			fprintf(stderr, "Invalid counter name: %s\n", name_list);
+			exit(-1);
+		}
+
+		name_list = comma;
+		if (name_list)
+			name_list++;
+
+	}
+	return retval;
+}
 
 void print_header(void)
 {
 	struct msr_counter *mp;
 
-	if (show_pkg)
+	if (DO_BIC(BIC_Package))
 		outp += sprintf(outp, "\tPackage");
-	if (show_core)
+	if (DO_BIC(BIC_Core))
 		outp += sprintf(outp, "\tCore");
-	if (show_cpu)
+	if (DO_BIC(BIC_CPU))
 		outp += sprintf(outp, "\tCPU");
-	if (has_aperf)
+	if (DO_BIC(BIC_Avg_MHz))
 		outp += sprintf(outp, "\tAvg_MHz");
-	if (has_aperf)
+	if (DO_BIC(BIC_Busy))
 		outp += sprintf(outp, "\tBusy%%");
-	if (has_aperf)
+	if (DO_BIC(BIC_Bzy_MHz))
 		outp += sprintf(outp, "\tBzy_MHz");
-	outp += sprintf(outp, "\tTSC_MHz");
+	if (DO_BIC(BIC_TSC_MHz))
+		outp += sprintf(outp, "\tTSC_MHz");
 
 	if (!debug)
 		goto done;
 
-	if (do_irq)
+	if (DO_BIC(BIC_IRQ))
 		outp += sprintf(outp, "\tIRQ");
-	if (do_smi)
+	if (DO_BIC(BIC_SMI))
 		outp += sprintf(outp, "\tSMI");
 
-	if (do_nhm_cstates)
+	if (DO_BIC(BIC_CPU_c1))
 		outp += sprintf(outp, "\tCPU%%c1");
 
 	for (mp = sys.tp; mp; mp = mp->next) {
@@ -387,15 +496,15 @@ void print_header(void)
 		}
 	}
 
-	if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
+	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
 		outp += sprintf(outp, "\tCPU%%c3");
-	if (do_nhm_cstates)
+	if (DO_BIC(BIC_CPU_c6))
 		outp += sprintf(outp, "\tCPU%%c6");
-	if (do_snb_cstates)
+	if (DO_BIC(BIC_CPU_c7))
 		outp += sprintf(outp, "\tCPU%%c7");
 
 
-	if (do_dts)
+	if (DO_BIC(BIC_CoreTmp))
 		outp += sprintf(outp, "\tCoreTmp");
 
 	for (mp = sys.cp; mp; mp = mp->next) {
@@ -409,13 +518,13 @@ void print_header(void)
 		}
 	}
 
-	if (do_ptm)
+	if (DO_BIC(BIC_PkgTmp))
 		outp += sprintf(outp, "\tPkgTmp");
 
-	if (do_gfx_rc6_ms)
+	if (DO_BIC(BIC_GFX_rc6))
 		outp += sprintf(outp, "\tGFX%%rc6");
 
-	if (do_gfx_mhz)
+	if (DO_BIC(BIC_GFXMHz))
 		outp += sprintf(outp, "\tGFXMHz");
 
 	if (do_skl_residency) {
@@ -440,30 +549,30 @@ void print_header(void)
 	}
 
 	if (do_rapl && !rapl_joules) {
-		if (do_rapl & RAPL_PKG)
+		if (DO_BIC(BIC_PkgWatt))
 			outp += sprintf(outp, "\tPkgWatt");
-		if (do_rapl & RAPL_CORES_ENERGY_STATUS)
+		if (DO_BIC(BIC_CorWatt))
 			outp += sprintf(outp, "\tCorWatt");
-		if (do_rapl & RAPL_GFX)
+		if (DO_BIC(BIC_GFXWatt))
 			outp += sprintf(outp, "\tGFXWatt");
-		if (do_rapl & RAPL_DRAM)
+		if (DO_BIC(BIC_RAMWatt))
 			outp += sprintf(outp, "\tRAMWatt");
-		if (do_rapl & RAPL_PKG_PERF_STATUS)
+		if (DO_BIC(BIC_PKG__))
 			outp += sprintf(outp, "\tPKG_%%");
-		if (do_rapl & RAPL_DRAM_PERF_STATUS)
+		if (DO_BIC(BIC_RAM__))
 			outp += sprintf(outp, "\tRAM_%%");
 	} else if (do_rapl && rapl_joules) {
-		if (do_rapl & RAPL_PKG)
+		if (DO_BIC(BIC_Pkg_J))
 			outp += sprintf(outp, "\tPkg_J");
-		if (do_rapl & RAPL_CORES_ENERGY_STATUS)
+		if (DO_BIC(BIC_Cor_J))
 			outp += sprintf(outp, "\tCor_J");
-		if (do_rapl & RAPL_GFX)
+		if (DO_BIC(BIC_GFX_J))
 			outp += sprintf(outp, "\tGFX_J");
-		if (do_rapl & RAPL_DRAM)
+		if (DO_BIC(BIC_RAM_J))
 			outp += sprintf(outp, "\tRAM_J");
-		if (do_rapl & RAPL_PKG_PERF_STATUS)
+		if (DO_BIC(BIC_PKG__))
 			outp += sprintf(outp, "\tPKG_%%");
-		if (do_rapl & RAPL_DRAM_PERF_STATUS)
+		if (DO_BIC(BIC_RAM__))
 			outp += sprintf(outp, "\tRAM_%%");
 	}
 	for (mp = sys.pp; mp; mp = mp->next) {
@@ -497,9 +606,9 @@ int dump_counters(struct thread_data *t, struct core_data *c,
 		outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
 		outp += sprintf(outp, "c1: %016llX\n", t->c1);
 
-		if (do_irq)
+		if (DO_BIC(BIC_IRQ))
 			outp += sprintf(outp, "IRQ: %08X\n", t->irq_count);
-		if (do_smi)
+		if (DO_BIC(BIC_SMI))
 			outp += sprintf(outp, "SMI: %08X\n", t->smi_count);
 
 		for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
@@ -583,40 +692,37 @@ int format_counters(struct thread_data *t, struct core_data *c,
 
 	/* topo columns, print blanks on 1st (average) line */
 	if (t == &average.threads) {
-		if (show_pkg)
+		if (DO_BIC(BIC_Package))
 			outp += sprintf(outp, "\t-");
-		if (show_core)
+		if (DO_BIC(BIC_Core))
 			outp += sprintf(outp, "\t-");
-		if (show_cpu)
+		if (DO_BIC(BIC_CPU))
 			outp += sprintf(outp, "\t-");
 	} else {
-		if (show_pkg) {
+		if (DO_BIC(BIC_Package)) {
 			if (p)
 				outp += sprintf(outp, "\t%d", p->package_id);
 			else
 				outp += sprintf(outp, "\t-");
 		}
-		if (show_core) {
+		if (DO_BIC(BIC_Core)) {
 			if (c)
 				outp += sprintf(outp, "\t%d", c->core_id);
 			else
 				outp += sprintf(outp, "\t-");
 		}
-		if (show_cpu)
+		if (DO_BIC(BIC_CPU))
 			outp += sprintf(outp, "\t%d", t->cpu_id);
 	}
 
-	/* Avg_MHz */
-	if (has_aperf)
+	if (DO_BIC(BIC_Avg_MHz))
 		outp += sprintf(outp, "\t%.0f",
 			1.0 / units * t->aperf / interval_float);
 
-	/* Busy% */
-	if (has_aperf)
+	if (DO_BIC(BIC_Busy))
 		outp += sprintf(outp, "\t%.2f", 100.0 * t->mperf/t->tsc/tsc_tweak);
 
-	/* Bzy_MHz */
-	if (has_aperf) {
+	if (DO_BIC(BIC_Bzy_MHz)) {
 		if (has_base_hz)
 			outp += sprintf(outp, "\t%.0f", base_hz / units * t->aperf / t->mperf);
 		else
@@ -624,22 +730,22 @@ int format_counters(struct thread_data *t, struct core_data *c,
 				1.0 * t->tsc / units * t->aperf / t->mperf / interval_float);
 	}
 
-	/* TSC_MHz */
-	outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc/units/interval_float);
+	if (DO_BIC(BIC_TSC_MHz))
+		outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc/units/interval_float);
 
 	if (!debug)
 		goto done;
 
 	/* IRQ */
-	if (do_irq)
+	if (DO_BIC(BIC_IRQ))
 		outp += sprintf(outp, "\t%d", t->irq_count);
 
 	/* SMI */
-	if (do_smi)
+	if (DO_BIC(BIC_SMI))
 		outp += sprintf(outp, "\t%d", t->smi_count);
 
 	/* C1 */
-	if (do_nhm_cstates)
+	if (DO_BIC(BIC_CPU_c1))
 		outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/t->tsc);
 
 	/* Added counters */
@@ -660,15 +766,14 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 		goto done;
 
-	if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
+	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
 		outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/t->tsc);
-	if (do_nhm_cstates)
+	if (DO_BIC(BIC_CPU_c6))
 		outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/t->tsc);
-	if (do_snb_cstates)
+	if (DO_BIC(BIC_CPU_c7))
 		outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/t->tsc);
 
-
-	if (do_dts)
+	if (DO_BIC(BIC_CoreTmp))
 		outp += sprintf(outp, "\t%d", c->core_temp_c);
 
 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
@@ -689,11 +794,11 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		goto done;
 
 	/* PkgTmp */
-	if (do_ptm)
+	if (DO_BIC(BIC_PkgTmp))
 		outp += sprintf(outp, "\t%d", p->pkg_temp_c);
 
 	/* GFXrc6 */
-	if (do_gfx_rc6_ms) {
+	if (DO_BIC(BIC_GFX_rc6)) {
 		if (p->gfx_rc6_ms == -1) {	/* detect GFX counter reset */
 			outp += sprintf(outp, "\t**.**");
 		} else {
@@ -703,7 +808,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	}
 
 	/* GFXMHz */
-	if (do_gfx_mhz)
+	if (DO_BIC(BIC_GFXMHz))
 		outp += sprintf(outp, "\t%d", p->gfx_mhz);
 
 	/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
@@ -737,37 +842,27 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	else
 		fmt8 = "%6.0f**";
 
-	if (do_rapl && !rapl_joules) {
-		if (do_rapl & RAPL_PKG)
-			outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float);
-		if (do_rapl & RAPL_CORES_ENERGY_STATUS)
-			outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float);
-		if (do_rapl & RAPL_GFX)
-			outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
-		if (do_rapl & RAPL_DRAM)
-			outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float);
-		if (do_rapl & RAPL_PKG_PERF_STATUS)
-			outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
-		if (do_rapl & RAPL_DRAM_PERF_STATUS)
-			outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
-	} else if (do_rapl && rapl_joules) {
-		if (do_rapl & RAPL_PKG)
-			outp += sprintf(outp, fmt8,
-					p->energy_pkg * rapl_energy_units);
-		if (do_rapl & RAPL_CORES)
-			outp += sprintf(outp, fmt8,
-					p->energy_cores * rapl_energy_units);
-		if (do_rapl & RAPL_GFX)
-			outp += sprintf(outp, fmt8,
-					p->energy_gfx * rapl_energy_units);
-		if (do_rapl & RAPL_DRAM)
-			outp += sprintf(outp, fmt8,
-					p->energy_dram * rapl_dram_energy_units);
-		if (do_rapl & RAPL_PKG_PERF_STATUS)
-			outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
-		if (do_rapl & RAPL_DRAM_PERF_STATUS)
-			outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
-	}
+	if (DO_BIC(BIC_PkgWatt))
+		outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float);
+	if (DO_BIC(BIC_CorWatt))
+		outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float);
+	if (DO_BIC(BIC_GFXWatt))
+		outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
+	if (DO_BIC(BIC_RAMWatt))
+		outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float);
+	if (DO_BIC(BIC_Pkg_J))
+		outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units);
+	if (DO_BIC(BIC_Cor_J))
+		outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units);
+	if (DO_BIC(BIC_GFX_J))
+		outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units);
+	if (DO_BIC(BIC_RAM_J))
+		outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units);
+	if (DO_BIC(BIC_PKG__))
+		outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
+	if (DO_BIC(BIC_RAM__))
+		outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
+
 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 32)
@@ -921,7 +1016,7 @@ delta_thread(struct thread_data *new, struct thread_data *old,
 
 	old->c1 = new->c1 - old->c1;
 
-	if (has_aperf) {
+	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
 		if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
 			old->aperf = new->aperf - old->aperf;
 			old->mperf = new->mperf - old->mperf;
@@ -957,10 +1052,10 @@ delta_thread(struct thread_data *new, struct thread_data *old,
 		old->mperf = 1;	/* divide by 0 protection */
 	}
 
-	if (do_irq)
+	if (DO_BIC(BIC_IRQ))
 		old->irq_count = new->irq_count - old->irq_count;
 
-	if (do_smi)
+	if (DO_BIC(BIC_SMI))
 		old->smi_count = new->smi_count - old->smi_count;
 
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
@@ -1217,7 +1312,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 retry:
 	t->tsc = rdtsc();	/* we are running on local CPU of interest */
 
-	if (has_aperf) {
+	if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
 		unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
 
 		/*
@@ -1273,9 +1368,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		t->mperf = t->mperf * aperf_mperf_multiplier;
 	}
 
-	if (do_irq)
+	if (DO_BIC(BIC_IRQ))
 		t->irq_count = irqs_per_cpu[cpu];
-	if (do_smi) {
+	if (DO_BIC(BIC_SMI)) {
 		if (get_msr(cpu, MSR_SMI_COUNT, &msr))
 			return -5;
 		t->smi_count = msr & 0xFFFFFFFF;
@@ -1296,12 +1391,12 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 		return 0;
 
-	if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) {
+	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) {
 		if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
 			return -6;
 	}
 
-	if (do_nhm_cstates && !do_knl_cstates) {
+	if (DO_BIC(BIC_CPU_c6) && !do_knl_cstates) {
 		if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
 			return -7;
 	} else if (do_knl_cstates) {
@@ -1309,11 +1404,11 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 			return -7;
 	}
 
-	if (do_snb_cstates)
+	if (DO_BIC(BIC_CPU_c7))
 		if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
 			return -8;
 
-	if (do_dts) {
+	if (DO_BIC(BIC_CoreTmp)) {
 		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
 			return -9;
 		c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
@@ -1388,16 +1483,16 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 			return -16;
 		p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
 	}
-	if (do_ptm) {
+	if (DO_BIC(BIC_PkgTmp)) {
 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
 			return -17;
 		p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
 	}
 
-	if (do_gfx_rc6_ms)
+	if (DO_BIC(BIC_GFX_rc6))
 		p->gfx_rc6_ms = gfx_cur_rc6_ms;
 
-	if (do_gfx_mhz)
+	if (DO_BIC(BIC_GFXMHz))
 		p->gfx_mhz = gfx_cur_mhz;
 
 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
@@ -2155,10 +2250,10 @@ int snapshot_proc_sysfs_files(void)
 	if (snapshot_proc_interrupts())
 		return 1;
 
-	if (do_gfx_rc6_ms)
+	if (DO_BIC(BIC_GFX_rc6))
 		snapshot_gfx_rc6_ms();
 
-	if (do_gfx_mhz)
+	if (DO_BIC(BIC_GFXMHz))
 		snapshot_gfx_mhz();
 
 	return 0;
@@ -2794,15 +2889,39 @@ void rapl_probe(unsigned int family, unsigned int model)
 	case INTEL_FAM6_BROADWELL_CORE:	/* BDW */
 	case INTEL_FAM6_BROADWELL_GT3E:	/* BDW */
 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
+		if (rapl_joules) {
+			BIC_PRESENT(BIC_Pkg_J);
+			BIC_PRESENT(BIC_Cor_J);
+			BIC_PRESENT(BIC_GFX_J);
+		} else {
+			BIC_PRESENT(BIC_PkgWatt);
+			BIC_PRESENT(BIC_CorWatt);
+			BIC_PRESENT(BIC_GFXWatt);
+		}
 		break;
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
 		do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
+		if (rapl_joules)
+			BIC_PRESENT(BIC_Pkg_J);
+		else
+			BIC_PRESENT(BIC_PkgWatt);
 		break;
 	case INTEL_FAM6_SKYLAKE_MOBILE:	/* SKL */
 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
 		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
+		BIC_PRESENT(BIC_PKG__);
+		BIC_PRESENT(BIC_RAM__);
+		if (rapl_joules) {
+			BIC_PRESENT(BIC_Pkg_J);
+			BIC_PRESENT(BIC_Cor_J);
+			BIC_PRESENT(BIC_RAM_J);
+		} else {
+			BIC_PRESENT(BIC_PkgWatt);
+			BIC_PRESENT(BIC_CorWatt);
+			BIC_PRESENT(BIC_RAMWatt);
+		}
 		break;
 	case INTEL_FAM6_HASWELL_X:	/* HSX */
 	case INTEL_FAM6_BROADWELL_X:	/* BDX */
@@ -2811,17 +2930,55 @@ void rapl_probe(unsigned int family, unsigned int model)
 	case INTEL_FAM6_XEON_PHI_KNL:	/* KNL */
 	case INTEL_FAM6_XEON_PHI_KNM:
 		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
+		BIC_PRESENT(BIC_PKG__);
+		BIC_PRESENT(BIC_RAM__);
+		if (rapl_joules) {
+			BIC_PRESENT(BIC_Pkg_J);
+			BIC_PRESENT(BIC_RAM_J);
+		} else {
+			BIC_PRESENT(BIC_PkgWatt);
+			BIC_PRESENT(BIC_RAMWatt);
+		}
 		break;
 	case INTEL_FAM6_SANDYBRIDGE_X:
 	case INTEL_FAM6_IVYBRIDGE_X:
 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
+		BIC_PRESENT(BIC_PKG__);
+		BIC_PRESENT(BIC_RAM__);
+		if (rapl_joules) {
+			BIC_PRESENT(BIC_Pkg_J);
+			BIC_PRESENT(BIC_Cor_J);
+			BIC_PRESENT(BIC_RAM_J);
+		} else {
+			BIC_PRESENT(BIC_PkgWatt);
+			BIC_PRESENT(BIC_CorWatt);
+			BIC_PRESENT(BIC_RAMWatt);
+		}
 		break;
 	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
 	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
 		do_rapl = RAPL_PKG | RAPL_CORES;
+		if (rapl_joules) {
+			BIC_PRESENT(BIC_Pkg_J);
+			BIC_PRESENT(BIC_Cor_J);
+		} else {
+			BIC_PRESENT(BIC_PkgWatt);
+			BIC_PRESENT(BIC_CorWatt);
+		}
 		break;
 	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
 		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
+		BIC_PRESENT(BIC_PKG__);
+		BIC_PRESENT(BIC_RAM__);
+		if (rapl_joules) {
+			BIC_PRESENT(BIC_Pkg_J);
+			BIC_PRESENT(BIC_Cor_J);
+			BIC_PRESENT(BIC_RAM_J);
+		} else {
+			BIC_PRESENT(BIC_PkgWatt);
+			BIC_PRESENT(BIC_CorWatt);
+			BIC_PRESENT(BIC_RAMWatt);
+		}
 		break;
 	default:
 		return;
@@ -3398,8 +3555,17 @@ void process_cpuid()
 
 	__cpuid(0x6, eax, ebx, ecx, edx);
 	has_aperf = ecx & (1 << 0);
+	if (has_aperf) {
+		BIC_PRESENT(BIC_Avg_MHz);
+		BIC_PRESENT(BIC_Busy);
+		BIC_PRESENT(BIC_Bzy_MHz);
+	}
 	do_dts = eax & (1 << 0);
+	if (do_dts)
+		BIC_PRESENT(BIC_CoreTmp);
 	do_ptm = eax & (1 << 6);
+	if (do_ptm)
+		BIC_PRESENT(BIC_PkgTmp);
 	has_hwp = eax & (1 << 7);
 	has_hwp_notify = eax & (1 << 8);
 	has_hwp_activity_window = eax & (1 << 9);
@@ -3497,8 +3663,21 @@ void process_cpuid()
 	if (has_aperf)
 		aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
 
-	do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model);
+	BIC_PRESENT(BIC_IRQ);
+	BIC_PRESENT(BIC_TSC_MHz);
+
+	if (probe_nhm_msrs(family, model)) {
+		do_nhm_platform_info = 1;
+		BIC_PRESENT(BIC_CPU_c1);
+		BIC_PRESENT(BIC_CPU_c3);
+		BIC_PRESENT(BIC_CPU_c6);
+		BIC_PRESENT(BIC_SMI);
+	}
 	do_snb_cstates = has_snb_msrs(family, model);
+
+	if (do_snb_cstates)
+		BIC_PRESENT(BIC_CPU_c7);
+
 	do_irtl_snb = has_snb_msrs(family, model);
 	do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2);
 	do_pc3 = (pkg_cstate_limit >= PCL__3);
@@ -3522,9 +3701,11 @@ void process_cpuid()
 	if (has_skl_msrs(family, model))
 		calculate_tsc_tweak();
 
-	do_gfx_rc6_ms = !access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK);
+	if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
+		BIC_PRESENT(BIC_GFX_rc6);
 
-	do_gfx_mhz = !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK);
+	if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
+		BIC_PRESENT(BIC_GFXMHz);
 
 	return;
 }
@@ -3583,7 +3764,7 @@ void topology_probe()
 	topo.max_cpu_num = 0;
 	for_all_proc_cpus(count_cpus);
 	if (!summary_only && topo.num_cpus > 1)
-		show_cpu = 1;
+		BIC_PRESENT(BIC_CPU);
 
 	if (debug > 1)
 		fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
@@ -3644,14 +3825,14 @@ void topology_probe()
 		fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
 			max_core_id, topo.num_cores_per_pkg);
 	if (debug && !summary_only && topo.num_cores_per_pkg > 1)
-		show_core = 1;
+		BIC_PRESENT(BIC_Core);
 
 	topo.num_packages = max_package_id + 1;
 	if (debug > 1)
 		fprintf(outf, "max_package_id %d, sizing for %d packages\n",
 			max_package_id, topo.num_packages);
 	if (debug && !summary_only && topo.num_packages > 1)
-		show_pkg = 1;
+		BIC_PRESENT(BIC_Package);
 
 	topo.num_threads_per_core = max_siblings;
 	if (debug > 1)
@@ -4045,6 +4226,40 @@ void parse_add_command(char *add_command)
 		exit(1);
 	}
 }
+/*
+ * HIDE_LIST - hide this list of counters, show the rest [default]
+ * SHOW_LIST - show this list of counters, hide the rest
+ */
+enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
+
+int shown;
+/*
+ * parse_show_hide() - process cmdline to set default counter action
+ */
+void parse_show_hide(char *optarg, enum show_hide_mode new_mode)
+{
+	/*
+	 * --show: show only those specified
+	 *  The 1st invocation will clear and replace the enabled mask
+	 *  subsequent invocations can add to it.
+	 */
+	if (new_mode == SHOW_LIST) {
+		if (shown == 0)
+			bic_enabled = bic_lookup(optarg);
+		else
+			bic_enabled |= bic_lookup(optarg);
+		shown = 1;
+
+		return;
+	}
+
+	/*
+	 * --hide: do not show those specified
+	 *  multiple invocations simply clear more bits in enabled mask
+	 */
+	bic_enabled &= ~bic_lookup(optarg);
+}
+
 void cmdline(int argc, char **argv)
 {
 	int opt;
@@ -4055,10 +4270,12 @@ void cmdline(int argc, char **argv)
 		{"debug",	no_argument,		0, 'd'},
 		{"interval",	required_argument,	0, 'i'},
 		{"help",	no_argument,		0, 'h'},
+		{"hide",	required_argument,	0, 'H'},	// meh, -h taken by --help
 		{"Joules",	no_argument,		0, 'J'},
 		{"out",		required_argument,	0, 'o'},
 		{"Package",	no_argument,		0, 'p'},
 		{"processor",	no_argument,		0, 'p'},
+		{"show",	required_argument,	0, 's'},
 		{"Summary",	no_argument,		0, 'S'},
 		{"TCC",		required_argument,	0, 'T'},
 		{"version",	no_argument,		0, 'v' },
@@ -4079,6 +4296,9 @@ void cmdline(int argc, char **argv)
 		case 'd':
 			debug++;
 			break;
+		case 'H':
+			parse_show_hide(optarg, HIDE_LIST);
+			break;
 		case 'h':
 		default:
 			help();
@@ -4109,6 +4329,9 @@ void cmdline(int argc, char **argv)
 		case 'p':
 			show_core_only++;
 			break;
+		case 's':
+			parse_show_hide(optarg, SHOW_LIST);
+			break;
 		case 'S':
 			summary_only++;
 			break;
-- 
GitLab


From cf4cbe5314884c3123fe4ca137e9d750b6e2b8c9 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sun, 1 Jan 2017 13:08:33 -0500
Subject: [PATCH 0580/1084] tools/power turbostat: BYT does not have
 MSR_MISC_PWR_MGMT

and so --debug fails with:

turbostat: msr 1 offset 0x1aa read failed: Input/output error

It seems that baytrail, and airmont do not have this MSR.
It is included in subsequent Goldmont Atom.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index fff280b50af0..fdf0273465fa 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -70,6 +70,7 @@ unsigned int units = 1000000;	/* MHz etc */
 unsigned int genuine_intel;
 unsigned int has_invariant_tsc;
 unsigned int do_nhm_platform_info;
+unsigned int no_MSR_MISC_PWR_MGMT;
 unsigned int aperf_mperf_multiplier = 1;
 double bclk;
 double base_hz;
@@ -330,7 +331,7 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
 	retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
 
 	if (retval != sizeof *msr)
-		err(-1, "msr %d offset 0x%llx read failed", cpu, (unsigned long long)offset);
+		err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
 
 	return 0;
 }
@@ -2384,6 +2385,8 @@ void check_permissions()
  * MSR_PLATFORM_INFO               0x000000ce
  * MSR_NHM_SNB_PKG_CST_CFG_CTL     0x000000e2
  *
+ * MSR_MISC_PWR_MGMT               0x000001aa
+ *
  * MSR_PKG_C3_RESIDENCY            0x000003f8
  * MSR_PKG_C6_RESIDENCY            0x000003f9
  * MSR_CORE_C3_RESIDENCY           0x000003fc
@@ -2440,11 +2443,13 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 		pkg_cstate_limits = skx_pkg_cstate_limits;
 		break;
 	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
+		no_MSR_MISC_PWR_MGMT = 1;
 	case INTEL_FAM6_ATOM_SILVERMONT2:	/* AVN */
 		pkg_cstate_limits = slv_pkg_cstate_limits;
 		break;
 	case INTEL_FAM6_ATOM_AIRMONT:	/* AMT */
 		pkg_cstate_limits = amt_pkg_cstate_limits;
+		no_MSR_MISC_PWR_MGMT = 1;
 		break;
 	case INTEL_FAM6_XEON_PHI_KNL:	/* PHI */
 	case INTEL_FAM6_XEON_PHI_KNM:
@@ -3481,6 +3486,9 @@ void decode_misc_pwr_mgmt_msr(void)
 	if (!do_nhm_platform_info)
 		return;
 
+	if (no_MSR_MISC_PWR_MGMT)
+		return;
+
 	if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
 		fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
 			base_cpu, msr,
@@ -4061,7 +4069,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-	fprintf(outf, "turbostat version 4.16 24 Dec 2016"
+	fprintf(outf, "turbostat version 4.17 1 Jan 2017"
 		" - Len Brown <lenb@kernel.org>\n");
 }
 
-- 
GitLab


From 71616c8e936a6dd541f0627d7bf4ff09971d8ccb Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 7 Jan 2017 22:37:48 -0500
Subject: [PATCH 0581/1084] tools/power turbostat: decode Baytrail CC6 and MC6
 demotion configuration

with --debug, see:

cpu0: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x00000000 (DISable-CC6-Demotion)
cpu0: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x00000000 (DISable-MC6-Demotion)

Note that the hardware default is to enable demotion,
and Linux started clearing these registers in 3.17.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 42 +++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index fdf0273465fa..1b762f67e3e2 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3262,6 +3262,27 @@ int has_snb_msrs(unsigned int family, unsigned int model)
 	return 0;
 }
 
+/*
+ * SLV client has supporet for unique MSRs:
+ *
+ * MSR_CC6_DEMOTION_POLICY_CONFIG
+ * MSR_MC6_DEMOTION_POLICY_CONFIG
+ */
+
+int has_slv_msrs(unsigned int family, unsigned int model)
+{
+	if (!genuine_intel)
+		return 0;
+
+	switch (model) {
+	case INTEL_FAM6_ATOM_SILVERMONT1:
+	case INTEL_FAM6_ATOM_MERRIFIELD:
+	case INTEL_FAM6_ATOM_MOOREFIELD:
+		return 1;
+	}
+	return 0;
+}
+
 /*
  * HSW adds support for additional MSRs:
  *
@@ -3496,6 +3517,24 @@ void decode_misc_pwr_mgmt_msr(void)
 			msr & (1 << 1) ? "EN" : "DIS",
 			msr & (1 << 8) ? "EN" : "DIS");
 }
+/*
+ * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
+ *
+ * This MSRs are present on Silvermont processors,
+ * Intel Atom processor E3000 series (Baytrail), and friends.
+ */
+void decode_c6_demotion_policy_msr(void)
+{
+	unsigned long long msr;
+
+	if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
+		fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
+			base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
+
+	if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
+		fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
+			base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
+}
 
 void process_cpuid()
 {
@@ -3700,6 +3739,9 @@ void process_cpuid()
 	if (debug)
 		decode_misc_pwr_mgmt_msr();
 
+	if (debug && has_slv_msrs(family, model))
+		decode_c6_demotion_policy_msr();
+
 	rapl_probe(family, model);
 	perf_limit_reasons_probe(family, model);
 
-- 
GitLab


From 8f6196c192f6393823e632bfb927ff1572369875 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 7 Jan 2017 22:40:23 -0500
Subject: [PATCH 0582/1084] tools/power turbostat: Baytrail: remove debug line
 in quiet mode

Without --debug, a debug line was printed on Baytrail:

SLM BCLK: 83.3 Mhz

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 1b762f67e3e2..9b35c9bb0489 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3388,7 +3388,8 @@ double slm_bclk(void)
 	}
 	freq = slm_freq_table[i];
 
-	fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
+	if (debug)
+		fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
 
 	return freq;
 }
-- 
GitLab


From f2642888476d7faefa9695bbebb2abbaeb3685d8 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 7 Jan 2017 23:13:15 -0500
Subject: [PATCH 0583/1084] tools/power turbostat: update
 MSR_PKG_CST_CONFIG_CONTROL decoding

AMT value 0 is unlimited, not PC0

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 9b35c9bb0489..aedfaddbad30 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1534,7 +1534,7 @@ int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV,
 int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
-int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
-- 
GitLab


From dc7ffefdcc28a45214aa707fdc3df6a5e611ba09 Mon Sep 17 00:00:00 2001
From: Oleg Drokin <green@linuxhacker.ru>
Date: Sun, 19 Feb 2017 16:35:59 -0500
Subject: [PATCH 0584/1084] staging/lustre/lnet: Fix allocation size for
 sv_cpt_data

This is unbreaking another of those "stealth" janitor
patches that got in and subtly broke some things.

sv_cpt_data is a pointer to pointer, so need to
dereference it twice to allocate the correct structure size.

Fixes: 9899cb68c6c2 ("Staging: lustre: rpc: Use sizeof type *pointer instead of sizeof type.")
CC: Sandhya Bankar <bankarsandhya512@gmail.com>
Cc: stable <stable@vger.kernel.org> # 4.7+
Signed-off-by: Oleg Drokin <green@linuxhacker.ru>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Doug Oucharek <doug.s.oucharek@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/lustre/lnet/selftest/rpc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/lustre/lnet/selftest/rpc.c b/drivers/staging/lustre/lnet/selftest/rpc.c
index 92cd4113cf75..87fe366f8f70 100644
--- a/drivers/staging/lustre/lnet/selftest/rpc.c
+++ b/drivers/staging/lustre/lnet/selftest/rpc.c
@@ -255,7 +255,7 @@ srpc_service_init(struct srpc_service *svc)
 	svc->sv_shuttingdown = 0;
 
 	svc->sv_cpt_data = cfs_percpt_alloc(lnet_cpt_table(),
-					    sizeof(*svc->sv_cpt_data));
+					    sizeof(**svc->sv_cpt_data));
 	if (!svc->sv_cpt_data)
 		return -ENOMEM;
 
-- 
GitLab


From f4082c6f28a8e77dcb694c6f23de9e533251051d Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 18 Feb 2017 02:20:36 +0300
Subject: [PATCH 0585/1084] staging: bcm2835/mmal-vchiq: unlock on error in
 buffer_from_host()

We should unlock before returning on this error path.

Fixes: 7b3ad5abf027 ("staging: Import the BCM2835 MMAL-based V4L2 camera driver.")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/media/platform/bcm2835/mmal-vchiq.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/media/platform/bcm2835/mmal-vchiq.c b/drivers/staging/media/platform/bcm2835/mmal-vchiq.c
index f0639ee6c8b9..fdfb6a620a43 100644
--- a/drivers/staging/media/platform/bcm2835/mmal-vchiq.c
+++ b/drivers/staging/media/platform/bcm2835/mmal-vchiq.c
@@ -397,8 +397,10 @@ buffer_from_host(struct vchiq_mmal_instance *instance,
 
 	/* get context */
 	msg_context = get_msg_context(instance);
-	if (msg_context == NULL)
-		return -ENOMEM;
+	if (!msg_context) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
 
 	/* store bulk message context for when data arrives */
 	msg_context->u.bulk.instance = instance;
@@ -454,6 +456,7 @@ buffer_from_host(struct vchiq_mmal_instance *instance,
 
 	vchi_service_release(instance->handle);
 
+unlock:
 	mutex_unlock(&instance->bulk_mutex);
 
 	return ret;
-- 
GitLab


From 6cf1bf636a067eb308cb3a8322b9d6b1844a075d Mon Sep 17 00:00:00 2001
From: Michael Zoran <mzoran@crowfest.net>
Date: Sat, 18 Feb 2017 03:22:01 -0800
Subject: [PATCH 0586/1084] staging: vchiq_2835_arm: Make cache-line-size a
 required DT property

The original github source allowed for the cache-line-size property
to be missing.  Since recent firmwares also require this property,
it makes sense to always require it in the driver as well.

If the cache-line-size property is missing, then the driver probe
should fail as no dev since the kernel and dt may be out of sync.
The fix is to add a check for the return value of of_property_read_u32.

Changes V2:
	1. Add error message if cache-line-size is missing.
	2. Simple check for non-zero return value from
	   of_property_read_u32.

Signed-off-by: Michael Zoran <mzoran@crowfest.net>
Acked-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../vc04_services/interface/vchiq_arm/vchiq_2835_arm.c    | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
index e6241fb5cfa6..3aeffcb9c87e 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
@@ -121,8 +121,14 @@ int vchiq_platform_init(struct platform_device *pdev, VCHIQ_STATE_T *state)
 	if (err < 0)
 		return err;
 
-	(void)of_property_read_u32(dev->of_node, "cache-line-size",
+	err = of_property_read_u32(dev->of_node, "cache-line-size",
 				   &g_cache_line_size);
+
+	if (err) {
+		dev_err(dev, "Missing cache-line-size property\n");
+		return -ENODEV;
+	}
+
 	g_fragments_size = 2 * g_cache_line_size;
 
 	/* Allocate space for the channels in coherent memory */
-- 
GitLab


From 31788ca803a0c89078f9e604e64286fbd9077926 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Tue, 21 Feb 2017 17:42:27 +0700
Subject: [PATCH 0587/1084] drm/vmwgfx: Work around drm removal of control
 nodes

vmware tools has a daemon that gets layout information from the GUI and
forwards it to DRM so that the modesetting code can set preferred connector
locations and modes. This daemon was using control nodes but since control
nodes were just removed, make it possible for the daemon to use render- or
primary nodes instead. This is a bit ugly but will allow drm to proceed with
removal of the mostly unused control-node code and allow vmware to proceed
with fixing up automatic layout settings for gnome-shell/wayland.

We bump minor to inform user-space about the api change.

Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20170221104227.2854-1-thellstrom@vmware.com
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 11 ++++++++++-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h |  4 ++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 541a5887dd6c..d08f26973d0b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -199,9 +199,14 @@ static const struct drm_ioctl_desc vmw_ioctls[] = {
 	VMW_IOCTL_DEF(VMW_PRESENT_READBACK,
 		      vmw_present_readback_ioctl,
 		      DRM_MASTER | DRM_AUTH),
+	/*
+	 * The permissions of the below ioctl are overridden in
+	 * vmw_generic_ioctl(). We require either
+	 * DRM_MASTER or capable(CAP_SYS_ADMIN).
+	 */
 	VMW_IOCTL_DEF(VMW_UPDATE_LAYOUT,
 		      vmw_kms_update_layout_ioctl,
-		      DRM_MASTER | DRM_CONTROL_ALLOW),
+		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_CREATE_SHADER,
 		      vmw_shader_define_ioctl,
 		      DRM_AUTH | DRM_RENDER_ALLOW),
@@ -1123,6 +1128,10 @@ static long vmw_generic_ioctl(struct file *filp, unsigned int cmd,
 
 			return (long) vmw_execbuf_ioctl(dev, arg, file_priv,
 							_IOC_SIZE(cmd));
+		} else if (nr == DRM_COMMAND_BASE + DRM_VMW_UPDATE_LAYOUT) {
+			if (!drm_is_current_master(file_priv) &&
+			    !capable(CAP_SYS_ADMIN))
+				return -EACCES;
 		}
 
 		if (unlikely(ioctl->cmd != cmd))
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 1e59a486bba8..59ff4197173a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -41,9 +41,9 @@
 #include <drm/ttm/ttm_module.h>
 #include "vmwgfx_fence.h"
 
-#define VMWGFX_DRIVER_DATE "20160210"
+#define VMWGFX_DRIVER_DATE "20170221"
 #define VMWGFX_DRIVER_MAJOR 2
-#define VMWGFX_DRIVER_MINOR 11
+#define VMWGFX_DRIVER_MINOR 12
 #define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
-- 
GitLab


From bd4e2d2907fa23a11d46217064ecf80470ddae10 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Wed, 22 Feb 2017 22:06:32 -0800
Subject: [PATCH 0588/1084] target: Fix NULL dereference during LUN lookup +
 active I/O shutdown

When transport_clear_lun_ref() is shutting down a se_lun via
configfs with new I/O in-flight, it's possible to trigger a
NULL pointer dereference in transport_lookup_cmd_lun() due
to the fact percpu_ref_get() doesn't do any __PERCPU_REF_DEAD
checking before incrementing lun->lun_ref.count after
lun->lun_ref has switched to atomic_t mode.

This results in a NULL pointer dereference as LUN shutdown
code in core_tpg_remove_lun() continues running after the
existing ->release() -> core_tpg_lun_ref_release() callback
completes, and clears the RCU protected se_lun->lun_se_dev
pointer.

During the OOPs, the state of lun->lun_ref in the process
which triggered the NULL pointer dereference looks like
the following on v4.1.y stable code:

struct se_lun {
  lun_link_magic = 4294932337,
  lun_status = TRANSPORT_LUN_STATUS_FREE,

  .....

  lun_se_dev = 0x0,
  lun_sep = 0x0,

  .....

  lun_ref = {
    count = {
      counter = 1
    },
    percpu_count_ptr = 3,
    release = 0xffffffffa02fa1e0 <core_tpg_lun_ref_release>,
    confirm_switch = 0x0,
    force_atomic = false,
    rcu = {
      next = 0xffff88154fa1a5d0,
      func = 0xffffffff8137c4c0 <percpu_ref_switch_to_atomic_rcu>
    }
  }
}

To address this bug, use percpu_ref_tryget_live() to ensure
once __PERCPU_REF_DEAD is visable on all CPUs and ->lun_ref
has switched to atomic_t, all new I/Os will fail to obtain
a new lun->lun_ref reference.

Also use an explicit percpu_ref_kill_and_confirm() callback
to block on ->lun_ref_comp to allow the first stage and
associated RCU grace period to complete, and then block on
->lun_ref_shutdown waiting for the final percpu_ref_put()
to drop the last reference via transport_lun_remove_cmd()
before continuing with core_tpg_remove_lun() shutdown.

Reported-by: Rob Millner <rlm@daterainc.com>
Tested-by: Rob Millner <rlm@daterainc.com>
Cc: Rob Millner <rlm@daterainc.com>
Tested-by: Vaibhav Tandon <vst@datera.io>
Cc: Vaibhav Tandon <vst@datera.io>
Tested-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Cc: <stable@vger.kernel.org> # v3.14+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_device.c    | 10 +++++++--
 drivers/target/target_core_tpg.c       |  3 ++-
 drivers/target/target_core_transport.c | 31 +++++++++++++++++++++++++-
 include/target/target_core_base.h      |  1 +
 4 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index cb7047d66afc..c754ae33bf7b 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -78,12 +78,16 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun)
 					&deve->read_bytes);
 
 		se_lun = rcu_dereference(deve->se_lun);
+
+		if (!percpu_ref_tryget_live(&se_lun->lun_ref)) {
+			se_lun = NULL;
+			goto out_unlock;
+		}
+
 		se_cmd->se_lun = rcu_dereference(deve->se_lun);
 		se_cmd->pr_res_key = deve->pr_res_key;
 		se_cmd->orig_fe_lun = unpacked_lun;
 		se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD;
-
-		percpu_ref_get(&se_lun->lun_ref);
 		se_cmd->lun_ref_active = true;
 
 		if ((se_cmd->data_direction == DMA_TO_DEVICE) &&
@@ -97,6 +101,7 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun)
 			goto ref_dev;
 		}
 	}
+out_unlock:
 	rcu_read_unlock();
 
 	if (!se_lun) {
@@ -815,6 +820,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
 	xcopy_lun = &dev->xcopy_lun;
 	rcu_assign_pointer(xcopy_lun->lun_se_dev, dev);
 	init_completion(&xcopy_lun->lun_ref_comp);
+	init_completion(&xcopy_lun->lun_shutdown_comp);
 	INIT_LIST_HEAD(&xcopy_lun->lun_deve_list);
 	INIT_LIST_HEAD(&xcopy_lun->lun_dev_link);
 	mutex_init(&xcopy_lun->lun_tg_pt_md_mutex);
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index d99752c6cd60..2744251178ad 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -445,7 +445,7 @@ static void core_tpg_lun_ref_release(struct percpu_ref *ref)
 {
 	struct se_lun *lun = container_of(ref, struct se_lun, lun_ref);
 
-	complete(&lun->lun_ref_comp);
+	complete(&lun->lun_shutdown_comp);
 }
 
 int core_tpg_register(
@@ -571,6 +571,7 @@ struct se_lun *core_tpg_alloc_lun(
 	lun->lun_link_magic = SE_LUN_LINK_MAGIC;
 	atomic_set(&lun->lun_acl_count, 0);
 	init_completion(&lun->lun_ref_comp);
+	init_completion(&lun->lun_shutdown_comp);
 	INIT_LIST_HEAD(&lun->lun_deve_list);
 	INIT_LIST_HEAD(&lun->lun_dev_link);
 	atomic_set(&lun->lun_tg_pt_secondary_offline, 0);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index efb9e6f38201..434d9d693989 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2700,10 +2700,39 @@ void target_wait_for_sess_cmds(struct se_session *se_sess)
 }
 EXPORT_SYMBOL(target_wait_for_sess_cmds);
 
+static void target_lun_confirm(struct percpu_ref *ref)
+{
+	struct se_lun *lun = container_of(ref, struct se_lun, lun_ref);
+
+	complete(&lun->lun_ref_comp);
+}
+
 void transport_clear_lun_ref(struct se_lun *lun)
 {
-	percpu_ref_kill(&lun->lun_ref);
+	/*
+	 * Mark the percpu-ref as DEAD, switch to atomic_t mode, drop
+	 * the initial reference and schedule confirm kill to be
+	 * executed after one full RCU grace period has completed.
+	 */
+	percpu_ref_kill_and_confirm(&lun->lun_ref, target_lun_confirm);
+	/*
+	 * The first completion waits for percpu_ref_switch_to_atomic_rcu()
+	 * to call target_lun_confirm after lun->lun_ref has been marked
+	 * as __PERCPU_REF_DEAD on all CPUs, and switches to atomic_t
+	 * mode so that percpu_ref_tryget_live() lookup of lun->lun_ref
+	 * fails for all new incoming I/O.
+	 */
 	wait_for_completion(&lun->lun_ref_comp);
+	/*
+	 * The second completion waits for percpu_ref_put_many() to
+	 * invoke ->release() after lun->lun_ref has switched to
+	 * atomic_t mode, and lun->lun_ref.count has reached zero.
+	 *
+	 * At this point all target-core lun->lun_ref references have
+	 * been dropped via transport_lun_remove_cmd(), and it's safe
+	 * to proceed with the remaining LUN shutdown.
+	 */
+	wait_for_completion(&lun->lun_shutdown_comp);
 }
 
 static bool
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index d7336f3c6b60..16d3be8395be 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -730,6 +730,7 @@ struct se_lun {
 	struct config_group	lun_group;
 	struct se_port_stat_grps port_stat_grps;
 	struct completion	lun_ref_comp;
+	struct completion	lun_shutdown_comp;
 	struct percpu_ref	lun_ref;
 	struct list_head	lun_dev_link;
 	struct hlist_node	link;
-- 
GitLab


From 17c61ad66f2e09a9014ab2d4e1f04c8294427db1 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Thu, 23 Feb 2017 21:26:31 -0800
Subject: [PATCH 0589/1084] iscsi-target: Fix early login failure statistics
 misses

Due to the long standing checks in iscsit_snmp_get_tiqn()
that assume conn->sess->tpg dereference of tpg->tpg_tiqn
for iscsit_collect_login_stats() usage, some of the early
login failure cases like ISCSI_LOGIN_STATUS_TGT_FORBIDDEN
where not getting incremented, due to sess->tpg assignment
happening later in iscsi_login_zero_tsih_s2().

Instead, use the earlier conn->tpg assignment done by
iscsi_target_locate_portal() -> iscsit_get_tpg_from_np()
so the existing counters are incremented correctly for
the various early login failure cases.

Also, go ahead and drop the old rate limiting check in
iscsit_collect_login_stats(), so we get the true number
of failed login attempts in the existing statistics.

Reported-by: Ryan Stiles <ras@datera.io>
Cc: Ryan Stiles <ras@datera.io>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target_util.c | 38 ++----------------------
 1 file changed, 2 insertions(+), 36 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index f46eadffec07..cc5958882431 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -1378,33 +1378,6 @@ int tx_data(
 	return iscsit_do_tx_data(conn, &c);
 }
 
-static bool sockaddr_equal(struct sockaddr_storage *x, struct sockaddr_storage *y)
-{
-	switch (x->ss_family) {
-	case AF_INET: {
-		struct sockaddr_in *sinx = (struct sockaddr_in *)x;
-		struct sockaddr_in *siny = (struct sockaddr_in *)y;
-		if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr)
-			return false;
-		if (sinx->sin_port != siny->sin_port)
-			return false;
-		break;
-	}
-	case AF_INET6: {
-		struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x;
-		struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y;
-		if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr))
-			return false;
-		if (sinx->sin6_port != siny->sin6_port)
-			return false;
-		break;
-	}
-	default:
-		return false;
-	}
-	return true;
-}
-
 void iscsit_collect_login_stats(
 	struct iscsi_conn *conn,
 	u8 status_class,
@@ -1421,13 +1394,6 @@ void iscsit_collect_login_stats(
 	ls = &tiqn->login_stats;
 
 	spin_lock(&ls->lock);
-	if (sockaddr_equal(&conn->login_sockaddr, &ls->last_intr_fail_sockaddr) &&
-	    ((get_jiffies_64() - ls->last_fail_time) < 10)) {
-		/* We already have the failure info for this login */
-		spin_unlock(&ls->lock);
-		return;
-	}
-
 	if (status_class == ISCSI_STATUS_CLS_SUCCESS)
 		ls->accepts++;
 	else if (status_class == ISCSI_STATUS_CLS_REDIRECT) {
@@ -1472,10 +1438,10 @@ struct iscsi_tiqn *iscsit_snmp_get_tiqn(struct iscsi_conn *conn)
 {
 	struct iscsi_portal_group *tpg;
 
-	if (!conn || !conn->sess)
+	if (!conn)
 		return NULL;
 
-	tpg = conn->sess->tpg;
+	tpg = conn->tpg;
 	if (!tpg)
 		return NULL;
 
-- 
GitLab


From c87ba9c49c1fa86261448b09c5f1b2223bf7efd9 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@daterainc.com>
Date: Thu, 19 Jan 2017 15:45:57 -0800
Subject: [PATCH 0590/1084] target: Add counters for ABORT_TASK success +
 failure

This patch introduces two counters for ABORT_TASK success +
failure under:

   /sys/kernel/config/target/core/$HBA/$DEV/statistics/scsi_tgt_dev/

that are useful for diagnosing various backend device latency
and front fabric issues.

Normally when folks see alot of aborts_complete happening,
it means the backend device I/O completion latency is high,
and not returning completions fast enough before host side
timeouts trigger.

And normally when folks see alot of aborts_no_task, it means
completions are being posted by target-core into fabric driver
code, but the responses aren't making it back to the host.

Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_stat.c | 18 ++++++++++++++++++
 drivers/target/target_core_tmr.c  |  2 ++
 include/target/target_core_base.h |  2 ++
 3 files changed, 22 insertions(+)

diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c
index be380e4acfcd..8038255b21e8 100644
--- a/drivers/target/target_core_stat.c
+++ b/drivers/target/target_core_stat.c
@@ -158,12 +158,28 @@ static ssize_t target_stat_tgt_resets_show(struct config_item *item,
 			atomic_long_read(&to_stat_tgt_dev(item)->num_resets));
 }
 
+static ssize_t target_stat_tgt_aborts_complete_show(struct config_item *item,
+		char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%lu\n",
+			atomic_long_read(&to_stat_tgt_dev(item)->aborts_complete));
+}
+
+static ssize_t target_stat_tgt_aborts_no_task_show(struct config_item *item,
+		char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%lu\n",
+			atomic_long_read(&to_stat_tgt_dev(item)->aborts_no_task));
+}
+
 CONFIGFS_ATTR_RO(target_stat_tgt_, inst);
 CONFIGFS_ATTR_RO(target_stat_tgt_, indx);
 CONFIGFS_ATTR_RO(target_stat_tgt_, num_lus);
 CONFIGFS_ATTR_RO(target_stat_tgt_, status);
 CONFIGFS_ATTR_RO(target_stat_tgt_, non_access_lus);
 CONFIGFS_ATTR_RO(target_stat_tgt_, resets);
+CONFIGFS_ATTR_RO(target_stat_tgt_, aborts_complete);
+CONFIGFS_ATTR_RO(target_stat_tgt_, aborts_no_task);
 
 static struct configfs_attribute *target_stat_scsi_tgt_dev_attrs[] = {
 	&target_stat_tgt_attr_inst,
@@ -172,6 +188,8 @@ static struct configfs_attribute *target_stat_scsi_tgt_dev_attrs[] = {
 	&target_stat_tgt_attr_status,
 	&target_stat_tgt_attr_non_access_lus,
 	&target_stat_tgt_attr_resets,
+	&target_stat_tgt_attr_aborts_complete,
+	&target_stat_tgt_attr_aborts_no_task,
 	NULL,
 };
 
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index a806d9bca3d2..dce1e1b47316 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -190,6 +190,7 @@ void core_tmr_abort_task(
 		printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for"
 				" ref_tag: %llu\n", ref_tag);
 		tmr->response = TMR_FUNCTION_COMPLETE;
+		atomic_long_inc(&dev->aborts_complete);
 		return;
 	}
 	spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
@@ -197,6 +198,7 @@ void core_tmr_abort_task(
 	printk("ABORT_TASK: Sending TMR_TASK_DOES_NOT_EXIST for ref_tag: %lld\n",
 			tmr->ref_task_tag);
 	tmr->response = TMR_TASK_DOES_NOT_EXIST;
+	atomic_long_inc(&dev->aborts_no_task);
 }
 
 static void core_tmr_drain_tmr_list(
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 16d3be8395be..49ce5bc9912f 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -766,6 +766,8 @@ struct se_device {
 	u32			dev_index;
 	u64			creation_time;
 	atomic_long_t		num_resets;
+	atomic_long_t		aborts_complete;
+	atomic_long_t		aborts_no_task;
 	atomic_long_t		num_cmds;
 	atomic_long_t		read_bytes;
 	atomic_long_t		write_bytes;
-- 
GitLab


From 9bd829041b704e5e501ad5f5a9374a63426763fd Mon Sep 17 00:00:00 2001
From: George Cherian <george.cherian@cavium.com>
Date: Wed, 15 Feb 2017 12:42:19 +0000
Subject: [PATCH 0591/1084] crypto: cavium - Fix couple of static checker
 errors

Fix the following smatch errors
cptvf_reqmanager.c:333 do_post_process() warn: variable dereferenced
before check 'cptvf'
cptvf_main.c:825 cptvf_remove() error: we previously assumed 'cptvf'
could be null

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: George Cherian <george.cherian@cavium.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/cavium/cpt/cptvf_main.c       | 4 +++-
 drivers/crypto/cavium/cpt/cptvf_reqmanager.c | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/cavium/cpt/cptvf_main.c b/drivers/crypto/cavium/cpt/cptvf_main.c
index aac2966ff8d9..e50872e666d6 100644
--- a/drivers/crypto/cavium/cpt/cptvf_main.c
+++ b/drivers/crypto/cavium/cpt/cptvf_main.c
@@ -815,8 +815,10 @@ static void cptvf_remove(struct pci_dev *pdev)
 {
 	struct cpt_vf *cptvf = pci_get_drvdata(pdev);
 
-	if (!cptvf)
+	if (!cptvf) {
 		dev_err(&pdev->dev, "Invalid CPT-VF device\n");
+		return;
+	}
 
 	/* Convey DOWN to PF */
 	if (cptvf_send_vf_down(cptvf)) {
diff --git a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
index 7f57f30f8863..169e66231bcf 100644
--- a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
+++ b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
@@ -330,8 +330,8 @@ void do_post_process(struct cpt_vf *cptvf, struct cpt_info_buffer *info)
 {
 	struct pci_dev *pdev = cptvf->pdev;
 
-	if (!info || !cptvf) {
-		dev_err(&pdev->dev, "Input params are incorrect for post processing\n");
+	if (!info) {
+		dev_err(&pdev->dev, "incorrect cpt_info_buffer for post processing\n");
 		return;
 	}
 
-- 
GitLab


From d80388eca1b476125755a811676f76f138bbbe28 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 17 Feb 2017 15:57:43 +0000
Subject: [PATCH 0592/1084] crypto: cavium - fix leak on curr if curr->head
 fails to be allocated

The exit path when curr->head cannot be allocated fails to kfree the
earlier allocated curr.  Fix this by kfree'ing it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/cavium/cpt/cptvf_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/cavium/cpt/cptvf_main.c b/drivers/crypto/cavium/cpt/cptvf_main.c
index e50872e666d6..6ffc740c7431 100644
--- a/drivers/crypto/cavium/cpt/cptvf_main.c
+++ b/drivers/crypto/cavium/cpt/cptvf_main.c
@@ -242,6 +242,7 @@ static int alloc_command_queues(struct cpt_vf *cptvf,
 			if (!curr->head) {
 				dev_err(&pdev->dev, "Command Q (%d) chunk (%d) allocation failed\n",
 					i, queue->nchunks);
+				kfree(curr);
 				goto cmd_qfail;
 			}
 
-- 
GitLab


From f7f9482e370981ebeeeac30bbdb9960807e332ee Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 24 Feb 2017 11:27:38 +0100
Subject: [PATCH 0593/1084] crypto: atmel - CRYPTO_DEV_ATMEL_TDES and
 CRYPTO_DEV_ATMEL_SHA should depend on HAS_DMA

If NO_DMA=y:

    ERROR: "bad_dma_ops" [drivers/crypto/atmel-tdes.ko] undefined!
    ERROR: "bad_dma_ops" [drivers/crypto/atmel-sha.ko] undefined!

Add dependencies on HAS_DMA to fix this.

Fixes: ceb4afb3086ab08f ("crypto: atmel - refine Kconfig dependencies")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 2cac445b02fd..69f7fc0dc84d 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -445,6 +445,7 @@ config CRYPTO_DEV_ATMEL_AES
 
 config CRYPTO_DEV_ATMEL_TDES
 	tristate "Support for Atmel DES/TDES hw accelerator"
+	depends on HAS_DMA
 	depends on ARCH_AT91 || COMPILE_TEST
 	select CRYPTO_DES
 	select CRYPTO_BLKCIPHER
@@ -458,6 +459,7 @@ config CRYPTO_DEV_ATMEL_TDES
 
 config CRYPTO_DEV_ATMEL_SHA
 	tristate "Support for Atmel SHA hw accelerator"
+	depends on HAS_DMA
 	depends on ARCH_AT91 || COMPILE_TEST
 	select CRYPTO_HASH
 	help
-- 
GitLab


From c884b36816c131906038cdac4773f6c84a5bf3ee Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 24 Feb 2017 11:27:39 +0100
Subject: [PATCH 0594/1084] crypto: atmel - CRYPTO_DEV_MEDIATEK should depend
 on HAS_DMA

If NO_DMA=y:

    ERROR: "bad_dma_ops" [drivers/crypto/mediatek/mtk-crypto.ko] undefined!

Add a dependency on HAS_DMA to fix this.

Fixes: 7dee9f618790d0b7 ("crypto: mediatek - remove ARM dependencies")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 69f7fc0dc84d..a7ff6e5d0ba9 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -571,6 +571,7 @@ config CRYPTO_DEV_ROCKCHIP
 
 config CRYPTO_DEV_MEDIATEK
 	tristate "MediaTek's EIP97 Cryptographic Engine driver"
+	depends on HAS_DMA
 	depends on (ARM && ARCH_MEDIATEK) || COMPILE_TEST
 	select CRYPTO_AES
 	select CRYPTO_AEAD
-- 
GitLab


From 016df0abc56ec06d0c63c5318ef53e40738dea8b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 26 Feb 2017 12:22:35 +0800
Subject: [PATCH 0595/1084] crypto: api - Add crypto_requires_off helper

This patch adds crypto_requires_off which is an extension of
crypto_requires_sync for similar bits such as NEED_FALLBACK.

Cc: stable@vger.kernel.org #4.10
Suggested-by: Marcelo Cerri <marcelo.cerri@canonical.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/algapi.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index ebe4ded0c55d..436c4c2683c7 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -360,13 +360,18 @@ static inline struct crypto_alg *crypto_get_attr_alg(struct rtattr **tb,
 	return crypto_attr_alg(tb[1], type, mask);
 }
 
+static inline int crypto_requires_off(u32 type, u32 mask, u32 off)
+{
+	return (type ^ off) & mask & off;
+}
+
 /*
  * Returns CRYPTO_ALG_ASYNC if type/mask requires the use of sync algorithms.
  * Otherwise returns zero.
  */
 static inline int crypto_requires_sync(u32 type, u32 mask)
 {
-	return (type ^ CRYPTO_ALG_ASYNC) & mask & CRYPTO_ALG_ASYNC;
+	return crypto_requires_off(type, mask, CRYPTO_ALG_ASYNC);
 }
 
 noinline unsigned long __crypto_memneq(const void *a, const void *b, size_t size);
-- 
GitLab


From 89027579bc6c2febbcc9c2f9d5069adf71539e4b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 26 Feb 2017 12:24:10 +0800
Subject: [PATCH 0596/1084] crypto: xts - Propagate NEED_FALLBACK bit

When we're used as a fallback algorithm, we should propagate
the NEED_FALLBACK bit when searching for the underlying ECB mode.

This just happens to fix a hang too because otherwise the search
may end up loading the same module that triggered this XTS creation.

Cc: stable@vger.kernel.org #4.10
Fixes: f1c131b45410 ("crypto: xts - Convert to skcipher")
Reported-by: Harald Freudenberger <freude@linux.vnet.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/xts.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/crypto/xts.c b/crypto/xts.c
index 410a2e299085..baeb34dd8582 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -463,6 +463,7 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
 	struct xts_instance_ctx *ctx;
 	struct skcipher_alg *alg;
 	const char *cipher_name;
+	u32 mask;
 	int err;
 
 	algt = crypto_get_attr_type(tb);
@@ -483,18 +484,19 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
 	ctx = skcipher_instance_ctx(inst);
 
 	crypto_set_skcipher_spawn(&ctx->spawn, skcipher_crypto_instance(inst));
-	err = crypto_grab_skcipher(&ctx->spawn, cipher_name, 0,
-				   crypto_requires_sync(algt->type,
-							algt->mask));
+
+	mask = crypto_requires_off(algt->type, algt->mask,
+				   CRYPTO_ALG_NEED_FALLBACK |
+				   CRYPTO_ALG_ASYNC);
+
+	err = crypto_grab_skcipher(&ctx->spawn, cipher_name, 0, mask);
 	if (err == -ENOENT) {
 		err = -ENAMETOOLONG;
 		if (snprintf(ctx->name, CRYPTO_MAX_ALG_NAME, "ecb(%s)",
 			     cipher_name) >= CRYPTO_MAX_ALG_NAME)
 			goto err_free_inst;
 
-		err = crypto_grab_skcipher(&ctx->spawn, ctx->name, 0,
-					   crypto_requires_sync(algt->type,
-								algt->mask));
+		err = crypto_grab_skcipher(&ctx->spawn, ctx->name, 0, mask);
 	}
 
 	if (err)
-- 
GitLab


From 29693efcea0f38cf40d0055d2401490a4f9bf8be Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Mon, 27 Feb 2017 10:11:47 +0800
Subject: [PATCH 0597/1084] ALSA: hda - Fix micmute hotkey problem for a lenovo
 AIO machine

On this machine, the micmute button is connected to Line2 of the
codec and the micmute led is connected to GPIO2 of the codec.

After applying this quirk, both hotkey and led work well.

Cc: <stable@vger.kernel.org>
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 73a00460b5c1..e2da176ac169 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5724,6 +5724,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460),
 	SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
 	SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
+	SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
 	SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
 	SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
 	SND_PCI_QUIRK(0x17aa, 0x3978, "IdeaPad Y410P", ALC269_FIXUP_NO_SHUTUP),
-- 
GitLab


From 888022c0473d079bff9b47fb50434b1f20f8f37f Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Tue, 21 Feb 2017 14:21:01 +0100
Subject: [PATCH 0598/1084] dma-buf: add support for compat ioctl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add compat ioctl support to dma-buf. This lets one to use DMA_BUF_IOCTL_SYNC
ioctl from 32bit application on 64bit kernel. Data structures for both 32
and 64bit modes are same, so there is no need for additional translation
layer.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org>
Link: http://patchwork.freedesktop.org/patch/msgid/1487683261-2655-1-git-send-email-m.szyprowski@samsung.com
---
 drivers/dma-buf/dma-buf.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 718f832a5c71..0007b792827b 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -325,6 +325,9 @@ static const struct file_operations dma_buf_fops = {
 	.llseek		= dma_buf_llseek,
 	.poll		= dma_buf_poll,
 	.unlocked_ioctl	= dma_buf_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= dma_buf_ioctl,
+#endif
 };
 
 /*
-- 
GitLab


From 0328edc77d4f35014b35f32b46be0a7e16aae74f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 20 Feb 2017 08:59:16 +0100
Subject: [PATCH 0599/1084] mac80211: fix packet statistics for fast-RX

When adding per-CPU statistics, which added statistics back
to mac80211 for the fast-RX path, I evidently forgot to add
the "stats->packets++" line. The reason for that is likely
that I didn't see it since it's done in defragmentation for
the regular RX path.

Add the missing line to properly count received packets in
the fast-RX case.

Fixes: c9c5962b56c1 ("mac80211: enable collecting station statistics per-CPU")
Reported-by: Oren Givon <oren.givon@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 50ca3828b124..a8443d8bc233 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3880,6 +3880,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
 	stats->last_rate = sta_stats_encode_rate(status);
 
 	stats->fragments++;
+	stats->packets++;
 
 	if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
 		stats->last_signal = status->signal;
-- 
GitLab


From a9e9200d8661c1a0be8c39f93deb383dc940de35 Mon Sep 17 00:00:00 2001
From: Matt Chen <matt.chen@intel.com>
Date: Sun, 22 Jan 2017 02:16:58 +0800
Subject: [PATCH 0600/1084] mac80211: flush delayed work when entering suspend

The issue was found when entering suspend and resume.
It triggers a warning in:
mac80211/key.c: ieee80211_enable_keys()
...
WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt ||
             sdata->crypto_tx_tailroom_pending_dec);
...

It points out sdata->crypto_tx_tailroom_pending_dec isn't cleaned up successfully
in a delayed_work during suspend. Add a flush_delayed_work to fix it.

Cc: stable@vger.kernel.org
Signed-off-by: Matt Chen <matt.chen@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/pm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 28a3a0957c9e..76a8bcd8ef11 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -168,6 +168,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
 			break;
 		}
 
+		flush_delayed_work(&sdata->dec_tailroom_needed_wk);
 		drv_remove_interface(local, sdata);
 	}
 
-- 
GitLab


From b7540d8f25c8034de7e4163fc23ac457bf057731 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Mon, 6 Feb 2017 15:28:42 +0200
Subject: [PATCH 0601/1084] mac80211: don't reorder frames with SN smaller than
 SSN

When RX aggregation starts, transmitter may continue send frames
with SN smaller than SSN until the AddBA response is received.
However, the reorder buffer is already initialized at this point,
which will cause the drop of such frames as duplicates since the
head SN of the reorder buffer is set to the SSN, which is bigger.

Cc: stable@vger.kernel.org
Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/agg-rx.c   |  1 +
 net/mac80211/rx.c       | 14 +++++++++++++-
 net/mac80211/sta_info.h |  6 ++++--
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 3b5fd4188f2a..58ad23a44109 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -398,6 +398,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
 	tid_agg_rx->timeout = timeout;
 	tid_agg_rx->stored_mpdu_num = 0;
 	tid_agg_rx->auto_seq = auto_seq;
+	tid_agg_rx->started = false;
 	tid_agg_rx->reorder_buf_filtered = 0;
 	status = WLAN_STATUS_SUCCESS;
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a8443d8bc233..28cc494a774d 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4,7 +4,7 @@
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
- * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
+ * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -1034,6 +1034,18 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
 	buf_size = tid_agg_rx->buf_size;
 	head_seq_num = tid_agg_rx->head_seq_num;
 
+	/*
+	 * If the current MPDU's SN is smaller than the SSN, it shouldn't
+	 * be reordered.
+	 */
+	if (unlikely(!tid_agg_rx->started)) {
+		if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) {
+			ret = false;
+			goto out;
+		}
+		tid_agg_rx->started = true;
+	}
+
 	/* frame with out of date sequence number */
 	if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) {
 		dev_kfree_skb(skb);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index dd06ef0b8861..15599c70a38f 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -189,6 +189,7 @@ struct tid_ampdu_tx {
  * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and
  *	and ssn.
  * @removed: this session is removed (but might have been found due to RCU)
+ * @started: this session has started (head ssn or higher was received)
  *
  * This structure's lifetime is managed by RCU, assignments to
  * the array holding it must hold the aggregation mutex.
@@ -212,8 +213,9 @@ struct tid_ampdu_rx {
 	u16 ssn;
 	u16 buf_size;
 	u16 timeout;
-	bool auto_seq;
-	bool removed;
+	u8 auto_seq:1,
+	   removed:1,
+	   started:1;
 };
 
 /**
-- 
GitLab


From 8fbcfeb8a9cc803464d6c166e7991913711c612c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 25 Feb 2017 10:27:37 +0000
Subject: [PATCH 0602/1084] mac80211_hwsim: Replace bogus hrtimer clockid

mac80211_hwsim initializes a hrtimer with clockid
CLOCK_MONOTONIC_RAW. That's not supported.

Use CLOCK_MONOTONIC instead.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 1620a5d2757d..0889fc81ce9e 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2671,7 +2671,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 
 	tasklet_hrtimer_init(&data->beacon_timer,
 			     mac80211_hwsim_beacon,
-			     CLOCK_MONOTONIC_RAW, HRTIMER_MODE_ABS);
+			     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 
 	spin_lock_bh(&hwsim_radio_lock);
 	list_add_tail(&data->list, &hwsim_radios);
-- 
GitLab


From 890030d3c425f49abaa4acf60e20f288b599f980 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 22 Feb 2017 16:16:07 +0100
Subject: [PATCH 0603/1084] mac80211: don't handle filtered frames within a BA
 session

When running a BA session, the driver (or the hardware) already takes
care of retransmitting failed frames, since it has to keep the receiver
reorder window in sync.

Adding another layer of retransmit around that does not improve
anything. In fact, it can only lead to some strong reordering with huge
latency.

Cc: stable@vger.kernel.org
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/status.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index a3af6e1bfd98..05ccd55b5d83 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -51,7 +51,8 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 	struct ieee80211_hdr *hdr = (void *)skb->data;
 	int ac;
 
-	if (info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER) {
+	if (info->flags & (IEEE80211_TX_CTL_NO_PS_BUFFER |
+			   IEEE80211_TX_CTL_AMPDU)) {
 		ieee80211_free_txskb(&local->hw, skb);
 		return;
 	}
-- 
GitLab


From d98937f4ea713d21e0fcc345919f86c877dd8d6f Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 20 Feb 2017 14:24:36 +0100
Subject: [PATCH 0604/1084] mac80211: fix power saving clients handling in
 iwlwifi

iwlwifi now supports RSS and can't let mac80211 track the
PS state based on the Rx frames since they can come out of
order. iwlwifi is now advertising AP_LINK_PS, and uses
explicit notifications to teach mac80211 about the PS state
of the stations and the PS poll / uAPSD trigger frames
coming our way from the peers.

Because of that, the TIM stopped being maintained in
mac80211. I tried to fix this in commit c68df2e7be0c
("mac80211: allow using AP_LINK_PS with mac80211-generated TIM IE")
but that was later reverted by Felix in commit 6c18a6b4e799
("Revert "mac80211: allow using AP_LINK_PS with mac80211-generated TIM IE")
since it broke drivers that do not implement set_tim.

Since none of the drivers that set AP_LINK_PS have the
set_tim() handler set besides iwlwifi, I can bail out in
__sta_info_recalc_tim if AP_LINK_PS AND .set_tim is not
implemented.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/sta_info.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 4774e663a411..8bb99d299cda 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -688,7 +688,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending)
 	}
 
 	/* No need to do anything if the driver does all */
-	if (ieee80211_hw_check(&local->hw, AP_LINK_PS))
+	if (ieee80211_hw_check(&local->hw, AP_LINK_PS) && !local->ops->set_tim)
 		return;
 
 	if (sta->dead)
-- 
GitLab


From 2595d259b667114431501bae51b45d6656b987d1 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Mon, 20 Feb 2017 14:24:39 +0100
Subject: [PATCH 0605/1084] mac80211: shorten debug message

Tracing is limited to 100 characters and this message passes
the limit when there are a few buffered frames. Shorten it.

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/sta_info.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 8bb99d299cda..3323a2fb289b 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1264,7 +1264,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
 	sta_info_recalc_tim(sta);
 
 	ps_dbg(sdata,
-	       "STA %pM aid %d sending %d filtered/%d PS frames since STA not sleeping anymore\n",
+	       "STA %pM aid %d sending %d filtered/%d PS frames since STA woke up\n",
 	       sta->sta.addr, sta->sta.aid, filtered, buffered);
 
 	ieee80211_check_fast_xmit(sta);
-- 
GitLab


From 09e0a2fe102208cbaf39510b8b04dd524d7d2935 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Mon, 20 Feb 2017 14:24:38 +0100
Subject: [PATCH 0606/1084] mac80211: fix typo in debug print

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/agg-rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 58ad23a44109..4456559cb056 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -85,7 +85,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 	ht_dbg(sta->sdata,
 	       "Rx BA session stop requested for %pM tid %u %s reason: %d\n",
 	       sta->sta.addr, tid,
-	       initiator == WLAN_BACK_RECIPIENT ? "recipient" : "inititator",
+	       initiator == WLAN_BACK_RECIPIENT ? "recipient" : "initiator",
 	       (int)reason);
 
 	if (drv_ampdu_action(local, sta->sdata, &params))
-- 
GitLab


From 6dbf5cea05a7098a69f294c96b6d76f08562cae5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 24 Feb 2017 13:25:14 +0100
Subject: [PATCH 0607/1084] cpuidle: menu: Avoid taking spinlock for accessing
 QoS values

After commit 9908859acaa9 (cpuidle/menu: add per CPU PM QoS resume
latency consideration) the cpuidle menu governor calls
dev_pm_qos_read_value() on CPU devices to read the current resume
latency QoS constraint values for them.  That function takes a spinlock
to prevent the device's power.qos pointer from becoming NULL during
the access which is a problem for the RT patchset where spinlocks are
converted into mutexes and the idle loop stops working.

However, it is not even necessary for the menu governor to take
that spinlock, because the power.qos pointer accessed under it
cannot be modified during the access anyway.

For this reason, introduce a "raw" routine for accessing device
QoS resume latency constraints without locking and use it in the
menu governor.

Fixes: 9908859acaa9 (cpuidle/menu: add per CPU PM QoS resume latency consideration)
Acked-by: Alex Shi <alex.shi@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/qos.c         | 3 +--
 drivers/cpuidle/governors/menu.c | 2 +-
 include/linux/pm_qos.h           | 7 +++++++
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c
index 58fcc758334e..73142d086240 100644
--- a/drivers/base/power/qos.c
+++ b/drivers/base/power/qos.c
@@ -108,8 +108,7 @@ s32 __dev_pm_qos_read_value(struct device *dev)
 {
 	lockdep_assert_held(&dev->power.lock);
 
-	return IS_ERR_OR_NULL(dev->power.qos) ?
-		0 : pm_qos_read_value(&dev->power.qos->resume_latency);
+	return dev_pm_qos_raw_read_value(dev);
 }
 
 /**
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 8d6d25c38c02..6d6f46e79d94 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -287,7 +287,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	unsigned int interactivity_req;
 	unsigned int expected_interval;
 	unsigned long nr_iowaiters, cpu_load;
-	int resume_latency = dev_pm_qos_read_value(device);
+	int resume_latency = dev_pm_qos_raw_read_value(device);
 
 	if (data->needs_update) {
 		menu_update(drv, dev);
diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 0f65d36c2a75..5aba9f47899d 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -173,6 +173,12 @@ static inline s32 dev_pm_qos_requested_flags(struct device *dev)
 {
 	return dev->power.qos->flags_req->data.flr.flags;
 }
+
+static inline s32 dev_pm_qos_raw_read_value(struct device *dev)
+{
+	return IS_ERR_OR_NULL(dev->power.qos) ?
+		0 : pm_qos_read_value(&dev->power.qos->resume_latency);
+}
 #else
 static inline enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev,
 							  s32 mask)
@@ -237,6 +243,7 @@ static inline void dev_pm_qos_hide_latency_tolerance(struct device *dev) {}
 
 static inline s32 dev_pm_qos_requested_resume_latency(struct device *dev) { return 0; }
 static inline s32 dev_pm_qos_requested_flags(struct device *dev) { return 0; }
+static inline s32 dev_pm_qos_raw_read_value(struct device *dev) { return 0; }
 #endif
 
 #endif
-- 
GitLab


From 81d45bdf89135cd26dc7535c14a45db6cdd647fa Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 24 Feb 2017 00:25:28 +0100
Subject: [PATCH 0608/1084] PM / hibernate: Untangle power_down()

The power_down() routine in the core hibernation code is not exactly
straightforward (to put it lightly), so clean it up to make it avoid
invoking itself recursively, among other things.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/hibernate.c | 35 ++++++++++++++++-------------------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index b26dbc48c75b..f251b4d32913 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -608,6 +608,22 @@ static void power_down(void)
 {
 #ifdef CONFIG_SUSPEND
 	int error;
+
+	if (hibernation_mode == HIBERNATION_SUSPEND) {
+		error = suspend_devices_and_enter(PM_SUSPEND_MEM);
+		if (error) {
+			hibernation_mode = hibernation_ops ?
+						HIBERNATION_PLATFORM :
+						HIBERNATION_SHUTDOWN;
+		} else {
+			/* Restore swap signature. */
+			error = swsusp_unmark();
+			if (error)
+				pr_err("PM: Swap will be unusable! Try swapon -a.\n");
+
+			return;
+		}
+	}
 #endif
 
 	switch (hibernation_mode) {
@@ -620,25 +636,6 @@ static void power_down(void)
 		if (pm_power_off)
 			kernel_power_off();
 		break;
-#ifdef CONFIG_SUSPEND
-	case HIBERNATION_SUSPEND:
-		error = suspend_devices_and_enter(PM_SUSPEND_MEM);
-		if (error) {
-			if (hibernation_ops)
-				hibernation_mode = HIBERNATION_PLATFORM;
-			else
-				hibernation_mode = HIBERNATION_SHUTDOWN;
-			power_down();
-		}
-		/*
-		 * Restore swap signature.
-		 */
-		error = swsusp_unmark();
-		if (error)
-			printk(KERN_ERR "PM: Swap will be unusable! "
-			                "Try swapon -a.\n");
-		return;
-#endif
 	}
 	kernel_halt();
 	/*
-- 
GitLab


From 2872de1382a7c888fa69532eda25aa7342dfe748 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 24 Feb 2017 00:26:15 +0100
Subject: [PATCH 0609/1084] PM / hibernate: Define pr_fmt() and use pr_*()
 instead of printk()

Define a pr_fmt() for hibernate.c and convert all of the explicit
printk() calls into corresponding pr_*() so that they use the
pr_fmt() format.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/hibernate.c | 60 +++++++++++++++++++---------------------
 1 file changed, 29 insertions(+), 31 deletions(-)

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index f251b4d32913..8951d0d04810 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -10,6 +10,8 @@
  * This file is released under the GPLv2.
  */
 
+#define pr_fmt(fmt) "PM: " fmt
+
 #include <linux/export.h>
 #include <linux/suspend.h>
 #include <linux/syscalls.h>
@@ -104,7 +106,7 @@ EXPORT_SYMBOL(system_entering_hibernation);
 #ifdef CONFIG_PM_DEBUG
 static void hibernation_debug_sleep(void)
 {
-	printk(KERN_INFO "hibernation debug: Waiting for 5 seconds.\n");
+	pr_info("hibernation debug: Waiting for 5 seconds.\n");
 	mdelay(5000);
 }
 
@@ -250,10 +252,9 @@ void swsusp_show_speed(ktime_t start, ktime_t stop,
 		centisecs = 1;	/* avoid div-by-zero */
 	k = nr_pages * (PAGE_SIZE / 1024);
 	kps = (k * 100) / centisecs;
-	printk(KERN_INFO "PM: %s %u kbytes in %u.%02u seconds (%u.%02u MB/s)\n",
-			msg, k,
-			centisecs / 100, centisecs % 100,
-			kps / 1000, (kps % 1000) / 10);
+	pr_info("%s %u kbytes in %u.%02u seconds (%u.%02u MB/s)\n",
+		msg, k, centisecs / 100, centisecs % 100, kps / 1000,
+		(kps % 1000) / 10);
 }
 
 /**
@@ -271,8 +272,7 @@ static int create_image(int platform_mode)
 
 	error = dpm_suspend_end(PMSG_FREEZE);
 	if (error) {
-		printk(KERN_ERR "PM: Some devices failed to power down, "
-			"aborting hibernation\n");
+		pr_err("Some devices failed to power down, aborting hibernation\n");
 		return error;
 	}
 
@@ -288,8 +288,7 @@ static int create_image(int platform_mode)
 
 	error = syscore_suspend();
 	if (error) {
-		printk(KERN_ERR "PM: Some system devices failed to power down, "
-			"aborting hibernation\n");
+		pr_err("Some system devices failed to power down, aborting hibernation\n");
 		goto Enable_irqs;
 	}
 
@@ -304,8 +303,8 @@ static int create_image(int platform_mode)
 	restore_processor_state();
 	trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false);
 	if (error)
-		printk(KERN_ERR "PM: Error %d creating hibernation image\n",
-			error);
+		pr_err("Error %d creating hibernation image\n", error);
+
 	if (!in_suspend) {
 		events_check_enabled = false;
 		clear_free_pages();
@@ -432,8 +431,7 @@ static int resume_target_kernel(bool platform_mode)
 
 	error = dpm_suspend_end(PMSG_QUIESCE);
 	if (error) {
-		printk(KERN_ERR "PM: Some devices failed to power down, "
-			"aborting resume\n");
+		pr_err("Some devices failed to power down, aborting resume\n");
 		return error;
 	}
 
@@ -619,7 +617,7 @@ static void power_down(void)
 			/* Restore swap signature. */
 			error = swsusp_unmark();
 			if (error)
-				pr_err("PM: Swap will be unusable! Try swapon -a.\n");
+				pr_err("Swap will be unusable! Try swapon -a.\n");
 
 			return;
 		}
@@ -642,7 +640,7 @@ static void power_down(void)
 	 * Valid image is on the disk, if we continue we risk serious data
 	 * corruption after resume.
 	 */
-	printk(KERN_CRIT "PM: Please power down manually\n");
+	pr_crit("Power down manually\n");
 	while (1)
 		cpu_relax();
 }
@@ -652,7 +650,7 @@ static int load_image_and_restore(void)
 	int error;
 	unsigned int flags;
 
-	pr_debug("PM: Loading hibernation image.\n");
+	pr_debug("Loading hibernation image.\n");
 
 	lock_device_hotplug();
 	error = create_basic_memory_bitmaps();
@@ -664,7 +662,7 @@ static int load_image_and_restore(void)
 	if (!error)
 		hibernation_restore(flags & SF_PLATFORM_MODE);
 
-	printk(KERN_ERR "PM: Failed to load hibernation image, recovering.\n");
+	pr_err("Failed to load hibernation image, recovering.\n");
 	swsusp_free();
 	free_basic_memory_bitmaps();
  Unlock:
@@ -682,7 +680,7 @@ int hibernate(void)
 	bool snapshot_test = false;
 
 	if (!hibernation_available()) {
-		pr_debug("PM: Hibernation not available.\n");
+		pr_debug("Hibernation not available.\n");
 		return -EPERM;
 	}
 
@@ -700,9 +698,9 @@ int hibernate(void)
 		goto Exit;
 	}
 
-	printk(KERN_INFO "PM: Syncing filesystems ... ");
+	pr_info("Syncing filesystems ... \n");
 	sys_sync();
-	printk("done.\n");
+	pr_info("done.\n");
 
 	error = freeze_processes();
 	if (error)
@@ -728,7 +726,7 @@ int hibernate(void)
 		else
 		        flags |= SF_CRC32_MODE;
 
-		pr_debug("PM: writing image.\n");
+		pr_debug("Writing image.\n");
 		error = swsusp_write(flags);
 		swsusp_free();
 		if (!error) {
@@ -740,7 +738,7 @@ int hibernate(void)
 		in_suspend = 0;
 		pm_restore_gfp_mask();
 	} else {
-		pr_debug("PM: Image restored successfully.\n");
+		pr_debug("Image restored successfully.\n");
 	}
 
  Free_bitmaps:
@@ -748,7 +746,7 @@ int hibernate(void)
  Thaw:
 	unlock_device_hotplug();
 	if (snapshot_test) {
-		pr_debug("PM: Checking hibernation image\n");
+		pr_debug("Checking hibernation image\n");
 		error = swsusp_check();
 		if (!error)
 			error = load_image_and_restore();
@@ -812,10 +810,10 @@ static int software_resume(void)
 		goto Unlock;
 	}
 
-	pr_debug("PM: Checking hibernation image partition %s\n", resume_file);
+	pr_debug("Checking hibernation image partition %s\n", resume_file);
 
 	if (resume_delay) {
-		printk(KERN_INFO "Waiting %dsec before reading resume device...\n",
+		pr_info("Waiting %dsec before reading resume device ...\n",
 			resume_delay);
 		ssleep(resume_delay);
 	}
@@ -854,10 +852,10 @@ static int software_resume(void)
 	}
 
  Check_image:
-	pr_debug("PM: Hibernation image partition %d:%d present\n",
+	pr_debug("Hibernation image partition %d:%d present\n",
 		MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));
 
-	pr_debug("PM: Looking for hibernation image.\n");
+	pr_debug("Looking for hibernation image.\n");
 	error = swsusp_check();
 	if (error)
 		goto Unlock;
@@ -876,7 +874,7 @@ static int software_resume(void)
 		goto Close_Finish;
 	}
 
-	pr_debug("PM: Preparing processes for restore.\n");
+	pr_debug("Preparing processes for restore.\n");
 	error = freeze_processes();
 	if (error)
 		goto Close_Finish;
@@ -889,7 +887,7 @@ static int software_resume(void)
 	/* For success case, the suspend path will release the lock */
  Unlock:
 	mutex_unlock(&pm_mutex);
-	pr_debug("PM: Hibernation image not present or could not be loaded.\n");
+	pr_debug("Hibernation image not present or could not be loaded.\n");
 	return error;
  Close_Finish:
 	swsusp_close(FMODE_READ);
@@ -1013,7 +1011,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
 		error = -EINVAL;
 
 	if (!error)
-		pr_debug("PM: Hibernation mode set to '%s'\n",
+		pr_debug("Hibernation mode set to '%s'\n",
 			 hibernation_modes[mode]);
 	unlock_system_sleep();
 	return error ? error : n;
@@ -1049,7 +1047,7 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
 	lock_system_sleep();
 	swsusp_resume_device = res;
 	unlock_system_sleep();
-	printk(KERN_INFO "PM: Starting manual resume from disk\n");
+	pr_info("Starting manual resume from disk\n");
 	noresume = 0;
 	software_resume();
 	return n;
-- 
GitLab


From 51be7a9a261ce18c520fb3928b168feb77522745 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 12 Jan 2017 23:36:32 +0200
Subject: [PATCH 0610/1084] virtio_mmio: expose header to userspace

It's handy for userspace emulators like QEMU.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_mmio.c           | 2 +-
 include/uapi/linux/Kbuild              | 1 +
 include/{ => uapi}/linux/virtio_mmio.h | 0
 3 files changed, 2 insertions(+), 1 deletion(-)
 rename include/{ => uapi}/linux/virtio_mmio.h (100%)

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index c71fde5fe835..08357d70a891 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -70,7 +70,7 @@
 #include <linux/spinlock.h>
 #include <linux/virtio.h>
 #include <linux/virtio_config.h>
-#include <linux/virtio_mmio.h>
+#include <uapi/linux/virtio_mmio.h>
 #include <linux/virtio_ring.h>
 
 
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index f330ba4547cf..718fa73310e1 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -458,6 +458,7 @@ header-y += virtio_console.h
 header-y += virtio_gpu.h
 header-y += virtio_ids.h
 header-y += virtio_input.h
+header-y += virtio_mmio.h
 header-y += virtio_net.h
 header-y += virtio_pci.h
 header-y += virtio_ring.h
diff --git a/include/linux/virtio_mmio.h b/include/uapi/linux/virtio_mmio.h
similarity index 100%
rename from include/linux/virtio_mmio.h
rename to include/uapi/linux/virtio_mmio.h
-- 
GitLab


From 126cfa2f5e15ae2ca7f70be71b07e6cd8d2b44d1 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Mon, 27 Feb 2017 21:19:44 +0530
Subject: [PATCH 0611/1084] ALSA: hda: Add Geminilake HDMI codec ID

Geminilake HDMI codec 0x280d is similar to previous platforms, so add it with
similar ops as previous.

Signed-off-by: Senthilnathan Veppur <senthilnathanx.veppur@intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_hdmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 3fc201c3b95a..c51e3218e33e 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -3659,6 +3659,7 @@ HDA_CODEC_ENTRY(0x80862808, "Broadwell HDMI",	patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI",	patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI",	patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI",	patch_i915_hsw_hdmi),
+HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI",	patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI",	patch_generic_hdmi),
 HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI",	patch_i915_byt_hdmi),
 HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI",	patch_i915_byt_hdmi),
-- 
GitLab


From ff4dd73dd2b4806419f8ff65cbce11d5019548d0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 27 Feb 2017 17:15:28 +0100
Subject: [PATCH 0612/1084] mac80211_hwsim: check HWSIM_ATTR_RADIO_NAME length

Unfortunately, the nla policy was defined to have HWSIM_ATTR_RADIO_NAME
as an NLA_STRING, rather than NLA_NUL_STRING, so we can't use it as a
NUL-terminated string in the kernel.

Rather than break the API, kasprintf() the string to a new buffer to
guarantee NUL termination.

Reported-by: Andrew Zaborowski <andrew.zaborowski@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 28 ++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 0889fc81ce9e..50c219fb1a52 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -3056,6 +3056,7 @@ static int hwsim_register_received_nl(struct sk_buff *skb_2,
 static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info)
 {
 	struct hwsim_new_radio_params param = { 0 };
+	const char *hwname = NULL;
 
 	param.reg_strict = info->attrs[HWSIM_ATTR_REG_STRICT_REG];
 	param.p2p_device = info->attrs[HWSIM_ATTR_SUPPORT_P2P_DEVICE];
@@ -3069,8 +3070,14 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info)
 	if (info->attrs[HWSIM_ATTR_NO_VIF])
 		param.no_vif = true;
 
-	if (info->attrs[HWSIM_ATTR_RADIO_NAME])
-		param.hwname = nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME]);
+	if (info->attrs[HWSIM_ATTR_RADIO_NAME]) {
+		hwname = kasprintf(GFP_KERNEL, "%.*s",
+				   nla_len(info->attrs[HWSIM_ATTR_RADIO_NAME]),
+				   (char *)nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME]));
+		if (!hwname)
+			return -ENOMEM;
+		param.hwname = hwname;
+	}
 
 	if (info->attrs[HWSIM_ATTR_USE_CHANCTX])
 		param.use_chanctx = true;
@@ -3098,11 +3105,15 @@ static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info)
 	s64 idx = -1;
 	const char *hwname = NULL;
 
-	if (info->attrs[HWSIM_ATTR_RADIO_ID])
+	if (info->attrs[HWSIM_ATTR_RADIO_ID]) {
 		idx = nla_get_u32(info->attrs[HWSIM_ATTR_RADIO_ID]);
-	else if (info->attrs[HWSIM_ATTR_RADIO_NAME])
-		hwname = (void *)nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME]);
-	else
+	} else if (info->attrs[HWSIM_ATTR_RADIO_NAME]) {
+		hwname = kasprintf(GFP_KERNEL, "%.*s",
+				   nla_len(info->attrs[HWSIM_ATTR_RADIO_NAME]),
+				   (char *)nla_data(info->attrs[HWSIM_ATTR_RADIO_NAME]));
+		if (!hwname)
+			return -ENOMEM;
+	} else
 		return -EINVAL;
 
 	spin_lock_bh(&hwsim_radio_lock);
@@ -3111,7 +3122,8 @@ static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info)
 			if (data->idx != idx)
 				continue;
 		} else {
-			if (strcmp(hwname, wiphy_name(data->hw->wiphy)))
+			if (!hwname ||
+			    strcmp(hwname, wiphy_name(data->hw->wiphy)))
 				continue;
 		}
 
@@ -3122,10 +3134,12 @@ static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info)
 		spin_unlock_bh(&hwsim_radio_lock);
 		mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy),
 					 info);
+		kfree(hwname);
 		return 0;
 	}
 	spin_unlock_bh(&hwsim_radio_lock);
 
+	kfree(hwname);
 	return -ENODEV;
 }
 
-- 
GitLab


From ad4d307fce0909a5f70635826f779321ab95b469 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 22 Feb 2017 19:10:35 -0800
Subject: [PATCH 0613/1084] f2fs: avoid very large discard command

This patch adds MAX_DISCARD_BLOCKS() to avoid issuing too much large single
discard command.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h    | 3 ++-
 fs/f2fs/segment.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 146cc71ff91c..2ce9fba27918 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -133,7 +133,8 @@ enum {
 		(SM_I(sbi)->trim_sections * (sbi)->segs_per_sec)
 #define BATCHED_TRIM_BLOCKS(sbi)	\
 		(BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
-
+#define MAX_DISCARD_BLOCKS(sbi)						\
+		((1 << (sbi)->log_blocks_per_seg) * (sbi)->segs_per_sec)
 #define DISCARD_ISSUE_RATE	8
 #define DEF_CP_INTERVAL			60	/* 60 secs */
 #define DEF_IDLE_INTERVAL		5	/* 5 secs */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index d780ecccc4ac..028db55375a4 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -886,7 +886,8 @@ static void __add_discard_entry(struct f2fs_sb_info *sbi,
 	if (!list_empty(head)) {
 		last = list_last_entry(head, struct discard_entry, list);
 		if (START_BLOCK(sbi, cpc->trim_start) + start ==
-						last->blkaddr + last->len) {
+				last->blkaddr + last->len &&
+				last->len < MAX_DISCARD_BLOCKS(sbi)) {
 			last->len += end - start;
 			goto done;
 		}
-- 
GitLab


From 47b898081605684aa1bacd1d14443a50c660c1fe Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 22 Feb 2017 19:53:07 -0800
Subject: [PATCH 0614/1084] f2fs: much larger batched trim_fs job

We have a kernel thread to issue discard commands, so we can increase the
number of batched discard sections. By default, now it becomes 4GB range.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 2ce9fba27918..6efb2d91f5a1 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -128,7 +128,7 @@ enum {
 	CP_DISCARD,
 };
 
-#define DEF_BATCHED_TRIM_SECTIONS	2
+#define DEF_BATCHED_TRIM_SECTIONS	2048
 #define BATCHED_TRIM_SEGMENTS(sbi)	\
 		(SM_I(sbi)->trim_sections * (sbi)->segs_per_sec)
 #define BATCHED_TRIM_BLOCKS(sbi)	\
-- 
GitLab


From 40465257ac4159e9ebfba10ee502e795b917da76 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 22 Feb 2017 19:58:23 -0800
Subject: [PATCH 0615/1084] f2fs: wait for discard completion after submission

We don't need to wait for each discard commands when unmounting the image.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 028db55375a4..df60e1502b82 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -676,8 +676,12 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
 	struct list_head *wait_list = &(dcc->discard_cmd_list);
 	struct discard_cmd *dc, *tmp;
+	struct blk_plug plug;
 
 	mutex_lock(&dcc->cmd_lock);
+
+	blk_start_plug(&plug);
+
 	list_for_each_entry_safe(dc, tmp, wait_list, list) {
 
 		if (blkaddr == NULL_ADDR) {
@@ -686,9 +690,6 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
 				submit_bio(dc->bio);
 				atomic_inc(&dcc->submit_discard);
 			}
-			wait_for_completion_io(&dc->wait);
-
-			__remove_discard_cmd(sbi, dc);
 			continue;
 		}
 
@@ -699,6 +700,15 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
 				__remove_discard_cmd(sbi, dc);
 		}
 	}
+	blk_finish_plug(&plug);
+
+	/* this comes from f2fs_put_super */
+	if (blkaddr == NULL_ADDR) {
+		list_for_each_entry_safe(dc, tmp, wait_list, list) {
+			wait_for_completion_io(&dc->wait);
+			__remove_discard_cmd(sbi, dc);
+		}
+	}
 	mutex_unlock(&dcc->cmd_lock);
 }
 
-- 
GitLab


From 92592285710aebe2de9afa38315b5c3914556905 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 22 Feb 2017 20:18:35 -0800
Subject: [PATCH 0616/1084] f2fs: check discard alignment only for SEQWRITE
 zones

For converntional zones, we don't need to align discard commands to exact zone
size.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index df60e1502b82..3e95db5375ed 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -793,24 +793,13 @@ static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi,
 static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
 		struct block_device *bdev, block_t blkstart, block_t blklen)
 {
-	sector_t nr_sects = SECTOR_FROM_BLOCK(blklen);
-	sector_t sector;
+	sector_t sector, nr_sects;
 	int devi = 0;
 
 	if (sbi->s_ndevs) {
 		devi = f2fs_target_device_index(sbi, blkstart);
 		blkstart -= FDEV(devi).start_blk;
 	}
-	sector = SECTOR_FROM_BLOCK(blkstart);
-
-	if (sector & (bdev_zone_sectors(bdev) - 1) ||
-	    nr_sects != bdev_zone_sectors(bdev)) {
-		f2fs_msg(sbi->sb, KERN_INFO,
-			"(%d) %s: Unaligned discard attempted (block %x + %x)",
-			devi, sbi->s_ndevs ? FDEV(devi).path: "",
-			blkstart, blklen);
-		return -EIO;
-	}
 
 	/*
 	 * We need to know the type of the zone: for conventional zones,
@@ -825,6 +814,17 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
 		return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen);
 	case BLK_ZONE_TYPE_SEQWRITE_REQ:
 	case BLK_ZONE_TYPE_SEQWRITE_PREF:
+		sector = SECTOR_FROM_BLOCK(blkstart);
+		nr_sects = SECTOR_FROM_BLOCK(blklen);
+
+		if (sector & (bdev_zone_sectors(bdev) - 1) ||
+				nr_sects != bdev_zone_sectors(bdev)) {
+			f2fs_msg(sbi->sb, KERN_INFO,
+				"(%d) %s: Unaligned discard attempted (block %x + %x)",
+				devi, sbi->s_ndevs ? FDEV(devi).path: "",
+				blkstart, blklen);
+			return -EIO;
+		}
 		trace_f2fs_issue_reset_zone(bdev, blkstart);
 		return blkdev_reset_zones(bdev, sector,
 					  nr_sects, GFP_NOFS);
-- 
GitLab


From 77190e1f313f5ef18a3742aa4c5b790456a5825b Mon Sep 17 00:00:00 2001
From: Yunlong Song <yunlong.song@huawei.com>
Date: Tue, 21 Feb 2017 20:43:48 +0800
Subject: [PATCH 0617/1084] f2fs: remove unnecessary condition check for
 write_checkpoint in f2fs_gc

Since has_not_enough_free_secs(sbi, 0, 0) must be true if has_not_enough_
free_secs(sbi, sec_freed, 0) is true, write_checkpoint is sure to execute in
both conditions.

Signed-off-by: Yunlong Song <yunlong.song@huawei.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/gc.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 11416c7cb705..6c996e39b59a 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -958,15 +958,9 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
 		 * enough free sections, we should flush dent/node blocks and do
 		 * garbage collections.
 		 */
-		if (dirty_segments(sbi) || prefree_segments(sbi)) {
-			ret = write_checkpoint(sbi, &cpc);
-			if (ret)
-				goto stop;
-		} else if (has_not_enough_free_secs(sbi, 0, 0)) {
-			ret = write_checkpoint(sbi, &cpc);
-			if (ret)
-				goto stop;
-		}
+		ret = write_checkpoint(sbi, &cpc);
+		if (ret)
+			goto stop;
 	} else if (gc_type == BG_GC && !background) {
 		/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
 		goto stop;
-- 
GitLab


From e15882b6c6caff427fe387e878e2f23de58c053b Mon Sep 17 00:00:00 2001
From: Hou Pengyang <houpengyang@huawei.com>
Date: Thu, 23 Feb 2017 09:18:05 +0000
Subject: [PATCH 0618/1084] f2fs: init local extent_info to avoid stale stack
 info in tp

To avoid such stale(fops, blk, len) info in f2fs_lookup_extent_tree_end tp

dio-23095 [005] ...1 17878.856859: f2fs_lookup_extent_tree_end:
			dev = (259,30), ino = 856, pgofs = 0,
			ext_info(fofs: 3441207040, blk: 4294967232, len: 3481143808)

Signed-off-by: Hou Pengyang <houpengyang@huawei.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 8 ++++----
 fs/f2fs/file.c | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 5f3bc98a387d..f72493d8c8e4 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -511,7 +511,7 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
 
 int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
 {
-	struct extent_info ei;
+	struct extent_info ei  = {0,0,0};
 	struct inode *inode = dn->inode;
 
 	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
@@ -528,7 +528,7 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index,
 	struct address_space *mapping = inode->i_mapping;
 	struct dnode_of_data dn;
 	struct page *page;
-	struct extent_info ei;
+	struct extent_info ei = {0,0,0};
 	int err;
 	struct f2fs_io_info fio = {
 		.sbi = F2FS_I_SB(inode),
@@ -803,7 +803,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
 	int err = 0, ofs = 1;
 	unsigned int ofs_in_node, last_ofs_in_node;
 	blkcnt_t prealloc;
-	struct extent_info ei;
+	struct extent_info ei = {0,0,0};
 	block_t blkaddr;
 
 	if (!maxblocks)
@@ -1667,7 +1667,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
 	struct dnode_of_data dn;
 	struct page *ipage;
 	bool locked = false;
-	struct extent_info ei;
+	struct extent_info ei = {0,0,0};
 	int err = 0;
 
 	/*
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 120164815030..36c156557bb1 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1877,7 +1877,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
 {
 	struct inode *inode = file_inode(filp);
 	struct f2fs_map_blocks map = { .m_next_pgofs = NULL };
-	struct extent_info ei;
+	struct extent_info ei = {0,0,0};
 	pgoff_t pg_start, pg_end;
 	unsigned int blk_per_seg = sbi->blocks_per_seg;
 	unsigned int total = 0, sec_num;
-- 
GitLab


From 6bfaf7b150f7dba04024b7b6420773c09606538c Mon Sep 17 00:00:00 2001
From: Hou Pengyang <houpengyang@huawei.com>
Date: Thu, 23 Feb 2017 09:18:06 +0000
Subject: [PATCH 0619/1084] f2fs: remove unsafe bitmap checking

proc A:                      proc B:
- writeback_sb_inodes
- __writeback_single_inode
- do_writepages
- f2fs_write_node_pages
- f2fs_balance_fs_bg         - write_checkpoint
- build_free_nids            - flush_nat_entries
- __build_free_nids          - __flush_nat_entry_set
- ra_meta_pages              - get_next_nat_page
- current_nat_addr           - set_to_next_nat
[do nat_bitmap checking]     - f2fs_change_bit

For proc A, nat_bitmap and nat_bitmap_mir would be compared without lock_op and
nm_i->nat_tree_lock, while proc B is changing nat_bitmap/nat_bitmap_ver in cp.

So it is normal for nat_bitmap/nat_bitmap diffrence under such scenario.

This patch fix this by removing the monitoring point.

[Fix: 599a09b f2fs: check in-memory nat version bitmap]
Signed-off-by: Hou Pengyang <houpengyang@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index d3d289306469..3fc9c4b1dce9 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -209,12 +209,6 @@ static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start)
 		(seg_off << sbi->log_blocks_per_seg << 1) +
 		(block_off & (sbi->blocks_per_seg - 1)));
 
-#ifdef CONFIG_F2FS_CHECK_FS
-	if (f2fs_test_bit(block_off, nm_i->nat_bitmap) !=
-			f2fs_test_bit(block_off, nm_i->nat_bitmap_mir))
-		f2fs_bug_on(sbi, 1);
-#endif
-
 	if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
 		block_addr += sbi->blocks_per_seg;
 
-- 
GitLab


From 3f2be04304cf10f2ef074399f8dd565bd00ddcae Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Thu, 23 Feb 2017 19:55:05 +0800
Subject: [PATCH 0620/1084] f2fs: avoid m_flags overlay when allocating more
 data blocks

When more than one data blocks are allocated, the F2FS_MAP_UNWRITTEN/MAPPED
flags will be overlapped by F2FS_MAP_NEW at the later times.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index f72493d8c8e4..80f9863dc4b0 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -867,7 +867,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
 			}
 			if (err)
 				goto sync_out;
-			map->m_flags = F2FS_MAP_NEW;
+			map->m_flags |= F2FS_MAP_NEW;
 			blkaddr = dn.data_blkaddr;
 		} else {
 			if (flag == F2FS_GET_BLOCK_BMAP) {
-- 
GitLab


From 5e8256ac2e378089a1bf3fae87370811b1d31625 Mon Sep 17 00:00:00 2001
From: Yunlei He <heyunlei@huawei.com>
Date: Thu, 23 Feb 2017 19:39:59 +0800
Subject: [PATCH 0621/1084] f2fs: replace rw semaphore extent_tree_lock with
 mutex lock

This patch replace rw semaphore extent_tree_lock with mutex lock
for no read cases with this lock.

Signed-off-by: Yunlei He <heyunlei@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/extent_cache.c | 22 +++++++++++-----------
 fs/f2fs/f2fs.h         |  2 +-
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 6ed6424807b6..0ab5518e45c2 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -77,7 +77,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
 	struct extent_tree *et;
 	nid_t ino = inode->i_ino;
 
-	down_write(&sbi->extent_tree_lock);
+	mutex_lock(&sbi->extent_tree_lock);
 	et = radix_tree_lookup(&sbi->extent_tree_root, ino);
 	if (!et) {
 		et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
@@ -94,7 +94,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
 		atomic_dec(&sbi->total_zombie_tree);
 		list_del_init(&et->list);
 	}
-	up_write(&sbi->extent_tree_lock);
+	mutex_unlock(&sbi->extent_tree_lock);
 
 	/* never died until evict_inode */
 	F2FS_I(inode)->extent_tree = et;
@@ -548,7 +548,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
 	if (!atomic_read(&sbi->total_zombie_tree))
 		goto free_node;
 
-	if (!down_write_trylock(&sbi->extent_tree_lock))
+	if (!mutex_trylock(&sbi->extent_tree_lock))
 		goto out;
 
 	/* 1. remove unreferenced extent tree */
@@ -570,11 +570,11 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
 			goto unlock_out;
 		cond_resched();
 	}
-	up_write(&sbi->extent_tree_lock);
+	mutex_unlock(&sbi->extent_tree_lock);
 
 free_node:
 	/* 2. remove LRU extent entries */
-	if (!down_write_trylock(&sbi->extent_tree_lock))
+	if (!mutex_trylock(&sbi->extent_tree_lock))
 		goto out;
 
 	remained = nr_shrink - (node_cnt + tree_cnt);
@@ -604,7 +604,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
 	spin_unlock(&sbi->extent_lock);
 
 unlock_out:
-	up_write(&sbi->extent_tree_lock);
+	mutex_unlock(&sbi->extent_tree_lock);
 out:
 	trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
 
@@ -651,10 +651,10 @@ void f2fs_destroy_extent_tree(struct inode *inode)
 
 	if (inode->i_nlink && !is_bad_inode(inode) &&
 					atomic_read(&et->node_cnt)) {
-		down_write(&sbi->extent_tree_lock);
+		mutex_lock(&sbi->extent_tree_lock);
 		list_add_tail(&et->list, &sbi->zombie_list);
 		atomic_inc(&sbi->total_zombie_tree);
-		up_write(&sbi->extent_tree_lock);
+		mutex_unlock(&sbi->extent_tree_lock);
 		return;
 	}
 
@@ -662,12 +662,12 @@ void f2fs_destroy_extent_tree(struct inode *inode)
 	node_cnt = f2fs_destroy_extent_node(inode);
 
 	/* delete extent tree entry in radix tree */
-	down_write(&sbi->extent_tree_lock);
+	mutex_lock(&sbi->extent_tree_lock);
 	f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
 	radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
 	kmem_cache_free(extent_tree_slab, et);
 	atomic_dec(&sbi->total_ext_tree);
-	up_write(&sbi->extent_tree_lock);
+	mutex_unlock(&sbi->extent_tree_lock);
 
 	F2FS_I(inode)->extent_tree = NULL;
 
@@ -714,7 +714,7 @@ void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
 void init_extent_cache_info(struct f2fs_sb_info *sbi)
 {
 	INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
-	init_rwsem(&sbi->extent_tree_lock);
+	mutex_init(&sbi->extent_tree_lock);
 	INIT_LIST_HEAD(&sbi->extent_list);
 	spin_lock_init(&sbi->extent_lock);
 	atomic_set(&sbi->total_ext_tree, 0);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 6efb2d91f5a1..e26cc6909a54 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -844,7 +844,7 @@ struct f2fs_sb_info {
 
 	/* for extent tree cache */
 	struct radix_tree_root extent_tree_root;/* cache extent cache entries */
-	struct rw_semaphore extent_tree_lock;	/* locking extent radix tree */
+	struct mutex extent_tree_lock;	/* locking extent radix tree */
 	struct list_head extent_list;		/* lru list for shrinker */
 	spinlock_t extent_lock;			/* locking extent lru list */
 	atomic_t total_ext_tree;		/* extent tree count */
-- 
GitLab


From 22ad0b6ab46683975c6da032f1c2593066c7b3bd Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 9 Feb 2017 10:38:09 -0800
Subject: [PATCH 0622/1084] f2fs: add bitmaps for empty or full NAT blocks

This patches adds bitmaps to represent empty or full NAT blocks containing
free nid entries.

If we can find valid crc|cp_ver in the last block of checkpoint pack, we'll
use these bitmaps when building free nids. In order to avoid checkpointing
burden, up-to-date bitmaps will be flushed only during umount time. So,
normally we can get this gain, but when power-cut happens, we rely on fsck.f2fs
which recovers this bitmap again.

After this patch, we build free nids from nid #0 at mount time to make more
full NAT blocks, but in runtime, we check empty NAT blocks to load free nids
without loading any NAT pages from disk.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c    |  28 +++++-
 fs/f2fs/debug.c         |   1 +
 fs/f2fs/f2fs.h          |  31 ++++++-
 fs/f2fs/node.c          | 188 ++++++++++++++++++++++++++++++++++++----
 fs/f2fs/segment.c       |   2 +-
 include/linux/f2fs_fs.h |   1 +
 6 files changed, 231 insertions(+), 20 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 042f8d9afe44..cd7132121573 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1024,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	spin_lock(&sbi->cp_lock);
 
+	if (cpc->reason == CP_UMOUNT && ckpt->cp_pack_total_block_count >
+			sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
+		disable_nat_bits(sbi, false);
+
 	if (cpc->reason == CP_UMOUNT)
 		__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
 	else
@@ -1136,6 +1140,28 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	start_blk = __start_cp_next_addr(sbi);
 
+	/* write nat bits */
+	if (enabled_nat_bits(sbi, cpc)) {
+		__u64 cp_ver = cur_cp_version(ckpt);
+		unsigned int i;
+		block_t blk;
+
+		cp_ver |= ((__u64)crc32 << 32);
+		*(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);
+
+		blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
+		for (i = 0; i < nm_i->nat_bits_blocks; i++)
+			update_meta_page(sbi, nm_i->nat_bits +
+					(i << F2FS_BLKSIZE_BITS), blk + i);
+
+		/* Flush all the NAT BITS pages */
+		while (get_pages(sbi, F2FS_DIRTY_META)) {
+			sync_meta_pages(sbi, META, LONG_MAX);
+			if (unlikely(f2fs_cp_error(sbi)))
+				return -EIO;
+		}
+	}
+
 	/* need to wait for end_io results */
 	wait_on_all_pages_writeback(sbi);
 	if (unlikely(f2fs_cp_error(sbi)))
@@ -1272,7 +1298,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
 
 	/* write cached NAT/SIT entries to NAT/SIT area */
-	flush_nat_entries(sbi);
+	flush_nat_entries(sbi, cpc);
 	flush_sit_entries(sbi, cpc);
 
 	/* unlock all the fs_lock[] in do_checkpoint() */
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index de8da9fc5c99..015ad2b73a92 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -193,6 +193,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
 	/* build nm */
 	si->base_mem += sizeof(struct f2fs_nm_info);
 	si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
+	si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
 
 get_cache:
 	si->cache_mem = 0;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e26cc6909a54..d1156cdd211e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -554,6 +554,7 @@ struct f2fs_nm_info {
 	struct list_head nat_entries;	/* cached nat entry list (clean) */
 	unsigned int nat_cnt;		/* the # of cached nat entries */
 	unsigned int dirty_nat_cnt;	/* total num of nat entries in set */
+	unsigned int nat_blocks;	/* # of nat blocks */
 
 	/* free node ids management */
 	struct radix_tree_root free_nid_root;/* root of the free_nid cache */
@@ -564,6 +565,11 @@ struct f2fs_nm_info {
 
 	/* for checkpoint */
 	char *nat_bitmap;		/* NAT bitmap pointer */
+
+	unsigned int nat_bits_blocks;	/* # of nat bits blocks */
+	unsigned char *nat_bits;	/* NAT bits blocks */
+	unsigned char *full_nat_bits;	/* full NAT pages */
+	unsigned char *empty_nat_bits;	/* empty NAT pages */
 #ifdef CONFIG_F2FS_CHECK_FS
 	char *nat_bitmap_mir;		/* NAT bitmap mirror */
 #endif
@@ -1171,6 +1177,27 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
 	spin_unlock(&sbi->cp_lock);
 }
 
+static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
+{
+	set_sbi_flag(sbi, SBI_NEED_FSCK);
+
+	if (lock)
+		spin_lock(&sbi->cp_lock);
+	__clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
+	kfree(NM_I(sbi)->nat_bits);
+	NM_I(sbi)->nat_bits = NULL;
+	if (lock)
+		spin_unlock(&sbi->cp_lock);
+}
+
+static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi,
+					struct cp_control *cpc)
+{
+	bool set = is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
+
+	return (cpc) ? (cpc->reason == CP_UMOUNT) && set : set;
+}
+
 static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
 {
 	down_read(&sbi->cp_rwsem);
@@ -2131,7 +2158,7 @@ void move_node_page(struct page *node_page, int gc_type);
 int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
 			struct writeback_control *wbc, bool atomic);
 int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc);
-void build_free_nids(struct f2fs_sb_info *sbi, bool sync);
+void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
 bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
 void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
 void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
@@ -2142,7 +2169,7 @@ int recover_xattr_data(struct inode *inode, struct page *page,
 int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
 int restore_node_summary(struct f2fs_sb_info *sbi,
 			unsigned int segno, struct f2fs_summary_block *sum);
-void flush_nat_entries(struct f2fs_sb_info *sbi);
+void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
 int build_node_manager(struct f2fs_sb_info *sbi);
 void destroy_node_manager(struct f2fs_sb_info *sbi);
 int __init create_node_manager_caches(void);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 8ebc4c78e6a4..43d35ec11851 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -338,6 +338,9 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
 		set_nat_flag(e, IS_CHECKPOINTED, false);
 	__set_nat_cache_dirty(nm_i, e);
 
+	if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR)
+		clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits);
+
 	/* update fsync_mark if its inode nat entry is still alive */
 	if (ni->nid != ni->ino)
 		e = __lookup_nat_cache(nm_i, ni->ino);
@@ -1841,7 +1844,60 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
 	}
 }
 
-static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
+static int scan_nat_bits(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	struct page *page;
+	unsigned int i = 0;
+	nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
+	nid_t nid;
+
+	if (!enabled_nat_bits(sbi, NULL))
+		return -EAGAIN;
+
+	down_read(&nm_i->nat_tree_lock);
+check_empty:
+	i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
+	if (i >= nm_i->nat_blocks) {
+		i = 0;
+		goto check_partial;
+	}
+
+	for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
+									nid++) {
+		if (unlikely(nid >= nm_i->max_nid))
+			break;
+		add_free_nid(sbi, nid, true);
+	}
+
+	if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
+		goto out;
+	i++;
+	goto check_empty;
+
+check_partial:
+	i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
+	if (i >= nm_i->nat_blocks) {
+		disable_nat_bits(sbi, true);
+		up_read(&nm_i->nat_tree_lock);
+		return -EINVAL;
+	}
+
+	nid = i * NAT_ENTRY_PER_BLOCK;
+	page = get_current_nat_page(sbi, nid);
+	scan_nat_page(sbi, page, nid);
+	f2fs_put_page(page, 1);
+
+	if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
+		i++;
+		goto check_partial;
+	}
+out:
+	up_read(&nm_i->nat_tree_lock);
+	return 0;
+}
+
+static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1856,6 +1912,21 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
 	if (!sync && !available_free_memory(sbi, FREE_NIDS))
 		return;
 
+	/* try to find free nids with nat_bits */
+	if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
+		return;
+
+	/* find next valid candidate */
+	if (enabled_nat_bits(sbi, NULL)) {
+		int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
+					nm_i->nat_blocks, 0);
+
+		if (idx >= nm_i->nat_blocks)
+			set_sbi_flag(sbi, SBI_NEED_FSCK);
+		else
+			nid = idx * NAT_ENTRY_PER_BLOCK;
+	}
+
 	/* readahead nat pages to be scanned */
 	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
 							META_NAT, true);
@@ -1898,10 +1969,10 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
 					nm_i->ra_nid_pages, META_NAT, false);
 }
 
-void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
+void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
 {
 	mutex_lock(&NM_I(sbi)->build_lock);
-	__build_free_nids(sbi, sync);
+	__build_free_nids(sbi, sync, mount);
 	mutex_unlock(&NM_I(sbi)->build_lock);
 }
 
@@ -1943,7 +2014,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
 	spin_unlock(&nm_i->nid_list_lock);
 
 	/* Let's scan nat pages and its caches to get free nids */
-	build_free_nids(sbi, true);
+	build_free_nids(sbi, true, false);
 	goto retry;
 }
 
@@ -2235,8 +2306,39 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes,
 	list_add_tail(&nes->set_list, head);
 }
 
+void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
+						struct page *page)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
+	struct f2fs_nat_block *nat_blk = page_address(page);
+	int valid = 0;
+	int i;
+
+	if (!enabled_nat_bits(sbi, NULL))
+		return;
+
+	for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
+		if (start_nid == 0 && i == 0)
+			valid++;
+		if (nat_blk->entries[i].block_addr)
+			valid++;
+	}
+	if (valid == 0) {
+		set_bit_le(nat_index, nm_i->empty_nat_bits);
+		clear_bit_le(nat_index, nm_i->full_nat_bits);
+		return;
+	}
+
+	clear_bit_le(nat_index, nm_i->empty_nat_bits);
+	if (valid == NAT_ENTRY_PER_BLOCK)
+		set_bit_le(nat_index, nm_i->full_nat_bits);
+	else
+		clear_bit_le(nat_index, nm_i->full_nat_bits);
+}
+
 static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
-					struct nat_entry_set *set)
+		struct nat_entry_set *set, struct cp_control *cpc)
 {
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
 	struct f2fs_journal *journal = curseg->journal;
@@ -2251,7 +2353,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 	 * #1, flush nat entries to journal in current hot data summary block.
 	 * #2, flush nat entries to nat page.
 	 */
-	if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
+	if (enabled_nat_bits(sbi, cpc) ||
+		!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
 		to_journal = false;
 
 	if (to_journal) {
@@ -2291,10 +2394,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 		}
 	}
 
-	if (to_journal)
+	if (to_journal) {
 		up_write(&curseg->journal_rwsem);
-	else
+	} else {
+		__update_nat_bits(sbi, start_nid, page);
 		f2fs_put_page(page, 1);
+	}
 
 	f2fs_bug_on(sbi, set->entry_cnt);
 
@@ -2305,7 +2410,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 /*
  * This function is called during the checkpointing process.
  */
-void flush_nat_entries(struct f2fs_sb_info *sbi)
+void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -2326,7 +2431,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
 	 * entries, remove all entries from journal and merge them
 	 * into nat entry set.
 	 */
-	if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
+	if (cpc->reason == CP_UMOUNT ||
+		!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
 		remove_nats_in_journal(sbi);
 
 	while ((found = __gang_lookup_nat_set(nm_i,
@@ -2340,27 +2446,72 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
 
 	/* flush dirty nats in nat entry set */
 	list_for_each_entry_safe(set, tmp, &sets, set_list)
-		__flush_nat_entry_set(sbi, set);
+		__flush_nat_entry_set(sbi, set, cpc);
 
 	up_write(&nm_i->nat_tree_lock);
 
 	f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
 }
 
+static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
+	unsigned int i;
+	__u64 cp_ver = cur_cp_version(ckpt);
+	size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
+	__u64 crc = le32_to_cpu(*((__le32 *)
+				((unsigned char *)ckpt + crc_offset)));
+	block_t nat_bits_addr;
+
+	if (!enabled_nat_bits(sbi, NULL))
+		return 0;
+
+	nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
+						F2FS_BLKSIZE - 1);
+	nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
+						GFP_KERNEL);
+	if (!nm_i->nat_bits)
+		return -ENOMEM;
+
+	nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
+						nm_i->nat_bits_blocks;
+	for (i = 0; i < nm_i->nat_bits_blocks; i++) {
+		struct page *page = get_meta_page(sbi, nat_bits_addr++);
+
+		memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
+					page_address(page), F2FS_BLKSIZE);
+		f2fs_put_page(page, 1);
+	}
+
+	cp_ver |= (crc << 32);
+	if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
+		disable_nat_bits(sbi, true);
+		return 0;
+	}
+
+	nm_i->full_nat_bits = nm_i->nat_bits + 8;
+	nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
+
+	f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
+	return 0;
+}
+
 static int init_node_manager(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	unsigned char *version_bitmap;
-	unsigned int nat_segs, nat_blocks;
+	unsigned int nat_segs;
+	int err;
 
 	nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
 
 	/* segment_count_nat includes pair segment so divide to 2. */
 	nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
-	nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
-
-	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
+	nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
+	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
 
 	/* not used nids: 0, node, meta, (and root counted as valid node) */
 	nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
@@ -2394,6 +2545,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 	if (!nm_i->nat_bitmap)
 		return -ENOMEM;
 
+	err = __get_nat_bitmaps(sbi);
+	if (err)
+		return err;
+
 #ifdef CONFIG_F2FS_CHECK_FS
 	nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
 					GFP_KERNEL);
@@ -2416,7 +2571,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
 	if (err)
 		return err;
 
-	build_free_nids(sbi, true);
+	build_free_nids(sbi, true, true);
 	return 0;
 }
 
@@ -2475,6 +2630,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
 	up_write(&nm_i->nat_tree_lock);
 
 	kfree(nm_i->nat_bitmap);
+	kfree(nm_i->nat_bits);
 #ifdef CONFIG_F2FS_CHECK_FS
 	kfree(nm_i->nat_bitmap_mir);
 #endif
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 3e95db5375ed..9eb6d89bf9e2 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -386,7 +386,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
 	if (!available_free_memory(sbi, FREE_NIDS))
 		try_to_free_nids(sbi, MAX_FREE_NIDS);
 	else
-		build_free_nids(sbi, false);
+		build_free_nids(sbi, false, false);
 
 	if (!is_idle(sbi))
 		return;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index f0748524ca8c..1c92ace2e8f8 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -114,6 +114,7 @@ struct f2fs_super_block {
 /*
  * For checkpoint
  */
+#define CP_NAT_BITS_FLAG	0x00000080
 #define CP_CRC_RECOVERY_FLAG	0x00000040
 #define CP_FASTBOOT_FLAG	0x00000020
 #define CP_FSCK_FLAG		0x00000010
-- 
GitLab


From dd7b2333e6cd31584682382fcf0a1c1e5140b936 Mon Sep 17 00:00:00 2001
From: Yunlei He <heyunlei@huawei.com>
Date: Thu, 23 Feb 2017 20:31:20 +0800
Subject: [PATCH 0623/1084] f2fs: no need lock_op in f2fs_write_inline_data

Similar as f2fs_write_inode, f2fs_write_inline_data just
mark inode page dirty, so it's no need to write inline data
under read lock of cp_rwsem.

Signed-off-by: Yunlei He <heyunlei@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 80f9863dc4b0..9e51c5e40ce1 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1414,9 +1414,12 @@ static int __write_data_page(struct page *page, bool *submitted,
 		goto redirty_out;
 
 	err = -EAGAIN;
-	f2fs_lock_op(sbi);
-	if (f2fs_has_inline_data(inode))
+	if (f2fs_has_inline_data(inode)) {
 		err = f2fs_write_inline_data(inode, page);
+		if (!err)
+			goto out;
+	}
+	f2fs_lock_op(sbi);
 	if (err == -EAGAIN)
 		err = do_write_data_page(&fio);
 	if (F2FS_I(inode)->last_disk_size < psize)
-- 
GitLab


From 55523519bc7227e651fd4febeb3aafdd22b8af1c Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Sat, 25 Feb 2017 11:08:28 +0800
Subject: [PATCH 0624/1084] f2fs: show simple call stack in fault injection
 message

Previously kernel message can show that in which function we do the
injection, but unfortunately, most of the caller are the same, for
tracking more information of injection path, it needs to show upper
caller's name. This patch supports that ability.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c |  1 +
 fs/f2fs/data.c       |  4 +++-
 fs/f2fs/dir.c        |  4 +++-
 fs/f2fs/f2fs.h       | 20 +++++++++++++-------
 fs/f2fs/gc.c         |  4 +++-
 fs/f2fs/inode.c      |  4 +++-
 fs/f2fs/node.c       |  4 +++-
 fs/f2fs/segment.c    |  4 +++-
 8 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index cd7132121573..04d7c244c0f0 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -494,6 +494,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi)
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 	if (time_to_inject(sbi, FAULT_ORPHAN)) {
 		spin_unlock(&im->ino_lock);
+		f2fs_show_injection_info(FAULT_ORPHAN);
 		return -ENOSPC;
 	}
 #endif
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9e51c5e40ce1..b0a2e3faabb2 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -55,8 +55,10 @@ static void f2fs_read_end_io(struct bio *bio)
 	int i;
 
 #ifdef CONFIG_F2FS_FAULT_INJECTION
-	if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO))
+	if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) {
+		f2fs_show_injection_info(FAULT_IO);
 		bio->bi_error = -EIO;
+	}
 #endif
 
 	if (f2fs_bio_encrypted(bio)) {
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 54aa30ee028f..295a223ae11e 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -549,8 +549,10 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
 
 start:
 #ifdef CONFIG_F2FS_FAULT_INJECTION
-	if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH))
+	if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) {
+		f2fs_show_injection_info(FAULT_DIR_DEPTH);
 		return -ENOSPC;
+	}
 #endif
 	if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
 		return -ENOSPC;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d1156cdd211e..8c0916ae0bea 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -952,6 +952,10 @@ struct f2fs_sb_info {
 };
 
 #ifdef CONFIG_F2FS_FAULT_INJECTION
+#define f2fs_show_injection_info(type)				\
+	printk("%sF2FS-fs : inject %s in %s of %pF\n",		\
+		KERN_INFO, fault_name[type],			\
+		__func__, __builtin_return_address(0))
 static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
 {
 	struct f2fs_fault_info *ffi = &sbi->fault_info;
@@ -965,10 +969,6 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
 	atomic_inc(&ffi->inject_ops);
 	if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) {
 		atomic_set(&ffi->inject_ops, 0);
-		printk("%sF2FS-fs : inject %s in %pF\n",
-				KERN_INFO,
-				fault_name[type],
-				__builtin_return_address(0));
 		return true;
 	}
 	return false;
@@ -1277,8 +1277,10 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
 	blkcnt_t diff;
 
 #ifdef CONFIG_F2FS_FAULT_INJECTION
-	if (time_to_inject(sbi, FAULT_BLOCK))
+	if (time_to_inject(sbi, FAULT_BLOCK)) {
+		f2fs_show_injection_info(FAULT_BLOCK);
 		return false;
+	}
 #endif
 	/*
 	 * let's increase this in prior to actual block count change in order
@@ -1518,8 +1520,10 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
 	if (page)
 		return page;
 
-	if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC))
+	if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) {
+		f2fs_show_injection_info(FAULT_PAGE_ALLOC);
 		return NULL;
+	}
 #endif
 	if (!for_write)
 		return grab_cache_page(mapping, index);
@@ -1995,8 +1999,10 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
 					size_t size, gfp_t flags)
 {
 #ifdef CONFIG_F2FS_FAULT_INJECTION
-	if (time_to_inject(sbi, FAULT_KMALLOC))
+	if (time_to_inject(sbi, FAULT_KMALLOC)) {
+		f2fs_show_injection_info(FAULT_KMALLOC);
 		return NULL;
+	}
 #endif
 	return kmalloc(size, flags);
 }
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 6c996e39b59a..8be5144da8e6 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -48,8 +48,10 @@ static int gc_thread_func(void *data)
 		}
 
 #ifdef CONFIG_F2FS_FAULT_INJECTION
-		if (time_to_inject(sbi, FAULT_CHECKPOINT))
+		if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
+			f2fs_show_injection_info(FAULT_CHECKPOINT);
 			f2fs_stop_checkpoint(sbi, false);
+		}
 #endif
 
 		/*
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index af06bda51a54..24bb8213d974 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -373,8 +373,10 @@ void f2fs_evict_inode(struct inode *inode)
 		goto no_delete;
 
 #ifdef CONFIG_F2FS_FAULT_INJECTION
-	if (time_to_inject(sbi, FAULT_EVICT_INODE))
+	if (time_to_inject(sbi, FAULT_EVICT_INODE)) {
+		f2fs_show_injection_info(FAULT_EVICT_INODE);
 		goto no_delete;
+	}
 #endif
 
 	remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 43d35ec11851..24ea49f98891 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1987,8 +1987,10 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
 	struct free_nid *i = NULL;
 retry:
 #ifdef CONFIG_F2FS_FAULT_INJECTION
-	if (time_to_inject(sbi, FAULT_ALLOC_NID))
+	if (time_to_inject(sbi, FAULT_ALLOC_NID)) {
+		f2fs_show_injection_info(FAULT_ALLOC_NID);
 		return false;
+	}
 #endif
 	spin_lock(&nm_i->nid_list_lock);
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 9eb6d89bf9e2..1bab09097590 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -352,8 +352,10 @@ int commit_inmem_pages(struct inode *inode)
 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
 {
 #ifdef CONFIG_F2FS_FAULT_INJECTION
-	if (time_to_inject(sbi, FAULT_CHECKPOINT))
+	if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
+		f2fs_show_injection_info(FAULT_CHECKPOINT);
 		f2fs_stop_checkpoint(sbi, false);
+	}
 #endif
 
 	if (!need)
-- 
GitLab


From d27c3d89db9986b6f48576169031247e4c893729 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Fri, 24 Feb 2017 18:46:00 +0800
Subject: [PATCH 0625/1084] f2fs: select target segment with closer temperature
 in SSR mode

In SSR mode, we can allocate target segment which has different
temperature type from the type of current block, in order to avoid
mixing coldest and hottest data/node as much as possible, change
SSR allocation policy to select closer temperature for current
block prior.

Signed-off-by: Yunlong Song <yunlong.song@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 1bab09097590..9006d8ed6f52 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1541,7 +1541,8 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
 {
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
 	const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
-	int i, n;
+	int i, cnt;
+	bool reversed = false;
 
 	/* need_SSR() already forces to do this */
 	if (v_ops->get_victim(sbi, &(curseg)->next_segno, BG_GC, type, SSR))
@@ -1549,14 +1550,24 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
 
 	/* For node segments, let's do SSR more intensively */
 	if (IS_NODESEG(type)) {
-		i = CURSEG_HOT_NODE;
-		n = CURSEG_COLD_NODE;
+		if (type >= CURSEG_WARM_NODE) {
+			reversed = true;
+			i = CURSEG_COLD_NODE;
+		} else {
+			i = CURSEG_HOT_NODE;
+		}
+		cnt = NR_CURSEG_NODE_TYPE;
 	} else {
-		i = CURSEG_HOT_DATA;
-		n = CURSEG_COLD_DATA;
+		if (type >= CURSEG_WARM_DATA) {
+			reversed = true;
+			i = CURSEG_COLD_DATA;
+		} else {
+			i = CURSEG_HOT_DATA;
+		}
+		cnt = NR_CURSEG_DATA_TYPE;
 	}
 
-	for (; i <= n; i++) {
+	for (; cnt-- > 0; reversed ? i-- : i++) {
 		if (i == type)
 			continue;
 		if (v_ops->get_victim(sbi, &(curseg)->next_segno,
-- 
GitLab


From 19f4e688f89a9ce07b86d06d3df23c1cd877ab4e Mon Sep 17 00:00:00 2001
From: Hou Pengyang <houpengyang@huawei.com>
Date: Sat, 25 Feb 2017 03:57:38 +0000
Subject: [PATCH 0626/1084] f2fs: avoid bggc->fggc when enough free segments
 are avaliable after cp

We use has_not_enough_free_secs to check if there are enough free segments,

    	(free_sections(sbi) + freed) <=
	        (node_secs + 2 * dent_secs + imeta_secs +
			         reserved_sections(sbi) + needed);

Under scenario with large number of dirty nodes, these nodes would be flushed
during cp, as a result, right side of the inequality would be decreased, while
left side stays unchanged if these nodes are flushed in SSR way, which means
there are enough free segments after this cp.

For this case, we just do a bggc instead of fggc.

Signed-off-by: Hou Pengyang <houpengyang@huawei.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/gc.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 8be5144da8e6..023f043edefd 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -953,21 +953,22 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
 		goto stop;
 	}
 
-	if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed, 0)) {
-		gc_type = FG_GC;
+	if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) {
 		/*
-		 * If there is no victim and no prefree segment but still not
-		 * enough free sections, we should flush dent/node blocks and do
-		 * garbage collections.
+		 * For example, if there are many prefree_segments below given
+		 * threshold, we can make them free by checkpoint. Then, we
+		 * secure free segments which doesn't need fggc any more.
 		 */
 		ret = write_checkpoint(sbi, &cpc);
 		if (ret)
 			goto stop;
-	} else if (gc_type == BG_GC && !background) {
-		/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
-		goto stop;
+		if (has_not_enough_free_secs(sbi, 0, 0))
+			gc_type = FG_GC;
 	}
 
+	/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
+	if (gc_type == BG_GC && !background)
+		goto stop;
 	if (!__get_victim(sbi, &segno, gc_type))
 		goto stop;
 	ret = 0;
-- 
GitLab


From 317e13009682d4614ec9ac7c2762eaee26f93ed3 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Sat, 25 Feb 2017 17:29:54 +0800
Subject: [PATCH 0627/1084] f2fs: kill __is_extent_same

Since commit ee6d182f2a19 ("f2fs: remove syncing inode page in all the
cases") delayed inode element updating from inode cache to node page
cache, so once largest cached extent is updated, we can make inode dirty
immediately instead of checking and updating it in the end of extent
cache update.

The above commit didn't clean up unneeded codes in extent_cache.c, let's
finish the job in this patch.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/extent_cache.c | 8 +++-----
 fs/f2fs/f2fs.h         | 7 -------
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 0ab5518e45c2..c6934f014e0f 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -413,7 +413,7 @@ static struct extent_node *__insert_extent_tree(struct inode *inode,
 	return en;
 }
 
-static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
+static void f2fs_update_extent_tree_range(struct inode *inode,
 				pgoff_t fofs, block_t blkaddr, unsigned int len)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -426,7 +426,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
 	unsigned int pos = (unsigned int)fofs;
 
 	if (!et)
-		return false;
+		return;
 
 	trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len);
 
@@ -434,7 +434,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
 
 	if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
 		write_unlock(&et->lock);
-		return false;
+		return;
 	}
 
 	prev = et->largest;
@@ -531,8 +531,6 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
 		__free_extent_tree(sbi, et);
 
 	write_unlock(&et->lock);
-
-	return !__is_extent_same(&prev, &et->largest);
 }
 
 unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 8c0916ae0bea..633f2efc8c17 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -496,13 +496,6 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
 	ei->len = len;
 }
 
-static inline bool __is_extent_same(struct extent_info *ei1,
-						struct extent_info *ei2)
-{
-	return (ei1->fofs == ei2->fofs && ei1->blk == ei2->blk &&
-						ei1->len == ei2->len);
-}
-
 static inline bool __is_extent_mergeable(struct extent_info *back,
 						struct extent_info *front)
 {
-- 
GitLab


From 5f35a2cd5b503e59ac949283a423f868e94fb36b Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Sat, 25 Feb 2017 19:23:27 +0800
Subject: [PATCH 0628/1084] f2fs: Don't update the xattr data that same as the
 exist

f2fs removes the old xattr data and appends the new data although
the new data is same as the exist.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/xattr.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index b50f6b581a71..dbfd5cbfa46c 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -545,6 +545,13 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
 	return error;
 }
 
+static bool f2fs_xattr_value_same(struct f2fs_xattr_entry *entry,
+					const void *value, size_t size)
+{
+	void *pval = entry->e_name + entry->e_name_len;
+	return (entry->e_value_size == size) && !memcmp(pval, value, size);
+}
+
 static int __f2fs_setxattr(struct inode *inode, int index,
 			const char *name, const void *value, size_t size,
 			struct page *ipage, int flags)
@@ -579,12 +586,17 @@ static int __f2fs_setxattr(struct inode *inode, int index,
 
 	found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;
 
-	if ((flags & XATTR_REPLACE) && !found) {
+	if (found) {
+		if ((flags & XATTR_CREATE)) {
+			error = -EEXIST;
+			goto exit;
+		}
+
+		if (f2fs_xattr_value_same(here, value, size))
+			goto exit;
+	} else if ((flags & XATTR_REPLACE)) {
 		error = -ENODATA;
 		goto exit;
-	} else if ((flags & XATTR_CREATE) && found) {
-		error = -EEXIST;
-		goto exit;
 	}
 
 	last = here;
-- 
GitLab


From 0ec4a5b647fca5a5649573c23d31230b25fdc827 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Sat, 25 Feb 2017 19:23:40 +0800
Subject: [PATCH 0629/1084] f2fs: drop the duplicate pval in f2fs_getxattr

Fixes: ba38c27eb9 ("f2fs: enhance lookup xattr")
Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/xattr.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index dbfd5cbfa46c..7298a4488f7f 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -467,7 +467,6 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
 	struct f2fs_xattr_entry *entry = NULL;
 	int error = 0;
 	unsigned int size, len;
-	char *pval;
 	void *base_addr = NULL;
 
 	if (name == NULL)
@@ -489,8 +488,6 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
 		goto out;
 	}
 
-	pval = entry->e_name + entry->e_name_len;
-
 	if (buffer) {
 		char *pval = entry->e_name + entry->e_name_len;
 		memcpy(buffer, pval, size);
-- 
GitLab


From 727ebb091e1778cc3a4842ae04f8f54da111eee0 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Sat, 25 Feb 2017 19:32:21 +0800
Subject: [PATCH 0630/1084] f2fs: update the comment of default nr_pages to
 skipping

Fixes: 2c237ebaa4 ("f2fs: avoid writing node/metapages during writes")
Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index f4020f141d83..5e8ad4280a50 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -736,8 +736,8 @@ static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
  * It is very important to gather dirty pages and write at once, so that we can
  * submit a big bio without interfering other data writes.
  * By default, 512 pages for directory data,
- * 512 pages (2MB) * 3 for three types of nodes, and
- * max_bio_blocks for meta are set.
+ * 512 pages (2MB) * 8 for nodes, and
+ * 256 pages * 8 for meta are set.
  */
 static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
 {
-- 
GitLab


From ced2c7ea8e99b46755a270872cd5ba61c27cffad Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Sat, 25 Feb 2017 19:53:39 +0800
Subject: [PATCH 0631/1084] f2fs: new helper cur_cp_crc() getting crc in
 f2fs_checkpoint

There are four places that getting the crc value in f2fs_checkpoint,
just add a new helper cur_cp_crc for them.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c |  3 +--
 fs/f2fs/f2fs.h       |  6 ++++++
 fs/f2fs/node.c       |  5 +----
 fs/f2fs/node.h       | 20 +++++++-------------
 4 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 04d7c244c0f0..0339daf4ca02 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -683,8 +683,7 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
 		return -EINVAL;
 	}
 
-	crc = le32_to_cpu(*((__le32 *)((unsigned char *)*cp_block
-							+ crc_offset)));
+	crc = cur_cp_crc(*cp_block);
 	if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
 		f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
 		return -EINVAL;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 633f2efc8c17..4d332b396384 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1126,6 +1126,12 @@ static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp)
 	return le64_to_cpu(cp->checkpoint_ver);
 }
 
+static inline __u64 cur_cp_crc(struct f2fs_checkpoint *cp)
+{
+	size_t crc_offset = le32_to_cpu(cp->checksum_offset);
+	return le32_to_cpu(*((__le32 *)((unsigned char *)cp + crc_offset)));
+}
+
 static inline bool __is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
 {
 	unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 24ea49f98891..8e53181d5db1 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -2462,9 +2462,6 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
 	unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
 	unsigned int i;
 	__u64 cp_ver = cur_cp_version(ckpt);
-	size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
-	__u64 crc = le32_to_cpu(*((__le32 *)
-				((unsigned char *)ckpt + crc_offset)));
 	block_t nat_bits_addr;
 
 	if (!enabled_nat_bits(sbi, NULL))
@@ -2487,7 +2484,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
 		f2fs_put_page(page, 1);
 	}
 
-	cp_ver |= (crc << 32);
+	cp_ver |= (cur_cp_crc(ckpt) << 32);
 	if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
 		disable_nat_bits(sbi, true);
 		return 0;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 3fc9c4b1dce9..2f9603fa85a5 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -300,14 +300,11 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
 {
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
 	struct f2fs_node *rn = F2FS_NODE(page);
-	size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
-	__u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
+	__u64 cp_ver = cur_cp_version(ckpt);
+
+	if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG))
+		cp_ver |= (cur_cp_crc(ckpt) << 32);
 
-	if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
-		__u64 crc = le32_to_cpu(*((__le32 *)
-				((unsigned char *)ckpt + crc_offset)));
-		cp_ver |= (crc << 32);
-	}
 	rn->footer.cp_ver = cpu_to_le64(cp_ver);
 	rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
 }
@@ -315,14 +312,11 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
 static inline bool is_recoverable_dnode(struct page *page)
 {
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
-	size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
 	__u64 cp_ver = cur_cp_version(ckpt);
 
-	if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
-		__u64 crc = le32_to_cpu(*((__le32 *)
-				((unsigned char *)ckpt + crc_offset)));
-		cp_ver |= (crc << 32);
-	}
+	if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG))
+		cp_ver |= (cur_cp_crc(ckpt) << 32);
+
 	return cp_ver == cpver_of_node(page);
 }
 
-- 
GitLab


From 4ac912427c4214d8031d9ad6fbc3bc75e71512df Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Thu, 23 Feb 2017 10:53:49 +0800
Subject: [PATCH 0632/1084] f2fs: introduce free nid bitmap

In scenario of intensively node allocation, free nids will be ran out
soon, then it needs to stop to load free nids by traversing NAT blocks,
in worse case, if NAT blocks does not be cached in memory, it generates
IOs which slows down our foreground operations.

In order to speed up node allocation, in this patch we introduce a new
free_nid_bitmap array, so there is an bitmap table for each NAT block,
Once the NAT block is loaded, related bitmap cache will be switched on,
and bitmap will be set during traversing nat entries in NAT block, later
we can query and update nid usage status in memory completely.

With such implementation, I expect performance of node allocation can be
improved in the long-term after filesystem image is mounted.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c         |   2 +
 fs/f2fs/f2fs.h          |   2 +
 fs/f2fs/node.c          | 125 ++++++++++++++++++++++++++++++++++++----
 include/linux/f2fs_fs.h |   1 +
 4 files changed, 120 insertions(+), 10 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 015ad2b73a92..a77df377e2e8 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -194,6 +194,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
 	si->base_mem += sizeof(struct f2fs_nm_info);
 	si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
 	si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
+	si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
+	si->base_mem += NM_I(sbi)->nat_blocks / 8;
 
 get_cache:
 	si->cache_mem = 0;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4d332b396384..7e2924981eeb 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -555,6 +555,8 @@ struct f2fs_nm_info {
 	unsigned int nid_cnt[MAX_NID_LIST];	/* the number of free node id */
 	spinlock_t nid_list_lock;	/* protect nid lists ops */
 	struct mutex build_lock;	/* lock for build free nids */
+	unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
+	unsigned char *nat_block_bitmap;
 
 	/* for checkpoint */
 	char *nat_bitmap;		/* NAT bitmap pointer */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 8e53181d5db1..fd12a32f7e87 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1765,7 +1765,8 @@ static void __remove_nid_from_list(struct f2fs_sb_info *sbi,
 		radix_tree_delete(&nm_i->free_nid_root, i->nid);
 }
 
-static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
+/* return if the nid is recognized as free */
+static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct free_nid *i;
@@ -1774,14 +1775,14 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
 
 	/* 0 nid should not be used */
 	if (unlikely(nid == 0))
-		return 0;
+		return false;
 
 	if (build) {
 		/* do not add allocated nids */
 		ne = __lookup_nat_cache(nm_i, nid);
 		if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
 				nat_get_blkaddr(ne) != NULL_ADDR))
-			return 0;
+			return false;
 	}
 
 	i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
@@ -1790,7 +1791,7 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
 
 	if (radix_tree_preload(GFP_NOFS)) {
 		kmem_cache_free(free_nid_slab, i);
-		return 0;
+		return true;
 	}
 
 	spin_lock(&nm_i->nid_list_lock);
@@ -1799,9 +1800,9 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
 	radix_tree_preload_end();
 	if (err) {
 		kmem_cache_free(free_nid_slab, i);
-		return 0;
+		return true;
 	}
-	return 1;
+	return true;
 }
 
 static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
@@ -1822,17 +1823,36 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
 		kmem_cache_free(free_nid_slab, i);
 }
 
+void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
+	unsigned int nid_ofs = nid - START_NID(nid);
+
+	if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
+		return;
+
+	if (set)
+		set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+	else
+		clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+}
+
 static void scan_nat_page(struct f2fs_sb_info *sbi,
 			struct page *nat_page, nid_t start_nid)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct f2fs_nat_block *nat_blk = page_address(nat_page);
 	block_t blk_addr;
+	unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
 	int i;
 
+	set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
+
 	i = start_nid % NAT_ENTRY_PER_BLOCK;
 
 	for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
+		bool freed = false;
 
 		if (unlikely(start_nid >= nm_i->max_nid))
 			break;
@@ -1840,8 +1860,52 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
 		blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
 		f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
 		if (blk_addr == NULL_ADDR)
-			add_free_nid(sbi, start_nid, true);
+			freed = add_free_nid(sbi, start_nid, true);
+		update_free_nid_bitmap(sbi, start_nid, freed);
+	}
+}
+
+static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
+	struct f2fs_journal *journal = curseg->journal;
+	unsigned int i, idx;
+	unsigned int target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
+
+	down_read(&nm_i->nat_tree_lock);
+
+	for (i = 0; i < nm_i->nat_blocks; i++) {
+		if (!test_bit_le(i, nm_i->nat_block_bitmap))
+			continue;
+		for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
+			nid_t nid;
+
+			if (!test_bit_le(idx, nm_i->free_nid_bitmap[i]))
+				continue;
+
+			nid = i * NAT_ENTRY_PER_BLOCK + idx;
+			add_free_nid(sbi, nid, true);
+
+			if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
+				goto out;
+		}
+	}
+out:
+	down_read(&curseg->journal_rwsem);
+	for (i = 0; i < nats_in_cursum(journal); i++) {
+		block_t addr;
+		nid_t nid;
+
+		addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
+		nid = le32_to_cpu(nid_in_journal(journal, i));
+		if (addr == NULL_ADDR)
+			add_free_nid(sbi, nid, true);
+		else
+			remove_free_nid(sbi, nid);
 	}
+	up_read(&curseg->journal_rwsem);
+	up_read(&nm_i->nat_tree_lock);
 }
 
 static int scan_nat_bits(struct f2fs_sb_info *sbi)
@@ -1912,9 +1976,17 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
 	if (!sync && !available_free_memory(sbi, FREE_NIDS))
 		return;
 
-	/* try to find free nids with nat_bits */
-	if (!mount && !scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
-		return;
+	if (!mount) {
+		/* try to find free nids in free_nid_bitmap */
+		scan_free_nid_bits(sbi);
+
+		if (nm_i->nid_cnt[FREE_NID_LIST])
+			return;
+
+		/* try to find free nids with nat_bits */
+		if (!scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
+			return;
+	}
 
 	/* find next valid candidate */
 	if (enabled_nat_bits(sbi, NULL)) {
@@ -2010,6 +2082,9 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
 		i->state = NID_ALLOC;
 		__insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
 		nm_i->available_nids--;
+
+		update_free_nid_bitmap(sbi, *nid, false);
+
 		spin_unlock(&nm_i->nid_list_lock);
 		return true;
 	}
@@ -2064,6 +2139,8 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
 
 	nm_i->available_nids++;
 
+	update_free_nid_bitmap(sbi, nid, true);
+
 	spin_unlock(&nm_i->nid_list_lock);
 
 	if (need_free)
@@ -2392,6 +2469,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 			add_free_nid(sbi, nid, false);
 			spin_lock(&NM_I(sbi)->nid_list_lock);
 			NM_I(sbi)->available_nids++;
+			update_free_nid_bitmap(sbi, nid, true);
+			spin_unlock(&NM_I(sbi)->nid_list_lock);
+		} else {
+			spin_lock(&NM_I(sbi)->nid_list_lock);
+			update_free_nid_bitmap(sbi, nid, false);
 			spin_unlock(&NM_I(sbi)->nid_list_lock);
 		}
 	}
@@ -2558,6 +2640,22 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 	return 0;
 }
 
+int init_free_nid_cache(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_nm_info *nm_i = NM_I(sbi);
+
+	nm_i->free_nid_bitmap = f2fs_kvzalloc(nm_i->nat_blocks *
+					NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL);
+	if (!nm_i->free_nid_bitmap)
+		return -ENOMEM;
+
+	nm_i->nat_block_bitmap = f2fs_kvzalloc(nm_i->nat_blocks / 8,
+								GFP_KERNEL);
+	if (!nm_i->nat_block_bitmap)
+		return -ENOMEM;
+	return 0;
+}
+
 int build_node_manager(struct f2fs_sb_info *sbi)
 {
 	int err;
@@ -2570,6 +2668,10 @@ int build_node_manager(struct f2fs_sb_info *sbi)
 	if (err)
 		return err;
 
+	err = init_free_nid_cache(sbi);
+	if (err)
+		return err;
+
 	build_free_nids(sbi, true, true);
 	return 0;
 }
@@ -2628,6 +2730,9 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
 	}
 	up_write(&nm_i->nat_tree_lock);
 
+	kvfree(nm_i->nat_block_bitmap);
+	kvfree(nm_i->free_nid_bitmap);
+
 	kfree(nm_i->nat_bitmap);
 	kfree(nm_i->nat_bits);
 #ifdef CONFIG_F2FS_CHECK_FS
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 1c92ace2e8f8..e2d239ed4c60 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -279,6 +279,7 @@ struct f2fs_node {
  * For NAT entries
  */
 #define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
+#define NAT_ENTRY_BITMAP_SIZE	((NAT_ENTRY_PER_BLOCK + 7) / 8)
 
 struct f2fs_nat_entry {
 	__u8 version;		/* latest version of cached nat entry */
-- 
GitLab


From f0cdbfe6ef0b2f3d42e1cf87bf962e730c2420cb Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Sun, 26 Feb 2017 20:47:16 +0800
Subject: [PATCH 0633/1084] f2fs: use MAX_FREE_NIDS for the free nids target

F2FS has define MAX_FREE_NIDS for maximum of cached free nids target.

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index fd12a32f7e87..b3aead4c5a0f 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1871,7 +1871,6 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
 	struct f2fs_journal *journal = curseg->journal;
 	unsigned int i, idx;
-	unsigned int target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
 
 	down_read(&nm_i->nat_tree_lock);
 
@@ -1887,7 +1886,7 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
 			nid = i * NAT_ENTRY_PER_BLOCK + idx;
 			add_free_nid(sbi, nid, true);
 
-			if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
+			if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
 				goto out;
 		}
 	}
@@ -1913,7 +1912,6 @@ static int scan_nat_bits(struct f2fs_sb_info *sbi)
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct page *page;
 	unsigned int i = 0;
-	nid_t target = FREE_NID_PAGES * NAT_ENTRY_PER_BLOCK;
 	nid_t nid;
 
 	if (!enabled_nat_bits(sbi, NULL))
@@ -1934,7 +1932,7 @@ static int scan_nat_bits(struct f2fs_sb_info *sbi)
 		add_free_nid(sbi, nid, true);
 	}
 
-	if (nm_i->nid_cnt[FREE_NID_LIST] >= target)
+	if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
 		goto out;
 	i++;
 	goto check_empty;
@@ -1952,7 +1950,7 @@ static int scan_nat_bits(struct f2fs_sb_info *sbi)
 	scan_nat_page(sbi, page, nid);
 	f2fs_put_page(page, 1);
 
-	if (nm_i->nid_cnt[FREE_NID_LIST] < target) {
+	if (nm_i->nid_cnt[FREE_NID_LIST] < MAX_FREE_NIDS) {
 		i++;
 		goto check_partial;
 	}
-- 
GitLab


From e3b56cdd4351f0e227d4d847eeadff4c82aef1b9 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Tue, 7 Feb 2017 15:49:50 +0800
Subject: [PATCH 0634/1084] vhost: try avoiding avail index access when getting
 descriptor

If last avail idx is not equal to cached avail idx, we're sure there's
still available buffers in the virtqueue so there's no need to re-read
avail idx. So let's skip this to avoid unnecessary userspace memory
access and memory barrier. Pktgen test show about 3% improvement on rx
pps.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/vhost.c | 39 +++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 8f99fe08de02..1f7e4e4e6f8e 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1930,25 +1930,32 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 
 	/* Check it isn't doing very strange things with descriptor numbers. */
 	last_avail_idx = vq->last_avail_idx;
-	if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) {
-		vq_err(vq, "Failed to access avail idx at %p\n",
-		       &vq->avail->idx);
-		return -EFAULT;
-	}
-	vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
 
-	if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
-		vq_err(vq, "Guest moved used index from %u to %u",
-		       last_avail_idx, vq->avail_idx);
-		return -EFAULT;
-	}
+	if (vq->avail_idx == vq->last_avail_idx) {
+		if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) {
+			vq_err(vq, "Failed to access avail idx at %p\n",
+				&vq->avail->idx);
+			return -EFAULT;
+		}
+		vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
 
-	/* If there's nothing new since last we looked, return invalid. */
-	if (vq->avail_idx == last_avail_idx)
-		return vq->num;
+		if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
+			vq_err(vq, "Guest moved used index from %u to %u",
+				last_avail_idx, vq->avail_idx);
+			return -EFAULT;
+		}
+
+		/* If there's nothing new since last we looked, return
+		 * invalid.
+		 */
+		if (vq->avail_idx == last_avail_idx)
+			return vq->num;
 
-	/* Only get avail ring entries after they have been exposed by guest. */
-	smp_rmb();
+		/* Only get avail ring entries after they have been
+		 * exposed by guest.
+		 */
+		smp_rmb();
+	}
 
 	/* Grab the next descriptor number they're advertising, and increment
 	 * the index we've seen. */
-- 
GitLab


From 540faedb0036aca14ff90a7f679b008442bc9dd7 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Mon, 27 Feb 2017 17:10:45 +0800
Subject: [PATCH 0635/1084] f2fs: fix to update F2FS_{CP_}WB_DATA count
 correctly

We should only account F2FS_{CP_}WB_DATA IOs for write path, fix it.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b0a2e3faabb2..1375fef11146 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -398,7 +398,8 @@ int f2fs_submit_page_mbio(struct f2fs_io_info *fio)
 		if ((fio->type == DATA || fio->type == NODE) &&
 				fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
 			err = -EAGAIN;
-			dec_page_count(sbi, WB_DATA_TYPE(bio_page));
+			if (!is_read)
+				dec_page_count(sbi, WB_DATA_TYPE(bio_page));
 			goto out_fail;
 		}
 		io->bio = __bio_alloc(sbi, fio->new_blkaddr,
-- 
GitLab


From b6895e8f99cd8e98056346c42732236b958aab83 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Mon, 27 Feb 2017 18:43:12 +0800
Subject: [PATCH 0636/1084] f2fs: fix memory leak of write_io_dummy mempool
 during umount

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 751e1068db17..53305880c455 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -818,7 +818,7 @@ static void f2fs_put_super(struct super_block *sb)
 	kfree(sbi->raw_super);
 
 	destroy_device_list(sbi);
-
+	mempool_destroy(sbi->write_io_dummy);
 	destroy_percpu_info(sbi);
 	kfree(sbi);
 }
-- 
GitLab


From a3ebfe4fd89794df8b2f357ac5f665052e74b4f9 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Mon, 27 Feb 2017 18:43:13 +0800
Subject: [PATCH 0637/1084] f2fs: fix to enlarge size of write_io_dummy mempool

It needs to double cache size of write_io_dummy mempool, otherwise we may
run out of cache in scenraio of Data/Node IOs were issued concurrently.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 53305880c455..7571eb297b21 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1929,7 +1929,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 
 	if (F2FS_IO_SIZE(sbi) > 1) {
 		sbi->write_io_dummy =
-			mempool_create_page_pool(F2FS_IO_SIZE(sbi) - 1, 0);
+			mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
 		if (!sbi->write_io_dummy)
 			goto free_options;
 	}
-- 
GitLab


From 4fcf589ad44e50cbe60dea19320d9e71d1a10f22 Mon Sep 17 00:00:00 2001
From: Yunlei He <heyunlei@huawei.com>
Date: Mon, 27 Feb 2017 18:59:53 +0800
Subject: [PATCH 0638/1084] f2fs: remove redundant set_page_dirty()

This patch remove redundant set_page_dirty in truncate_blocks

Signed-off-by: Yunlei He <heyunlei@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 36c156557bb1..c6ca00c87265 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -567,8 +567,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
 	}
 
 	if (f2fs_has_inline_data(inode)) {
-		if (truncate_inline_inode(ipage, from))
-			set_page_dirty(ipage);
+		truncate_inline_inode(ipage, from);
 		if (from == 0)
 			clear_inode_flag(inode, FI_DATA_EXIST);
 		f2fs_put_page(ipage, 1);
-- 
GitLab


From 7bb3a371d199156cb6a863feab377146b80942c5 Mon Sep 17 00:00:00 2001
From: Masato Suzuki <masato.suzuki@wdc.com>
Date: Mon, 27 Feb 2017 20:52:49 +0900
Subject: [PATCH 0639/1084] f2fs: Fix zoned block device support

The introduction of the multi-device feature partially broke the support
for zoned block devices. In the function f2fs_scan_devices, sbi->devs
allocation and initialization is skipped in the case of a single device
mount. This result in no device information structure being allocated
for the device. This is fine if the device is a regular device, but in
the case of a zoned block device, the device zone type array is not
initialized, which causes the function __f2fs_issue_discard_zone to fail
as get_blkz_type is unable to determine the zone type of a section.

Fix this by always allocating and initializing the sbi->devs device
information array even in the case of a single device if that device is
zoned. For this particular case, make sure to obtain a reference on the
single device so that the call to blkdev_put() in destroy_device_list
operates as expected.

Fixes: 3c62be17d4f562f4 ("f2fs: support multiple devices")
Cc: <stable@vger.kernel.org> # v4.10
Signed-off-by: Masato Suzuki <masato.suzuki@wdc.com>
Acked-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 67 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 44 insertions(+), 23 deletions(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 7571eb297b21..fe714d549a6a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1743,36 +1743,55 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
 static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
+	unsigned int max_devices = MAX_DEVICES;
 	int i;
 
-	for (i = 0; i < MAX_DEVICES; i++) {
-		if (!RDEV(i).path[0])
+	/* Initialize single device information */
+	if (!RDEV(0).path[0]) {
+		if (!bdev_is_zoned(sbi->sb->s_bdev))
 			return 0;
+		max_devices = 1;
+	}
 
-		if (i == 0) {
-			sbi->devs = kzalloc(sizeof(struct f2fs_dev_info) *
-						MAX_DEVICES, GFP_KERNEL);
-			if (!sbi->devs)
-				return -ENOMEM;
-		}
+	/*
+	 * Initialize multiple devices information, or single
+	 * zoned block device information.
+	 */
+	sbi->devs = kcalloc(max_devices, sizeof(struct f2fs_dev_info),
+				GFP_KERNEL);
+	if (!sbi->devs)
+		return -ENOMEM;
 
-		memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
-		FDEV(i).total_segments = le32_to_cpu(RDEV(i).total_segments);
-		if (i == 0) {
-			FDEV(i).start_blk = 0;
-			FDEV(i).end_blk = FDEV(i).start_blk +
-				(FDEV(i).total_segments <<
-				sbi->log_blocks_per_seg) - 1 +
-				le32_to_cpu(raw_super->segment0_blkaddr);
-		} else {
-			FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
-			FDEV(i).end_blk = FDEV(i).start_blk +
-				(FDEV(i).total_segments <<
-				sbi->log_blocks_per_seg) - 1;
-		}
+	for (i = 0; i < max_devices; i++) {
 
-		FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
+		if (i > 0 && !RDEV(i).path[0])
+			break;
+
+		if (max_devices == 1) {
+			/* Single zoned block device mount */
+			FDEV(0).bdev =
+				blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev,
 					sbi->sb->s_mode, sbi->sb->s_type);
+		} else {
+			/* Multi-device mount */
+			memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
+			FDEV(i).total_segments =
+				le32_to_cpu(RDEV(i).total_segments);
+			if (i == 0) {
+				FDEV(i).start_blk = 0;
+				FDEV(i).end_blk = FDEV(i).start_blk +
+				    (FDEV(i).total_segments <<
+				    sbi->log_blocks_per_seg) - 1 +
+				    le32_to_cpu(raw_super->segment0_blkaddr);
+			} else {
+				FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
+				FDEV(i).end_blk = FDEV(i).start_blk +
+					(FDEV(i).total_segments <<
+					sbi->log_blocks_per_seg) - 1;
+			}
+			FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
+					sbi->sb->s_mode, sbi->sb->s_type);
+		}
 		if (IS_ERR(FDEV(i).bdev))
 			return PTR_ERR(FDEV(i).bdev);
 
@@ -1792,6 +1811,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
 					"Failed to initialize F2FS blkzone information");
 				return -EINVAL;
 			}
+			if (max_devices == 1)
+				break;
 			f2fs_msg(sbi->sb, KERN_INFO,
 				"Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
 				i, FDEV(i).path,
-- 
GitLab


From b8d96a30b6f1c29e1ab716b3727a63fe7268f661 Mon Sep 17 00:00:00 2001
From: Hou Pengyang <houpengyang@huawei.com>
Date: Mon, 27 Feb 2017 13:02:58 +0000
Subject: [PATCH 0640/1084] f2fs: add f2fs_drop_inode tracepoint

Signed-off-by: Hou Pengyang <houpengyang@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c             | 7 +++++--
 include/trace/events/f2fs.h | 7 +++++++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index fe714d549a6a..2d494d5239c5 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -624,6 +624,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
 
 static int f2fs_drop_inode(struct inode *inode)
 {
+	int ret;
 	/*
 	 * This is to avoid a deadlock condition like below.
 	 * writeback_single_inode(inode)
@@ -656,10 +657,12 @@ static int f2fs_drop_inode(struct inode *inode)
 			spin_lock(&inode->i_lock);
 			atomic_dec(&inode->i_count);
 		}
+		trace_f2fs_drop_inode(inode, 0);
 		return 0;
 	}
-
-	return generic_drop_inode(inode);
+	ret = generic_drop_inode(inode);
+	trace_f2fs_drop_inode(inode, ret);
+	return ret;
 }
 
 int f2fs_inode_dirtied(struct inode *inode, bool sync)
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index ff31ccfb8111..c80fcad0a6c9 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -315,6 +315,13 @@ DEFINE_EVENT(f2fs__inode_exit, f2fs_unlink_exit,
 	TP_ARGS(inode, ret)
 );
 
+DEFINE_EVENT(f2fs__inode_exit, f2fs_drop_inode,
+
+	TP_PROTO(struct inode *inode, int ret),
+
+	TP_ARGS(inode, ret)
+);
+
 DEFINE_EVENT(f2fs__inode, f2fs_truncate,
 
 	TP_PROTO(struct inode *inode),
-- 
GitLab


From 37e79cd31c2c4dcd7e149958ca3cae9d180c63c7 Mon Sep 17 00:00:00 2001
From: Hou Pengyang <houpengyang@huawei.com>
Date: Mon, 27 Feb 2017 13:02:59 +0000
Subject: [PATCH 0641/1084] f2fs: fix a plint compile warning

fix such pclint warning:
...
Loss of precision (arg. no. 2) (unsigned long long to unsigned int))

Signed-off-by: Hou Pengyang <houpengyang@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/gc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 023f043edefd..418fd9881646 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1009,6 +1009,6 @@ void build_gc_manager(struct f2fs_sb_info *sbi)
 	ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
 	blocks_per_sec = sbi->blocks_per_seg * sbi->segs_per_sec;
 
-	sbi->fggc_threshold = div_u64((main_count - ovp_count) * blocks_per_sec,
+	sbi->fggc_threshold = div64_u64((main_count - ovp_count) * blocks_per_sec,
 					(main_count - resv_count));
 }
-- 
GitLab


From 5c34d002dcc7a6dd665a19d098b4f4cd5501ba1a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:18 +0100
Subject: [PATCH 0642/1084] virtio_pci: remove struct virtio_pci_vq_info

We don't really need struct virtio_pci_vq_info, as most field in there
are redundant:

 - the vq backpointer is not strictly neede to start with
 - the entry in the vqs list is not needed - the generic virtqueue already
   has list, we only need to check if it has a callback to get the same
   semantics
 - we can use a simple array to look up the MSI-X vec if needed.
 - That simple array now also duoble serves to replace the per_vq_vectors
   flag

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci_common.c | 117 +++++++++--------------------
 drivers/virtio/virtio_pci_common.h |  25 +-----
 drivers/virtio/virtio_pci_legacy.c |   6 +-
 drivers/virtio/virtio_pci_modern.c |   6 +-
 4 files changed, 39 insertions(+), 115 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 186cbab327b8..a33767318cbf 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -62,16 +62,13 @@ static irqreturn_t vp_config_changed(int irq, void *opaque)
 static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
 {
 	struct virtio_pci_device *vp_dev = opaque;
-	struct virtio_pci_vq_info *info;
 	irqreturn_t ret = IRQ_NONE;
-	unsigned long flags;
+	struct virtqueue *vq;
 
-	spin_lock_irqsave(&vp_dev->lock, flags);
-	list_for_each_entry(info, &vp_dev->virtqueues, node) {
-		if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
+	list_for_each_entry(vq, &vp_dev->vdev.vqs, list) {
+		if (vq->callback && vring_interrupt(irq, vq) == IRQ_HANDLED)
 			ret = IRQ_HANDLED;
 	}
-	spin_unlock_irqrestore(&vp_dev->lock, flags);
 
 	return ret;
 }
@@ -167,55 +164,6 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
 	return err;
 }
 
-static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
-				     void (*callback)(struct virtqueue *vq),
-				     const char *name,
-				     u16 msix_vec)
-{
-	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL);
-	struct virtqueue *vq;
-	unsigned long flags;
-
-	/* fill out our structure that represents an active queue */
-	if (!info)
-		return ERR_PTR(-ENOMEM);
-
-	vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, msix_vec);
-	if (IS_ERR(vq))
-		goto out_info;
-
-	info->vq = vq;
-	if (callback) {
-		spin_lock_irqsave(&vp_dev->lock, flags);
-		list_add(&info->node, &vp_dev->virtqueues);
-		spin_unlock_irqrestore(&vp_dev->lock, flags);
-	} else {
-		INIT_LIST_HEAD(&info->node);
-	}
-
-	vp_dev->vqs[index] = info;
-	return vq;
-
-out_info:
-	kfree(info);
-	return vq;
-}
-
-static void vp_del_vq(struct virtqueue *vq)
-{
-	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
-	struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
-	unsigned long flags;
-
-	spin_lock_irqsave(&vp_dev->lock, flags);
-	list_del(&info->node);
-	spin_unlock_irqrestore(&vp_dev->lock, flags);
-
-	vp_dev->del_vq(info);
-	kfree(info);
-}
-
 /* the config->del_vqs() implementation */
 void vp_del_vqs(struct virtio_device *vdev)
 {
@@ -224,16 +172,15 @@ void vp_del_vqs(struct virtio_device *vdev)
 	int i;
 
 	list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
-		if (vp_dev->per_vq_vectors) {
-			int v = vp_dev->vqs[vq->index]->msix_vector;
+		if (vp_dev->msix_vector_map) {
+			int v = vp_dev->msix_vector_map[vq->index];
 
 			if (v != VIRTIO_MSI_NO_VECTOR)
 				free_irq(pci_irq_vector(vp_dev->pci_dev, v),
 					vq);
 		}
-		vp_del_vq(vq);
+		vp_dev->del_vq(vq);
 	}
-	vp_dev->per_vq_vectors = false;
 
 	if (vp_dev->intx_enabled) {
 		free_irq(vp_dev->pci_dev->irq, vp_dev);
@@ -261,8 +208,8 @@ void vp_del_vqs(struct virtio_device *vdev)
 	vp_dev->msix_names = NULL;
 	kfree(vp_dev->msix_affinity_masks);
 	vp_dev->msix_affinity_masks = NULL;
-	kfree(vp_dev->vqs);
-	vp_dev->vqs = NULL;
+	kfree(vp_dev->msix_vector_map);
+	vp_dev->msix_vector_map = NULL;
 }
 
 static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
@@ -275,10 +222,6 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 	u16 msix_vec;
 	int i, err, nvectors, allocated_vectors;
 
-	vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
-	if (!vp_dev->vqs)
-		return -ENOMEM;
-
 	if (per_vq_vectors) {
 		/* Best option: one for change interrupt, one per vq. */
 		nvectors = 1;
@@ -294,7 +237,13 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 	if (err)
 		goto error_find;
 
-	vp_dev->per_vq_vectors = per_vq_vectors;
+	if (per_vq_vectors) {
+		vp_dev->msix_vector_map = kmalloc_array(nvqs,
+				sizeof(*vp_dev->msix_vector_map), GFP_KERNEL);
+		if (!vp_dev->msix_vector_map)
+			goto error_find;
+	}
+
 	allocated_vectors = vp_dev->msix_used_vectors;
 	for (i = 0; i < nvqs; ++i) {
 		if (!names[i]) {
@@ -304,19 +253,25 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 
 		if (!callbacks[i])
 			msix_vec = VIRTIO_MSI_NO_VECTOR;
-		else if (vp_dev->per_vq_vectors)
+		else if (per_vq_vectors)
 			msix_vec = allocated_vectors++;
 		else
 			msix_vec = VP_MSIX_VQ_VECTOR;
-		vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], msix_vec);
+		vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i],
+				msix_vec);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
 			goto error_find;
 		}
 
-		if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
+		if (!per_vq_vectors)
 			continue;
 
+		if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
+			vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
+			continue;
+		}
+
 		/* allocate per-vq irq if available and necessary */
 		snprintf(vp_dev->msix_names[msix_vec],
 			 sizeof *vp_dev->msix_names,
@@ -326,8 +281,12 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 				  vring_interrupt, 0,
 				  vp_dev->msix_names[msix_vec],
 				  vqs[i]);
-		if (err)
+		if (err) {
+			/* don't free this irq on error */
+			vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
 			goto error_find;
+		}
+		vp_dev->msix_vector_map[i] = msix_vec;
 	}
 	return 0;
 
@@ -343,23 +302,18 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	int i, err;
 
-	vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
-	if (!vp_dev->vqs)
-		return -ENOMEM;
-
 	err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED,
 			dev_name(&vdev->dev), vp_dev);
 	if (err)
 		goto out_del_vqs;
 
 	vp_dev->intx_enabled = 1;
-	vp_dev->per_vq_vectors = false;
 	for (i = 0; i < nvqs; ++i) {
 		if (!names[i]) {
 			vqs[i] = NULL;
 			continue;
 		}
-		vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
+		vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i],
 				VIRTIO_MSI_NO_VECTOR);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
@@ -409,16 +363,15 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
 {
 	struct virtio_device *vdev = vq->vdev;
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
-	struct cpumask *mask;
-	unsigned int irq;
 
 	if (!vq->callback)
 		return -EINVAL;
 
 	if (vp_dev->msix_enabled) {
-		mask = vp_dev->msix_affinity_masks[info->msix_vector];
-		irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector);
+		int vec = vp_dev->msix_vector_map[vq->index];
+		struct cpumask *mask = vp_dev->msix_affinity_masks[vec];
+		unsigned int irq = pci_irq_vector(vp_dev->pci_dev, vec);
+
 		if (cpu == -1)
 			irq_set_affinity_hint(irq, NULL);
 		else {
@@ -498,8 +451,6 @@ static int virtio_pci_probe(struct pci_dev *pci_dev,
 	vp_dev->vdev.dev.parent = &pci_dev->dev;
 	vp_dev->vdev.dev.release = virtio_pci_release_dev;
 	vp_dev->pci_dev = pci_dev;
-	INIT_LIST_HEAD(&vp_dev->virtqueues);
-	spin_lock_init(&vp_dev->lock);
 
 	/* enable the device */
 	rc = pci_enable_device(pci_dev);
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index b2f666250ae0..2038887bdf23 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -31,17 +31,6 @@
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
 
-struct virtio_pci_vq_info {
-	/* the actual virtqueue */
-	struct virtqueue *vq;
-
-	/* the list node for the virtqueues list */
-	struct list_head node;
-
-	/* MSI-X vector (or none) */
-	unsigned msix_vector;
-};
-
 /* Our device structure */
 struct virtio_pci_device {
 	struct virtio_device vdev;
@@ -75,13 +64,6 @@ struct virtio_pci_device {
 	/* the IO mapping for the PCI config space */
 	void __iomem *ioaddr;
 
-	/* a list of queues so we can dispatch IRQs */
-	spinlock_t lock;
-	struct list_head virtqueues;
-
-	/* array of all queues for house-keeping */
-	struct virtio_pci_vq_info **vqs;
-
 	/* MSI-X support */
 	int msix_enabled;
 	int intx_enabled;
@@ -94,16 +76,15 @@ struct virtio_pci_device {
 	/* Vectors allocated, excluding per-vq vectors if any */
 	unsigned msix_used_vectors;
 
-	/* Whether we have vector per vq */
-	bool per_vq_vectors;
+	/* Map of per-VQ MSI-X vectors, may be NULL */
+	unsigned *msix_vector_map;
 
 	struct virtqueue *(*setup_vq)(struct virtio_pci_device *vp_dev,
-				      struct virtio_pci_vq_info *info,
 				      unsigned idx,
 				      void (*callback)(struct virtqueue *vq),
 				      const char *name,
 				      u16 msix_vec);
-	void (*del_vq)(struct virtio_pci_vq_info *info);
+	void (*del_vq)(struct virtqueue *vq);
 
 	u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector);
 };
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
index 6d9e5173d5fa..47292dad0ff9 100644
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -112,7 +112,6 @@ static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
 }
 
 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
-				  struct virtio_pci_vq_info *info,
 				  unsigned index,
 				  void (*callback)(struct virtqueue *vq),
 				  const char *name,
@@ -130,8 +129,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN))
 		return ERR_PTR(-ENOENT);
 
-	info->msix_vector = msix_vec;
-
 	/* create the vring */
 	vq = vring_create_virtqueue(index, num,
 				    VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev,
@@ -162,9 +159,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	return ERR_PTR(err);
 }
 
-static void del_vq(struct virtio_pci_vq_info *info)
+static void del_vq(struct virtqueue *vq)
 {
-	struct virtqueue *vq = info->vq;
 	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
 
 	iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index 4bf7ab375894..00e6fc1df407 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -293,7 +293,6 @@ static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
 }
 
 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
-				  struct virtio_pci_vq_info *info,
 				  unsigned index,
 				  void (*callback)(struct virtqueue *vq),
 				  const char *name,
@@ -323,8 +322,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	/* get offset of notification word for this vq */
 	off = vp_ioread16(&cfg->queue_notify_off);
 
-	info->msix_vector = msix_vec;
-
 	/* create the vring */
 	vq = vring_create_virtqueue(index, num,
 				    SMP_CACHE_BYTES, &vp_dev->vdev,
@@ -409,9 +406,8 @@ static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 	return 0;
 }
 
-static void del_vq(struct virtio_pci_vq_info *info)
+static void del_vq(struct virtqueue *vq)
 {
-	struct virtqueue *vq = info->vq;
 	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
 
 	vp_iowrite16(vq->index, &vp_dev->common->queue_select);
-- 
GitLab


From 07ec51480b5eb1233f8c1b0f5d7a7c8d1247c507 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:19 +0100
Subject: [PATCH 0643/1084] virtio_pci: use shared interrupts for virtqueues

This lets IRQ layer handle dispatching IRQs to separate handlers for the
case where we don't have per-VQ MSI-X vectors, and allows us to greatly
simplify the code based on the assumption that we always have interrupt
vector 0 (legacy INTx or config interrupt for MSI-X) available, and
any other interrupt is request/freed throught the VQ, even if the
actual interrupt line might be shared in some cases.

This allows removing a great deal of variables keeping track of the
interrupt state in struct virtio_pci_device, as we can now simply walk the
list of VQs and deal with per-VQ interrupt handlers there, and only treat
vector 0 special.

Additionally clean up the VQ allocation code to properly unwind on error
instead of having a single global cleanup label, which is error prone,
and in this case also leads to more code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci_common.c | 235 +++++++++++++----------------
 drivers/virtio/virtio_pci_common.h |  16 +-
 2 files changed, 106 insertions(+), 145 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index a33767318cbf..274dc1ff09c0 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -33,10 +33,8 @@ void vp_synchronize_vectors(struct virtio_device *vdev)
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	int i;
 
-	if (vp_dev->intx_enabled)
-		synchronize_irq(vp_dev->pci_dev->irq);
-
-	for (i = 0; i < vp_dev->msix_vectors; ++i)
+	synchronize_irq(pci_irq_vector(vp_dev->pci_dev, 0));
+	for (i = 1; i < vp_dev->msix_vectors; i++)
 		synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i));
 }
 
@@ -99,77 +97,10 @@ static irqreturn_t vp_interrupt(int irq, void *opaque)
 	return vp_vring_interrupt(irq, opaque);
 }
 
-static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
-				   bool per_vq_vectors)
-{
-	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	const char *name = dev_name(&vp_dev->vdev.dev);
-	unsigned i, v;
-	int err = -ENOMEM;
-
-	vp_dev->msix_vectors = nvectors;
-
-	vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
-				     GFP_KERNEL);
-	if (!vp_dev->msix_names)
-		goto error;
-	vp_dev->msix_affinity_masks
-		= kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
-			  GFP_KERNEL);
-	if (!vp_dev->msix_affinity_masks)
-		goto error;
-	for (i = 0; i < nvectors; ++i)
-		if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
-					GFP_KERNEL))
-			goto error;
-
-	err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors,
-			PCI_IRQ_MSIX);
-	if (err < 0)
-		goto error;
-	vp_dev->msix_enabled = 1;
-
-	/* Set the vector used for configuration */
-	v = vp_dev->msix_used_vectors;
-	snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
-		 "%s-config", name);
-	err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
-			  vp_config_changed, 0, vp_dev->msix_names[v],
-			  vp_dev);
-	if (err)
-		goto error;
-	++vp_dev->msix_used_vectors;
-
-	v = vp_dev->config_vector(vp_dev, v);
-	/* Verify we had enough resources to assign the vector */
-	if (v == VIRTIO_MSI_NO_VECTOR) {
-		err = -EBUSY;
-		goto error;
-	}
-
-	if (!per_vq_vectors) {
-		/* Shared vector for all VQs */
-		v = vp_dev->msix_used_vectors;
-		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
-			 "%s-virtqueues", name);
-		err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
-				  vp_vring_interrupt, 0, vp_dev->msix_names[v],
-				  vp_dev);
-		if (err)
-			goto error;
-		++vp_dev->msix_used_vectors;
-	}
-	return 0;
-error:
-	return err;
-}
-
-/* the config->del_vqs() implementation */
-void vp_del_vqs(struct virtio_device *vdev)
+static void vp_remove_vqs(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	struct virtqueue *vq, *n;
-	int i;
 
 	list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
 		if (vp_dev->msix_vector_map) {
@@ -181,35 +112,33 @@ void vp_del_vqs(struct virtio_device *vdev)
 		}
 		vp_dev->del_vq(vq);
 	}
+}
 
-	if (vp_dev->intx_enabled) {
-		free_irq(vp_dev->pci_dev->irq, vp_dev);
-		vp_dev->intx_enabled = 0;
-	}
+/* the config->del_vqs() implementation */
+void vp_del_vqs(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	int i;
 
-	for (i = 0; i < vp_dev->msix_used_vectors; ++i)
-		free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev);
+	if (WARN_ON_ONCE(list_empty_careful(&vdev->vqs)))
+		return;
 
-	for (i = 0; i < vp_dev->msix_vectors; i++)
-		if (vp_dev->msix_affinity_masks[i])
-			free_cpumask_var(vp_dev->msix_affinity_masks[i]);
+	vp_remove_vqs(vdev);
 
 	if (vp_dev->msix_enabled) {
+		for (i = 0; i < vp_dev->msix_vectors; i++)
+			free_cpumask_var(vp_dev->msix_affinity_masks[i]);
+
 		/* Disable the vector used for configuration */
 		vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);
 
-		pci_free_irq_vectors(vp_dev->pci_dev);
-		vp_dev->msix_enabled = 0;
+		kfree(vp_dev->msix_affinity_masks);
+		kfree(vp_dev->msix_names);
+		kfree(vp_dev->msix_vector_map);
 	}
 
-	vp_dev->msix_vectors = 0;
-	vp_dev->msix_used_vectors = 0;
-	kfree(vp_dev->msix_names);
-	vp_dev->msix_names = NULL;
-	kfree(vp_dev->msix_affinity_masks);
-	vp_dev->msix_affinity_masks = NULL;
-	kfree(vp_dev->msix_vector_map);
-	vp_dev->msix_vector_map = NULL;
+	free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
+	pci_free_irq_vectors(vp_dev->pci_dev);
 }
 
 static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
@@ -219,79 +148,122 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 			      bool per_vq_vectors)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	const char *name = dev_name(&vp_dev->vdev.dev);
+	int i, err = -ENOMEM, allocated_vectors, nvectors;
 	u16 msix_vec;
-	int i, err, nvectors, allocated_vectors;
+
+	nvectors = 1;
+	for (i = 0; i < nvqs; i++)
+		if (callbacks[i])
+			nvectors++;
 
 	if (per_vq_vectors) {
-		/* Best option: one for change interrupt, one per vq. */
-		nvectors = 1;
-		for (i = 0; i < nvqs; ++i)
-			if (callbacks[i])
-				++nvectors;
+		err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors,
+				PCI_IRQ_MSIX);
 	} else {
-		/* Second best: one for change, shared for all vqs. */
-		nvectors = 2;
+		err = pci_alloc_irq_vectors(vp_dev->pci_dev, 2, 2,
+				PCI_IRQ_MSIX);
 	}
+	if (err < 0)
+		return err;
 
-	err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors);
+	vp_dev->msix_vectors = nvectors;
+	vp_dev->msix_names = kmalloc_array(nvectors,
+			sizeof(*vp_dev->msix_names), GFP_KERNEL);
+	if (!vp_dev->msix_names)
+		goto out_free_irq_vectors;
+
+	vp_dev->msix_affinity_masks = kcalloc(nvectors,
+			sizeof(*vp_dev->msix_affinity_masks), GFP_KERNEL);
+	if (!vp_dev->msix_affinity_masks)
+		goto out_free_msix_names;
+
+	for (i = 0; i < nvectors; ++i) {
+		if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
+				GFP_KERNEL))
+			goto out_free_msix_affinity_masks;
+	}
+
+	/* Set the vector used for configuration */
+	snprintf(vp_dev->msix_names[0], sizeof(*vp_dev->msix_names),
+		 "%s-config", name);
+	err = request_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_config_changed,
+			0, vp_dev->msix_names[0], vp_dev);
 	if (err)
-		goto error_find;
+		goto out_free_irq_vectors;
 
-	if (per_vq_vectors) {
-		vp_dev->msix_vector_map = kmalloc_array(nvqs,
-				sizeof(*vp_dev->msix_vector_map), GFP_KERNEL);
-		if (!vp_dev->msix_vector_map)
-			goto error_find;
+	/* Verify we had enough resources to assign the vector */
+	if (vp_dev->config_vector(vp_dev, 0) == VIRTIO_MSI_NO_VECTOR) {
+		err = -EBUSY;
+		goto out_free_config_irq;
 	}
 
-	allocated_vectors = vp_dev->msix_used_vectors;
+	vp_dev->msix_vector_map = kmalloc_array(nvqs,
+			sizeof(*vp_dev->msix_vector_map), GFP_KERNEL);
+	if (!vp_dev->msix_vector_map)
+		goto out_disable_config_irq;
+
+	allocated_vectors = 1; /* vector 0 is the config interrupt */
 	for (i = 0; i < nvqs; ++i) {
 		if (!names[i]) {
 			vqs[i] = NULL;
 			continue;
 		}
 
-		if (!callbacks[i])
-			msix_vec = VIRTIO_MSI_NO_VECTOR;
-		else if (per_vq_vectors)
-			msix_vec = allocated_vectors++;
+		if (callbacks[i])
+			msix_vec = allocated_vectors;
 		else
-			msix_vec = VP_MSIX_VQ_VECTOR;
+			msix_vec = VIRTIO_MSI_NO_VECTOR;
+
 		vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i],
 				msix_vec);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
-			goto error_find;
+			goto out_remove_vqs;
 		}
 
-		if (!per_vq_vectors)
-			continue;
-
 		if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
 			vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
 			continue;
 		}
 
-		/* allocate per-vq irq if available and necessary */
-		snprintf(vp_dev->msix_names[msix_vec],
-			 sizeof *vp_dev->msix_names,
-			 "%s-%s",
+		snprintf(vp_dev->msix_names[i + 1],
+			 sizeof(*vp_dev->msix_names), "%s-%s",
 			 dev_name(&vp_dev->vdev.dev), names[i]);
 		err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec),
-				  vring_interrupt, 0,
-				  vp_dev->msix_names[msix_vec],
-				  vqs[i]);
+				  vring_interrupt, IRQF_SHARED,
+				  vp_dev->msix_names[i + 1], vqs[i]);
 		if (err) {
 			/* don't free this irq on error */
 			vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
-			goto error_find;
+			goto out_remove_vqs;
 		}
 		vp_dev->msix_vector_map[i] = msix_vec;
+
+		if (per_vq_vectors)
+			allocated_vectors++;
 	}
+
+	vp_dev->msix_enabled = 1;
 	return 0;
 
-error_find:
-	vp_del_vqs(vdev);
+out_remove_vqs:
+	vp_remove_vqs(vdev);
+	kfree(vp_dev->msix_vector_map);
+out_disable_config_irq:
+	vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);
+out_free_config_irq:
+	free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
+out_free_msix_affinity_masks:
+	for (i = 0; i < nvectors; i++) {
+		if (vp_dev->msix_affinity_masks[i])
+			free_cpumask_var(vp_dev->msix_affinity_masks[i]);
+	}
+	kfree(vp_dev->msix_affinity_masks);
+out_free_msix_names:
+	kfree(vp_dev->msix_names);
+out_free_irq_vectors:
+	pci_free_irq_vectors(vp_dev->pci_dev);
 	return err;
 }
 
@@ -305,9 +277,8 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
 	err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED,
 			dev_name(&vdev->dev), vp_dev);
 	if (err)
-		goto out_del_vqs;
+		return err;
 
-	vp_dev->intx_enabled = 1;
 	for (i = 0; i < nvqs; ++i) {
 		if (!names[i]) {
 			vqs[i] = NULL;
@@ -317,13 +288,15 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
 				VIRTIO_MSI_NO_VECTOR);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
-			goto out_del_vqs;
+			goto out_remove_vqs;
 		}
 	}
 
 	return 0;
-out_del_vqs:
-	vp_del_vqs(vdev);
+
+out_remove_vqs:
+	vp_remove_vqs(vdev);
+	free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
 	return err;
 }
 
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index 2038887bdf23..85593867e712 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -66,16 +66,12 @@ struct virtio_pci_device {
 
 	/* MSI-X support */
 	int msix_enabled;
-	int intx_enabled;
 	cpumask_var_t *msix_affinity_masks;
 	/* Name strings for interrupts. This size should be enough,
 	 * and I'm too lazy to allocate each name separately. */
 	char (*msix_names)[256];
-	/* Number of available vectors */
-	unsigned msix_vectors;
-	/* Vectors allocated, excluding per-vq vectors if any */
-	unsigned msix_used_vectors;
-
+	/* Total Number of MSI-X vectors (including per-VQ ones). */
+	int msix_vectors;
 	/* Map of per-VQ MSI-X vectors, may be NULL */
 	unsigned *msix_vector_map;
 
@@ -89,14 +85,6 @@ struct virtio_pci_device {
 	u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector);
 };
 
-/* Constants for MSI-X */
-/* Use first vector for configuration changes, second and the rest for
- * virtqueues Thus, we need at least 2 vectors for MSI. */
-enum {
-	VP_MSIX_CONFIG_VECTOR = 0,
-	VP_MSIX_VQ_VECTOR = 1,
-};
-
 /* Convert a generic virtio device to our structure */
 static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
 {
-- 
GitLab


From 53a020c661741f3b87ad3ac6fa545088aaebac9b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:20 +0100
Subject: [PATCH 0644/1084] virtio_pci: don't duplicate the msix_enable flag in
 struct pci_dev

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci_common.c | 5 ++---
 drivers/virtio/virtio_pci_common.h | 2 --
 drivers/virtio/virtio_pci_legacy.c | 2 +-
 drivers/virtio/virtio_pci_modern.c | 2 +-
 include/uapi/linux/virtio_pci.h    | 2 +-
 5 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 274dc1ff09c0..b83053082875 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -125,7 +125,7 @@ void vp_del_vqs(struct virtio_device *vdev)
 
 	vp_remove_vqs(vdev);
 
-	if (vp_dev->msix_enabled) {
+	if (vp_dev->pci_dev->msix_enabled) {
 		for (i = 0; i < vp_dev->msix_vectors; i++)
 			free_cpumask_var(vp_dev->msix_affinity_masks[i]);
 
@@ -244,7 +244,6 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 			allocated_vectors++;
 	}
 
-	vp_dev->msix_enabled = 1;
 	return 0;
 
 out_remove_vqs:
@@ -340,7 +339,7 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
 	if (!vq->callback)
 		return -EINVAL;
 
-	if (vp_dev->msix_enabled) {
+	if (vp_dev->pci_dev->msix_enabled) {
 		int vec = vp_dev->msix_vector_map[vq->index];
 		struct cpumask *mask = vp_dev->msix_affinity_masks[vec];
 		unsigned int irq = pci_irq_vector(vp_dev->pci_dev, vec);
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index 85593867e712..217ca876eed7 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -64,8 +64,6 @@ struct virtio_pci_device {
 	/* the IO mapping for the PCI config space */
 	void __iomem *ioaddr;
 
-	/* MSI-X support */
-	int msix_enabled;
 	cpumask_var_t *msix_affinity_masks;
 	/* Name strings for interrupts. This size should be enough,
 	 * and I'm too lazy to allocate each name separately. */
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
index 47292dad0ff9..2ab6aee51bf6 100644
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -165,7 +165,7 @@ static void del_vq(struct virtqueue *vq)
 
 	iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
 
-	if (vp_dev->msix_enabled) {
+	if (vp_dev->pci_dev->msix_enabled) {
 		iowrite16(VIRTIO_MSI_NO_VECTOR,
 			  vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
 		/* Flush the write out to device */
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index 00e6fc1df407..e5ce31091953 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -412,7 +412,7 @@ static void del_vq(struct virtqueue *vq)
 
 	vp_iowrite16(vq->index, &vp_dev->common->queue_select);
 
-	if (vp_dev->msix_enabled) {
+	if (vp_dev->pci_dev->msix_enabled) {
 		vp_iowrite16(VIRTIO_MSI_NO_VECTOR,
 			     &vp_dev->common->queue_msix_vector);
 		/* Flush the write out to device */
diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h
index 90007a1abcab..15b4385a2be1 100644
--- a/include/uapi/linux/virtio_pci.h
+++ b/include/uapi/linux/virtio_pci.h
@@ -79,7 +79,7 @@
  * configuration space */
 #define VIRTIO_PCI_CONFIG_OFF(msix_enabled)	((msix_enabled) ? 24 : 20)
 /* Deprecated: please use VIRTIO_PCI_CONFIG_OFF instead */
-#define VIRTIO_PCI_CONFIG(dev)	VIRTIO_PCI_CONFIG_OFF((dev)->msix_enabled)
+#define VIRTIO_PCI_CONFIG(dev)	VIRTIO_PCI_CONFIG_OFF((dev)->pci_dev->msix_enabled)
 
 /* Virtio ABI version, this must match exactly */
 #define VIRTIO_PCI_ABI_VERSION		0
-- 
GitLab


From 52a61516125fa9a21b3bdf4f90928308e2e5573f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:21 +0100
Subject: [PATCH 0645/1084] virtio_pci: simplify MSI-X setup

Try to grab the MSI-X vectors early and fall back to the shared one
before doing lots of allocations.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci_common.c | 35 +++++++++++++++---------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index b83053082875..822f8e5dcee4 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -142,14 +142,13 @@ void vp_del_vqs(struct virtio_device *vdev)
 }
 
 static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
-			      struct virtqueue *vqs[],
-			      vq_callback_t *callbacks[],
-			      const char * const names[],
-			      bool per_vq_vectors)
+		struct virtqueue *vqs[], vq_callback_t *callbacks[],
+		const char * const names[])
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	const char *name = dev_name(&vp_dev->vdev.dev);
 	int i, err = -ENOMEM, allocated_vectors, nvectors;
+	bool shared = false;
 	u16 msix_vec;
 
 	nvectors = 1;
@@ -157,12 +156,16 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 		if (callbacks[i])
 			nvectors++;
 
-	if (per_vq_vectors) {
-		err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors,
-				PCI_IRQ_MSIX);
-	} else {
+	/* Try one vector per queue first. */
+	err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors,
+			PCI_IRQ_MSIX);
+	if (err < 0) {
+		/* Fallback to one vector for config, one shared for queues. */
+		shared = true;
 		err = pci_alloc_irq_vectors(vp_dev->pci_dev, 2, 2,
 				PCI_IRQ_MSIX);
+		if (err < 0)
+			return err;
 	}
 	if (err < 0)
 		return err;
@@ -190,7 +193,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 	err = request_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_config_changed,
 			0, vp_dev->msix_names[0], vp_dev);
 	if (err)
-		goto out_free_irq_vectors;
+		goto out_free_msix_affinity_masks;
 
 	/* Verify we had enough resources to assign the vector */
 	if (vp_dev->config_vector(vp_dev, 0) == VIRTIO_MSI_NO_VECTOR) {
@@ -240,7 +243,11 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 		}
 		vp_dev->msix_vector_map[i] = msix_vec;
 
-		if (per_vq_vectors)
+		/*
+		 * Use a different vector for each queue if they are available,
+		 * else share the same vector for all VQs.
+		 */
+		if (!shared)
 			allocated_vectors++;
 	}
 
@@ -307,15 +314,9 @@ int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 {
 	int err;
 
-	/* Try MSI-X with one vector per queue. */
-	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true);
-	if (!err)
-		return 0;
-	/* Fallback: MSI-X with one vector for config, one shared for queues. */
-	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false);
+	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names);
 	if (!err)
 		return 0;
-	/* Finally fall back to regular interrupts. */
 	return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names);
 }
 
-- 
GitLab


From fb5e31d970ce8b4941f03ed765d7dbefc39f22d9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:22 +0100
Subject: [PATCH 0646/1084] virtio: allow drivers to request IRQ affinity when
 creating VQs

Add a struct irq_affinity pointer to the find_vqs methods, which if set
is used to tell the PCI layer to create the MSI-X vectors for our I/O
virtqueues with the proper affinity from the start.  Compared to after
the fact affinity hints this gives us an instantly working setup and
allows to allocate the irq descritors node-local and avoid interconnect
traffic.  Last but not least this will allow blk-mq queues are created
based on the interrupt affinity for storage drivers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/block/virtio_blk.c                 |  3 ++-
 drivers/char/virtio_console.c              |  2 +-
 drivers/crypto/virtio/virtio_crypto_core.c |  2 +-
 drivers/gpu/drm/virtio/virtgpu_kms.c       |  2 +-
 drivers/misc/mic/vop/vop_main.c            |  2 +-
 drivers/net/caif/caif_virtio.c             |  3 ++-
 drivers/net/virtio_net.c                   |  2 +-
 drivers/remoteproc/remoteproc_virtio.c     |  3 ++-
 drivers/rpmsg/virtio_rpmsg_bus.c           |  2 +-
 drivers/s390/virtio/kvm_virtio.c           |  3 ++-
 drivers/s390/virtio/virtio_ccw.c           |  3 ++-
 drivers/scsi/virtio_scsi.c                 |  3 ++-
 drivers/virtio/virtio_balloon.c            |  3 ++-
 drivers/virtio/virtio_input.c              |  3 ++-
 drivers/virtio/virtio_mmio.c               |  3 ++-
 drivers/virtio/virtio_pci_common.c         | 19 ++++++++++++-------
 drivers/virtio/virtio_pci_common.h         |  5 ++---
 drivers/virtio/virtio_pci_modern.c         |  7 +++----
 include/linux/virtio_config.h              |  9 +++++----
 net/vmw_vsock/virtio_transport.c           |  3 ++-
 20 files changed, 48 insertions(+), 34 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 10332c24f961..c54118bdc67d 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -411,7 +411,8 @@ static int init_vq(struct virtio_blk *vblk)
 	}
 
 	/* Discover virtqueues and write information to configuration.  */
-	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
+	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names,
+			NULL);
 	if (err)
 		goto out;
 
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 17857beb4892..6266c0568e1d 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1939,7 +1939,7 @@ static int init_vqs(struct ports_device *portdev)
 	/* Find the queues. */
 	err = portdev->vdev->config->find_vqs(portdev->vdev, nr_queues, vqs,
 					      io_callbacks,
-					      (const char **)io_names);
+					      (const char **)io_names, NULL);
 	if (err)
 		goto free;
 
diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c
index fe70ec823b27..0aa2f045543b 100644
--- a/drivers/crypto/virtio/virtio_crypto_core.c
+++ b/drivers/crypto/virtio/virtio_crypto_core.c
@@ -119,7 +119,7 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi)
 	}
 
 	ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
-					 names);
+					 names, NULL);
 	if (ret)
 		goto err_find;
 
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c
index 1235519853f4..e975fa5b0a32 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -172,7 +172,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags)
 		 vgdev->has_virgl_3d ? "enabled" : "not available");
 
 	ret = vgdev->vdev->config->find_vqs(vgdev->vdev, 2, vqs,
-					    callbacks, names);
+					    callbacks, names, NULL);
 	if (ret) {
 		DRM_ERROR("failed to find virt queues\n");
 		goto err_vqs;
diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c
index 1a2b67f3183d..c2e29d7f0de8 100644
--- a/drivers/misc/mic/vop/vop_main.c
+++ b/drivers/misc/mic/vop/vop_main.c
@@ -374,7 +374,7 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev,
 static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs,
 			struct virtqueue *vqs[],
 			vq_callback_t *callbacks[],
-			const char * const names[])
+			const char * const names[], struct irq_affinity *desc)
 {
 	struct _vop_vdev *vdev = to_vopvdev(dev);
 	struct vop_device *vpdev = vdev->vpdev;
diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c
index b306210b02b7..bc0eb47eccee 100644
--- a/drivers/net/caif/caif_virtio.c
+++ b/drivers/net/caif/caif_virtio.c
@@ -679,7 +679,8 @@ static int cfv_probe(struct virtio_device *vdev)
 		goto err;
 
 	/* Get the TX virtio ring. This is a "guest side vring". */
-	err = vdev->config->find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names);
+	err = vdev->config->find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names,
+			NULL);
 	if (err)
 		goto err;
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 765c2d6358da..9be74c2dfb22 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2003,7 +2003,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
 	}
 
 	ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
-					 names);
+					 names, NULL);
 	if (ret)
 		goto err_find;
 
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index 364411fb7734..0142cc3f0c91 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -137,7 +137,8 @@ static void rproc_virtio_del_vqs(struct virtio_device *vdev)
 static int rproc_virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
 				 struct virtqueue *vqs[],
 				 vq_callback_t *callbacks[],
-				 const char * const names[])
+				 const char * const names[],
+				 struct irq_affinity *desc)
 {
 	int i, ret;
 
diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
index 3090b0d3072f..5e66e081027e 100644
--- a/drivers/rpmsg/virtio_rpmsg_bus.c
+++ b/drivers/rpmsg/virtio_rpmsg_bus.c
@@ -869,7 +869,7 @@ static int rpmsg_probe(struct virtio_device *vdev)
 	init_waitqueue_head(&vrp->sendq);
 
 	/* We expect two virtqueues, rx and tx (and in this order) */
-	err = vdev->config->find_vqs(vdev, 2, vqs, vq_cbs, names);
+	err = vdev->config->find_vqs(vdev, 2, vqs, vq_cbs, names, NULL);
 	if (err)
 		goto free_vrp;
 
diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c
index 5e5c11f37b24..2ce0b3eb2efe 100644
--- a/drivers/s390/virtio/kvm_virtio.c
+++ b/drivers/s390/virtio/kvm_virtio.c
@@ -255,7 +255,8 @@ static void kvm_del_vqs(struct virtio_device *vdev)
 static int kvm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 			struct virtqueue *vqs[],
 			vq_callback_t *callbacks[],
-			const char * const names[])
+			const char * const names[],
+			struct irq_affinity *desc)
 {
 	struct kvm_device *kdev = to_kvmdev(vdev);
 	int i;
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 070c4da95f48..304d3b3cbfd3 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -628,7 +628,8 @@ static int virtio_ccw_register_adapter_ind(struct virtio_ccw_device *vcdev,
 static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 			       struct virtqueue *vqs[],
 			       vq_callback_t *callbacks[],
-			       const char * const names[])
+			       const char * const names[],
+			       struct irq_affinity *desc)
 {
 	struct virtio_ccw_device *vcdev = to_vc_device(vdev);
 	unsigned long *indicatorp = NULL;
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index c680d7641311..c9c5ea0611e9 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -941,7 +941,8 @@ static int virtscsi_init(struct virtio_device *vdev,
 	}
 
 	/* Discover virtqueues and write information to configuration.  */
-	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
+	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names,
+			NULL);
 	if (err)
 		goto out;
 
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 181793f07852..36c9c8fcb7f8 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -413,7 +413,8 @@ static int init_vqs(struct virtio_balloon *vb)
 	 * optionally stat.
 	 */
 	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
-	err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names);
+	err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names,
+			NULL);
 	if (err)
 		return err;
 
diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c
index 350a2a5a49db..79f1293cda93 100644
--- a/drivers/virtio/virtio_input.c
+++ b/drivers/virtio/virtio_input.c
@@ -173,7 +173,8 @@ static int virtinput_init_vqs(struct virtio_input *vi)
 	static const char * const names[] = { "events", "status" };
 	int err;
 
-	err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names);
+	err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names,
+			NULL);
 	if (err)
 		return err;
 	vi->evt = vqs[0];
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 08357d70a891..78343b8f9034 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -446,7 +446,8 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
 static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 		       struct virtqueue *vqs[],
 		       vq_callback_t *callbacks[],
-		       const char * const names[])
+		       const char * const names[],
+		       struct irq_affinity *desc)
 {
 	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
 	unsigned int irq = platform_get_irq(vm_dev->pdev, 0);
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 822f8e5dcee4..7902e920fc73 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -143,22 +143,28 @@ void vp_del_vqs(struct virtio_device *vdev)
 
 static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 		struct virtqueue *vqs[], vq_callback_t *callbacks[],
-		const char * const names[])
+		const char * const names[], struct irq_affinity *desc)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	const char *name = dev_name(&vp_dev->vdev.dev);
 	int i, err = -ENOMEM, allocated_vectors, nvectors;
+	unsigned flags = PCI_IRQ_MSIX;
 	bool shared = false;
 	u16 msix_vec;
 
+	if (desc) {
+		flags |= PCI_IRQ_AFFINITY;
+		desc->pre_vectors++; /* virtio config vector */
+	}
+
 	nvectors = 1;
 	for (i = 0; i < nvqs; i++)
 		if (callbacks[i])
 			nvectors++;
 
 	/* Try one vector per queue first. */
-	err = pci_alloc_irq_vectors(vp_dev->pci_dev, nvectors, nvectors,
-			PCI_IRQ_MSIX);
+	err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors,
+			nvectors, flags, desc);
 	if (err < 0) {
 		/* Fallback to one vector for config, one shared for queues. */
 		shared = true;
@@ -308,13 +314,12 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
 
 /* the config->find_vqs() implementation */
 int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
-		struct virtqueue *vqs[],
-		vq_callback_t *callbacks[],
-		const char * const names[])
+		struct virtqueue *vqs[], vq_callback_t *callbacks[],
+		const char * const names[], struct irq_affinity *desc)
 {
 	int err;
 
-	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names);
+	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, desc);
 	if (!err)
 		return 0;
 	return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names);
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index 217ca876eed7..a6ad9ec6baef 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -97,9 +97,8 @@ bool vp_notify(struct virtqueue *vq);
 void vp_del_vqs(struct virtio_device *vdev);
 /* the config->find_vqs() implementation */
 int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
-		       struct virtqueue *vqs[],
-		       vq_callback_t *callbacks[],
-		       const char * const names[]);
+		struct virtqueue *vqs[], vq_callback_t *callbacks[],
+		const char * const names[], struct irq_affinity *desc);
 const char *vp_bus_name(struct virtio_device *vdev);
 
 /* Setup the affinity for a virtqueue:
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index e5ce31091953..a7a0981e441c 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -384,13 +384,12 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 }
 
 static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
-			      struct virtqueue *vqs[],
-			      vq_callback_t *callbacks[],
-			      const char * const names[])
+		struct virtqueue *vqs[], vq_callback_t *callbacks[],
+		const char * const names[], struct irq_affinity *desc)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	struct virtqueue *vq;
-	int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names);
+	int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, desc);
 
 	if (rc)
 		return rc;
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 26c155bb639b..2ebe506fe41a 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -7,6 +7,8 @@
 #include <linux/virtio_byteorder.h>
 #include <uapi/linux/virtio_config.h>
 
+struct irq_affinity;
+
 /**
  * virtio_config_ops - operations for configuring a virtio device
  * @get: read the value of a configuration field
@@ -68,9 +70,8 @@ struct virtio_config_ops {
 	void (*set_status)(struct virtio_device *vdev, u8 status);
 	void (*reset)(struct virtio_device *vdev);
 	int (*find_vqs)(struct virtio_device *, unsigned nvqs,
-			struct virtqueue *vqs[],
-			vq_callback_t *callbacks[],
-			const char * const names[]);
+			struct virtqueue *vqs[], vq_callback_t *callbacks[],
+			const char * const names[], struct irq_affinity *desc);
 	void (*del_vqs)(struct virtio_device *);
 	u64 (*get_features)(struct virtio_device *vdev);
 	int (*finalize_features)(struct virtio_device *vdev);
@@ -169,7 +170,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
 	vq_callback_t *callbacks[] = { c };
 	const char *names[] = { n };
 	struct virtqueue *vq;
-	int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names);
+	int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL);
 	if (err < 0)
 		return ERR_PTR(err);
 	return vq;
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 6788264acc63..9d24c0e958b1 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -532,7 +532,8 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
 	vsock->vdev = vdev;
 
 	ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX,
-					    vsock->vqs, callbacks, names);
+					    vsock->vqs, callbacks, names,
+					    NULL);
 	if (ret < 0)
 		goto out;
 
-- 
GitLab


From bbaba479563910aaa51e59bb9027a09e396d3a3c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:23 +0100
Subject: [PATCH 0647/1084] virtio: provide a method to get the IRQ affinity
 mask for a virtqueue

This basically passed up the pci_irq_get_affinity information through
virtio through an optional get_vq_affinity method.  It is only implemented
by the PCI backend for now, and only when we use per-virtqueue IRQs.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci_common.c | 11 +++++++++++
 drivers/virtio/virtio_pci_common.h |  2 ++
 drivers/virtio/virtio_pci_legacy.c |  1 +
 drivers/virtio/virtio_pci_modern.c |  2 ++
 include/linux/virtio_config.h      |  3 +++
 5 files changed, 19 insertions(+)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 7902e920fc73..df548a6fb844 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -361,6 +361,17 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
 	return 0;
 }
 
+const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	unsigned int *map = vp_dev->msix_vector_map;
+
+	if (!map || map[index] == VIRTIO_MSI_NO_VECTOR)
+		return NULL;
+
+	return pci_irq_get_affinity(vp_dev->pci_dev, map[index]);
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int virtio_pci_freeze(struct device *dev)
 {
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index a6ad9ec6baef..ac8c9d788964 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -108,6 +108,8 @@ const char *vp_bus_name(struct virtio_device *vdev);
  */
 int vp_set_vq_affinity(struct virtqueue *vq, int cpu);
 
+const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index);
+
 #if IS_ENABLED(CONFIG_VIRTIO_PCI_LEGACY)
 int virtio_pci_legacy_probe(struct virtio_pci_device *);
 void virtio_pci_legacy_remove(struct virtio_pci_device *);
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
index 2ab6aee51bf6..f7362c5fe18a 100644
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -190,6 +190,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
 	.finalize_features = vp_finalize_features,
 	.bus_name	= vp_bus_name,
 	.set_vq_affinity = vp_set_vq_affinity,
+	.get_vq_affinity = vp_get_vq_affinity,
 };
 
 /* the PCI probing function */
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index a7a0981e441c..7bc3004b840e 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -437,6 +437,7 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
 	.finalize_features = vp_finalize_features,
 	.bus_name	= vp_bus_name,
 	.set_vq_affinity = vp_set_vq_affinity,
+	.get_vq_affinity = vp_get_vq_affinity,
 };
 
 static const struct virtio_config_ops virtio_pci_config_ops = {
@@ -452,6 +453,7 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
 	.finalize_features = vp_finalize_features,
 	.bus_name	= vp_bus_name,
 	.set_vq_affinity = vp_set_vq_affinity,
+	.get_vq_affinity = vp_get_vq_affinity,
 };
 
 /**
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 2ebe506fe41a..8355bab175e1 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -58,6 +58,7 @@ struct irq_affinity;
  *      This returns a pointer to the bus name a la pci_name from which
  *      the caller can then copy.
  * @set_vq_affinity: set the affinity for a virtqueue.
+ * @get_vq_affinity: get the affinity for a virtqueue (optional).
  */
 typedef void vq_callback_t(struct virtqueue *);
 struct virtio_config_ops {
@@ -77,6 +78,8 @@ struct virtio_config_ops {
 	int (*finalize_features)(struct virtio_device *vdev);
 	const char *(*bus_name)(struct virtio_device *vdev);
 	int (*set_vq_affinity)(struct virtqueue *vq, int cpu);
+	const struct cpumask *(*get_vq_affinity)(struct virtio_device *vdev,
+			int index);
 };
 
 /* If driver didn't advertise the feature, it will never appear. */
-- 
GitLab


From 73473427bb551686e4b68ecd99bfd27e6635286a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:24 +0100
Subject: [PATCH 0648/1084] blk-mq: provide a default queue mapping for virtio
 device

Similar to the PCI version, just calling into virtio instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 block/Kconfig                 |  5 ++++
 block/Makefile                |  1 +
 block/blk-mq-virtio.c         | 54 +++++++++++++++++++++++++++++++++++
 include/linux/blk-mq-virtio.h | 10 +++++++
 4 files changed, 70 insertions(+)
 create mode 100644 block/blk-mq-virtio.c
 create mode 100644 include/linux/blk-mq-virtio.h

diff --git a/block/Kconfig b/block/Kconfig
index 8bf114a3858a..3523b4f0cd8b 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -165,4 +165,9 @@ config BLK_MQ_PCI
 	depends on BLOCK && PCI
 	default y
 
+config BLK_MQ_VIRTIO
+	bool
+	depends on BLOCK && VIRTIO
+	default y
+
 source block/Kconfig.iosched
diff --git a/block/Makefile b/block/Makefile
index a827f988c4e6..60691949d28d 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -23,5 +23,6 @@ obj-$(CONFIG_BLOCK_COMPAT)	+= compat_ioctl.o
 obj-$(CONFIG_BLK_CMDLINE_PARSER)	+= cmdline-parser.o
 obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
 obj-$(CONFIG_BLK_MQ_PCI)	+= blk-mq-pci.o
+obj-$(CONFIG_BLK_MQ_VIRTIO)	+= blk-mq-virtio.o
 obj-$(CONFIG_BLK_DEV_ZONED)	+= blk-zoned.o
 obj-$(CONFIG_BLK_WBT)		+= blk-wbt.o
diff --git a/block/blk-mq-virtio.c b/block/blk-mq-virtio.c
new file mode 100644
index 000000000000..c3afbca11299
--- /dev/null
+++ b/block/blk-mq-virtio.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2016 Christoph Hellwig.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/device.h>
+#include <linux/blk-mq.h>
+#include <linux/blk-mq-virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/module.h>
+#include "blk-mq.h"
+
+/**
+ * blk_mq_virtio_map_queues - provide a default queue mapping for virtio device
+ * @set:	tagset to provide the mapping for
+ * @vdev:	virtio device associated with @set.
+ * @first_vec:	first interrupt vectors to use for queues (usually 0)
+ *
+ * This function assumes the virtio device @vdev has at least as many available
+ * interrupt vetors as @set has queues.  It will then queuery the vector
+ * corresponding to each queue for it's affinity mask and built queue mapping
+ * that maps a queue to the CPUs that have irq affinity for the corresponding
+ * vector.
+ */
+int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set,
+		struct virtio_device *vdev, int first_vec)
+{
+	const struct cpumask *mask;
+	unsigned int queue, cpu;
+
+	if (!vdev->config->get_vq_affinity)
+		goto fallback;
+
+	for (queue = 0; queue < set->nr_hw_queues; queue++) {
+		mask = vdev->config->get_vq_affinity(vdev, first_vec + queue);
+		if (!mask)
+			goto fallback;
+
+		for_each_cpu(cpu, mask)
+			set->mq_map[cpu] = queue;
+	}
+
+	return 0;
+fallback:
+	return blk_mq_map_queues(set);
+}
+EXPORT_SYMBOL_GPL(blk_mq_virtio_map_queues);
diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h
new file mode 100644
index 000000000000..b1ef6e14744f
--- /dev/null
+++ b/include/linux/blk-mq-virtio.h
@@ -0,0 +1,10 @@
+#ifndef _LINUX_BLK_MQ_VIRTIO_H
+#define _LINUX_BLK_MQ_VIRTIO_H
+
+struct blk_mq_tag_set;
+struct virtio_device;
+
+int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set,
+		struct virtio_device *vdev, int first_vec);
+
+#endif /* _LINUX_BLK_MQ_VIRTIO_H */
-- 
GitLab


From ad71473d9c43725c917fc5a86d54ceb7001ee28c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:25 +0100
Subject: [PATCH 0649/1084] virtio_blk: use virtio IRQ affinity

Use automatic IRQ affinity assignment in the virtio layer if available,
and build the blk-mq queues based on it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/block/virtio_blk.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c54118bdc67d..1028dfeb5a7f 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -5,6 +5,7 @@
 #include <linux/hdreg.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/interrupt.h>
 #include <linux/virtio.h>
 #include <linux/virtio_blk.h>
 #include <linux/scatterlist.h>
@@ -12,6 +13,7 @@
 #include <scsi/scsi_cmnd.h>
 #include <linux/idr.h>
 #include <linux/blk-mq.h>
+#include <linux/blk-mq-virtio.h>
 #include <linux/numa.h>
 
 #define PART_BITS 4
@@ -385,6 +387,7 @@ static int init_vq(struct virtio_blk *vblk)
 	struct virtqueue **vqs;
 	unsigned short num_vqs;
 	struct virtio_device *vdev = vblk->vdev;
+	struct irq_affinity desc = { 0, };
 
 	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ,
 				   struct virtio_blk_config, num_queues,
@@ -412,7 +415,7 @@ static int init_vq(struct virtio_blk *vblk)
 
 	/* Discover virtqueues and write information to configuration.  */
 	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names,
-			NULL);
+			&desc);
 	if (err)
 		goto out;
 
@@ -543,10 +546,18 @@ static int virtblk_init_request(void *data, struct request *rq,
 	return 0;
 }
 
+static int virtblk_map_queues(struct blk_mq_tag_set *set)
+{
+	struct virtio_blk *vblk = set->driver_data;
+
+	return blk_mq_virtio_map_queues(set, vblk->vdev, 0);
+}
+
 static struct blk_mq_ops virtio_mq_ops = {
 	.queue_rq	= virtio_queue_rq,
 	.complete	= virtblk_request_done,
 	.init_request	= virtblk_init_request,
+	.map_queues	= virtblk_map_queues,
 };
 
 static unsigned int virtblk_queue_depth;
-- 
GitLab


From 0d9f0a52c8b9f7a003fe1650b7d5fb8518efabe0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 5 Feb 2017 18:15:26 +0100
Subject: [PATCH 0650/1084] virtio_scsi: use virtio IRQ affinity

Use automatic IRQ affinity assignment in the virtio layer if available,
and build the blk-mq queues based on it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/scsi/virtio_scsi.c | 126 ++++---------------------------------
 include/linux/cpuhotplug.h |   1 -
 2 files changed, 12 insertions(+), 115 deletions(-)

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index c9c5ea0611e9..939c47df73fa 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/mempool.h>
+#include <linux/interrupt.h>
 #include <linux/virtio.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
@@ -29,6 +30,7 @@
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_tcq.h>
 #include <linux/seqlock.h>
+#include <linux/blk-mq-virtio.h>
 
 #define VIRTIO_SCSI_MEMPOOL_SZ 64
 #define VIRTIO_SCSI_EVENT_LEN 8
@@ -108,7 +110,6 @@ struct virtio_scsi {
 	bool affinity_hint_set;
 
 	struct hlist_node node;
-	struct hlist_node node_dead;
 
 	/* Protected by event_vq lock */
 	bool stop_events;
@@ -118,7 +119,6 @@ struct virtio_scsi {
 	struct virtio_scsi_vq req_vqs[];
 };
 
-static enum cpuhp_state virtioscsi_online;
 static struct kmem_cache *virtscsi_cmd_cache;
 static mempool_t *virtscsi_cmd_pool;
 
@@ -766,6 +766,13 @@ static void virtscsi_target_destroy(struct scsi_target *starget)
 	kfree(tgt);
 }
 
+static int virtscsi_map_queues(struct Scsi_Host *shost)
+{
+	struct virtio_scsi *vscsi = shost_priv(shost);
+
+	return blk_mq_virtio_map_queues(&shost->tag_set, vscsi->vdev, 2);
+}
+
 static struct scsi_host_template virtscsi_host_template_single = {
 	.module = THIS_MODULE,
 	.name = "Virtio SCSI HBA",
@@ -801,6 +808,7 @@ static struct scsi_host_template virtscsi_host_template_multi = {
 	.use_clustering = ENABLE_CLUSTERING,
 	.target_alloc = virtscsi_target_alloc,
 	.target_destroy = virtscsi_target_destroy,
+	.map_queues = virtscsi_map_queues,
 	.track_queue_depth = 1,
 };
 
@@ -817,80 +825,6 @@ static struct scsi_host_template virtscsi_host_template_multi = {
 		virtio_cwrite(vdev, struct virtio_scsi_config, fld, &__val); \
 	} while(0)
 
-static void __virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity)
-{
-	int i;
-	int cpu;
-
-	/* In multiqueue mode, when the number of cpu is equal
-	 * to the number of request queues, we let the qeueues
-	 * to be private to one cpu by setting the affinity hint
-	 * to eliminate the contention.
-	 */
-	if ((vscsi->num_queues == 1 ||
-	     vscsi->num_queues != num_online_cpus()) && affinity) {
-		if (vscsi->affinity_hint_set)
-			affinity = false;
-		else
-			return;
-	}
-
-	if (affinity) {
-		i = 0;
-		for_each_online_cpu(cpu) {
-			virtqueue_set_affinity(vscsi->req_vqs[i].vq, cpu);
-			i++;
-		}
-
-		vscsi->affinity_hint_set = true;
-	} else {
-		for (i = 0; i < vscsi->num_queues; i++) {
-			if (!vscsi->req_vqs[i].vq)
-				continue;
-
-			virtqueue_set_affinity(vscsi->req_vqs[i].vq, -1);
-		}
-
-		vscsi->affinity_hint_set = false;
-	}
-}
-
-static void virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity)
-{
-	get_online_cpus();
-	__virtscsi_set_affinity(vscsi, affinity);
-	put_online_cpus();
-}
-
-static int virtscsi_cpu_online(unsigned int cpu, struct hlist_node *node)
-{
-	struct virtio_scsi *vscsi = hlist_entry_safe(node, struct virtio_scsi,
-						     node);
-	__virtscsi_set_affinity(vscsi, true);
-	return 0;
-}
-
-static int virtscsi_cpu_notif_add(struct virtio_scsi *vi)
-{
-	int ret;
-
-	ret = cpuhp_state_add_instance(virtioscsi_online, &vi->node);
-	if (ret)
-		return ret;
-
-	ret = cpuhp_state_add_instance(CPUHP_VIRT_SCSI_DEAD, &vi->node_dead);
-	if (ret)
-		cpuhp_state_remove_instance(virtioscsi_online, &vi->node);
-	return ret;
-}
-
-static void virtscsi_cpu_notif_remove(struct virtio_scsi *vi)
-{
-	cpuhp_state_remove_instance_nocalls(virtioscsi_online, &vi->node);
-	cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_SCSI_DEAD,
-					    &vi->node_dead);
-}
-
 static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq,
 			     struct virtqueue *vq)
 {
@@ -900,14 +834,8 @@ static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq,
 
 static void virtscsi_remove_vqs(struct virtio_device *vdev)
 {
-	struct Scsi_Host *sh = virtio_scsi_host(vdev);
-	struct virtio_scsi *vscsi = shost_priv(sh);
-
-	virtscsi_set_affinity(vscsi, false);
-
 	/* Stop all the virtqueues. */
 	vdev->config->reset(vdev);
-
 	vdev->config->del_vqs(vdev);
 }
 
@@ -920,6 +848,7 @@ static int virtscsi_init(struct virtio_device *vdev,
 	vq_callback_t **callbacks;
 	const char **names;
 	struct virtqueue **vqs;
+	struct irq_affinity desc = { .pre_vectors = 2 };
 
 	num_vqs = vscsi->num_queues + VIRTIO_SCSI_VQ_BASE;
 	vqs = kmalloc(num_vqs * sizeof(struct virtqueue *), GFP_KERNEL);
@@ -942,7 +871,7 @@ static int virtscsi_init(struct virtio_device *vdev,
 
 	/* Discover virtqueues and write information to configuration.  */
 	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names,
-			NULL);
+			&desc);
 	if (err)
 		goto out;
 
@@ -1008,10 +937,6 @@ static int virtscsi_probe(struct virtio_device *vdev)
 	if (err)
 		goto virtscsi_init_failed;
 
-	err = virtscsi_cpu_notif_add(vscsi);
-	if (err)
-		goto scsi_add_host_failed;
-
 	cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1;
 	shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue);
 	shost->max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0xFFFF;
@@ -1066,9 +991,6 @@ static void virtscsi_remove(struct virtio_device *vdev)
 		virtscsi_cancel_event_work(vscsi);
 
 	scsi_remove_host(shost);
-
-	virtscsi_cpu_notif_remove(vscsi);
-
 	virtscsi_remove_vqs(vdev);
 	scsi_host_put(shost);
 }
@@ -1076,10 +998,6 @@ static void virtscsi_remove(struct virtio_device *vdev)
 #ifdef CONFIG_PM_SLEEP
 static int virtscsi_freeze(struct virtio_device *vdev)
 {
-	struct Scsi_Host *sh = virtio_scsi_host(vdev);
-	struct virtio_scsi *vscsi = shost_priv(sh);
-
-	virtscsi_cpu_notif_remove(vscsi);
 	virtscsi_remove_vqs(vdev);
 	return 0;
 }
@@ -1094,11 +1012,6 @@ static int virtscsi_restore(struct virtio_device *vdev)
 	if (err)
 		return err;
 
-	err = virtscsi_cpu_notif_add(vscsi);
-	if (err) {
-		vdev->config->del_vqs(vdev);
-		return err;
-	}
 	virtio_device_ready(vdev);
 
 	if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG))
@@ -1153,16 +1066,6 @@ static int __init init(void)
 		pr_err("mempool_create() for virtscsi_cmd_pool failed\n");
 		goto error;
 	}
-	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
-				      "scsi/virtio:online",
-				      virtscsi_cpu_online, NULL);
-	if (ret < 0)
-		goto error;
-	virtioscsi_online = ret;
-	ret = cpuhp_setup_state_multi(CPUHP_VIRT_SCSI_DEAD, "scsi/virtio:dead",
-				      NULL, virtscsi_cpu_online);
-	if (ret)
-		goto error;
 	ret = register_virtio_driver(&virtio_scsi_driver);
 	if (ret < 0)
 		goto error;
@@ -1178,17 +1081,12 @@ static int __init init(void)
 		kmem_cache_destroy(virtscsi_cmd_cache);
 		virtscsi_cmd_cache = NULL;
 	}
-	if (virtioscsi_online)
-		cpuhp_remove_multi_state(virtioscsi_online);
-	cpuhp_remove_multi_state(CPUHP_VIRT_SCSI_DEAD);
 	return ret;
 }
 
 static void __exit fini(void)
 {
 	unregister_virtio_driver(&virtio_scsi_driver);
-	cpuhp_remove_multi_state(virtioscsi_online);
-	cpuhp_remove_multi_state(CPUHP_VIRT_SCSI_DEAD);
 	mempool_destroy(virtscsi_cmd_pool);
 	kmem_cache_destroy(virtscsi_cmd_cache);
 }
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 921acaaa1601..01aea80a503e 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -26,7 +26,6 @@ enum cpuhp_state {
 	CPUHP_ARM_OMAP_WAKE_DEAD,
 	CPUHP_IRQ_POLL_DEAD,
 	CPUHP_BLOCK_SOFTIRQ_DEAD,
-	CPUHP_VIRT_SCSI_DEAD,
 	CPUHP_ACPI_CPUDRV_DEAD,
 	CPUHP_S390_PFAULT_DEAD,
 	CPUHP_BLK_MQ_DEAD,
-- 
GitLab


From 8b107f5b97772c7c0c218302e9a4d15b4edf50b4 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Mon, 27 Feb 2017 11:57:11 -0800
Subject: [PATCH 0651/1084] f2fs: avoid to issue redundant discard commands

If segs_per_sec is over 1 like under SMR, previously f2fs issues discard
commands redundantly on the same section, since we didn't move end position
for the previous discard command.

E.g.,

                       start  end
                         |    |
      prefree_bitmap = [01111100111100]

And, after issue discard for this section,
                             end      start
                              |        |
      prefree_bitmap = [01111100111100]

Select this section again by searching from (end + 1),
                             start  end
                                |   |
      prefree_bitmap = [01111100111100]

Fixes: 36abef4e796d38 ("f2fs: introduce mode=lfs mount option")
Cc: <stable@vger.kernel.org>
Cc: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 9006d8ed6f52..4bd7a8b19332 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1036,6 +1036,8 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		start = start_segno + sbi->segs_per_sec;
 		if (start < end)
 			goto next;
+		else
+			end = start - 1;
 	}
 	mutex_unlock(&dirty_i->seglist_lock);
 
-- 
GitLab


From 41139ac3cd76c6dff8286102a8ac62933c680463 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Mon, 20 Feb 2017 09:12:58 +0800
Subject: [PATCH 0652/1084] ACPI: APEI: Fix BERT resources conflict with ACPI
 NVS area

It was reported that on some machines, there is overlap between ACPI
NVS area and BERT address range.  This appears reasonable because BERT
contents need to be non-volatile across reboot.  But this will cause
resources conflict in current Linux kernel implementation because the
ACPI NVS area is marked as busy.  The resource conflict is fixed via
excluding the ACPI NVS area when requesting IO resources for BERT.
When accessing the BERT contents, the whole BERT address range will be
ioremapped and accessed.

Reported-and-tested-by: Hans Kristian Rosbach <hansr@raskesider.no>
Signed-off-by: Ying Huang <ying.huang@intel.com>
Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/apei/bert.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c
index a05b5c0cf181..12771fcf0417 100644
--- a/drivers/acpi/apei/bert.c
+++ b/drivers/acpi/apei/bert.c
@@ -97,6 +97,7 @@ static int __init bert_check_table(struct acpi_table_bert *bert_tab)
 
 static int __init bert_init(void)
 {
+	struct apei_resources bert_resources;
 	struct acpi_bert_region *boot_error_region;
 	struct acpi_table_bert *bert_tab;
 	unsigned int region_len;
@@ -127,13 +128,14 @@ static int __init bert_init(void)
 	}
 
 	region_len = bert_tab->region_length;
-	if (!request_mem_region(bert_tab->address, region_len, "APEI BERT")) {
-		pr_err("Can't request iomem region <%016llx-%016llx>.\n",
-		       (unsigned long long)bert_tab->address,
-		       (unsigned long long)bert_tab->address + region_len - 1);
-		return -EIO;
-	}
-
+	apei_resources_init(&bert_resources);
+	rc = apei_resources_add(&bert_resources, bert_tab->address,
+				region_len, true);
+	if (rc)
+		return rc;
+	rc = apei_resources_request(&bert_resources, "APEI BERT");
+	if (rc)
+		goto out_fini;
 	boot_error_region = ioremap_cache(bert_tab->address, region_len);
 	if (boot_error_region) {
 		bert_print_all(boot_error_region, region_len);
@@ -142,7 +144,9 @@ static int __init bert_init(void)
 		rc = -ENOMEM;
 	}
 
-	release_mem_region(bert_tab->address, region_len);
+	apei_resources_release(&bert_resources);
+out_fini:
+	apei_resources_fini(&bert_resources);
 
 	return rc;
 }
-- 
GitLab


From d3635ff07e8ca598d44f72bbf5d6c65b8ebeeb46 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 22 Feb 2017 18:35:32 -0500
Subject: [PATCH 0653/1084] nfsd: fix configuration of supported minor versions

When the user turns off all minor versions of NFSv4, that should be
equivalent to turning off NFSv4 support, so a mount attempt using NFSv4
should get RPC_PROG_MISMATCH, not NFSERR_MINOR_VERS_MISMATCH.

Allow the user to use either '4.0' or '4' to enable or disable minor
version 0.  Other minor versions are still enabled or disabled using the
'4.x' format.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfsctl.c | 16 ++++++++--------
 fs/nfsd/nfssvc.c | 14 ++++++++++++++
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index d54fb0e3f30e..4bbba88416dc 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -561,6 +561,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 		len = qword_get(&mesg, vers, size);
 		if (len <= 0) return -EINVAL;
 		do {
+			enum vers_op cmd;
 			sign = *vers;
 			if (sign == '+' || sign == '-')
 				num = simple_strtol((vers+1), &minorp, 0);
@@ -571,21 +572,20 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 					return -EINVAL;
 				if (kstrtouint(minorp+1, 0, &minor) < 0)
 					return -EINVAL;
-				if (nfsd_minorversion(minor, sign == '-' ?
-						     NFSD_CLEAR : NFSD_SET) < 0)
-					return -EINVAL;
-				goto next;
-			}
+			} else
+				minor = 0;
+			cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET;
 			switch(num) {
 			case 2:
 			case 3:
-			case 4:
-				nfsd_vers(num, sign == '-' ? NFSD_CLEAR : NFSD_SET);
+				nfsd_vers(num, cmd);
 				break;
+			case 4:
+				if (nfsd_minorversion(minor, cmd) >= 0)
+					break;
 			default:
 				return -EINVAL;
 			}
-		next:
 			vers += len + 1;
 		} while ((len = qword_get(&mesg, vers, size)) > 0);
 		/* If all get turned off, turn them back on, as
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 2e378d0479ad..efd66da99201 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -153,6 +153,18 @@ int nfsd_vers(int vers, enum vers_op change)
 	return 0;
 }
 
+static void
+nfsd_adjust_nfsd_versions4(void)
+{
+	unsigned i;
+
+	for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) {
+		if (nfsd_supported_minorversions[i])
+			return;
+	}
+	nfsd_vers(4, NFSD_CLEAR);
+}
+
 int nfsd_minorversion(u32 minorversion, enum vers_op change)
 {
 	if (minorversion > NFSD_SUPPORTED_MINOR_VERSION)
@@ -160,9 +172,11 @@ int nfsd_minorversion(u32 minorversion, enum vers_op change)
 	switch(change) {
 	case NFSD_SET:
 		nfsd_supported_minorversions[minorversion] = true;
+		nfsd_vers(4, NFSD_SET);
 		break;
 	case NFSD_CLEAR:
 		nfsd_supported_minorversions[minorversion] = false;
+		nfsd_adjust_nfsd_versions4();
 		break;
 	case NFSD_TEST:
 		return nfsd_supported_minorversions[minorversion];
-- 
GitLab


From ff7d11797e728efbd1da8cfaa6f7c37585784ca5 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Wed, 22 Feb 2017 18:35:33 -0500
Subject: [PATCH 0654/1084] nfsd: Fix display of the version string

The current display code assumes that v4 minor version 0 is tracked by
the call to nfsd_vers(). Now it is tracked by nfsd_minorversion(), and
so we need to adjust the display code.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfsctl.c | 56 ++++++++++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 30 deletions(-)

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 4bbba88416dc..73e75ac90525 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -536,6 +536,19 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
 	return rv;
 }
 
+static ssize_t
+nfsd_print_version_support(char *buf, int remaining, const char *sep,
+		unsigned vers, unsigned minor)
+{
+	const char *format = (minor == 0) ? "%s%c%u" : "%s%c%u.%u";
+	bool supported = !!nfsd_vers(vers, NFSD_TEST);
+
+	if (vers == 4 && !nfsd_minorversion(minor, NFSD_TEST))
+		supported = false;
+	return snprintf(buf, remaining, format, sep,
+			supported ? '+' : '-', vers, minor);
+}
+
 static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 {
 	char *mesg = buf;
@@ -598,40 +611,23 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 	len = 0;
 	sep = "";
 	remaining = SIMPLE_TRANSACTION_LIMIT;
-	for (num=2 ; num <= 4 ; num++)
-		if (nfsd_vers(num, NFSD_AVAIL)) {
-			len = snprintf(buf, remaining, "%s%c%d", sep,
-				       nfsd_vers(num, NFSD_TEST)?'+':'-',
-				       num);
-			sep = " ";
-
-			if (len >= remaining)
-				break;
-			remaining -= len;
-			buf += len;
-			tlen += len;
-		}
-	if (nfsd_vers(4, NFSD_AVAIL))
-		for (minor = 0; minor <= NFSD_SUPPORTED_MINOR_VERSION;
-		     minor++) {
-			if (minor == 0 && nfsd_minorversion(minor, NFSD_TEST))
-				/* for backward compatibility, don't report
-				 * +4.0
-				 */
-				continue;
-			len = snprintf(buf, remaining, " %c4.%u",
-					(nfsd_vers(4, NFSD_TEST) &&
-					 nfsd_minorversion(minor, NFSD_TEST)) ?
-						'+' : '-',
-					minor);
-
+	for (num=2 ; num <= 4 ; num++) {
+		if (!nfsd_vers(num, NFSD_AVAIL))
+			continue;
+		minor = 0;
+		do {
+			len = nfsd_print_version_support(buf, remaining,
+					sep, num, minor);
 			if (len >= remaining)
-				break;
+				goto out;
 			remaining -= len;
 			buf += len;
 			tlen += len;
-		}
-
+			minor++;
+			sep = " ";
+		} while (num == 4 && minor <= NFSD_SUPPORTED_MINOR_VERSION);
+	}
+out:
 	len = snprintf(buf, remaining, "\n");
 	if (len >= remaining)
 		return -EINVAL;
-- 
GitLab


From 90ec7c9dff07d676c0b9b499286b931005c6b051 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 27 Feb 2017 21:31:43 +0100
Subject: [PATCH 0655/1084] scsi: lpfc: use div_u64 for 64-bit division

The new debugfs output causes a link error on 32-bit architectures:

ERROR: "__aeabi_uldivmod" [drivers/scsi/lpfc/lpfc.ko] undefined!

This code is not performance critical, so we can simply use div_u64().

[mkp: fixed up whitespace]

Fixes: bd2cdd5e400f ("scsi: lpfc: NVME Initiator: Add debugfs support")
Fixes: 2b65e18202fd ("scsi: lpfc: NVME Target: Add debugfs support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_debugfs.c | 64 ++++++++++++++++----------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 599fde4ea8b1..9f4798e9d938 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -873,8 +873,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 		len += snprintf(
 			buf + len, PAGE_SIZE - len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg1_total /
-			phba->ktime_data_samples,
+			div_u64(phba->ktime_seg1_total,
+				phba->ktime_data_samples),
 			phba->ktime_seg1_min,
 			phba->ktime_seg1_max);
 		len += snprintf(
@@ -884,8 +884,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 		len += snprintf(
 			buf + len, PAGE_SIZE - len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg2_total /
-			phba->ktime_data_samples,
+			div_u64(phba->ktime_seg2_total,
+				phba->ktime_data_samples),
 			phba->ktime_seg2_min,
 			phba->ktime_seg2_max);
 		len += snprintf(
@@ -895,8 +895,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 		len += snprintf(
 			buf + len, PAGE_SIZE - len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg3_total /
-			phba->ktime_data_samples,
+			div_u64(phba->ktime_seg3_total,
+				phba->ktime_data_samples),
 			phba->ktime_seg3_min,
 			phba->ktime_seg3_max);
 		len += snprintf(
@@ -906,17 +906,17 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 		len += snprintf(
 			buf + len, PAGE_SIZE - len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg4_total /
-			phba->ktime_data_samples,
+			div_u64(phba->ktime_seg4_total,
+				phba->ktime_data_samples),
 			phba->ktime_seg4_min,
 			phba->ktime_seg4_max);
 		len += snprintf(
 			buf + len, PAGE_SIZE - len,
 			"Total IO avg time: %08lld\n",
-			((phba->ktime_seg1_total +
+			div_u64(phba->ktime_seg1_total +
 			phba->ktime_seg2_total  +
 			phba->ktime_seg3_total +
-			phba->ktime_seg4_total) /
+			phba->ktime_seg4_total,
 			phba->ktime_data_samples));
 		return len;
 	}
@@ -935,8 +935,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			"cmd pass to NVME Layer\n");
 	len += snprintf(buf + len, PAGE_SIZE-len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg1_total /
-			phba->ktime_data_samples,
+			div_u64(phba->ktime_seg1_total,
+				phba->ktime_data_samples),
 			phba->ktime_seg1_min,
 			phba->ktime_seg1_max);
 	len += snprintf(buf + len, PAGE_SIZE-len,
@@ -944,8 +944,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			"-to- Driver rcv cmd OP (action)\n");
 	len += snprintf(buf + len, PAGE_SIZE-len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg2_total /
-			phba->ktime_data_samples,
+			div_u64(phba->ktime_seg2_total,
+				phba->ktime_data_samples),
 			phba->ktime_seg2_min,
 			phba->ktime_seg2_max);
 	len += snprintf(buf + len, PAGE_SIZE-len,
@@ -953,8 +953,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			"Firmware WQ doorbell: cmd\n");
 	len += snprintf(buf + len, PAGE_SIZE-len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg3_total /
-			phba->ktime_data_samples,
+			div_u64(phba->ktime_seg3_total,
+				phba->ktime_data_samples),
 			phba->ktime_seg3_min,
 			phba->ktime_seg3_max);
 	len += snprintf(buf + len, PAGE_SIZE-len,
@@ -962,8 +962,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			"-to- MSI-X ISR for cmd cmpl\n");
 	len += snprintf(buf + len, PAGE_SIZE-len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg4_total /
-			phba->ktime_data_samples,
+			div_u64(phba->ktime_seg4_total,
+				phba->ktime_data_samples),
 			phba->ktime_seg4_min,
 			phba->ktime_seg4_max);
 	len += snprintf(buf + len, PAGE_SIZE-len,
@@ -971,8 +971,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			"-to- NVME layer passed cmd done\n");
 	len += snprintf(buf + len, PAGE_SIZE-len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg5_total /
-			phba->ktime_data_samples,
+			div_u64(phba->ktime_seg5_total,
+				phba->ktime_data_samples),
 			phba->ktime_seg5_min,
 			phba->ktime_seg5_max);
 
@@ -983,8 +983,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 		len += snprintf(buf + len, PAGE_SIZE-len,
 				"avg:%08lld min:%08lld "
 				"max %08lld\n",
-				phba->ktime_seg10_total /
-				phba->ktime_data_samples,
+				div_u64(phba->ktime_seg10_total,
+					phba->ktime_data_samples),
 				phba->ktime_seg10_min,
 				phba->ktime_seg10_max);
 		return len;
@@ -995,8 +995,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			"-to- Driver rcv rsp status OP\n");
 	len += snprintf(buf + len, PAGE_SIZE-len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg6_total /
-			phba->ktime_status_samples,
+			div_u64(phba->ktime_seg6_total,
+				phba->ktime_status_samples),
 			phba->ktime_seg6_min,
 			phba->ktime_seg6_max);
 	len += snprintf(buf + len, PAGE_SIZE-len,
@@ -1004,8 +1004,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			"-to- Firmware WQ doorbell: status\n");
 	len += snprintf(buf + len, PAGE_SIZE-len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg7_total /
-			phba->ktime_status_samples,
+			div_u64(phba->ktime_seg7_total,
+				phba->ktime_status_samples),
 			phba->ktime_seg7_min,
 			phba->ktime_seg7_max);
 	len += snprintf(buf + len, PAGE_SIZE-len,
@@ -1013,8 +1013,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			" -to- MSI-X ISR for status cmpl\n");
 	len += snprintf(buf + len, PAGE_SIZE-len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg8_total /
-			phba->ktime_status_samples,
+			div_u64(phba->ktime_seg8_total,
+				phba->ktime_status_samples),
 			phba->ktime_seg8_min,
 			phba->ktime_seg8_max);
 	len += snprintf(buf + len, PAGE_SIZE-len,
@@ -1022,8 +1022,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			"-to- NVME layer passed status done\n");
 	len += snprintf(buf + len, PAGE_SIZE-len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg9_total /
-			phba->ktime_status_samples,
+			div_u64(phba->ktime_seg9_total,
+				phba->ktime_status_samples),
 			phba->ktime_seg9_min,
 			phba->ktime_seg9_max);
 	len += snprintf(buf + len, PAGE_SIZE-len,
@@ -1031,8 +1031,8 @@ lpfc_debugfs_nvmektime_data(struct lpfc_vport *vport, char *buf, int size)
 			"cmd completed on wire\n");
 	len += snprintf(buf + len, PAGE_SIZE-len,
 			"avg:%08lld min:%08lld max %08lld\n",
-			phba->ktime_seg10_total /
-			phba->ktime_status_samples,
+			div_u64(phba->ktime_seg10_total,
+				phba->ktime_status_samples),
 			phba->ktime_seg10_min,
 			phba->ktime_seg10_max);
 	return len;
-- 
GitLab


From 825c6abbc316f496cd2b66e1fa72892cf4b49a9f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 27 Feb 2017 21:37:12 +0100
Subject: [PATCH 0656/1084] scsi: lpfc: use proper format string for dma_addr_t

dma_addr_t may be either u32 or u64, depending on the kernel configuration,
and we get a warning for the 32-bit case:

drivers/scsi/lpfc/lpfc_nvme.c: In function 'lpfc_nvme_ls_req':
drivers/scsi/lpfc/lpfc_logmsg.h:52:52: error: format '%llu' expects argument of type 'long long unsigned int', but argument 11 has type 'dma_addr_t {aka unsigned int}' [-Werror=format=]
drivers/scsi/lpfc/lpfc_logmsg.h:52:52: error: format '%llu' expects argument of type 'long long unsigned int', but argument 12 has type 'dma_addr_t {aka unsigned int}' [-Werror=format=]
drivers/scsi/lpfc/lpfc_nvme.c: In function 'lpfc_nvme_ls_abort':
drivers/scsi/lpfc/lpfc_logmsg.h:52:52: error: format '%llu' expects argument of type 'long long unsigned int', but argument 11 has type 'dma_addr_t {aka unsigned int}' [-Werror=format=]
drivers/scsi/lpfc/lpfc_logmsg.h:52:52: error: format '%llu' expects argument of type 'long long unsigned int', but argument 12 has type 'dma_addr_t {aka unsigned int}' [-Werror=format=]

printk has a special "%pad" format string that passes the dma address by
reference to solve this problem.

Fixes: 01649561a8b4 ("scsi: lpfc: NVME Initiator: bind to nvme_fc api")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_nvme.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 625b6589a34d..609a908ea9db 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -457,11 +457,11 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
 	/* Expand print to include key fields. */
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
 			 "6051 ENTER.  lport %p, rport %p lsreq%p rqstlen:%d "
-			 "rsplen:%d %llux %llux\n",
+			 "rsplen:%d %pad %pad\n",
 			 pnvme_lport, pnvme_rport,
 			 pnvme_lsreq, pnvme_lsreq->rqstlen,
-			 pnvme_lsreq->rsplen, pnvme_lsreq->rqstdma,
-			 pnvme_lsreq->rspdma);
+			 pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma,
+			 &pnvme_lsreq->rspdma);
 
 	vport->phba->fc4NvmeLsRequests++;
 
@@ -527,11 +527,11 @@ lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport,
 	/* Expand print to include key fields. */
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,
 			 "6040 ENTER.  lport %p, rport %p lsreq %p rqstlen:%d "
-			 "rsplen:%d %llux %llux\n",
+			 "rsplen:%d %pad %pad\n",
 			 pnvme_lport, pnvme_rport,
 			 pnvme_lsreq, pnvme_lsreq->rqstlen,
-			 pnvme_lsreq->rsplen, pnvme_lsreq->rqstdma,
-			 pnvme_lsreq->rspdma);
+			 pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma,
+			 &pnvme_lsreq->rspdma);
 
 	/*
 	 * Lock the ELS ring txcmplq and build a local list of all ELS IOs
-- 
GitLab


From 3856081eede297b617560b85e948cfb00bb395ec Mon Sep 17 00:00:00 2001
From: "Y.C. Chen" <yc_chen@aspeedtech.com>
Date: Thu, 23 Feb 2017 15:52:33 +0800
Subject: [PATCH 0657/1084] drm/ast: Fix AST2400 POST failure without BMC FW or
 VBIOS

The current POST code for the AST2300/2400 family doesn't work properly
if the chip hasn't been initialized previously by either the BMC own FW
or the VBIOS. This fixes it.

Signed-off-by: Y.C. Chen <yc_chen@aspeedtech.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Tested-by: Y.C. Chen <yc_chen@aspeedtech.com>
Acked-by: Joel Stanley <joel@jms.id.au>
Cc: <stable@vger.kernel.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ast/ast_post.c | 38 +++++++++++++++++++++++++++++++---
 1 file changed, 35 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c
index 5331ee1df086..6c5391cb90eb 100644
--- a/drivers/gpu/drm/ast/ast_post.c
+++ b/drivers/gpu/drm/ast/ast_post.c
@@ -1638,12 +1638,44 @@ static void ast_init_dram_2300(struct drm_device *dev)
 		temp |= 0x73;
 		ast_write32(ast, 0x12008, temp);
 
+		param.dram_freq = 396;
 		param.dram_type = AST_DDR3;
+		temp = ast_mindwm(ast, 0x1e6e2070);
 		if (temp & 0x01000000)
 			param.dram_type = AST_DDR2;
-		param.dram_chipid = ast->dram_type;
-		param.dram_freq = ast->mclk;
-		param.vram_size = ast->vram_size;
+                switch (temp & 0x18000000) {
+		case 0:
+			param.dram_chipid = AST_DRAM_512Mx16;
+			break;
+		default:
+		case 0x08000000:
+			param.dram_chipid = AST_DRAM_1Gx16;
+			break;
+		case 0x10000000:
+			param.dram_chipid = AST_DRAM_2Gx16;
+			break;
+		case 0x18000000:
+			param.dram_chipid = AST_DRAM_4Gx16;
+			break;
+		}
+                switch (temp & 0x0c) {
+                default:
+		case 0x00:
+			param.vram_size = AST_VIDMEM_SIZE_8M;
+			break;
+
+		case 0x04:
+			param.vram_size = AST_VIDMEM_SIZE_16M;
+			break;
+
+		case 0x08:
+			param.vram_size = AST_VIDMEM_SIZE_32M;
+			break;
+
+		case 0x0c:
+			param.vram_size = AST_VIDMEM_SIZE_64M;
+			break;
+		}
 
 		if (param.dram_type == AST_DDR3) {
 			get_ddr3_info(ast, &param);
-- 
GitLab


From 71f677a91046599ece96ebab21df956ce909c456 Mon Sep 17 00:00:00 2001
From: Russell Currey <ruscur@russell.cc>
Date: Fri, 17 Feb 2017 14:33:01 +1100
Subject: [PATCH 0658/1084] drm/ast: Handle configuration without P2A bridge

The ast driver configures a window to enable access into BMC
memory space in order to read some configuration registers.

If this window is disabled, which it can be from the BMC side,
the ast driver can't function.

Closing this window is a necessity for security if a machine's
host side and BMC side are controlled by different parties;
i.e. a cloud provider offering machines "bare metal".

A recent patch went in to try to check if that window is open
but it does so by trying to access the registers in question
and testing if the result is 0xffffffff.

This method will trigger a PCIe error when the window is closed
which on some systems will be fatal (it will trigger an EEH
for example on POWER which will take out the device).

This patch improves this in two ways:

 - First, if the firmware has put properties in the device-tree
containing the relevant configuration information, we use these.

 - Otherwise, a bit in one of the SCU scratch registers (which
are readable via the VGA register space and writeable by the BMC)
will indicate if the BMC has closed the window. This bit has been
defined by Y.C Chen from Aspeed.

If the window is closed and the configuration isn't available from
the device-tree, some sane defaults are used. Those defaults are
hopefully sufficient for standard video modes used on a server.

Signed-off-by: Russell Currey <ruscur@russell.cc>
Acked-by: Joel Stanley <joel@jms.id.au>
Cc: <stable@vger.kernel.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ast/ast_drv.h  |   6 +-
 drivers/gpu/drm/ast/ast_main.c | 264 ++++++++++++++++++++-------------
 drivers/gpu/drm/ast/ast_post.c |   7 +-
 3 files changed, 168 insertions(+), 109 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h
index 5a8fa1c85229..3b0be9ea4191 100644
--- a/drivers/gpu/drm/ast/ast_drv.h
+++ b/drivers/gpu/drm/ast/ast_drv.h
@@ -114,7 +114,11 @@ struct ast_private {
 	struct ttm_bo_kmap_obj cache_kmap;
 	int next_cursor;
 	bool support_wide_screen;
-	bool DisableP2A;
+	enum {
+		ast_use_p2a,
+		ast_use_dt,
+		ast_use_defaults
+	} config_mode;
 
 	enum ast_tx_chip tx_chip_type;
 	u8 dp501_maxclk;
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 993909430736..1f9fa69d4504 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -62,13 +62,84 @@ uint8_t ast_get_index_reg_mask(struct ast_private *ast,
 	return ret;
 }
 
+static void ast_detect_config_mode(struct drm_device *dev, u32 *scu_rev)
+{
+	struct device_node *np = dev->pdev->dev.of_node;
+	struct ast_private *ast = dev->dev_private;
+	uint32_t data, jregd0, jregd1;
+
+	/* Defaults */
+	ast->config_mode = ast_use_defaults;
+	*scu_rev = 0xffffffff;
+
+	/* Check if we have device-tree properties */
+	if (np && !of_property_read_u32(np, "aspeed,scu-revision-id",
+					scu_rev)) {
+		/* We do, disable P2A access */
+		ast->config_mode = ast_use_dt;
+		DRM_INFO("Using device-tree for configuration\n");
+		return;
+	}
+
+	/* Not all families have a P2A bridge */
+	if (dev->pdev->device != PCI_CHIP_AST2000)
+		return;
+
+	/*
+	 * The BMC will set SCU 0x40 D[12] to 1 if the P2 bridge
+	 * is disabled. We force using P2A if VGA only mode bit
+	 * is set D[7]
+	 */
+	jregd0 = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd0, 0xff);
+	jregd1 = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd1, 0xff);
+	if (!(jregd0 & 0x80) || !(jregd1 & 0x10)) {
+		/* Double check it's actually working */
+		data = ast_read32(ast, 0xf004);
+		if (data != 0xFFFFFFFF) {
+			/* P2A works, grab silicon revision */
+			ast->config_mode = ast_use_p2a;
+
+			DRM_INFO("Using P2A bridge for configuration\n");
+
+			/* Read SCU7c (silicon revision register) */
+			ast_write32(ast, 0xf004, 0x1e6e0000);
+			ast_write32(ast, 0xf000, 0x1);
+			*scu_rev = ast_read32(ast, 0x1207c);
+			return;
+		}
+	}
+
+	/* We have a P2A bridge but it's disabled */
+	DRM_INFO("P2A bridge disabled, using default configuration\n");
+}
 
 static int ast_detect_chip(struct drm_device *dev, bool *need_post)
 {
 	struct ast_private *ast = dev->dev_private;
-	uint32_t data, jreg;
+	uint32_t jreg, scu_rev;
+
+	/*
+	 * If VGA isn't enabled, we need to enable now or subsequent
+	 * access to the scratch registers will fail. We also inform
+	 * our caller that it needs to POST the chip
+	 * (Assumption: VGA not enabled -> need to POST)
+	 */
+	if (!ast_is_vga_enabled(dev)) {
+		ast_enable_vga(dev);
+		DRM_INFO("VGA not enabled on entry, requesting chip POST\n");
+		*need_post = true;
+	} else
+		*need_post = false;
+
+
+	/* Enable extended register access */
+	ast_enable_mmio(dev);
 	ast_open_key(ast);
 
+	/* Find out whether P2A works or whether to use device-tree */
+	ast_detect_config_mode(dev, &scu_rev);
+
+	/* Identify chipset */
 	if (dev->pdev->device == PCI_CHIP_AST1180) {
 		ast->chip = AST1100;
 		DRM_INFO("AST 1180 detected\n");
@@ -80,12 +151,7 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post)
 			ast->chip = AST2300;
 			DRM_INFO("AST 2300 detected\n");
 		} else if (dev->pdev->revision >= 0x10) {
-			uint32_t data;
-			ast_write32(ast, 0xf004, 0x1e6e0000);
-			ast_write32(ast, 0xf000, 0x1);
-
-			data = ast_read32(ast, 0x1207c);
-			switch (data & 0x0300) {
+			switch (scu_rev & 0x0300) {
 			case 0x0200:
 				ast->chip = AST1100;
 				DRM_INFO("AST 1100 detected\n");
@@ -110,26 +176,6 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post)
 		}
 	}
 
-	/*
-	 * If VGA isn't enabled, we need to enable now or subsequent
-	 * access to the scratch registers will fail. We also inform
-	 * our caller that it needs to POST the chip
-	 * (Assumption: VGA not enabled -> need to POST)
-	 */
-	if (!ast_is_vga_enabled(dev)) {
-		ast_enable_vga(dev);
-		ast_enable_mmio(dev);
-		DRM_INFO("VGA not enabled on entry, requesting chip POST\n");
-		*need_post = true;
-	} else
-		*need_post = false;
-
-	/* Check P2A Access */
-	ast->DisableP2A = true;
-	data = ast_read32(ast, 0xf004);
-	if (data != 0xFFFFFFFF)
-		ast->DisableP2A = false;
-
 	/* Check if we support wide screen */
 	switch (ast->chip) {
 	case AST1180:
@@ -146,17 +192,12 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post)
 			ast->support_wide_screen = true;
 		else {
 			ast->support_wide_screen = false;
-			if (ast->DisableP2A == false) {
-				/* Read SCU7c (silicon revision register) */
-				ast_write32(ast, 0xf004, 0x1e6e0000);
-				ast_write32(ast, 0xf000, 0x1);
-				data = ast_read32(ast, 0x1207c);
-				data &= 0x300;
-				if (ast->chip == AST2300 && data == 0x0) /* ast1300 */
-					ast->support_wide_screen = true;
-				if (ast->chip == AST2400 && data == 0x100) /* ast1400 */
-					ast->support_wide_screen = true;
-			}
+			if (ast->chip == AST2300 &&
+			    (scu_rev & 0x300) == 0x0) /* ast1300 */
+				ast->support_wide_screen = true;
+			if (ast->chip == AST2400 &&
+			    (scu_rev & 0x300) == 0x100) /* ast1400 */
+				ast->support_wide_screen = true;
 		}
 		break;
 	}
@@ -220,85 +261,102 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post)
 
 static int ast_get_dram_info(struct drm_device *dev)
 {
+	struct device_node *np = dev->pdev->dev.of_node;
 	struct ast_private *ast = dev->dev_private;
-	uint32_t data, data2;
-	uint32_t denum, num, div, ref_pll;
+	uint32_t mcr_cfg, mcr_scu_mpll, mcr_scu_strap;
+	uint32_t denum, num, div, ref_pll, dsel;
 
-	if (ast->DisableP2A)
-	{
+	switch (ast->config_mode) {
+	case ast_use_dt:
+		/*
+		 * If some properties are missing, use reasonable
+		 * defaults for AST2400
+		 */
+		if (of_property_read_u32(np, "aspeed,mcr-configuration",
+					 &mcr_cfg))
+			mcr_cfg = 0x00000577;
+		if (of_property_read_u32(np, "aspeed,mcr-scu-mpll",
+					 &mcr_scu_mpll))
+			mcr_scu_mpll = 0x000050C0;
+		if (of_property_read_u32(np, "aspeed,mcr-scu-strap",
+					 &mcr_scu_strap))
+			mcr_scu_strap = 0;
+		break;
+	case ast_use_p2a:
+		ast_write32(ast, 0xf004, 0x1e6e0000);
+		ast_write32(ast, 0xf000, 0x1);
+		mcr_cfg = ast_read32(ast, 0x10004);
+		mcr_scu_mpll = ast_read32(ast, 0x10120);
+		mcr_scu_strap = ast_read32(ast, 0x10170);
+		break;
+	case ast_use_defaults:
+	default:
 		ast->dram_bus_width = 16;
 		ast->dram_type = AST_DRAM_1Gx16;
 		ast->mclk = 396;
+		return 0;
 	}
-	else
-	{
-		ast_write32(ast, 0xf004, 0x1e6e0000);
-		ast_write32(ast, 0xf000, 0x1);
-		data = ast_read32(ast, 0x10004);
-
-		if (data & 0x40)
-			ast->dram_bus_width = 16;
-		else
-			ast->dram_bus_width = 32;
 
-		if (ast->chip == AST2300 || ast->chip == AST2400) {
-			switch (data & 0x03) {
-			case 0:
-				ast->dram_type = AST_DRAM_512Mx16;
-				break;
-			default:
-			case 1:
-				ast->dram_type = AST_DRAM_1Gx16;
-				break;
-			case 2:
-				ast->dram_type = AST_DRAM_2Gx16;
-				break;
-			case 3:
-				ast->dram_type = AST_DRAM_4Gx16;
-				break;
-			}
-		} else {
-			switch (data & 0x0c) {
-			case 0:
-			case 4:
-				ast->dram_type = AST_DRAM_512Mx16;
-				break;
-			case 8:
-				if (data & 0x40)
-					ast->dram_type = AST_DRAM_1Gx16;
-				else
-					ast->dram_type = AST_DRAM_512Mx32;
-				break;
-			case 0xc:
-				ast->dram_type = AST_DRAM_1Gx32;
-				break;
-			}
-		}
+	if (mcr_cfg & 0x40)
+		ast->dram_bus_width = 16;
+	else
+		ast->dram_bus_width = 32;
 
-		data = ast_read32(ast, 0x10120);
-		data2 = ast_read32(ast, 0x10170);
-		if (data2 & 0x2000)
-			ref_pll = 14318;
-		else
-			ref_pll = 12000;
-
-		denum = data & 0x1f;
-		num = (data & 0x3fe0) >> 5;
-		data = (data & 0xc000) >> 14;
-		switch (data) {
-		case 3:
-			div = 0x4;
+	if (ast->chip == AST2300 || ast->chip == AST2400) {
+		switch (mcr_cfg & 0x03) {
+		case 0:
+			ast->dram_type = AST_DRAM_512Mx16;
 			break;
-		case 2:
+		default:
 		case 1:
-			div = 0x2;
+			ast->dram_type = AST_DRAM_1Gx16;
 			break;
-		default:
-			div = 0x1;
+		case 2:
+			ast->dram_type = AST_DRAM_2Gx16;
+			break;
+		case 3:
+			ast->dram_type = AST_DRAM_4Gx16;
+			break;
+		}
+	} else {
+		switch (mcr_cfg & 0x0c) {
+		case 0:
+		case 4:
+			ast->dram_type = AST_DRAM_512Mx16;
+			break;
+		case 8:
+			if (mcr_cfg & 0x40)
+				ast->dram_type = AST_DRAM_1Gx16;
+			else
+				ast->dram_type = AST_DRAM_512Mx32;
+			break;
+		case 0xc:
+			ast->dram_type = AST_DRAM_1Gx32;
 			break;
 		}
-		ast->mclk = ref_pll * (num + 2) / (denum + 2) * (div * 1000);
 	}
+
+	if (mcr_scu_strap & 0x2000)
+		ref_pll = 14318;
+	else
+		ref_pll = 12000;
+
+	denum = mcr_scu_mpll & 0x1f;
+	num = (mcr_scu_mpll & 0x3fe0) >> 5;
+	dsel = (mcr_scu_mpll & 0xc000) >> 14;
+	switch (dsel) {
+	case 3:
+		div = 0x4;
+		break;
+	case 2:
+	case 1:
+		div = 0x2;
+		break;
+	default:
+		div = 0x1;
+		break;
+	}
+	ast->mclk = ref_pll * (num + 2) / (denum + 2) * (div * 1000);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c
index 6c5391cb90eb..64549cebcc5b 100644
--- a/drivers/gpu/drm/ast/ast_post.c
+++ b/drivers/gpu/drm/ast/ast_post.c
@@ -379,17 +379,14 @@ void ast_post_gpu(struct drm_device *dev)
 	ast_open_key(ast);
 	ast_set_def_ext_reg(dev);
 
-	if (ast->DisableP2A == false)
-	{
+	if (ast->config_mode == ast_use_p2a) {
 		if (ast->chip == AST2300 || ast->chip == AST2400)
 			ast_init_dram_2300(dev);
 		else
 			ast_init_dram_reg(dev);
 
 		ast_init_3rdtx(dev);
-	}
-	else
-	{
+	} else {
 		if (ast->tx_chip_type != AST_TX_NONE)
 			ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xa3, 0xcf, 0x80);	/* Enable DVO */
 	}
-- 
GitLab


From 22acdbb1bdbd56cc9939e18516dfcf214a9d835b Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 17 Feb 2017 13:45:24 +1100
Subject: [PATCH 0659/1084] drm/ast: const'ify mode setting tables

And fix some comment alignment & space/tabs while at it

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ast/ast_drv.h    |   4 +-
 drivers/gpu/drm/ast/ast_mode.c   |   8 +--
 drivers/gpu/drm/ast/ast_tables.h | 106 +++++++++++++++----------------
 3 files changed, 59 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h
index 3b0be9ea4191..8fcd55c0e63d 100644
--- a/drivers/gpu/drm/ast/ast_drv.h
+++ b/drivers/gpu/drm/ast/ast_drv.h
@@ -305,8 +305,8 @@ struct ast_vbios_dclk_info {
 };
 
 struct ast_vbios_mode_info {
-	struct ast_vbios_stdtable *std_table;
-	struct ast_vbios_enhtable *enh_table;
+	const struct ast_vbios_stdtable *std_table;
+	const struct ast_vbios_enhtable *enh_table;
 };
 
 extern int ast_mode_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index 606cb40f6c7c..c25b8b06d55a 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -81,9 +81,9 @@ static bool ast_get_vbios_mode_info(struct drm_crtc *crtc, struct drm_display_mo
 	struct ast_private *ast = crtc->dev->dev_private;
 	const struct drm_framebuffer *fb = crtc->primary->fb;
 	u32 refresh_rate_index = 0, mode_id, color_index, refresh_rate;
+	const struct ast_vbios_enhtable *best = NULL;
 	u32 hborder, vborder;
 	bool check_sync;
-	struct ast_vbios_enhtable *best = NULL;
 
 	switch (fb->format->cpp[0] * 8) {
 	case 8:
@@ -147,7 +147,7 @@ static bool ast_get_vbios_mode_info(struct drm_crtc *crtc, struct drm_display_mo
 	refresh_rate = drm_mode_vrefresh(mode);
 	check_sync = vbios_mode->enh_table->flags & WideScreenMode;
 	do {
-		struct ast_vbios_enhtable *loop = vbios_mode->enh_table;
+		const struct ast_vbios_enhtable *loop = vbios_mode->enh_table;
 
 		while (loop->refresh_rate != 0xff) {
 			if ((check_sync) &&
@@ -227,7 +227,7 @@ static void ast_set_std_reg(struct drm_crtc *crtc, struct drm_display_mode *mode
 			    struct ast_vbios_mode_info *vbios_mode)
 {
 	struct ast_private *ast = crtc->dev->dev_private;
-	struct ast_vbios_stdtable *stdtable;
+	const struct ast_vbios_stdtable *stdtable;
 	u32 i;
 	u8 jreg;
 
@@ -384,7 +384,7 @@ static void ast_set_dclk_reg(struct drm_device *dev, struct drm_display_mode *mo
 			     struct ast_vbios_mode_info *vbios_mode)
 {
 	struct ast_private *ast = dev->dev_private;
-	struct ast_vbios_dclk_info *clk_info;
+	const struct ast_vbios_dclk_info *clk_info;
 
 	clk_info = &dclk_table[vbios_mode->enh_table->dclk_index];
 
diff --git a/drivers/gpu/drm/ast/ast_tables.h b/drivers/gpu/drm/ast/ast_tables.h
index 3608d5aa7451..a4ddf901a54f 100644
--- a/drivers/gpu/drm/ast/ast_tables.h
+++ b/drivers/gpu/drm/ast/ast_tables.h
@@ -78,37 +78,37 @@
 #define VCLK97_75     		0x19
 #define VCLK118_25			0x1A
 
-static struct ast_vbios_dclk_info dclk_table[] = {
-	{0x2C, 0xE7, 0x03},					/* 00: VCLK25_175	*/
-	{0x95, 0x62, 0x03},				        /* 01: VCLK28_322	*/
-	{0x67, 0x63, 0x01},				        /* 02: VCLK31_5         */
-	{0x76, 0x63, 0x01},				        /* 03: VCLK36         	*/
-	{0xEE, 0x67, 0x01},				        /* 04: VCLK40          	*/
-	{0x82, 0x62, 0x01}, 			        /* 05: VCLK49_5        	*/
-	{0xC6, 0x64, 0x01},                        	        /* 06: VCLK50          	*/
-	{0x94, 0x62, 0x01},                        	        /* 07: VCLK56_25       	*/
-	{0x80, 0x64, 0x00},                        	        /* 08: VCLK65		*/
-	{0x7B, 0x63, 0x00},                        	        /* 09: VCLK75	        */
-	{0x67, 0x62, 0x00},				        /* 0A: VCLK78_75       	*/
-	{0x7C, 0x62, 0x00},                        	        /* 0B: VCLK94_5        	*/
-	{0x8E, 0x62, 0x00},                        	        /* 0C: VCLK108         	*/
-	{0x85, 0x24, 0x00},                        	        /* 0D: VCLK135         	*/
-	{0x67, 0x22, 0x00},                        	        /* 0E: VCLK157_5       	*/
-	{0x6A, 0x22, 0x00},				        /* 0F: VCLK162         	*/
-	{0x4d, 0x4c, 0x80},				        /* 10: VCLK154      	*/
-	{0xa7, 0x78, 0x80},					/* 11: VCLK83.5         */
-	{0x28, 0x49, 0x80},					/* 12: VCLK106.5        */
-	{0x37, 0x49, 0x80},					/* 13: VCLK146.25       */
-	{0x1f, 0x45, 0x80},					/* 14: VCLK148.5        */
-	{0x47, 0x6c, 0x80},					/* 15: VCLK71       */
-	{0x25, 0x65, 0x80},					/* 16: VCLK88.75    */
-	{0x77, 0x58, 0x80},					/* 17: VCLK119      */
-	{0x32, 0x67, 0x80},				    /* 18: VCLK85_5     */
-	{0x6a, 0x6d, 0x80},					/* 19: VCLK97_75	*/
-	{0x3b, 0x2c, 0x81},					/* 1A: VCLK118_25	*/
+static const struct ast_vbios_dclk_info dclk_table[] = {
+	{0x2C, 0xE7, 0x03},			/* 00: VCLK25_175	*/
+	{0x95, 0x62, 0x03},			/* 01: VCLK28_322	*/
+	{0x67, 0x63, 0x01},			/* 02: VCLK31_5		*/
+	{0x76, 0x63, 0x01},			/* 03: VCLK36		*/
+	{0xEE, 0x67, 0x01},			/* 04: VCLK40		*/
+	{0x82, 0x62, 0x01},			/* 05: VCLK49_5		*/
+	{0xC6, 0x64, 0x01},			/* 06: VCLK50		*/
+	{0x94, 0x62, 0x01},			/* 07: VCLK56_25	*/
+	{0x80, 0x64, 0x00},			/* 08: VCLK65		*/
+	{0x7B, 0x63, 0x00},			/* 09: VCLK75		*/
+	{0x67, 0x62, 0x00},			/* 0A: VCLK78_75	*/
+	{0x7C, 0x62, 0x00},			/* 0B: VCLK94_5		*/
+	{0x8E, 0x62, 0x00},			/* 0C: VCLK108		*/
+	{0x85, 0x24, 0x00},			/* 0D: VCLK135		*/
+	{0x67, 0x22, 0x00},			/* 0E: VCLK157_5	*/
+	{0x6A, 0x22, 0x00},			/* 0F: VCLK162		*/
+	{0x4d, 0x4c, 0x80},			/* 10: VCLK154		*/
+	{0xa7, 0x78, 0x80},			/* 11: VCLK83.5		*/
+	{0x28, 0x49, 0x80},			/* 12: VCLK106.5	*/
+	{0x37, 0x49, 0x80},			/* 13: VCLK146.25	*/
+	{0x1f, 0x45, 0x80},			/* 14: VCLK148.5	*/
+	{0x47, 0x6c, 0x80},			/* 15: VCLK71		*/
+	{0x25, 0x65, 0x80},			/* 16: VCLK88.75	*/
+	{0x77, 0x58, 0x80},			/* 17: VCLK119		*/
+	{0x32, 0x67, 0x80},			/* 18: VCLK85_5		*/
+	{0x6a, 0x6d, 0x80},			/* 19: VCLK97_75	*/
+	{0x3b, 0x2c, 0x81},			/* 1A: VCLK118_25	*/
 };
 
-static struct ast_vbios_stdtable vbios_stdtable[] = {
+static const struct ast_vbios_stdtable vbios_stdtable[] = {
 	/* MD_2_3_400 */
 	{
 		0x67,
@@ -181,21 +181,21 @@ static struct ast_vbios_stdtable vbios_stdtable[] = {
 	},
 };
 
-static struct ast_vbios_enhtable res_640x480[] = {
+static const struct ast_vbios_enhtable res_640x480[] = {
 	{ 800, 640, 8, 96, 525, 480, 2, 2, VCLK25_175,	/* 60Hz */
 	  (SyncNN | HBorder | VBorder | Charx8Dot), 60, 1, 0x2E },
 	{ 832, 640, 16, 40, 520, 480, 1, 3, VCLK31_5,	/* 72Hz */
 	  (SyncNN | HBorder | VBorder | Charx8Dot), 72, 2, 0x2E  },
 	{ 840, 640, 16, 64, 500, 480, 1, 3, VCLK31_5,	/* 75Hz */
 	  (SyncNN | Charx8Dot) , 75, 3, 0x2E },
-	{ 832, 640, 56, 56, 509, 480, 1, 3, VCLK36,		/* 85Hz */
+	{ 832, 640, 56, 56, 509, 480, 1, 3, VCLK36,	/* 85Hz */
 	  (SyncNN | Charx8Dot) , 85, 4, 0x2E },
-	{ 832, 640, 56, 56, 509, 480, 1, 3, VCLK36,		/* end */
+	{ 832, 640, 56, 56, 509, 480, 1, 3, VCLK36,	/* end */
 	  (SyncNN | Charx8Dot) , 0xFF, 4, 0x2E },
 };
 
-static struct ast_vbios_enhtable res_800x600[] = {
-	{1024, 800, 24, 72, 625, 600, 1, 2, VCLK36,		/* 56Hz */
+static const struct ast_vbios_enhtable res_800x600[] = {
+	{1024, 800, 24, 72, 625, 600, 1, 2, VCLK36,	/* 56Hz */
 	 (SyncPP | Charx8Dot), 56, 1, 0x30 },
 	{1056, 800, 40, 128, 628, 600, 1, 4, VCLK40,	/* 60Hz */
 	 (SyncPP | Charx8Dot), 60, 2, 0x30 },
@@ -210,7 +210,7 @@ static struct ast_vbios_enhtable res_800x600[] = {
 };
 
 
-static struct ast_vbios_enhtable res_1024x768[] = {
+static const struct ast_vbios_enhtable res_1024x768[] = {
 	{1344, 1024, 24, 136, 806, 768, 3, 6, VCLK65,	/* 60Hz */
 	 (SyncNN | Charx8Dot), 60, 1, 0x31 },
 	{1328, 1024, 24, 136, 806, 768, 3, 6, VCLK75,	/* 70Hz */
@@ -223,7 +223,7 @@ static struct ast_vbios_enhtable res_1024x768[] = {
 	 (SyncPP | Charx8Dot), 0xFF, 4, 0x31 },
 };
 
-static struct ast_vbios_enhtable res_1280x1024[] = {
+static const struct ast_vbios_enhtable res_1280x1024[] = {
 	{1688, 1280, 48, 112, 1066, 1024, 1, 3, VCLK108,	/* 60Hz */
 	 (SyncPP | Charx8Dot), 60, 1, 0x32 },
 	{1688, 1280, 16, 144, 1066, 1024, 1, 3, VCLK135,	/* 75Hz */
@@ -234,7 +234,7 @@ static struct ast_vbios_enhtable res_1280x1024[] = {
 	 (SyncPP | Charx8Dot), 0xFF, 3, 0x32 },
 };
 
-static struct ast_vbios_enhtable res_1600x1200[] = {
+static const struct ast_vbios_enhtable res_1600x1200[] = {
 	{2160, 1600, 64, 192, 1250, 1200, 1, 3, VCLK162,	/* 60Hz */
 	 (SyncPP | Charx8Dot), 60, 1, 0x33 },
 	{2160, 1600, 64, 192, 1250, 1200, 1, 3, VCLK162,	/* end */
@@ -242,23 +242,23 @@ static struct ast_vbios_enhtable res_1600x1200[] = {
 };
 
 /* 16:9 */
-static struct ast_vbios_enhtable res_1360x768[] = {
-	{1792, 1360, 64,112, 795,  768, 3, 6, VCLK85_5,	         /* 60Hz */
+static const struct ast_vbios_enhtable res_1360x768[] = {
+	{1792, 1360, 64, 112, 795, 768, 3, 6, VCLK85_5,		/* 60Hz */
 	 (SyncPP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x39 },
-	{1792, 1360, 64,112, 795,  768, 3, 6, VCLK85_5,	         /* end */
+	{1792, 1360, 64, 112, 795, 768, 3, 6, VCLK85_5,	         /* end */
 	 (SyncPP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 0xFF, 1, 0x39 },
 };
 
-static struct ast_vbios_enhtable res_1600x900[] = {
-	{1760, 1600, 48, 32, 926,  900, 3, 5, VCLK97_75,	/* 60Hz CVT RB */
+static const struct ast_vbios_enhtable res_1600x900[] = {
+	{1760, 1600, 48, 32, 926, 900, 3, 5, VCLK97_75,		/* 60Hz CVT RB */
 	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x3A },
-	{2112, 1600, 88,168, 934,  900, 3, 5, VCLK118_25,	/* 60Hz CVT */
+	{2112, 1600, 88, 168, 934, 900, 3, 5, VCLK118_25,	/* 60Hz CVT */
 	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 2, 0x3A },
-	{2112, 1600, 88,168, 934,  900, 3, 5, VCLK118_25,	/* 60Hz CVT */
+	{2112, 1600, 88, 168, 934, 900, 3, 5, VCLK118_25,	/* 60Hz CVT */
 	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 0xFF, 2, 0x3A },
 };
 
-static struct ast_vbios_enhtable res_1920x1080[] = {
+static const struct ast_vbios_enhtable res_1920x1080[] = {
 	{2200, 1920, 88, 44, 1125, 1080, 4, 5, VCLK148_5,	/* 60Hz */
 	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x38 },
 	{2200, 1920, 88, 44, 1125, 1080, 4, 5, VCLK148_5,	/* 60Hz */
@@ -267,8 +267,8 @@ static struct ast_vbios_enhtable res_1920x1080[] = {
 
 
 /* 16:10 */
-static struct ast_vbios_enhtable res_1280x800[] = {
-	{1440, 1280, 48, 32,  823,  800, 3, 6, VCLK71,	/* 60Hz RB */
+static const struct ast_vbios_enhtable res_1280x800[] = {
+	{1440, 1280, 48, 32,  823,  800, 3, 6, VCLK71,		/* 60Hz RB */
 	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x35 },
 	{1680, 1280, 72,128,  831,  800, 3, 6, VCLK83_5,	/* 60Hz */
 	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 2, 0x35 },
@@ -277,7 +277,7 @@ static struct ast_vbios_enhtable res_1280x800[] = {
 
 };
 
-static struct ast_vbios_enhtable res_1440x900[] = {
+static const struct ast_vbios_enhtable res_1440x900[] = {
 	{1600, 1440, 48, 32,  926,  900, 3, 6, VCLK88_75,	/* 60Hz RB */
 	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x36 },
 	{1904, 1440, 80,152,  934,  900, 3, 6, VCLK106_5,	/* 60Hz */
@@ -286,8 +286,8 @@ static struct ast_vbios_enhtable res_1440x900[] = {
 	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 0xFF, 2, 0x36 },
 };
 
-static struct ast_vbios_enhtable res_1680x1050[] = {
-	{1840, 1680, 48, 32, 1080, 1050, 3, 6, VCLK119,	/* 60Hz RB */
+static const struct ast_vbios_enhtable res_1680x1050[] = {
+	{1840, 1680, 48, 32, 1080, 1050, 3, 6, VCLK119,		/* 60Hz RB */
 	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x37 },
 	{2240, 1680,104,176, 1089, 1050, 3, 6, VCLK146_25,	/* 60Hz */
 	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 2, 0x37 },
@@ -295,10 +295,10 @@ static struct ast_vbios_enhtable res_1680x1050[] = {
 	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 0xFF, 2, 0x37 },
 };
 
-static struct ast_vbios_enhtable res_1920x1200[] = {
-	{2080, 1920, 48, 32, 1235, 1200, 3, 6, VCLK154,	/* 60Hz RB*/
+static const struct ast_vbios_enhtable res_1920x1200[] = {
+	{2080, 1920, 48, 32, 1235, 1200, 3, 6, VCLK154,		/* 60Hz RB*/
 	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x34 },
-	{2080, 1920, 48, 32, 1235, 1200, 3, 6, VCLK154,	/* 60Hz RB */
+	{2080, 1920, 48, 32, 1235, 1200, 3, 6, VCLK154,		/* 60Hz RB */
 	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 0xFF, 1, 0x34 },
 };
 
-- 
GitLab


From cf2f6bd402470caed4e4d75b783d2d48277a3568 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 17 Feb 2017 13:55:58 +1100
Subject: [PATCH 0660/1084] drm/ast: Remove spurious include

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ast/ast_main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 1f9fa69d4504..29ab2c11db41 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -32,8 +32,6 @@
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_crtc_helper.h>
 
-#include "ast_dram_tables.h"
-
 void ast_set_index_reg_mask(struct ast_private *ast,
 			    uint32_t base, uint8_t index,
 			    uint8_t mask, uint8_t val)
-- 
GitLab


From 6475a7cce61967fca4dd793b60acf5a7dc70bc9a Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 17 Feb 2017 13:57:30 +1100
Subject: [PATCH 0661/1084] drm/ast: Fix calculation of MCLK

Some braces were missing causing an incorrect calculation.

Y.C. Chen from Aspeed provided me with the right formula
which I tested on AST2400 and 2500.

The MCLK isn't currently used by the driver (it will eventually
to filter modes) so the issue isn't catastrophic.

Also make the printed value a bit more meaningful

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Joel Stanley <joel@jms.id.au>
Tested-by: Y.C. Chen <yc_chen@aspeedtech.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ast/ast_main.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 29ab2c11db41..fd167008be31 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -354,7 +354,7 @@ static int ast_get_dram_info(struct drm_device *dev)
 		div = 0x1;
 		break;
 	}
-	ast->mclk = ref_pll * (num + 2) / (denum + 2) * (div * 1000);
+	ast->mclk = ref_pll * (num + 2) / ((denum + 2) * (div * 1000));
 	return 0;
 }
 
@@ -498,7 +498,9 @@ int ast_driver_load(struct drm_device *dev, unsigned long flags)
 		if (ret)
 			goto out_free;
 		ast->vram_size = ast_get_vram_info(dev);
-		DRM_INFO("dram %d %d %d %08x\n", ast->mclk, ast->dram_type, ast->dram_bus_width, ast->vram_size);
+		DRM_INFO("dram MCLK=%u Mhz type=%d bus_width=%d size=%08x\n",
+			 ast->mclk, ast->dram_type,
+			 ast->dram_bus_width, ast->vram_size);
 	}
 
 	if (need_post)
-- 
GitLab


From 9f93c8b3c08f8c456aad86fd05caa6a1688320ff Mon Sep 17 00:00:00 2001
From: "Y.C. Chen" <yc_chen@aspeedtech.com>
Date: Fri, 17 Feb 2017 14:36:46 +1100
Subject: [PATCH 0662/1084] drm/ast: Base support for AST2500

Add detection and mode setting updates for AST2500 generation chip,
code originally from Aspeed and slightly reworked for coding style
mostly by Ben. This doesn't contain the BMC DRAM POST code which
is in a separate patch.

Signed-off-by: Y.C. Chen <yc_chen@aspeedtech.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ast/ast_drv.h    |  2 ++
 drivers/gpu/drm/ast/ast_main.c   | 32 ++++++++++++++++--
 drivers/gpu/drm/ast/ast_mode.c   | 30 +++++++++++++----
 drivers/gpu/drm/ast/ast_tables.h | 58 +++++++++++++++++++++++++++-----
 4 files changed, 103 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h
index 8fcd55c0e63d..8880f0b62e9c 100644
--- a/drivers/gpu/drm/ast/ast_drv.h
+++ b/drivers/gpu/drm/ast/ast_drv.h
@@ -65,6 +65,7 @@ enum ast_chip {
 	AST2150,
 	AST2300,
 	AST2400,
+	AST2500,
 	AST1180,
 };
 
@@ -81,6 +82,7 @@ enum ast_tx_chip {
 #define AST_DRAM_1Gx32   3
 #define AST_DRAM_2Gx16   6
 #define AST_DRAM_4Gx16   7
+#define AST_DRAM_8Gx16   8
 
 struct ast_fbdev;
 
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index fd167008be31..8e8c0310245f 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -142,7 +142,10 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post)
 		ast->chip = AST1100;
 		DRM_INFO("AST 1180 detected\n");
 	} else {
-		if (dev->pdev->revision >= 0x30) {
+		if (dev->pdev->revision >= 0x40) {
+			ast->chip = AST2500;
+			DRM_INFO("AST 2500 detected\n");
+		} else if (dev->pdev->revision >= 0x30) {
 			ast->chip = AST2400;
 			DRM_INFO("AST 2400 detected\n");
 		} else if (dev->pdev->revision >= 0x20) {
@@ -196,6 +199,9 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post)
 			if (ast->chip == AST2400 &&
 			    (scu_rev & 0x300) == 0x100) /* ast1400 */
 				ast->support_wide_screen = true;
+			if (ast->chip == AST2500 &&
+			    scu_rev == 0x100)           /* ast2510 */
+				ast->support_wide_screen = true;
 		}
 		break;
 	}
@@ -291,7 +297,10 @@ static int ast_get_dram_info(struct drm_device *dev)
 	default:
 		ast->dram_bus_width = 16;
 		ast->dram_type = AST_DRAM_1Gx16;
-		ast->mclk = 396;
+		if (ast->chip == AST2500)
+			ast->mclk = 800;
+		else
+			ast->mclk = 396;
 		return 0;
 	}
 
@@ -300,7 +309,23 @@ static int ast_get_dram_info(struct drm_device *dev)
 	else
 		ast->dram_bus_width = 32;
 
-	if (ast->chip == AST2300 || ast->chip == AST2400) {
+	if (ast->chip == AST2500) {
+		switch (mcr_cfg & 0x03) {
+		case 0:
+			ast->dram_type = AST_DRAM_1Gx16;
+			break;
+		default:
+		case 1:
+			ast->dram_type = AST_DRAM_2Gx16;
+			break;
+		case 2:
+			ast->dram_type = AST_DRAM_4Gx16;
+			break;
+		case 3:
+			ast->dram_type = AST_DRAM_8Gx16;
+			break;
+		}
+	} else if (ast->chip == AST2300 || ast->chip == AST2400) {
 		switch (mcr_cfg & 0x03) {
 		case 0:
 			ast->dram_type = AST_DRAM_512Mx16;
@@ -523,6 +548,7 @@ int ast_driver_load(struct drm_device *dev, unsigned long flags)
 	    ast->chip == AST2200 ||
 	    ast->chip == AST2300 ||
 	    ast->chip == AST2400 ||
+	    ast->chip == AST2500 ||
 	    ast->chip == AST1180) {
 		dev->mode_config.max_width = 1920;
 		dev->mode_config.max_height = 2048;
diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index c25b8b06d55a..47b78e52691c 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -273,7 +273,11 @@ static void ast_set_crtc_reg(struct drm_crtc *crtc, struct drm_display_mode *mod
 {
 	struct ast_private *ast = crtc->dev->dev_private;
 	u8 jreg05 = 0, jreg07 = 0, jreg09 = 0, jregAC = 0, jregAD = 0, jregAE = 0;
-	u16 temp;
+	u16 temp, precache = 0;
+
+	if ((ast->chip == AST2500) &&
+	    (vbios_mode->enh_table->flags & AST2500PreCatchCRT))
+		precache = 40;
 
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x11, 0x7f, 0x00);
 
@@ -299,12 +303,12 @@ static void ast_set_crtc_reg(struct drm_crtc *crtc, struct drm_display_mode *mod
 		jregAD |= 0x01;  /* HBE D[5] */
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x03, 0xE0, (temp & 0x1f));
 
-	temp = (mode->crtc_hsync_start >> 3) - 1;
+	temp = ((mode->crtc_hsync_start-precache) >> 3) - 1;
 	if (temp & 0x100)
 		jregAC |= 0x40; /* HRS D[5] */
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x04, 0x00, temp);
 
-	temp = ((mode->crtc_hsync_end >> 3) - 1) & 0x3f;
+	temp = (((mode->crtc_hsync_end-precache) >> 3) - 1) & 0x3f;
 	if (temp & 0x20)
 		jregAD |= 0x04; /* HRE D[5] */
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x05, 0x60, (u8)((temp & 0x1f) | jreg05));
@@ -365,6 +369,11 @@ static void ast_set_crtc_reg(struct drm_crtc *crtc, struct drm_display_mode *mod
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x09, 0xdf, jreg09);
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xAE, 0x00, (jregAE | 0x80));
 
+	if (precache)
+		ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb6, 0x3f, 0x80);
+	else
+		ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb6, 0x3f, 0x00);
+
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x11, 0x7f, 0x80);
 }
 
@@ -386,12 +395,16 @@ static void ast_set_dclk_reg(struct drm_device *dev, struct drm_display_mode *mo
 	struct ast_private *ast = dev->dev_private;
 	const struct ast_vbios_dclk_info *clk_info;
 
-	clk_info = &dclk_table[vbios_mode->enh_table->dclk_index];
+	if (ast->chip == AST2500)
+		clk_info = &dclk_table_ast2500[vbios_mode->enh_table->dclk_index];
+	else
+		clk_info = &dclk_table[vbios_mode->enh_table->dclk_index];
 
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xc0, 0x00, clk_info->param1);
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xc1, 0x00, clk_info->param2);
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xbb, 0x0f,
-			       (clk_info->param3 & 0x80) | ((clk_info->param3 & 0x3) << 4));
+			       (clk_info->param3 & 0xc0) |
+			       ((clk_info->param3 & 0x3) << 4));
 }
 
 static void ast_set_ext_reg(struct drm_crtc *crtc, struct drm_display_mode *mode,
@@ -425,7 +438,8 @@ static void ast_set_ext_reg(struct drm_crtc *crtc, struct drm_display_mode *mode
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xa8, 0xfd, jregA8);
 
 	/* Set Threshold */
-	if (ast->chip == AST2300 || ast->chip == AST2400) {
+	if (ast->chip == AST2300 || ast->chip == AST2400 ||
+	    ast->chip == AST2500) {
 		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xa7, 0x78);
 		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xa6, 0x60);
 	} else if (ast->chip == AST2100 ||
@@ -800,7 +814,9 @@ static int ast_mode_valid(struct drm_connector *connector,
 		if ((mode->hdisplay == 1600) && (mode->vdisplay == 900))
 			return MODE_OK;
 
-		if ((ast->chip == AST2100) || (ast->chip == AST2200) || (ast->chip == AST2300) || (ast->chip == AST2400) || (ast->chip == AST1180)) {
+		if ((ast->chip == AST2100) || (ast->chip == AST2200) ||
+		    (ast->chip == AST2300) || (ast->chip == AST2400) ||
+		    (ast->chip == AST2500) || (ast->chip == AST1180)) {
 			if ((mode->hdisplay == 1920) && (mode->vdisplay == 1080))
 				return MODE_OK;
 
diff --git a/drivers/gpu/drm/ast/ast_tables.h b/drivers/gpu/drm/ast/ast_tables.h
index a4ddf901a54f..5f4c2e833a65 100644
--- a/drivers/gpu/drm/ast/ast_tables.h
+++ b/drivers/gpu/drm/ast/ast_tables.h
@@ -47,6 +47,7 @@
 #define SyncPN			(PVSync | NHSync)
 #define SyncNP			(NVSync | PHSync)
 #define SyncNN			(NVSync | NHSync)
+#define AST2500PreCatchCRT		0x00004000
 
 /* DCLK Index */
 #define VCLK25_175     		0x00
@@ -108,6 +109,36 @@ static const struct ast_vbios_dclk_info dclk_table[] = {
 	{0x3b, 0x2c, 0x81},			/* 1A: VCLK118_25	*/
 };
 
+static const struct ast_vbios_dclk_info dclk_table_ast2500[] = {
+	{0x2C, 0xE7, 0x03},			/* 00: VCLK25_175	*/
+	{0x95, 0x62, 0x03},			/* 01: VCLK28_322	*/
+	{0x67, 0x63, 0x01},			/* 02: VCLK31_5		*/
+	{0x76, 0x63, 0x01},			/* 03: VCLK36		*/
+	{0xEE, 0x67, 0x01},			/* 04: VCLK40		*/
+	{0x82, 0x62, 0x01},			/* 05: VCLK49_5		*/
+	{0xC6, 0x64, 0x01},			/* 06: VCLK50		*/
+	{0x94, 0x62, 0x01},			/* 07: VCLK56_25	*/
+	{0x80, 0x64, 0x00},			/* 08: VCLK65		*/
+	{0x7B, 0x63, 0x00},			/* 09: VCLK75		*/
+	{0x67, 0x62, 0x00},			/* 0A: VCLK78_75	*/
+	{0x7C, 0x62, 0x00},			/* 0B: VCLK94_5		*/
+	{0x8E, 0x62, 0x00},			/* 0C: VCLK108		*/
+	{0x85, 0x24, 0x00},			/* 0D: VCLK135		*/
+	{0x67, 0x22, 0x00},			/* 0E: VCLK157_5	*/
+	{0x6A, 0x22, 0x00},			/* 0F: VCLK162		*/
+	{0x4d, 0x4c, 0x80},			/* 10: VCLK154		*/
+	{0xa7, 0x78, 0x80},			/* 11: VCLK83.5		*/
+	{0x28, 0x49, 0x80},			/* 12: VCLK106.5	*/
+	{0x37, 0x49, 0x80},			/* 13: VCLK146.25	*/
+	{0x1f, 0x45, 0x80},			/* 14: VCLK148.5	*/
+	{0x47, 0x6c, 0x80},			/* 15: VCLK71		*/
+	{0x25, 0x65, 0x80},			/* 16: VCLK88.75	*/
+	{0x58, 0x01, 0x42},			/* 17: VCLK119		*/
+	{0x32, 0x67, 0x80},			/* 18: VCLK85_5		*/
+	{0x6a, 0x6d, 0x80},			/* 19: VCLK97_75	*/
+	{0x44, 0x20, 0x43},			/* 1A: VCLK118_25	*/
+};
+
 static const struct ast_vbios_stdtable vbios_stdtable[] = {
 	/* MD_2_3_400 */
 	{
@@ -246,12 +277,14 @@ static const struct ast_vbios_enhtable res_1360x768[] = {
 	{1792, 1360, 64, 112, 795, 768, 3, 6, VCLK85_5,		/* 60Hz */
 	 (SyncPP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x39 },
 	{1792, 1360, 64, 112, 795, 768, 3, 6, VCLK85_5,	         /* end */
-	 (SyncPP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 0xFF, 1, 0x39 },
+	 (SyncPP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo |
+	  AST2500PreCatchCRT), 0xFF, 1, 0x39 },
 };
 
 static const struct ast_vbios_enhtable res_1600x900[] = {
 	{1760, 1600, 48, 32, 926, 900, 3, 5, VCLK97_75,		/* 60Hz CVT RB */
-	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x3A },
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo |
+	  AST2500PreCatchCRT), 60, 1, 0x3A },
 	{2112, 1600, 88, 168, 934, 900, 3, 5, VCLK118_25,	/* 60Hz CVT */
 	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 2, 0x3A },
 	{2112, 1600, 88, 168, 934, 900, 3, 5, VCLK118_25,	/* 60Hz CVT */
@@ -260,16 +293,19 @@ static const struct ast_vbios_enhtable res_1600x900[] = {
 
 static const struct ast_vbios_enhtable res_1920x1080[] = {
 	{2200, 1920, 88, 44, 1125, 1080, 4, 5, VCLK148_5,	/* 60Hz */
-	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x38 },
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo |
+	  AST2500PreCatchCRT), 60, 1, 0x38 },
 	{2200, 1920, 88, 44, 1125, 1080, 4, 5, VCLK148_5,	/* 60Hz */
-	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 0xFF, 1, 0x38 },
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo |
+	  AST2500PreCatchCRT), 0xFF, 1, 0x38 },
 };
 
 
 /* 16:10 */
 static const struct ast_vbios_enhtable res_1280x800[] = {
 	{1440, 1280, 48, 32,  823,  800, 3, 6, VCLK71,		/* 60Hz RB */
-	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x35 },
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo |
+	  AST2500PreCatchCRT), 60, 1, 0x35 },
 	{1680, 1280, 72,128,  831,  800, 3, 6, VCLK83_5,	/* 60Hz */
 	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 2, 0x35 },
 	{1680, 1280, 72,128,  831,  800, 3, 6, VCLK83_5,	/* 60Hz */
@@ -279,7 +315,8 @@ static const struct ast_vbios_enhtable res_1280x800[] = {
 
 static const struct ast_vbios_enhtable res_1440x900[] = {
 	{1600, 1440, 48, 32,  926,  900, 3, 6, VCLK88_75,	/* 60Hz RB */
-	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x36 },
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo |
+	  AST2500PreCatchCRT), 60, 1, 0x36 },
 	{1904, 1440, 80,152,  934,  900, 3, 6, VCLK106_5,	/* 60Hz */
 	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 2, 0x36 },
 	{1904, 1440, 80,152,  934,  900, 3, 6, VCLK106_5,	/* 60Hz */
@@ -288,7 +325,8 @@ static const struct ast_vbios_enhtable res_1440x900[] = {
 
 static const struct ast_vbios_enhtable res_1680x1050[] = {
 	{1840, 1680, 48, 32, 1080, 1050, 3, 6, VCLK119,		/* 60Hz RB */
-	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x37 },
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo |
+	  AST2500PreCatchCRT), 60, 1, 0x37 },
 	{2240, 1680,104,176, 1089, 1050, 3, 6, VCLK146_25,	/* 60Hz */
 	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 2, 0x37 },
 	{2240, 1680,104,176, 1089, 1050, 3, 6, VCLK146_25,	/* 60Hz */
@@ -297,9 +335,11 @@ static const struct ast_vbios_enhtable res_1680x1050[] = {
 
 static const struct ast_vbios_enhtable res_1920x1200[] = {
 	{2080, 1920, 48, 32, 1235, 1200, 3, 6, VCLK154,		/* 60Hz RB*/
-	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x34 },
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo |
+	  AST2500PreCatchCRT), 60, 1, 0x34 },
 	{2080, 1920, 48, 32, 1235, 1200, 3, 6, VCLK154,		/* 60Hz RB */
-	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 0xFF, 1, 0x34 },
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo |
+	  AST2500PreCatchCRT), 0xFF, 1, 0x34 },
 };
 
 #endif
-- 
GitLab


From bad09da6deab157440a3f0dd5e6c03cdc60c0a91 Mon Sep 17 00:00:00 2001
From: "Y.C. Chen" <yc_chen@aspeedtech.com>
Date: Fri, 17 Feb 2017 10:56:47 +0800
Subject: [PATCH 0663/1084] drm/ast: Fixed vram size incorrect issue on POWER

The default value of VGA scratch may incorrect.
Should initial h/w before get vram info.

Acked-by: Joel Stanley <joel@jms.id.au>
Tested-by: Y.C. Chen <yc_chen@aspeedtech.com>
Signed-off-by: Y.C. Chen <yc_chen@aspeedtech.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ast/ast_main.c | 6 +++---
 drivers/gpu/drm/ast/ast_post.c | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 8e8c0310245f..262c2c0e43b4 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -518,6 +518,9 @@ int ast_driver_load(struct drm_device *dev, unsigned long flags)
 
 	ast_detect_chip(dev, &need_post);
 
+	if (need_post)
+		ast_post_gpu(dev);
+
 	if (ast->chip != AST1180) {
 		ret = ast_get_dram_info(dev);
 		if (ret)
@@ -528,9 +531,6 @@ int ast_driver_load(struct drm_device *dev, unsigned long flags)
 			 ast->dram_bus_width, ast->vram_size);
 	}
 
-	if (need_post)
-		ast_post_gpu(dev);
-
 	ret = ast_mm_init(ast);
 	if (ret)
 		goto out_free;
diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c
index 64549cebcc5b..e8024502e749 100644
--- a/drivers/gpu/drm/ast/ast_post.c
+++ b/drivers/gpu/drm/ast/ast_post.c
@@ -79,7 +79,7 @@ ast_set_def_ext_reg(struct drm_device *dev)
 	const u8 *ext_reg_info;
 
 	/* reset scratch */
-	for (i = 0x81; i <= 0x8f; i++)
+	for (i = 0x81; i <= 0x9f; i++)
 		ast_set_index_reg(ast, AST_IO_CRTC_PORT, i, 0x00);
 
 	if (ast->chip == AST2300 || ast->chip == AST2400) {
-- 
GitLab


From b368e53aae7694e0fb2bdec9667c9acc5108b629 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Sat, 18 Feb 2017 14:19:10 +1100
Subject: [PATCH 0664/1084] drm/ast: Factor mmc_test code in POST code

There's a some duplication for what's essentially copies of
two loops, so factor it. The upcoming AST2500 POST code adds
more of them. Also cleanup return types for the test functions,
most of them return a boolean, some return a u32.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Joel Stanley <joel@jms.id.au>
Tested-by: Y.C. Chen <yc_chen@aspeedtech.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ast/ast_post.c | 82 +++++++++++++---------------------
 1 file changed, 31 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c
index e8024502e749..c55067ccb0b1 100644
--- a/drivers/gpu/drm/ast/ast_post.c
+++ b/drivers/gpu/drm/ast/ast_post.c
@@ -445,85 +445,65 @@ static const u32 pattern[8] = {
 	0x7C61D253
 };
 
-static int mmc_test_burst(struct ast_private *ast, u32 datagen)
+static bool mmc_test(struct ast_private *ast, u32 datagen, u8 test_ctl)
 {
 	u32 data, timeout;
 
 	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
-	ast_moutdwm(ast, 0x1e6e0070, 0x000000c1 | (datagen << 3));
+	ast_moutdwm(ast, 0x1e6e0070, (datagen << 3) | test_ctl);
 	timeout = 0;
 	do {
 		data = ast_mindwm(ast, 0x1e6e0070) & 0x3000;
-		if (data & 0x2000) {
-			return 0;
-		}
+		if (data & 0x2000)
+			return false;
 		if (++timeout > TIMEOUT) {
 			ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
-			return 0;
+			return false;
 		}
 	} while (!data);
-	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
-	return 1;
+	ast_moutdwm(ast, 0x1e6e0070, 0x0);
+	return true;
 }
 
-static int mmc_test_burst2(struct ast_private *ast, u32 datagen)
+static u32 mmc_test2(struct ast_private *ast, u32 datagen, u8 test_ctl)
 {
 	u32 data, timeout;
 
 	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
-	ast_moutdwm(ast, 0x1e6e0070, 0x00000041 | (datagen << 3));
+	ast_moutdwm(ast, 0x1e6e0070, (datagen << 3) | test_ctl);
 	timeout = 0;
 	do {
 		data = ast_mindwm(ast, 0x1e6e0070) & 0x1000;
 		if (++timeout > TIMEOUT) {
 			ast_moutdwm(ast, 0x1e6e0070, 0x0);
-			return -1;
+			return 0xffffffff;
 		}
 	} while (!data);
 	data = ast_mindwm(ast, 0x1e6e0078);
 	data = (data | (data >> 16)) & 0xffff;
-	ast_moutdwm(ast, 0x1e6e0070, 0x0);
+	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
 	return data;
 }
 
-static int mmc_test_single(struct ast_private *ast, u32 datagen)
+
+static bool mmc_test_burst(struct ast_private *ast, u32 datagen)
 {
-	u32 data, timeout;
+	return mmc_test(ast, datagen, 0xc1);
+}
 
-	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
-	ast_moutdwm(ast, 0x1e6e0070, 0x000000c5 | (datagen << 3));
-	timeout = 0;
-	do {
-		data = ast_mindwm(ast, 0x1e6e0070) & 0x3000;
-		if (data & 0x2000)
-			return 0;
-		if (++timeout > TIMEOUT) {
-			ast_moutdwm(ast, 0x1e6e0070, 0x0);
-			return 0;
-		}
-	} while (!data);
-	ast_moutdwm(ast, 0x1e6e0070, 0x0);
-	return 1;
+static u32 mmc_test_burst2(struct ast_private *ast, u32 datagen)
+{
+	return mmc_test2(ast, datagen, 0x41);
 }
 
-static int mmc_test_single2(struct ast_private *ast, u32 datagen)
+static bool mmc_test_single(struct ast_private *ast, u32 datagen)
 {
-	u32 data, timeout;
+	return mmc_test(ast, datagen, 0xc5);
+}
 
-	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
-	ast_moutdwm(ast, 0x1e6e0070, 0x00000005 | (datagen << 3));
-	timeout = 0;
-	do {
-		data = ast_mindwm(ast, 0x1e6e0070) & 0x1000;
-		if (++timeout > TIMEOUT) {
-			ast_moutdwm(ast, 0x1e6e0070, 0x0);
-			return -1;
-		}
-	} while (!data);
-	data = ast_mindwm(ast, 0x1e6e0078);
-	data = (data | (data >> 16)) & 0xffff;
-	ast_moutdwm(ast, 0x1e6e0070, 0x0);
-	return data;
+static u32 mmc_test_single2(struct ast_private *ast, u32 datagen)
+{
+	return mmc_test2(ast, datagen, 0x05);
 }
 
 static int cbr_test(struct ast_private *ast)
@@ -601,16 +581,16 @@ static u32 cbr_scan2(struct ast_private *ast)
 	return data2;
 }
 
-static u32 cbr_test3(struct ast_private *ast)
+static bool cbr_test3(struct ast_private *ast)
 {
 	if (!mmc_test_burst(ast, 0))
-		return 0;
+		return false;
 	if (!mmc_test_single(ast, 0))
-		return 0;
-	return 1;
+		return false;
+	return true;
 }
 
-static u32 cbr_scan3(struct ast_private *ast)
+static bool cbr_scan3(struct ast_private *ast)
 {
 	u32 patcnt, loop;
 
@@ -621,9 +601,9 @@ static u32 cbr_scan3(struct ast_private *ast)
 				break;
 		}
 		if (loop == 2)
-			return 0;
+			return false;
 	}
-	return 1;
+	return true;
 }
 
 static bool finetuneDQI_L(struct ast_private *ast, struct ast2300_dram_param *param)
-- 
GitLab


From d95618ef0a05236a2642d1a829fffd90de88e5f0 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 17 Feb 2017 13:51:34 +1100
Subject: [PATCH 0665/1084] drm/ast: Rename ast_init_dram_2300 to
 ast_post_chip_2300

The function does more than initializing the DRAM and in turns
calls other functions to do the actual init. This will keeping
things more consistent with the upcoming AST2500 POST code.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Joel Stanley <joel@jms.id.au>
Tested-by: Y.C. Chen <yc_chen@aspeedtech.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ast/ast_post.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c
index c55067ccb0b1..561fd7d3a0cf 100644
--- a/drivers/gpu/drm/ast/ast_post.c
+++ b/drivers/gpu/drm/ast/ast_post.c
@@ -31,7 +31,7 @@
 
 #include "ast_dram_tables.h"
 
-static void ast_init_dram_2300(struct drm_device *dev);
+static void ast_post_chip_2300(struct drm_device *dev);
 
 void ast_enable_vga(struct drm_device *dev)
 {
@@ -381,7 +381,7 @@ void ast_post_gpu(struct drm_device *dev)
 
 	if (ast->config_mode == ast_use_p2a) {
 		if (ast->chip == AST2300 || ast->chip == AST2400)
-			ast_init_dram_2300(dev);
+			ast_post_chip_2300(dev);
 		else
 			ast_init_dram_reg(dev);
 
@@ -1589,7 +1589,7 @@ static void ddr2_init(struct ast_private *ast, struct ast2300_dram_param *param)
 
 }
 
-static void ast_init_dram_2300(struct drm_device *dev)
+static void ast_post_chip_2300(struct drm_device *dev)
 {
 	struct ast_private *ast = dev->dev_private;
 	struct ast2300_dram_param param;
-- 
GitLab


From 94fdc2a86a927b409d9a09ba28bcb08e34f3ac8d Mon Sep 17 00:00:00 2001
From: "Y.C. Chen" <yc_chen@aspeedtech.com>
Date: Fri, 17 Feb 2017 14:45:07 +1100
Subject: [PATCH 0666/1084] drm/ast: POST code for the new AST2500

This is used when the BMC isn't running any code and thus has
to be initialized by the host.

The code originates from Aspeed (Y.C. Chen) and has been cleaned
up for coding style purposes by BenH.

Signed-off-by: Y.C. Chen <yc_chen@aspeedtech.com>
Acked-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ast/ast_dram_tables.h |  62 ++++
 drivers/gpu/drm/ast/ast_post.c        | 417 +++++++++++++++++++++++++-
 2 files changed, 476 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_dram_tables.h b/drivers/gpu/drm/ast/ast_dram_tables.h
index cc04539c0ff3..1d9c4e75d303 100644
--- a/drivers/gpu/drm/ast/ast_dram_tables.h
+++ b/drivers/gpu/drm/ast/ast_dram_tables.h
@@ -141,4 +141,66 @@ static const struct ast_dramstruct ast2100_dram_table_data[] = {
 	{ 0xffff, 0xffffffff },
 };
 
+/*
+ * AST2500 DRAM settings modules
+ */
+#define REGTBL_NUM           17
+#define REGIDX_010           0
+#define REGIDX_014           1
+#define REGIDX_018           2
+#define REGIDX_020           3
+#define REGIDX_024           4
+#define REGIDX_02C           5
+#define REGIDX_030           6
+#define REGIDX_214           7
+#define REGIDX_2E0           8
+#define REGIDX_2E4           9
+#define REGIDX_2E8           10
+#define REGIDX_2EC           11
+#define REGIDX_2F0           12
+#define REGIDX_2F4           13
+#define REGIDX_2F8           14
+#define REGIDX_RFC           15
+#define REGIDX_PLL           16
+
+static const u32 ast2500_ddr3_1600_timing_table[REGTBL_NUM] = {
+	0x64604D38,		     /* 0x010 */
+	0x29690599,		     /* 0x014 */
+	0x00000300,		     /* 0x018 */
+	0x00000000,		     /* 0x020 */
+	0x00000000,		     /* 0x024 */
+	0x02181E70,		     /* 0x02C */
+	0x00000040,		     /* 0x030 */
+	0x00000024,		     /* 0x214 */
+	0x02001300,		     /* 0x2E0 */
+	0x0E0000A0,		     /* 0x2E4 */
+	0x000E001B,		     /* 0x2E8 */
+	0x35B8C105,		     /* 0x2EC */
+	0x08090408,		     /* 0x2F0 */
+	0x9B000800,		     /* 0x2F4 */
+	0x0E400A00,		     /* 0x2F8 */
+	0x9971452F,		     /* tRFC  */
+	0x000071C1		     /* PLL   */
+};
+
+static const u32 ast2500_ddr4_1600_timing_table[REGTBL_NUM] = {
+	0x63604E37,		     /* 0x010 */
+	0xE97AFA99,		     /* 0x014 */
+	0x00019000,		     /* 0x018 */
+	0x08000000,		     /* 0x020 */
+	0x00000400,		     /* 0x024 */
+	0x00000410,		     /* 0x02C */
+	0x00000101,		     /* 0x030 */
+	0x00000024,		     /* 0x214 */
+	0x03002900,		     /* 0x2E0 */
+	0x0E0000A0,		     /* 0x2E4 */
+	0x000E001C,		     /* 0x2E8 */
+	0x35B8C106,		     /* 0x2EC */
+	0x08080607,		     /* 0x2F0 */
+	0x9B000900,		     /* 0x2F4 */
+	0x0E400A00,		     /* 0x2F8 */
+	0x99714545,		     /* tRFC  */
+	0x000071C1		     /* PLL   */
+};
+
 #endif
diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c
index 561fd7d3a0cf..c15f643915af 100644
--- a/drivers/gpu/drm/ast/ast_post.c
+++ b/drivers/gpu/drm/ast/ast_post.c
@@ -32,6 +32,7 @@
 #include "ast_dram_tables.h"
 
 static void ast_post_chip_2300(struct drm_device *dev);
+static void ast_post_chip_2500(struct drm_device *dev);
 
 void ast_enable_vga(struct drm_device *dev)
 {
@@ -82,7 +83,8 @@ ast_set_def_ext_reg(struct drm_device *dev)
 	for (i = 0x81; i <= 0x9f; i++)
 		ast_set_index_reg(ast, AST_IO_CRTC_PORT, i, 0x00);
 
-	if (ast->chip == AST2300 || ast->chip == AST2400) {
+	if (ast->chip == AST2300 || ast->chip == AST2400 ||
+	    ast->chip == AST2500) {
 		if (dev->pdev->revision >= 0x20)
 			ext_reg_info = extreginfo_ast2300;
 		else
@@ -106,7 +108,8 @@ ast_set_def_ext_reg(struct drm_device *dev)
 
 	/* Enable RAMDAC for A1 */
 	reg = 0x04;
-	if (ast->chip == AST2300 || ast->chip == AST2400)
+	if (ast->chip == AST2300 || ast->chip == AST2400 ||
+	    ast->chip == AST2500)
 		reg |= 0x20;
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb6, 0xff, reg);
 }
@@ -380,7 +383,9 @@ void ast_post_gpu(struct drm_device *dev)
 	ast_set_def_ext_reg(dev);
 
 	if (ast->config_mode == ast_use_p2a) {
-		if (ast->chip == AST2300 || ast->chip == AST2400)
+		if (ast->chip == AST2500)
+			ast_post_chip_2500(dev);
+		else if (ast->chip == AST2300 || ast->chip == AST2400)
 			ast_post_chip_2300(dev);
 		else
 			ast_init_dram_reg(dev);
@@ -506,6 +511,11 @@ static u32 mmc_test_single2(struct ast_private *ast, u32 datagen)
 	return mmc_test2(ast, datagen, 0x05);
 }
 
+static bool mmc_test_single_2500(struct ast_private *ast, u32 datagen)
+{
+	return mmc_test(ast, datagen, 0x85);
+}
+
 static int cbr_test(struct ast_private *ast)
 {
 	u32 data;
@@ -1672,3 +1682,404 @@ static void ast_post_chip_2300(struct drm_device *dev)
 	} while ((reg & 0x40) == 0);
 }
 
+static bool cbr_test_2500(struct ast_private *ast)
+{
+	ast_moutdwm(ast, 0x1E6E0074, 0x0000FFFF);
+	ast_moutdwm(ast, 0x1E6E007C, 0xFF00FF00);
+	if (!mmc_test_burst(ast, 0))
+		return false;
+	if (!mmc_test_single_2500(ast, 0))
+		return false;
+	return true;
+}
+
+static bool ddr_test_2500(struct ast_private *ast)
+{
+	ast_moutdwm(ast, 0x1E6E0074, 0x0000FFFF);
+	ast_moutdwm(ast, 0x1E6E007C, 0xFF00FF00);
+	if (!mmc_test_burst(ast, 0))
+		return false;
+	if (!mmc_test_burst(ast, 1))
+		return false;
+	if (!mmc_test_burst(ast, 2))
+		return false;
+	if (!mmc_test_burst(ast, 3))
+		return false;
+	if (!mmc_test_single_2500(ast, 0))
+		return false;
+	return true;
+}
+
+static void ddr_init_common_2500(struct ast_private *ast)
+{
+	ast_moutdwm(ast, 0x1E6E0034, 0x00020080);
+	ast_moutdwm(ast, 0x1E6E0008, 0x2003000F);
+	ast_moutdwm(ast, 0x1E6E0038, 0x00000FFF);
+	ast_moutdwm(ast, 0x1E6E0040, 0x88448844);
+	ast_moutdwm(ast, 0x1E6E0044, 0x24422288);
+	ast_moutdwm(ast, 0x1E6E0048, 0x22222222);
+	ast_moutdwm(ast, 0x1E6E004C, 0x22222222);
+	ast_moutdwm(ast, 0x1E6E0050, 0x80000000);
+	ast_moutdwm(ast, 0x1E6E0208, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0218, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0220, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0228, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0230, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E02A8, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E02B0, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0240, 0x86000000);
+	ast_moutdwm(ast, 0x1E6E0244, 0x00008600);
+	ast_moutdwm(ast, 0x1E6E0248, 0x80000000);
+	ast_moutdwm(ast, 0x1E6E024C, 0x80808080);
+}
+
+static void ddr_phy_init_2500(struct ast_private *ast)
+{
+	u32 data, pass, timecnt;
+
+	pass = 0;
+	ast_moutdwm(ast, 0x1E6E0060, 0x00000005);
+	while (!pass) {
+		for (timecnt = 0; timecnt < TIMEOUT; timecnt++) {
+			data = ast_mindwm(ast, 0x1E6E0060) & 0x1;
+			if (!data)
+				break;
+		}
+		if (timecnt != TIMEOUT) {
+			data = ast_mindwm(ast, 0x1E6E0300) & 0x000A0000;
+			if (!data)
+				pass = 1;
+		}
+		if (!pass) {
+			ast_moutdwm(ast, 0x1E6E0060, 0x00000000);
+			udelay(10); /* delay 10 us */
+			ast_moutdwm(ast, 0x1E6E0060, 0x00000005);
+		}
+	}
+
+	ast_moutdwm(ast, 0x1E6E0060, 0x00000006);
+}
+
+/*
+ * Check DRAM Size
+ * 1Gb : 0x80000000 ~ 0x87FFFFFF
+ * 2Gb : 0x80000000 ~ 0x8FFFFFFF
+ * 4Gb : 0x80000000 ~ 0x9FFFFFFF
+ * 8Gb : 0x80000000 ~ 0xBFFFFFFF
+ */
+static void check_dram_size_2500(struct ast_private *ast, u32 tRFC)
+{
+	u32 reg_04, reg_14;
+
+	reg_04 = ast_mindwm(ast, 0x1E6E0004) & 0xfffffffc;
+	reg_14 = ast_mindwm(ast, 0x1E6E0014) & 0xffffff00;
+
+	ast_moutdwm(ast, 0xA0100000, 0x41424344);
+	ast_moutdwm(ast, 0x90100000, 0x35363738);
+	ast_moutdwm(ast, 0x88100000, 0x292A2B2C);
+	ast_moutdwm(ast, 0x80100000, 0x1D1E1F10);
+
+	/* Check 8Gbit */
+	if (ast_mindwm(ast, 0xA0100000) == 0x41424344) {
+		reg_04 |= 0x03;
+		reg_14 |= (tRFC >> 24) & 0xFF;
+		/* Check 4Gbit */
+	} else if (ast_mindwm(ast, 0x90100000) == 0x35363738) {
+		reg_04 |= 0x02;
+		reg_14 |= (tRFC >> 16) & 0xFF;
+		/* Check 2Gbit */
+	} else if (ast_mindwm(ast, 0x88100000) == 0x292A2B2C) {
+		reg_04 |= 0x01;
+		reg_14 |= (tRFC >> 8) & 0xFF;
+	} else {
+		reg_14 |= tRFC & 0xFF;
+	}
+	ast_moutdwm(ast, 0x1E6E0004, reg_04);
+	ast_moutdwm(ast, 0x1E6E0014, reg_14);
+}
+
+static void enable_cache_2500(struct ast_private *ast)
+{
+	u32 reg_04, data;
+
+	reg_04 = ast_mindwm(ast, 0x1E6E0004);
+	ast_moutdwm(ast, 0x1E6E0004, reg_04 | 0x1000);
+
+	do
+		data = ast_mindwm(ast, 0x1E6E0004);
+	while (!(data & 0x80000));
+	ast_moutdwm(ast, 0x1E6E0004, reg_04 | 0x400);
+}
+
+static void set_mpll_2500(struct ast_private *ast)
+{
+	u32 addr, data, param;
+
+	/* Reset MMC */
+	ast_moutdwm(ast, 0x1E6E0000, 0xFC600309);
+	ast_moutdwm(ast, 0x1E6E0034, 0x00020080);
+	for (addr = 0x1e6e0004; addr < 0x1e6e0090;) {
+		ast_moutdwm(ast, addr, 0x0);
+		addr += 4;
+	}
+	ast_moutdwm(ast, 0x1E6E0034, 0x00020000);
+
+	ast_moutdwm(ast, 0x1E6E2000, 0x1688A8A8);
+	data = ast_mindwm(ast, 0x1E6E2070) & 0x00800000;
+	if (data) {
+		/* CLKIN = 25MHz */
+		param = 0x930023E0;
+		ast_moutdwm(ast, 0x1E6E2160, 0x00011320);
+	} else {
+		/* CLKIN = 24MHz */
+		param = 0x93002400;
+	}
+	ast_moutdwm(ast, 0x1E6E2020, param);
+	udelay(100);
+}
+
+static void reset_mmc_2500(struct ast_private *ast)
+{
+	ast_moutdwm(ast, 0x1E78505C, 0x00000004);
+	ast_moutdwm(ast, 0x1E785044, 0x00000001);
+	ast_moutdwm(ast, 0x1E785048, 0x00004755);
+	ast_moutdwm(ast, 0x1E78504C, 0x00000013);
+	mdelay(100);
+	ast_moutdwm(ast, 0x1E785054, 0x00000077);
+	ast_moutdwm(ast, 0x1E6E0000, 0xFC600309);
+}
+
+static void ddr3_init_2500(struct ast_private *ast, const u32 *ddr_table)
+{
+
+	ast_moutdwm(ast, 0x1E6E0004, 0x00000303);
+	ast_moutdwm(ast, 0x1E6E0010, ddr_table[REGIDX_010]);
+	ast_moutdwm(ast, 0x1E6E0014, ddr_table[REGIDX_014]);
+	ast_moutdwm(ast, 0x1E6E0018, ddr_table[REGIDX_018]);
+	ast_moutdwm(ast, 0x1E6E0020, ddr_table[REGIDX_020]);	     /* MODEREG4/6 */
+	ast_moutdwm(ast, 0x1E6E0024, ddr_table[REGIDX_024]);	     /* MODEREG5 */
+	ast_moutdwm(ast, 0x1E6E002C, ddr_table[REGIDX_02C] | 0x100); /* MODEREG0/2 */
+	ast_moutdwm(ast, 0x1E6E0030, ddr_table[REGIDX_030]);	     /* MODEREG1/3 */
+
+	/* DDR PHY Setting */
+	ast_moutdwm(ast, 0x1E6E0200, 0x02492AAE);
+	ast_moutdwm(ast, 0x1E6E0204, 0x00001001);
+	ast_moutdwm(ast, 0x1E6E020C, 0x55E00B0B);
+	ast_moutdwm(ast, 0x1E6E0210, 0x20000000);
+	ast_moutdwm(ast, 0x1E6E0214, ddr_table[REGIDX_214]);
+	ast_moutdwm(ast, 0x1E6E02E0, ddr_table[REGIDX_2E0]);
+	ast_moutdwm(ast, 0x1E6E02E4, ddr_table[REGIDX_2E4]);
+	ast_moutdwm(ast, 0x1E6E02E8, ddr_table[REGIDX_2E8]);
+	ast_moutdwm(ast, 0x1E6E02EC, ddr_table[REGIDX_2EC]);
+	ast_moutdwm(ast, 0x1E6E02F0, ddr_table[REGIDX_2F0]);
+	ast_moutdwm(ast, 0x1E6E02F4, ddr_table[REGIDX_2F4]);
+	ast_moutdwm(ast, 0x1E6E02F8, ddr_table[REGIDX_2F8]);
+	ast_moutdwm(ast, 0x1E6E0290, 0x00100008);
+	ast_moutdwm(ast, 0x1E6E02C0, 0x00000006);
+
+	/* Controller Setting */
+	ast_moutdwm(ast, 0x1E6E0034, 0x00020091);
+
+	/* Wait DDR PHY init done */
+	ddr_phy_init_2500(ast);
+
+	ast_moutdwm(ast, 0x1E6E0120, ddr_table[REGIDX_PLL]);
+	ast_moutdwm(ast, 0x1E6E000C, 0x42AA5C81);
+	ast_moutdwm(ast, 0x1E6E0034, 0x0001AF93);
+
+	check_dram_size_2500(ast, ddr_table[REGIDX_RFC]);
+	enable_cache_2500(ast);
+	ast_moutdwm(ast, 0x1E6E001C, 0x00000008);
+	ast_moutdwm(ast, 0x1E6E0038, 0xFFFFFF00);
+}
+
+static void ddr4_init_2500(struct ast_private *ast, const u32 *ddr_table)
+{
+	u32 data, data2, pass, retrycnt;
+	u32 ddr_vref, phy_vref;
+	u32 min_ddr_vref = 0, min_phy_vref = 0;
+	u32 max_ddr_vref = 0, max_phy_vref = 0;
+
+	ast_moutdwm(ast, 0x1E6E0004, 0x00000313);
+	ast_moutdwm(ast, 0x1E6E0010, ddr_table[REGIDX_010]);
+	ast_moutdwm(ast, 0x1E6E0014, ddr_table[REGIDX_014]);
+	ast_moutdwm(ast, 0x1E6E0018, ddr_table[REGIDX_018]);
+	ast_moutdwm(ast, 0x1E6E0020, ddr_table[REGIDX_020]);	     /* MODEREG4/6 */
+	ast_moutdwm(ast, 0x1E6E0024, ddr_table[REGIDX_024]);	     /* MODEREG5 */
+	ast_moutdwm(ast, 0x1E6E002C, ddr_table[REGIDX_02C] | 0x100); /* MODEREG0/2 */
+	ast_moutdwm(ast, 0x1E6E0030, ddr_table[REGIDX_030]);	     /* MODEREG1/3 */
+
+	/* DDR PHY Setting */
+	ast_moutdwm(ast, 0x1E6E0200, 0x42492AAE);
+	ast_moutdwm(ast, 0x1E6E0204, 0x09002000);
+	ast_moutdwm(ast, 0x1E6E020C, 0x55E00B0B);
+	ast_moutdwm(ast, 0x1E6E0210, 0x20000000);
+	ast_moutdwm(ast, 0x1E6E0214, ddr_table[REGIDX_214]);
+	ast_moutdwm(ast, 0x1E6E02E0, ddr_table[REGIDX_2E0]);
+	ast_moutdwm(ast, 0x1E6E02E4, ddr_table[REGIDX_2E4]);
+	ast_moutdwm(ast, 0x1E6E02E8, ddr_table[REGIDX_2E8]);
+	ast_moutdwm(ast, 0x1E6E02EC, ddr_table[REGIDX_2EC]);
+	ast_moutdwm(ast, 0x1E6E02F0, ddr_table[REGIDX_2F0]);
+	ast_moutdwm(ast, 0x1E6E02F4, ddr_table[REGIDX_2F4]);
+	ast_moutdwm(ast, 0x1E6E02F8, ddr_table[REGIDX_2F8]);
+	ast_moutdwm(ast, 0x1E6E0290, 0x00100008);
+	ast_moutdwm(ast, 0x1E6E02C4, 0x3C183C3C);
+	ast_moutdwm(ast, 0x1E6E02C8, 0x00631E0E);
+
+	/* Controller Setting */
+	ast_moutdwm(ast, 0x1E6E0034, 0x0001A991);
+
+	/* Train PHY Vref first */
+	pass = 0;
+
+	for (retrycnt = 0; retrycnt < 4 && pass == 0; retrycnt++) {
+		max_phy_vref = 0x0;
+		pass = 0;
+		ast_moutdwm(ast, 0x1E6E02C0, 0x00001C06);
+		for (phy_vref = 0x40; phy_vref < 0x80; phy_vref++) {
+			ast_moutdwm(ast, 0x1E6E000C, 0x00000000);
+			ast_moutdwm(ast, 0x1E6E0060, 0x00000000);
+			ast_moutdwm(ast, 0x1E6E02CC, phy_vref | (phy_vref << 8));
+			/* Fire DFI Init */
+			ddr_phy_init_2500(ast);
+			ast_moutdwm(ast, 0x1E6E000C, 0x00005C01);
+			if (cbr_test_2500(ast)) {
+				pass++;
+				data = ast_mindwm(ast, 0x1E6E03D0);
+				data2 = data >> 8;
+				data  = data & 0xff;
+				if (data > data2)
+					data = data2;
+				if (max_phy_vref < data) {
+					max_phy_vref = data;
+					min_phy_vref = phy_vref;
+				}
+			} else if (pass > 0)
+				break;
+		}
+	}
+	ast_moutdwm(ast, 0x1E6E02CC, min_phy_vref | (min_phy_vref << 8));
+
+	/* Train DDR Vref next */
+	pass = 0;
+
+	for (retrycnt = 0; retrycnt < 4 && pass == 0; retrycnt++) {
+		min_ddr_vref = 0xFF;
+		max_ddr_vref = 0x0;
+		pass = 0;
+		for (ddr_vref = 0x00; ddr_vref < 0x40; ddr_vref++) {
+			ast_moutdwm(ast, 0x1E6E000C, 0x00000000);
+			ast_moutdwm(ast, 0x1E6E0060, 0x00000000);
+			ast_moutdwm(ast, 0x1E6E02C0, 0x00000006 | (ddr_vref << 8));
+			/* Fire DFI Init */
+			ddr_phy_init_2500(ast);
+			ast_moutdwm(ast, 0x1E6E000C, 0x00005C01);
+			if (cbr_test_2500(ast)) {
+				pass++;
+				if (min_ddr_vref > ddr_vref)
+					min_ddr_vref = ddr_vref;
+				if (max_ddr_vref < ddr_vref)
+					max_ddr_vref = ddr_vref;
+			} else if (pass != 0)
+				break;
+		}
+	}
+
+	ast_moutdwm(ast, 0x1E6E000C, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0060, 0x00000000);
+	ddr_vref = (min_ddr_vref + max_ddr_vref + 1) >> 1;
+	ast_moutdwm(ast, 0x1E6E02C0, 0x00000006 | (ddr_vref << 8));
+
+	/* Wait DDR PHY init done */
+	ddr_phy_init_2500(ast);
+
+	ast_moutdwm(ast, 0x1E6E0120, ddr_table[REGIDX_PLL]);
+	ast_moutdwm(ast, 0x1E6E000C, 0x42AA5C81);
+	ast_moutdwm(ast, 0x1E6E0034, 0x0001AF93);
+
+	check_dram_size_2500(ast, ddr_table[REGIDX_RFC]);
+	enable_cache_2500(ast);
+	ast_moutdwm(ast, 0x1E6E001C, 0x00000008);
+	ast_moutdwm(ast, 0x1E6E0038, 0xFFFFFF00);
+}
+
+static bool ast_dram_init_2500(struct ast_private *ast)
+{
+	u32 data;
+	u32 max_tries = 5;
+
+	do {
+		if (max_tries-- == 0)
+			return false;
+		set_mpll_2500(ast);
+		reset_mmc_2500(ast);
+		ddr_init_common_2500(ast);
+
+		data = ast_mindwm(ast, 0x1E6E2070);
+		if (data & 0x01000000)
+			ddr4_init_2500(ast, ast2500_ddr4_1600_timing_table);
+		else
+			ddr3_init_2500(ast, ast2500_ddr3_1600_timing_table);
+	} while (!ddr_test_2500(ast));
+
+	ast_moutdwm(ast, 0x1E6E2040, ast_mindwm(ast, 0x1E6E2040) | 0x41);
+
+	/* Patch code */
+	data = ast_mindwm(ast, 0x1E6E200C) & 0xF9FFFFFF;
+	ast_moutdwm(ast, 0x1E6E200C, data | 0x10000000);
+
+	return true;
+}
+
+void ast_post_chip_2500(struct drm_device *dev)
+{
+	struct ast_private *ast = dev->dev_private;
+	u32 temp;
+	u8 reg;
+
+	reg = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd0, 0xff);
+	if ((reg & 0x80) == 0) {/* vga only */
+		/* Clear bus lock condition */
+		ast_moutdwm(ast, 0x1e600000, 0xAEED1A03);
+		ast_moutdwm(ast, 0x1e600084, 0x00010000);
+		ast_moutdwm(ast, 0x1e600088, 0x00000000);
+		ast_moutdwm(ast, 0x1e6e2000, 0x1688A8A8);
+		ast_write32(ast, 0xf004, 0x1e6e0000);
+		ast_write32(ast, 0xf000, 0x1);
+		ast_write32(ast, 0x12000, 0x1688a8a8);
+		while (ast_read32(ast, 0x12000) != 0x1)
+			;
+
+		ast_write32(ast, 0x10000, 0xfc600309);
+		while (ast_read32(ast, 0x10000) != 0x1)
+			;
+
+		/* Slow down CPU/AHB CLK in VGA only mode */
+		temp = ast_read32(ast, 0x12008);
+		temp |= 0x73;
+		ast_write32(ast, 0x12008, temp);
+
+		/* Reset USB port to patch USB unknown device issue */
+		ast_moutdwm(ast, 0x1e6e2090, 0x20000000);
+		temp  = ast_mindwm(ast, 0x1e6e2094);
+		temp |= 0x00004000;
+		ast_moutdwm(ast, 0x1e6e2094, temp);
+		temp  = ast_mindwm(ast, 0x1e6e2070);
+		if (temp & 0x00800000) {
+			ast_moutdwm(ast, 0x1e6e207c, 0x00800000);
+			mdelay(100);
+			ast_moutdwm(ast, 0x1e6e2070, 0x00800000);
+		}
+
+		if (!ast_dram_init_2500(ast))
+			DRM_ERROR("DRAM init failed !\n");
+
+		temp = ast_mindwm(ast, 0x1e6e2040);
+		ast_moutdwm(ast, 0x1e6e2040, temp | 0x40);
+	}
+
+	/* wait ready */
+	do {
+		reg = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd0, 0xff);
+	} while ((reg & 0x40) == 0);
+}
-- 
GitLab


From fbdab3e7fd547e1ce558db1521659707bdf02cc6 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 24 Feb 2017 14:43:30 +0000
Subject: [PATCH 0667/1084] scsi: aacraid: remove redundant zero check on ret

The check for ret being zero is redundant as a few statements earlier we
break out of the while loop if ret is non-zero.  Thus we can remove the
zero check and also the dead-code non-zero case too.

Detected by CoverityScan, CID#1411632 ("Logically Dead Code")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Dave Carroll <david.carroll@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/commsup.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 1994c7445b54..a3ad04293487 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -2508,8 +2508,7 @@ int aac_command_thread(void *data)
 			 && (now.tv_usec > (1000000 / HZ)))
 				difference = (((1000000 - now.tv_usec) * HZ)
 				  + 500000) / 1000000;
-			else if (ret == 0) {
-
+			else {
 				if (now.tv_usec > 500000)
 					++now.tv_sec;
 
@@ -2520,9 +2519,6 @@ int aac_command_thread(void *data)
 					ret = aac_send_hosttime(dev, &now);
 
 				difference = (long)(unsigned)update_interval*HZ;
-			} else {
-				/* retry shortly */
-				difference = 10 * HZ;
 			}
 			next_jiffies = jiffies + difference;
 			if (time_before(next_check_jiffies,next_jiffies))
-- 
GitLab


From 42a70abddd90374518057e989f5b7289b7b535d8 Mon Sep 17 00:00:00 2001
From: "Dupuis, Chad" <chad.dupuis@cavium.com>
Date: Fri, 24 Feb 2017 00:31:56 -0800
Subject: [PATCH 0668/1084] scsi: qedi: Fix memory leak in tmf response
 processing.

Signed-off-by: Manish Rangankar <manish.rangankar@cavium.com>
Signed-off-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedi/qedi_fw.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index b1d3904ae8fd..c9f0ef4e11b3 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -165,10 +165,9 @@ static void qedi_tmf_resp_work(struct work_struct *work)
 	iscsi_block_session(session->cls_session);
 	rval = qedi_cleanup_all_io(qedi, qedi_conn, qedi_cmd->task, true);
 	if (rval) {
-		clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags);
 		qedi_clear_task_idx(qedi, qedi_cmd->task_id);
 		iscsi_unblock_session(session->cls_session);
-		return;
+		goto exit_tmf_resp;
 	}
 
 	iscsi_unblock_session(session->cls_session);
@@ -177,6 +176,8 @@ static void qedi_tmf_resp_work(struct work_struct *work)
 	spin_lock(&session->back_lock);
 	__iscsi_complete_pdu(conn, (struct iscsi_hdr *)resp_hdr_ptr, NULL, 0);
 	spin_unlock(&session->back_lock);
+
+exit_tmf_resp:
 	kfree(resp_hdr_ptr);
 	clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags);
 }
-- 
GitLab


From 905f21a49d388de3e99438235f3301cabf0c0ef4 Mon Sep 17 00:00:00 2001
From: "Y.C. Chen" <yc_chen@aspeedtech.com>
Date: Wed, 22 Feb 2017 15:10:50 +1100
Subject: [PATCH 0669/1084] drm/ast: Fix test for VGA enabled

The test to see if VGA was already enabled is doing an unnecessary
second test from a register that may or may not have been initialized
to a valid value. Remove it.

Signed-off-by: Y.C. Chen <yc_chen@aspeedtech.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: <stable@vger.kernel.org>
Acked-by: Joel Stanley <joel@jms.id.au>
Tested-by: Y.C. Chen <yc_chen@aspeedtech.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ast/ast_post.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c
index c15f643915af..a5a780921553 100644
--- a/drivers/gpu/drm/ast/ast_post.c
+++ b/drivers/gpu/drm/ast/ast_post.c
@@ -59,13 +59,9 @@ bool ast_is_vga_enabled(struct drm_device *dev)
 		/* TODO 1180 */
 	} else {
 		ch = ast_io_read8(ast, AST_IO_VGA_ENABLE_PORT);
-		if (ch) {
-			ast_open_key(ast);
-			ch = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb6, 0xff);
-			return ch & 0x04;
-		}
+		return !!(ch & 0x01);
 	}
-	return 0;
+	return false;
 }
 
 static const u8 extreginfo[] = { 0x0f, 0x04, 0x1c, 0xff };
-- 
GitLab


From 9bb92f51558f2ef5f56c257bdcea0588f31d857e Mon Sep 17 00:00:00 2001
From: "Y.C. Chen" <yc_chen@aspeedtech.com>
Date: Wed, 22 Feb 2017 15:14:19 +1100
Subject: [PATCH 0670/1084] drm/ast: Call open_key before enable_mmio in POST
 code

open_key enables access the registers used by enable_mmio

Signed-off-by: Y.C. Chen <yc_chen@aspeedtech.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Joel Stanley <joel@jms.id.au>
Tested-by: Y.C. Chen <yc_chen@aspeedtech.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ast/ast_post.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c
index a5a780921553..f7d421359d56 100644
--- a/drivers/gpu/drm/ast/ast_post.c
+++ b/drivers/gpu/drm/ast/ast_post.c
@@ -374,8 +374,8 @@ void ast_post_gpu(struct drm_device *dev)
 	pci_write_config_dword(ast->dev->pdev, 0x04, reg);
 
 	ast_enable_vga(dev);
-	ast_enable_mmio(dev);
 	ast_open_key(ast);
+	ast_enable_mmio(dev);
 	ast_set_def_ext_reg(dev);
 
 	if (ast->config_mode == ast_use_p2a) {
-- 
GitLab


From 19d19e960598161be92a7e4828eb7706c6410ce6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 27 Feb 2017 09:38:11 +0100
Subject: [PATCH 0671/1084] mac80211: use driver-indicated transmitter STA only
 for data frames
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When I originally introduced using the driver-indicated station as an
optimisation to avoid the hashtable lookup/iteration, of course it
wasn't intended to really functionally change anything.

I neglected, however, to take into account VLAN interfaces, which have
the property that management and data frames are handled differently:
data frames go directly to the station and the VLAN while management
frames continue to be processed over the underlying/associated AP-type
interface. As a consequence, when a driver used this optimisation for
management frames and the user enabled VLANs, my change broke things
since any management frames, particularly disassoc/deauth, were missed
by hostapd.

Fix this by restoring the original code path for non-data frames, they
aren't critical for performance to begin with.

This fixes https://bugzilla.kernel.org/show_bug.cgi?id=194713.

Big thanks goes to Jarek who bisected the issue and provided a very
detailed bug report, including the crucial information that he was
using VLANs in his configuration.

Cc: stable@vger.kernel.org
Fixes: 771e846bea9e ("mac80211: allow passing transmitter station on RX")
Reported-and-tested-by: Jarek Kamiński <jarek@freeside.be>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 28cc494a774d..e48724a6725e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4086,15 +4086,17 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 		     ieee80211_is_beacon(hdr->frame_control)))
 		ieee80211_scan_rx(local, skb);
 
-	if (pubsta) {
-		rx.sta = container_of(pubsta, struct sta_info, sta);
-		rx.sdata = rx.sta->sdata;
-		if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
-			return;
-		goto out;
-	} else if (ieee80211_is_data(fc)) {
+	if (ieee80211_is_data(fc)) {
 		struct sta_info *sta, *prev_sta;
 
+		if (pubsta) {
+			rx.sta = container_of(pubsta, struct sta_info, sta);
+			rx.sdata = rx.sta->sdata;
+			if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
+				return;
+			goto out;
+		}
+
 		prev_sta = NULL;
 
 		for_each_sta_info(local, hdr->addr2, sta, tmp) {
-- 
GitLab


From 4e4636cf981b5b629fbfb78aa9f232e015f7d521 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 27 Feb 2017 22:21:16 -0600
Subject: [PATCH 0672/1084] objtool: Enclose contents of unreachable() macro in
 a block

Guenter Roeck reported a boot failure in mips64.  It was bisected to the
following commit:

  d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends")

The unreachable() macro was formerly only composed of a single
statement.  The above commit added a second statement, but neglected to
enclose the statements in a block.

Suggested-by: Guenter Roeck <linux@roeck-us.net>
Reported-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends")
Link: http://lkml.kernel.org/r/20170228042116.glmwmwiohcix7o4a@treble
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/compiler-gcc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index de471346b365..f457b520ead6 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -215,7 +215,8 @@
  * this in the preprocessor, but we can live with this because they're
  * unreleased.  Really, we need to have autoconf for the kernel.
  */
-#define unreachable() annotate_unreachable(); __builtin_unreachable()
+#define unreachable() \
+	do { annotate_unreachable(); __builtin_unreachable(); } while (0)
 
 /* Mark a function definition as prohibited from being cloned. */
 #define __noclone	__attribute__((__noclone__, __optimize__("no-tracer")))
-- 
GitLab


From 3b30460c5b0ed762be75a004e924ec3f8711e032 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 27 Feb 2017 15:30:56 +0000
Subject: [PATCH 0673/1084] crypto: ccm - move cbcmac input off the stack

Commit f15f05b0a5de ("crypto: ccm - switch to separate cbcmac driver")
refactored the CCM driver to allow separate implementations of the
underlying MAC to be provided by a platform. However, in doing so, it
moved some data from the linear region to the stack, which violates the
SG constraints when the stack is virtually mapped.

So move idata/odata back to the request ctx struct, of which we can
reasonably expect that it has been allocated using kmalloc() et al.

Reported-by: Johannes Berg <johannes@sipsolutions.net>
Fixes: f15f05b0a5de ("crypto: ccm - switch to separate cbcmac driver")
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/ccm.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/crypto/ccm.c b/crypto/ccm.c
index 442848807a52..1ce37ae0ce56 100644
--- a/crypto/ccm.c
+++ b/crypto/ccm.c
@@ -45,6 +45,7 @@ struct crypto_rfc4309_req_ctx {
 
 struct crypto_ccm_req_priv_ctx {
 	u8 odata[16];
+	u8 idata[16];
 	u8 auth_tag[16];
 	u32 flags;
 	struct scatterlist src[3];
@@ -183,8 +184,8 @@ static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain,
 	AHASH_REQUEST_ON_STACK(ahreq, ctx->mac);
 	unsigned int assoclen = req->assoclen;
 	struct scatterlist sg[3];
-	u8 odata[16];
-	u8 idata[16];
+	u8 *odata = pctx->odata;
+	u8 *idata = pctx->idata;
 	int ilen, err;
 
 	/* format control data for input */
-- 
GitLab


From d0a0b78de4a641ff0924687bfd5a9698f1a97f7a Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:30 +0200
Subject: [PATCH 0674/1084] btrfs: Make btrfs_log_all_parents take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 3806853cde08..cf45b264cff3 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5287,15 +5287,15 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
 }
 
 static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
-				 struct inode *inode,
+				 struct btrfs_inode *inode,
 				 struct btrfs_log_ctx *ctx)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 	int ret;
 	struct btrfs_path *path;
 	struct btrfs_key key;
-	struct btrfs_root *root = BTRFS_I(inode)->root;
-	const u64 ino = btrfs_ino(BTRFS_I(inode));
+	struct btrfs_root *root = inode->root;
+	const u64 ino = btrfs_ino(inode);
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -5506,7 +5506,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 	 * and has a link count of 2.
 	 */
 	if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
-		ret = btrfs_log_all_parents(trans, orig_inode, ctx);
+		ret = btrfs_log_all_parents(trans, BTRFS_I(orig_inode), ctx);
 		if (ret)
 			goto end_trans;
 	}
-- 
GitLab


From 8e7611cf38765f1bf1324ed1190f1f8e76ab9546 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:31 +0200
Subject: [PATCH 0675/1084] btrfs: Make btrfs_insert_dir_item take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       | 2 +-
 fs/btrfs/dir-item.c    | 6 +++---
 fs/btrfs/inode.c       | 2 +-
 fs/btrfs/ioctl.c       | 2 +-
 fs/btrfs/transaction.c | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ad23a73ac7e7..97f84a80b479 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2982,7 +2982,7 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
 			  const char *name, int name_len);
 int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root, const char *name,
-			  int name_len, struct inode *dir,
+			  int name_len, struct btrfs_inode *dir,
 			  struct btrfs_key *location, u8 type, u64 index);
 struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
 					     struct btrfs_root *root,
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 724504a2d7ac..b13d9536d4de 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -120,7 +120,7 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
  */
 int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
 			  *root, const char *name, int name_len,
-			  struct inode *dir, struct btrfs_key *location,
+			  struct btrfs_inode *dir, struct btrfs_key *location,
 			  u8 type, u64 index)
 {
 	int ret = 0;
@@ -133,7 +133,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
 	struct btrfs_disk_key disk_key;
 	u32 data_size;
 
-	key.objectid = btrfs_ino(BTRFS_I(dir));
+	key.objectid = btrfs_ino(dir);
 	key.type = BTRFS_DIR_ITEM_KEY;
 	key.offset = btrfs_name_hash(name, name_len);
 
@@ -174,7 +174,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
 	btrfs_release_path(path);
 
 	ret2 = btrfs_insert_delayed_dir_index(trans, root->fs_info, name,
-			name_len, BTRFS_I(dir), &disk_key, type, index);
+			name_len, dir, &disk_key, type, index);
 out_free:
 	btrfs_free_path(path);
 	if (ret)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c38391e948d9..a23391b52ab1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6276,7 +6276,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
 		return ret;
 
 	ret = btrfs_insert_dir_item(trans, root, name, name_len,
-				    parent_inode, &key,
+				    BTRFS_I(parent_inode), &key,
 				    btrfs_inode_type(inode), index);
 	if (ret == -EEXIST || ret == -EOVERFLOW)
 		goto fail_dir_item;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d8539979b44f..a55361d9554d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -587,7 +587,7 @@ static noinline int create_subvol(struct inode *dir,
 	}
 
 	ret = btrfs_insert_dir_item(trans, root,
-				    name, namelen, dir, &key,
+				    name, namelen, BTRFS_I(dir), &key,
 				    BTRFS_FT_DIR, index);
 	if (ret) {
 		btrfs_abort_transaction(trans, ret);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 6b3e0fc2fe7a..294563216dd3 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1644,7 +1644,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 
 	ret = btrfs_insert_dir_item(trans, parent_root,
 				    dentry->d_name.name, dentry->d_name.len,
-				    parent_inode, &key,
+				    BTRFS_I(parent_inode), &key,
 				    BTRFS_FT_DIR, index);
 	/* We have check then name at the beginning, so it is impossible. */
 	BUG_ON(ret == -EEXIST || ret == -EOVERFLOW);
-- 
GitLab


From 4c570655f4f95609e9f682a36647f84b54b8a721 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:32 +0200
Subject: [PATCH 0676/1084] btrfs: make btrfs_set_inode_index_count take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a23391b52ab1..226b5a9fdf0d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5978,15 +5978,15 @@ static int btrfs_update_time(struct inode *inode, struct timespec *now,
  * and then set the in-memory index_cnt variable to reflect
  * free sequence numbers
  */
-static int btrfs_set_inode_index_count(struct inode *inode)
+static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
 {
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_root *root = inode->root;
 	struct btrfs_key key, found_key;
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
 	int ret;
 
-	key.objectid = btrfs_ino(BTRFS_I(inode));
+	key.objectid = btrfs_ino(inode);
 	key.type = BTRFS_DIR_INDEX_KEY;
 	key.offset = (u64)-1;
 
@@ -6009,7 +6009,7 @@ static int btrfs_set_inode_index_count(struct inode *inode)
 	 * else has to start at 2
 	 */
 	if (path->slots[0] == 0) {
-		BTRFS_I(inode)->index_cnt = 2;
+		inode->index_cnt = 2;
 		goto out;
 	}
 
@@ -6018,13 +6018,13 @@ static int btrfs_set_inode_index_count(struct inode *inode)
 	leaf = path->nodes[0];
 	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 
-	if (found_key.objectid != btrfs_ino(BTRFS_I(inode)) ||
+	if (found_key.objectid != btrfs_ino(inode) ||
 	    found_key.type != BTRFS_DIR_INDEX_KEY) {
-		BTRFS_I(inode)->index_cnt = 2;
+		inode->index_cnt = 2;
 		goto out;
 	}
 
-	BTRFS_I(inode)->index_cnt = found_key.offset + 1;
+	inode->index_cnt = found_key.offset + 1;
 out:
 	btrfs_free_path(path);
 	return ret;
@@ -6041,7 +6041,7 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index)
 	if (BTRFS_I(dir)->index_cnt == (u64)-1) {
 		ret = btrfs_inode_delayed_dir_index_count(BTRFS_I(dir));
 		if (ret) {
-			ret = btrfs_set_inode_index_count(dir);
+			ret = btrfs_set_inode_index_count(BTRFS_I(dir));
 			if (ret)
 				return ret;
 		}
-- 
GitLab


From 877574e2548bbfd792b0b1200d4b46eef54c05f5 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:33 +0200
Subject: [PATCH 0677/1084] btrfs: Make btrfs_set_inode_index take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c       | 22 +++++++++++-----------
 fs/btrfs/ioctl.c       |  2 +-
 fs/btrfs/transaction.c |  2 +-
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 226b5a9fdf0d..d4ba1c4235de 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6034,21 +6034,21 @@ static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
  * helper to find a free sequence number in a given directory.  This current
  * code is very simple, later versions will do smarter things in the btree
  */
-int btrfs_set_inode_index(struct inode *dir, u64 *index)
+int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index)
 {
 	int ret = 0;
 
-	if (BTRFS_I(dir)->index_cnt == (u64)-1) {
-		ret = btrfs_inode_delayed_dir_index_count(BTRFS_I(dir));
+	if (dir->index_cnt == (u64)-1) {
+		ret = btrfs_inode_delayed_dir_index_count(dir);
 		if (ret) {
-			ret = btrfs_set_inode_index_count(BTRFS_I(dir));
+			ret = btrfs_set_inode_index_count(dir);
 			if (ret)
 				return ret;
 		}
 	}
 
-	*index = BTRFS_I(dir)->index_cnt;
-	BTRFS_I(dir)->index_cnt++;
+	*index = dir->index_cnt;
+	dir->index_cnt++;
 
 	return ret;
 }
@@ -6109,7 +6109,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 	if (dir && name) {
 		trace_btrfs_inode_request(dir);
 
-		ret = btrfs_set_inode_index(dir, index);
+		ret = btrfs_set_inode_index(BTRFS_I(dir), index);
 		if (ret) {
 			btrfs_free_path(path);
 			iput(inode);
@@ -6490,7 +6490,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 	if (inode->i_nlink >= BTRFS_LINK_MAX)
 		return -EMLINK;
 
-	err = btrfs_set_inode_index(dir, &index);
+	err = btrfs_set_inode_index(BTRFS_I(dir), &index);
 	if (err)
 		goto fail;
 
@@ -9480,10 +9480,10 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 	 * We need to find a free sequence number both in the source and
 	 * in the destination directory for the exchange.
 	 */
-	ret = btrfs_set_inode_index(new_dir, &old_idx);
+	ret = btrfs_set_inode_index(BTRFS_I(new_dir), &old_idx);
 	if (ret)
 		goto out_fail;
-	ret = btrfs_set_inode_index(old_dir, &new_idx);
+	ret = btrfs_set_inode_index(BTRFS_I(old_dir), &new_idx);
 	if (ret)
 		goto out_fail;
 
@@ -9791,7 +9791,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (dest != root)
 		btrfs_record_root_in_trans(trans, dest);
 
-	ret = btrfs_set_inode_index(new_dir, &index);
+	ret = btrfs_set_inode_index(BTRFS_I(new_dir), &index);
 	if (ret)
 		goto out_fail;
 
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a55361d9554d..628d1b180cee 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -580,7 +580,7 @@ static noinline int create_subvol(struct inode *dir,
 	/*
 	 * insert the directory item
 	 */
-	ret = btrfs_set_inode_index(dir, &index);
+	ret = btrfs_set_inode_index(BTRFS_I(dir), &index);
 	if (ret) {
 		btrfs_abort_transaction(trans, ret);
 		goto fail;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 294563216dd3..05c2bbff2a28 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1505,7 +1505,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	/*
 	 * insert the directory item
 	 */
-	ret = btrfs_set_inode_index(parent_inode, &index);
+	ret = btrfs_set_inode_index(BTRFS_I(parent_inode), &index);
 	BUG_ON(ret); /* -ENOMEM */
 
 	/* check if there is a file/dir which has the same name. */
-- 
GitLab


From 6ef06d27903d9c15505dc1a3ccf424f5018562f7 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:34 +0200
Subject: [PATCH 0678/1084] btrfs: Make btrfs_i_size_write take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/btrfs_inode.h      |  6 +++---
 fs/btrfs/delayed-inode.c    |  2 +-
 fs/btrfs/free-space-cache.c |  2 +-
 fs/btrfs/inode.c            | 19 +++++++++----------
 fs/btrfs/ioctl.c            |  4 ++--
 fs/btrfs/transaction.c      |  2 +-
 fs/btrfs/tree-log.c         |  2 +-
 7 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 819a6d27218a..46d117b77bd2 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -237,10 +237,10 @@ static inline u64 btrfs_ino(struct btrfs_inode *inode)
 	return ino;
 }
 
-static inline void btrfs_i_size_write(struct inode *inode, u64 size)
+static inline void btrfs_i_size_write(struct btrfs_inode *inode, u64 size)
 {
-	i_size_write(inode, size);
-	BTRFS_I(inode)->disk_i_size = size;
+	i_size_write(&inode->vfs_inode, size);
+	inode->disk_i_size = size;
 }
 
 static inline bool btrfs_is_free_space_inode(struct inode *inode)
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index f7a6ee5ccc80..1aff676f0e5b 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1790,7 +1790,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
 
 	i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
 	i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
-	btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
+	btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item));
 	inode->i_mode = btrfs_stack_inode_mode(inode_item);
 	set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
 	inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 1a131f7d6c1b..c0f313cbbbf2 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -260,7 +260,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
 		btrfs_free_path(path);
 	}
 
-	btrfs_i_size_write(inode, 0);
+	btrfs_i_size_write(BTRFS_I(inode), 0);
 	truncate_pagecache(inode, 0);
 
 	/*
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d4ba1c4235de..4e9407a014d9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3617,7 +3617,7 @@ static int btrfs_read_locked_inode(struct inode *inode)
 	set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
 	i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
 	i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
-	btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
+	btrfs_i_size_write(BTRFS_I(inode), btrfs_inode_size(leaf, inode_item));
 
 	inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
 	inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
@@ -3988,8 +3988,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
 	if (ret)
 		goto out;
 
-	btrfs_i_size_write(&dir->vfs_inode,
-			dir->vfs_inode.i_size - name_len * 2);
+	btrfs_i_size_write(dir, dir->vfs_inode.i_size - name_len * 2);
 	inode_inc_iversion(&inode->vfs_inode);
 	inode_inc_iversion(&dir->vfs_inode);
 	inode->vfs_inode.i_ctime = dir->vfs_inode.i_mtime =
@@ -4137,7 +4136,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
 		goto out;
 	}
 
-	btrfs_i_size_write(dir, dir->i_size - name_len * 2);
+	btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name_len * 2);
 	inode_inc_iversion(dir);
 	dir->i_mtime = dir->i_ctime = current_time(dir);
 	ret = btrfs_update_inode_fallback(trans, root, dir);
@@ -4184,7 +4183,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
 			BTRFS_I(d_inode(dentry)), dentry->d_name.name,
 			dentry->d_name.len);
 	if (!err) {
-		btrfs_i_size_write(inode, 0);
+		btrfs_i_size_write(BTRFS_I(inode), 0);
 		/*
 		 * Propagate the last_unlink_trans value of the deleted dir to
 		 * its parent directory. This is to prevent an unrecoverable
@@ -5221,7 +5220,7 @@ void btrfs_evict_inode(struct inode *inode)
 	rsv->failfast = 1;
 	global_rsv = &fs_info->global_block_rsv;
 
-	btrfs_i_size_write(inode, 0);
+	btrfs_i_size_write(BTRFS_I(inode), 0);
 
 	/*
 	 * This is a bit simpler than btrfs_truncate since we've already
@@ -6285,7 +6284,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
 		return ret;
 	}
 
-	btrfs_i_size_write(parent_inode, parent_inode->i_size +
+	btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size +
 			   name_len * 2);
 	inode_inc_iversion(parent_inode);
 	parent_inode->i_mtime = parent_inode->i_ctime =
@@ -6589,7 +6588,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	if (err)
 		goto out_fail_inode;
 
-	btrfs_i_size_write(inode, 0);
+	btrfs_i_size_write(BTRFS_I(inode), 0);
 	err = btrfs_update_inode(trans, root, inode);
 	if (err)
 		goto out_fail_inode;
@@ -9205,7 +9204,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
 	inode->i_fop = &btrfs_dir_file_operations;
 
 	set_nlink(inode, 1);
-	btrfs_i_size_write(inode, 0);
+	btrfs_i_size_write(BTRFS_I(inode), 0);
 	unlock_new_inode(inode);
 
 	err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
@@ -10232,7 +10231,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 	inode_nohighmem(inode);
 	inode->i_mapping->a_ops = &btrfs_symlink_aops;
 	inode_set_bytes(inode, name_len);
-	btrfs_i_size_write(inode, name_len);
+	btrfs_i_size_write(BTRFS_I(inode), name_len);
 	err = btrfs_update_inode(trans, root, inode);
 	/*
 	 * Last step, add directory indexes for our symlink inode. This is the
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 628d1b180cee..86f993c958ba 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -594,7 +594,7 @@ static noinline int create_subvol(struct inode *dir,
 		goto fail;
 	}
 
-	btrfs_i_size_write(dir, dir->i_size + namelen * 2);
+	btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2);
 	ret = btrfs_update_inode(trans, root, dir);
 	BUG_ON(ret);
 
@@ -3298,7 +3298,7 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
 	if (endoff > destoff + olen)
 		endoff = destoff + olen;
 	if (endoff > inode->i_size)
-		btrfs_i_size_write(inode, endoff);
+		btrfs_i_size_write(BTRFS_I(inode), endoff);
 
 	ret = btrfs_update_inode(trans, root, inode);
 	if (ret) {
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 05c2bbff2a28..61b807de3e16 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1653,7 +1653,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 		goto fail;
 	}
 
-	btrfs_i_size_write(parent_inode, parent_inode->i_size +
+	btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size +
 					 dentry->d_name.len * 2);
 	parent_inode->i_mtime = parent_inode->i_ctime =
 		current_time(parent_inode);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index cf45b264cff3..071f64944711 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1780,7 +1780,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
 out:
 	btrfs_release_path(path);
 	if (!ret && update_size) {
-		btrfs_i_size_write(dir, dir->i_size + name_len * 2);
+		btrfs_i_size_write(BTRFS_I(dir), dir->i_size + name_len * 2);
 		ret = btrfs_update_inode(trans, root, dir);
 	}
 	kfree(name);
-- 
GitLab


From 70ddc553b5522b96e65a162be1cecba532630841 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:35 +0200
Subject: [PATCH 0679/1084] btrfs: make btrfs_is_free_space_inode take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/btrfs_inode.h |  8 ++++----
 fs/btrfs/ctree.h       |  2 +-
 fs/btrfs/extent-tree.c |  4 ++--
 fs/btrfs/file-item.c   |  2 +-
 fs/btrfs/inode.c       | 23 ++++++++++++-----------
 5 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 46d117b77bd2..36eca5464e1b 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -243,14 +243,14 @@ static inline void btrfs_i_size_write(struct btrfs_inode *inode, u64 size)
 	inode->disk_i_size = size;
 }
 
-static inline bool btrfs_is_free_space_inode(struct inode *inode)
+static inline bool btrfs_is_free_space_inode(struct btrfs_inode *inode)
 {
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_root *root = inode->root;
 
 	if (root == root->fs_info->tree_root &&
-	    btrfs_ino(BTRFS_I(inode)) != BTRFS_BTREE_INODE_OBJECTID)
+	    btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID)
 		return true;
-	if (BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
+	if (inode->location.objectid == BTRFS_FREE_INO_OBJECTID)
 		return true;
 	return false;
 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 97f84a80b479..8ab0ce65a218 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3123,7 +3123,7 @@ static inline void btrfs_force_ra(struct address_space *mapping,
 }
 
 struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
-int btrfs_set_inode_index(struct inode *dir, u64 *index);
+int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
 int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
 		       struct btrfs_root *root,
 		       struct btrfs_inode *dir, struct btrfs_inode *inode,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c35b96633554..b8a172e65619 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4148,7 +4148,7 @@ int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes)
 	/* make sure bytes are sectorsize aligned */
 	bytes = ALIGN(bytes, fs_info->sectorsize);
 
-	if (btrfs_is_free_space_inode(inode)) {
+	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
 		need_commit = 0;
 		ASSERT(current->journal_info);
 	}
@@ -5947,7 +5947,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 	 * If we have a transaction open (can happen if we call truncate_block
 	 * from truncate), then we need FLUSH_LIMIT so we don't deadlock.
 	 */
-	if (btrfs_is_free_space_inode(inode)) {
+	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
 		flush = BTRFS_RESERVE_NO_FLUSH;
 		delalloc_lock = false;
 	} else if (current->journal_info) {
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index f7b9a92ad56d..a8a0dd217084 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -214,7 +214,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
 	 * read from the commit root and sidestep a nasty deadlock
 	 * between reading the free space cache and updating the csum tree.
 	 */
-	if (btrfs_is_free_space_inode(inode)) {
+	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
 		path->search_commit_root = 1;
 		path->skip_locking = 1;
 	}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4e9407a014d9..a465a927395e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -933,7 +933,7 @@ static noinline int cow_file_range(struct inode *inode,
 	struct extent_map *em;
 	int ret = 0;
 
-	if (btrfs_is_free_space_inode(inode)) {
+	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
 		WARN_ON_ONCE(1);
 		ret = -EINVAL;
 		goto out_unlock;
@@ -1231,7 +1231,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 		return -ENOMEM;
 	}
 
-	nolock = btrfs_is_free_space_inode(inode);
+	nolock = btrfs_is_free_space_inode(BTRFS_I(inode));
 
 	cow_start = (u64)-1;
 	cur_offset = start;
@@ -1670,7 +1670,7 @@ static void btrfs_set_bit_hook(struct inode *inode,
 	if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
 		struct btrfs_root *root = BTRFS_I(inode)->root;
 		u64 len = state->end + 1 - state->start;
-		bool do_list = !btrfs_is_free_space_inode(inode);
+		bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
 
 		if (*bits & EXTENT_FIRST_DELALLOC) {
 			*bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1720,7 +1720,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
 	 */
 	if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
 		struct btrfs_root *root = BTRFS_I(inode)->root;
-		bool do_list = !btrfs_is_free_space_inode(inode);
+		bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
 
 		if (*bits & EXTENT_FIRST_DELALLOC) {
 			*bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1854,7 +1854,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
 
 	skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
-	if (btrfs_is_free_space_inode(inode))
+	if (btrfs_is_free_space_inode(BTRFS_I(inode)))
 		metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
 
 	if (bio_op(bio) != REQ_OP_WRITE) {
@@ -2793,7 +2793,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 	bool nolock;
 	bool truncated = false;
 
-	nolock = btrfs_is_free_space_inode(inode);
+	nolock = btrfs_is_free_space_inode(BTRFS_I(inode));
 
 	if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
 		ret = -EIO;
@@ -2993,7 +2993,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
 					    end - start + 1, uptodate))
 		return 0;
 
-	if (btrfs_is_free_space_inode(inode)) {
+	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
 		wq = fs_info->endio_freespace_worker;
 		func = btrfs_freespace_write_helper;
 	} else {
@@ -3865,7 +3865,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
 	 * The data relocation inode should also be directly updated
 	 * without delay
 	 */
-	if (!btrfs_is_free_space_inode(inode)
+	if (!btrfs_is_free_space_inode(BTRFS_I(inode))
 	    && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
 	    && !test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
 		btrfs_update_root_times(trans, root);
@@ -4319,7 +4319,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 	 * for non-free space inodes and ref cows, we want to back off from
 	 * time to time
 	 */
-	if (!btrfs_is_free_space_inode(inode) &&
+	if (!btrfs_is_free_space_inode(BTRFS_I(inode)) &&
 	    test_bit(BTRFS_ROOT_REF_COWS, &root->state))
 		be_nice = 1;
 
@@ -5180,7 +5180,7 @@ void btrfs_evict_inode(struct inode *inode)
 	if (inode->i_nlink &&
 	    ((btrfs_root_refs(&root->root_item) != 0 &&
 	      root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
-	     btrfs_is_free_space_inode(inode)))
+	     btrfs_is_free_space_inode(BTRFS_I(inode))))
 		goto no_delete;
 
 	if (is_bad_inode(inode)) {
@@ -5897,7 +5897,8 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
 		return 0;
 
-	if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(inode))
+	if (btrfs_fs_closing(root->fs_info) &&
+			btrfs_is_free_space_inode(BTRFS_I(inode)))
 		nolock = true;
 
 	if (wbc->sync_mode == WB_SYNC_ALL) {
-- 
GitLab


From 04f4f916531adc7d2ca6fdb16a68b6f2ff2a8a3b Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:36 +0200
Subject: [PATCH 0680/1084] btrfs: make btrfs_alloc_data_chunk_ondemand take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       | 2 +-
 fs/btrfs/extent-tree.c | 8 ++++----
 fs/btrfs/file.c        | 3 ++-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8ab0ce65a218..76e661d00822 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2687,7 +2687,7 @@ enum btrfs_flush_state {
 };
 
 int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len);
-int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes);
+int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
 void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len);
 void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
 					    u64 len);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b8a172e65619..c17493ee2d9d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4135,10 +4135,10 @@ static u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
 		(may_use_included ? s_info->bytes_may_use : 0);
 }
 
-int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes)
+int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
 {
 	struct btrfs_space_info *data_sinfo;
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_root *root = inode->root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	u64 used;
 	int ret = 0;
@@ -4148,7 +4148,7 @@ int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes)
 	/* make sure bytes are sectorsize aligned */
 	bytes = ALIGN(bytes, fs_info->sectorsize);
 
-	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
+	if (btrfs_is_free_space_inode(inode)) {
 		need_commit = 0;
 		ASSERT(current->journal_info);
 	}
@@ -4281,7 +4281,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len)
 	      round_down(start, fs_info->sectorsize);
 	start = round_down(start, fs_info->sectorsize);
 
-	ret = btrfs_alloc_data_chunk_ondemand(inode, len);
+	ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len);
 	if (ret < 0)
 		return ret;
 
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 18e5146df864..073bc975bbb6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2747,7 +2747,8 @@ static long btrfs_fallocate(struct file *file, int mode,
 	 *
 	 * For qgroup space, it will be checked later.
 	 */
-	ret = btrfs_alloc_data_chunk_ondemand(inode, alloc_end - alloc_start);
+	ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
+			alloc_end - alloc_start);
 	if (ret < 0)
 		return ret;
 
-- 
GitLab


From baa3ba39b91bdfa270b3f6db6fdc81a1a6b4ab34 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:37 +0200
Subject: [PATCH 0681/1084] btrfs: Make drop_outstanding_extent take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c17493ee2d9d..19c2a9b6b52b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5846,7 +5846,8 @@ void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
  * reserved extents that need to be freed.  This must be called with
  * BTRFS_I(inode)->lock held.
  */
-static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
+static unsigned drop_outstanding_extent(struct btrfs_inode *inode,
+		u64 num_bytes)
 {
 	unsigned drop_inode_space = 0;
 	unsigned dropped_extents = 0;
@@ -5854,25 +5855,23 @@ static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes)
 
 	num_extents = count_max_extents(num_bytes);
 	ASSERT(num_extents);
-	ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents);
-	BTRFS_I(inode)->outstanding_extents -= num_extents;
+	ASSERT(inode->outstanding_extents >= num_extents);
+	inode->outstanding_extents -= num_extents;
 
-	if (BTRFS_I(inode)->outstanding_extents == 0 &&
+	if (inode->outstanding_extents == 0 &&
 	    test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
-			       &BTRFS_I(inode)->runtime_flags))
+			       &inode->runtime_flags))
 		drop_inode_space = 1;
 
 	/*
 	 * If we have more or the same amount of outstanding extents than we have
 	 * reserved then we need to leave the reserved extents count alone.
 	 */
-	if (BTRFS_I(inode)->outstanding_extents >=
-	    BTRFS_I(inode)->reserved_extents)
+	if (inode->outstanding_extents >= inode->reserved_extents)
 		return drop_inode_space;
 
-	dropped_extents = BTRFS_I(inode)->reserved_extents -
-		BTRFS_I(inode)->outstanding_extents;
-	BTRFS_I(inode)->reserved_extents -= dropped_extents;
+	dropped_extents = inode->reserved_extents - inode->outstanding_extents;
+	inode->reserved_extents -= dropped_extents;
 	return dropped_extents + drop_inode_space;
 }
 
@@ -6015,7 +6014,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 
 out_fail:
 	spin_lock(&BTRFS_I(inode)->lock);
-	dropped = drop_outstanding_extent(inode, num_bytes);
+	dropped = drop_outstanding_extent(BTRFS_I(inode), num_bytes);
 	/*
 	 * If the inodes csum_bytes is the same as the original
 	 * csum_bytes then we know we haven't raced with any free()ers
@@ -6094,7 +6093,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
 
 	num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
 	spin_lock(&BTRFS_I(inode)->lock);
-	dropped = drop_outstanding_extent(inode, num_bytes);
+	dropped = drop_outstanding_extent(BTRFS_I(inode), num_bytes);
 
 	if (num_bytes)
 		to_free = calc_csum_metadata_size(inode, num_bytes, 0);
-- 
GitLab


From 0e6bf9b13caa41b34592369c47faab916233343b Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:38 +0200
Subject: [PATCH 0682/1084] btrfs: Make calc_csum_metadata_size take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 19c2a9b6b52b..1fe062685253 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5893,24 +5893,21 @@ static unsigned drop_outstanding_extent(struct btrfs_inode *inode,
  *
  * This must be called with BTRFS_I(inode)->lock held.
  */
-static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
+static u64 calc_csum_metadata_size(struct btrfs_inode *inode, u64 num_bytes,
 				   int reserve)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 	u64 old_csums, num_csums;
 
-	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
-	    BTRFS_I(inode)->csum_bytes == 0)
+	if (inode->flags & BTRFS_INODE_NODATASUM && inode->csum_bytes == 0)
 		return 0;
 
-	old_csums = btrfs_csum_bytes_to_leaves(fs_info,
-					       BTRFS_I(inode)->csum_bytes);
+	old_csums = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes);
 	if (reserve)
-		BTRFS_I(inode)->csum_bytes += num_bytes;
+		inode->csum_bytes += num_bytes;
 	else
-		BTRFS_I(inode)->csum_bytes -= num_bytes;
-	num_csums = btrfs_csum_bytes_to_leaves(fs_info,
-					       BTRFS_I(inode)->csum_bytes);
+		inode->csum_bytes -= num_bytes;
+	num_csums = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes);
 
 	/* No change, no need to reserve more */
 	if (old_csums == num_csums)
@@ -5974,7 +5971,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 
 	/* We always want to reserve a slot for updating the inode. */
 	to_reserve = btrfs_calc_trans_metadata_size(fs_info, nr_extents + 1);
-	to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
+	to_reserve += calc_csum_metadata_size(BTRFS_I(inode), num_bytes, 1);
 	csum_bytes = BTRFS_I(inode)->csum_bytes;
 	spin_unlock(&BTRFS_I(inode)->lock);
 
@@ -6021,7 +6018,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 	 * so we can just reduce our inodes csum bytes and carry on.
 	 */
 	if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
-		calc_csum_metadata_size(inode, num_bytes, 0);
+		calc_csum_metadata_size(BTRFS_I(inode), num_bytes, 0);
 	} else {
 		u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
 		u64 bytes;
@@ -6036,7 +6033,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 		 */
 		bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
 		BTRFS_I(inode)->csum_bytes = csum_bytes;
-		to_free = calc_csum_metadata_size(inode, bytes, 0);
+		to_free = calc_csum_metadata_size(BTRFS_I(inode), bytes, 0);
 
 
 		/*
@@ -6046,7 +6043,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 		 */
 		BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
 		bytes = csum_bytes - orig_csum_bytes;
-		bytes = calc_csum_metadata_size(inode, bytes, 0);
+		bytes = calc_csum_metadata_size(BTRFS_I(inode), bytes, 0);
 
 		/*
 		 * Now reset ->csum_bytes to what it should be.  If bytes is
@@ -6096,7 +6093,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
 	dropped = drop_outstanding_extent(BTRFS_I(inode), num_bytes);
 
 	if (num_bytes)
-		to_free = calc_csum_metadata_size(inode, num_bytes, 0);
+		to_free = calc_csum_metadata_size(BTRFS_I(inode), num_bytes, 0);
 	spin_unlock(&BTRFS_I(inode)->lock);
 	if (dropped > 0)
 		to_free += btrfs_calc_trans_metadata_size(fs_info, dropped);
-- 
GitLab


From 8ed7a2a0e0732b62d925041ff04a5e9621e0e58b Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:39 +0200
Subject: [PATCH 0683/1084] btrfs: Make btrfs_orphan_reserve_metadata take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       |  2 +-
 fs/btrfs/extent-tree.c | 10 +++++-----
 fs/btrfs/inode.c       |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 76e661d00822..52af63e1d9a0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2695,7 +2695,7 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
 				  struct btrfs_fs_info *fs_info);
 void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
-				  struct inode *inode);
+				  struct btrfs_inode *inode);
 void btrfs_orphan_release_metadata(struct inode *inode);
 int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 				     struct btrfs_block_rsv *rsv,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1fe062685253..8156964abc10 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5742,10 +5742,10 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
 
 /* Can only return 0 or -ENOSPC */
 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
-				  struct inode *inode)
+				  struct btrfs_inode *inode)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_root *root = inode->root;
 	/*
 	 * We always use trans->block_rsv here as we will have reserved space
 	 * for our orphan when starting the transaction, using get_block_rsv()
@@ -5762,8 +5762,8 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
 	 */
 	u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
 
-	trace_btrfs_space_reservation(fs_info, "orphan",
-				      btrfs_ino(BTRFS_I(inode)), num_bytes, 1);
+	trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode), 
+			num_bytes, 1);
 	return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
 }
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a465a927395e..12345d4f25a5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3215,7 +3215,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 
 	/* grab metadata reservation from transaction handle */
 	if (reserve) {
-		ret = btrfs_orphan_reserve_metadata(trans, inode);
+		ret = btrfs_orphan_reserve_metadata(trans, BTRFS_I(inode));
 		ASSERT(!ret);
 		if (ret) {
 			atomic_dec(&root->orphan_inodes);
-- 
GitLab


From 703b391a0362e0ee35260dd16d7d0c7a12fef0e6 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:40 +0200
Subject: [PATCH 0684/1084] btrfs: Make btrfs_orphan_release_metadata take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       |  2 +-
 fs/btrfs/extent-tree.c | 10 +++++-----
 fs/btrfs/inode.c       |  4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 52af63e1d9a0..8eb07a9dfa6a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2696,7 +2696,7 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
 void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
 				  struct btrfs_inode *inode);
-void btrfs_orphan_release_metadata(struct inode *inode);
+void btrfs_orphan_release_metadata(struct btrfs_inode *inode);
 int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 				     struct btrfs_block_rsv *rsv,
 				     int nitems,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 8156964abc10..2aa665674329 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5767,14 +5767,14 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
 	return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
 }
 
-void btrfs_orphan_release_metadata(struct inode *inode)
+void btrfs_orphan_release_metadata(struct btrfs_inode *inode)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_root *root = inode->root;
 	u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
 
-	trace_btrfs_space_reservation(fs_info, "orphan",
-				      btrfs_ino(BTRFS_I(inode)), num_bytes, 0);
+	trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode),
+			num_bytes, 0);
 	btrfs_block_rsv_release(fs_info, root->orphan_block_rsv, num_bytes);
 }
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 12345d4f25a5..8a14b47b4dc0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3237,7 +3237,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 			if (reserve) {
 				clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
 					  &BTRFS_I(inode)->runtime_flags);
-				btrfs_orphan_release_metadata(inode);
+				btrfs_orphan_release_metadata(BTRFS_I(inode));
 			}
 			if (ret != -EEXIST) {
 				clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
@@ -3291,7 +3291,7 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
 	}
 
 	if (release_rsv)
-		btrfs_orphan_release_metadata(inode);
+		btrfs_orphan_release_metadata(BTRFS_I(inode));
 
 	return ret;
 }
-- 
GitLab


From 9f3db423f98c5c6c53b47f4bb2729901500bc330 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:41 +0200
Subject: [PATCH 0685/1084] btrfs: Make btrfs_delalloc_reserve_metadata take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       |  2 +-
 fs/btrfs/extent-tree.c | 69 +++++++++++++++++++++---------------------
 fs/btrfs/file.c        |  3 +-
 fs/btrfs/relocation.c  |  3 +-
 4 files changed, 39 insertions(+), 38 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8eb07a9dfa6a..23dcc42f479b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2703,7 +2703,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 				     u64 *qgroup_reserved, bool use_global_rsv);
 void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
 				      struct btrfs_block_rsv *rsv);
-int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes);
+int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
 void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes);
 int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len);
 void btrfs_delalloc_release_space(struct inode *inode, u64 start, u64 len);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2aa665674329..4db3cf4675a1 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5920,10 +5920,10 @@ static u64 calc_csum_metadata_size(struct btrfs_inode *inode, u64 num_bytes,
 	return btrfs_calc_trans_metadata_size(fs_info, old_csums - num_csums);
 }
 
-int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
+int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_root *root = inode->root;
 	struct btrfs_block_rsv *block_rsv = &fs_info->delalloc_block_rsv;
 	u64 to_reserve = 0;
 	u64 csum_bytes;
@@ -5943,7 +5943,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 	 * If we have a transaction open (can happen if we call truncate_block
 	 * from truncate), then we need FLUSH_LIMIT so we don't deadlock.
 	 */
-	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
+	if (btrfs_is_free_space_inode(inode)) {
 		flush = BTRFS_RESERVE_NO_FLUSH;
 		delalloc_lock = false;
 	} else if (current->journal_info) {
@@ -5955,25 +5955,24 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 		schedule_timeout(1);
 
 	if (delalloc_lock)
-		mutex_lock(&BTRFS_I(inode)->delalloc_mutex);
+		mutex_lock(&inode->delalloc_mutex);
 
 	num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
 
-	spin_lock(&BTRFS_I(inode)->lock);
+	spin_lock(&inode->lock);
 	nr_extents = count_max_extents(num_bytes);
-	BTRFS_I(inode)->outstanding_extents += nr_extents;
+	inode->outstanding_extents += nr_extents;
 
 	nr_extents = 0;
-	if (BTRFS_I(inode)->outstanding_extents >
-	    BTRFS_I(inode)->reserved_extents)
-		nr_extents += BTRFS_I(inode)->outstanding_extents -
-			BTRFS_I(inode)->reserved_extents;
+	if (inode->outstanding_extents > inode->reserved_extents)
+		nr_extents += inode->outstanding_extents -
+			inode->reserved_extents;
 
 	/* We always want to reserve a slot for updating the inode. */
 	to_reserve = btrfs_calc_trans_metadata_size(fs_info, nr_extents + 1);
-	to_reserve += calc_csum_metadata_size(BTRFS_I(inode), num_bytes, 1);
-	csum_bytes = BTRFS_I(inode)->csum_bytes;
-	spin_unlock(&BTRFS_I(inode)->lock);
+	to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
+	csum_bytes = inode->csum_bytes;
+	spin_unlock(&inode->lock);
 
 	if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
 		ret = btrfs_qgroup_reserve_meta(root,
@@ -5989,38 +5988,38 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 		goto out_fail;
 	}
 
-	spin_lock(&BTRFS_I(inode)->lock);
+	spin_lock(&inode->lock);
 	if (test_and_set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
-			     &BTRFS_I(inode)->runtime_flags)) {
+			     &inode->runtime_flags)) {
 		to_reserve -= btrfs_calc_trans_metadata_size(fs_info, 1);
 		release_extra = true;
 	}
-	BTRFS_I(inode)->reserved_extents += nr_extents;
-	spin_unlock(&BTRFS_I(inode)->lock);
+	inode->reserved_extents += nr_extents;
+	spin_unlock(&inode->lock);
 
 	if (delalloc_lock)
-		mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
+		mutex_unlock(&inode->delalloc_mutex);
 
 	if (to_reserve)
 		trace_btrfs_space_reservation(fs_info, "delalloc",
-				      btrfs_ino(BTRFS_I(inode)), to_reserve, 1);
+					      btrfs_ino(inode), to_reserve, 1);
 	if (release_extra)
 		btrfs_block_rsv_release(fs_info, block_rsv,
 				btrfs_calc_trans_metadata_size(fs_info, 1));
 	return 0;
 
 out_fail:
-	spin_lock(&BTRFS_I(inode)->lock);
-	dropped = drop_outstanding_extent(BTRFS_I(inode), num_bytes);
+	spin_lock(&inode->lock);
+	dropped = drop_outstanding_extent(inode, num_bytes);
 	/*
 	 * If the inodes csum_bytes is the same as the original
 	 * csum_bytes then we know we haven't raced with any free()ers
 	 * so we can just reduce our inodes csum bytes and carry on.
 	 */
-	if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
-		calc_csum_metadata_size(BTRFS_I(inode), num_bytes, 0);
+	if (inode->csum_bytes == csum_bytes) {
+		calc_csum_metadata_size(inode, num_bytes, 0);
 	} else {
-		u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
+		u64 orig_csum_bytes = inode->csum_bytes;
 		u64 bytes;
 
 		/*
@@ -6031,9 +6030,9 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 		 * number of bytes that were freed while we were trying our
 		 * reservation.
 		 */
-		bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
-		BTRFS_I(inode)->csum_bytes = csum_bytes;
-		to_free = calc_csum_metadata_size(BTRFS_I(inode), bytes, 0);
+		bytes = csum_bytes - inode->csum_bytes;
+		inode->csum_bytes = csum_bytes;
+		to_free = calc_csum_metadata_size(inode, bytes, 0);
 
 
 		/*
@@ -6041,9 +6040,9 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 		 * been making this reservation and our ->csum_bytes were not
 		 * artificially inflated.
 		 */
-		BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
+		inode->csum_bytes = csum_bytes - num_bytes;
 		bytes = csum_bytes - orig_csum_bytes;
-		bytes = calc_csum_metadata_size(BTRFS_I(inode), bytes, 0);
+		bytes = calc_csum_metadata_size(inode, bytes, 0);
 
 		/*
 		 * Now reset ->csum_bytes to what it should be.  If bytes is
@@ -6053,23 +6052,23 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 		 * need to do anything, the other free-ers did the correct
 		 * thing.
 		 */
-		BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
+		inode->csum_bytes = orig_csum_bytes - num_bytes;
 		if (bytes > to_free)
 			to_free = bytes - to_free;
 		else
 			to_free = 0;
 	}
-	spin_unlock(&BTRFS_I(inode)->lock);
+	spin_unlock(&inode->lock);
 	if (dropped)
 		to_free += btrfs_calc_trans_metadata_size(fs_info, dropped);
 
 	if (to_free) {
 		btrfs_block_rsv_release(fs_info, block_rsv, to_free);
 		trace_btrfs_space_reservation(fs_info, "delalloc",
-				      btrfs_ino(BTRFS_I(inode)), to_free, 0);
+					      btrfs_ino(inode), to_free, 0);
 	}
 	if (delalloc_lock)
-		mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
+		mutex_unlock(&inode->delalloc_mutex);
 	return ret;
 }
 
@@ -6137,7 +6136,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len)
 	ret = btrfs_check_data_free_space(inode, start, len);
 	if (ret < 0)
 		return ret;
-	ret = btrfs_delalloc_reserve_metadata(inode, len);
+	ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
 	if (ret < 0)
 		btrfs_free_reserved_data_space(inode, start, len);
 	return ret;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 073bc975bbb6..e32a92081547 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1599,7 +1599,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 			}
 		}
 
-		ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes);
+		ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
+				reserve_bytes);
 		if (ret) {
 			if (!only_release_metadata)
 				btrfs_free_reserved_data_space(inode, pos,
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index ddbde0f08365..4ee1490f086e 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3203,7 +3203,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
 	index = (cluster->start - offset) >> PAGE_SHIFT;
 	last_index = (cluster->end - offset) >> PAGE_SHIFT;
 	while (index <= last_index) {
-		ret = btrfs_delalloc_reserve_metadata(inode, PAGE_SIZE);
+		ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
+				PAGE_SIZE);
 		if (ret)
 			goto out;
 
-- 
GitLab


From 691fa059673b3b33c25d7925acb0a58e8204dbd6 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:42 +0200
Subject: [PATCH 0686/1084] btrfs: all btrfs_delalloc_release_metadata take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h            |  2 +-
 fs/btrfs/extent-tree.c      | 18 +++++++++---------
 fs/btrfs/file.c             |  5 +++--
 fs/btrfs/free-space-cache.c |  3 ++-
 fs/btrfs/inode-map.c        |  2 +-
 fs/btrfs/inode.c            |  7 ++++---
 fs/btrfs/relocation.c       |  4 ++--
 7 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 23dcc42f479b..7f7f3b60a73d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2704,7 +2704,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
 				      struct btrfs_block_rsv *rsv);
 int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
-void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes);
+void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes);
 int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len);
 void btrfs_delalloc_release_space(struct inode *inode, u64 start, u64 len);
 void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4db3cf4675a1..7b2313a4441e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -6081,27 +6081,27 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
  * once we complete IO for a given set of bytes to release their metadata
  * reservations.
  */
-void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
+void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 	u64 to_free = 0;
 	unsigned dropped;
 
 	num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
-	spin_lock(&BTRFS_I(inode)->lock);
-	dropped = drop_outstanding_extent(BTRFS_I(inode), num_bytes);
+	spin_lock(&inode->lock);
+	dropped = drop_outstanding_extent(inode, num_bytes);
 
 	if (num_bytes)
-		to_free = calc_csum_metadata_size(BTRFS_I(inode), num_bytes, 0);
-	spin_unlock(&BTRFS_I(inode)->lock);
+		to_free = calc_csum_metadata_size(inode, num_bytes, 0);
+	spin_unlock(&inode->lock);
 	if (dropped > 0)
 		to_free += btrfs_calc_trans_metadata_size(fs_info, dropped);
 
 	if (btrfs_is_testing(fs_info))
 		return;
 
-	trace_btrfs_space_reservation(fs_info, "delalloc",
-				      btrfs_ino(BTRFS_I(inode)), to_free, 0);
+	trace_btrfs_space_reservation(fs_info, "delalloc", btrfs_ino(inode),
+				      to_free, 0);
 
 	btrfs_block_rsv_release(fs_info, &fs_info->delalloc_block_rsv, to_free);
 }
@@ -6159,7 +6159,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len)
  */
 void btrfs_delalloc_release_space(struct inode *inode, u64 start, u64 len)
 {
-	btrfs_delalloc_release_metadata(inode, len);
+	btrfs_delalloc_release_metadata(BTRFS_I(inode), len);
 	btrfs_free_reserved_data_space(inode, start, len);
 }
 
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e32a92081547..0e30d14b4916 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1678,7 +1678,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 				spin_unlock(&BTRFS_I(inode)->lock);
 			}
 			if (only_release_metadata) {
-				btrfs_delalloc_release_metadata(inode,
+				btrfs_delalloc_release_metadata(BTRFS_I(inode),
 								release_bytes);
 			} else {
 				u64 __pos;
@@ -1739,7 +1739,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 	if (release_bytes) {
 		if (only_release_metadata) {
 			btrfs_end_write_no_snapshoting(root);
-			btrfs_delalloc_release_metadata(inode, release_bytes);
+			btrfs_delalloc_release_metadata(BTRFS_I(inode),
+					release_bytes);
 		} else {
 			btrfs_delalloc_release_space(inode,
 						round_down(pos, fs_info->sectorsize),
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index c0f313cbbbf2..7dcf0b100dcd 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -3545,7 +3545,8 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
 
 	if (ret) {
 		if (release_metadata)
-			btrfs_delalloc_release_metadata(inode, inode->i_size);
+			btrfs_delalloc_release_metadata(BTRFS_I(inode),
+					inode->i_size);
 #ifdef DEBUG
 		btrfs_err(fs_info,
 			  "failed to write free ino cache for root %llu",
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 3bbb8f095953..5c6c20ec64d8 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -499,7 +499,7 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
 	ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
 					      prealloc, prealloc, &alloc_hint);
 	if (ret) {
-		btrfs_delalloc_release_metadata(inode, prealloc);
+		btrfs_delalloc_release_metadata(BTRFS_I(inode), prealloc);
 		goto out_put;
 	}
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8a14b47b4dc0..dd0f1abdacd2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -316,7 +316,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
 	}
 
 	set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
-	btrfs_delalloc_release_metadata(inode, end + 1 - start);
+	btrfs_delalloc_release_metadata(BTRFS_I(inode), end + 1 - start);
 	btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
 out:
 	/*
@@ -1737,7 +1737,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
 		 */
 		if (*bits & EXTENT_DO_ACCOUNTING &&
 		    root != fs_info->tree_root)
-			btrfs_delalloc_release_metadata(inode, len);
+			btrfs_delalloc_release_metadata(BTRFS_I(inode), len);
 
 		/* For sanity tests. */
 		if (btrfs_is_testing(fs_info))
@@ -2914,7 +2914,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 			     ordered_extent->len - 1, &cached_state, GFP_NOFS);
 out:
 	if (root != fs_info->tree_root)
-		btrfs_delalloc_release_metadata(inode, ordered_extent->len);
+		btrfs_delalloc_release_metadata(BTRFS_I(inode),
+				ordered_extent->len);
 	if (trans)
 		btrfs_end_transaction(trans);
 
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 4ee1490f086e..e6470890ce26 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3216,7 +3216,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
 			page = find_or_create_page(inode->i_mapping, index,
 						   mask);
 			if (!page) {
-				btrfs_delalloc_release_metadata(inode,
+				btrfs_delalloc_release_metadata(BTRFS_I(inode),
 							PAGE_SIZE);
 				ret = -ENOMEM;
 				goto out;
@@ -3235,7 +3235,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
 			if (!PageUptodate(page)) {
 				unlock_page(page);
 				put_page(page);
-				btrfs_delalloc_release_metadata(inode,
+				btrfs_delalloc_release_metadata(BTRFS_I(inode),
 							PAGE_SIZE);
 				ret = -EIO;
 				goto out;
-- 
GitLab


From 6158e1ce1cc620df650ebdcfb3cc08a3d86f5a4c Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:43 +0200
Subject: [PATCH 0687/1084] btrfs: Make (__)btrfs_add_inode_defrag take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h |  2 +-
 fs/btrfs/file.c  | 22 +++++++++++-----------
 fs/btrfs/inode.c |  9 +++++----
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 7f7f3b60a73d..5246cbe4c17f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3215,7 +3215,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
 int btrfs_auto_defrag_init(void);
 void btrfs_auto_defrag_exit(void);
 int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
-			   struct inode *inode);
+			   struct btrfs_inode *inode);
 int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
 void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
 int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0e30d14b4916..63645e86ad95 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -92,10 +92,10 @@ static int __compare_inode_defrag(struct inode_defrag *defrag1,
  * If an existing record is found the defrag item you
  * pass in is freed
  */
-static int __btrfs_add_inode_defrag(struct inode *inode,
+static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
 				    struct inode_defrag *defrag)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 	struct inode_defrag *entry;
 	struct rb_node **p;
 	struct rb_node *parent = NULL;
@@ -123,7 +123,7 @@ static int __btrfs_add_inode_defrag(struct inode *inode,
 			return -EEXIST;
 		}
 	}
-	set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
+	set_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags);
 	rb_link_node(&defrag->rb_node, parent, p);
 	rb_insert_color(&defrag->rb_node, &fs_info->defrag_inodes);
 	return 0;
@@ -145,10 +145,10 @@ static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info)
  * enabled
  */
 int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
-			   struct inode *inode)
+			   struct btrfs_inode *inode)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_root *root = inode->root;
 	struct inode_defrag *defrag;
 	u64 transid;
 	int ret;
@@ -156,24 +156,24 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
 	if (!__need_auto_defrag(fs_info))
 		return 0;
 
-	if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
+	if (test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags))
 		return 0;
 
 	if (trans)
 		transid = trans->transid;
 	else
-		transid = BTRFS_I(inode)->root->last_trans;
+		transid = inode->root->last_trans;
 
 	defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS);
 	if (!defrag)
 		return -ENOMEM;
 
-	defrag->ino = btrfs_ino(BTRFS_I(inode));
+	defrag->ino = btrfs_ino(inode);
 	defrag->transid = transid;
 	defrag->root = root->root_key.objectid;
 
 	spin_lock(&fs_info->defrag_inodes_lock);
-	if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) {
+	if (!test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags)) {
 		/*
 		 * If we set IN_DEFRAG flag and evict the inode from memory,
 		 * and then re-read this inode, this new inode doesn't have
@@ -208,7 +208,7 @@ static void btrfs_requeue_inode_defrag(struct inode *inode,
 	 * them together.
 	 */
 	spin_lock(&fs_info->defrag_inodes_lock);
-	ret = __btrfs_add_inode_defrag(inode, defrag);
+	ret = __btrfs_add_inode_defrag(BTRFS_I(inode), defrag);
 	spin_unlock(&fs_info->defrag_inodes_lock);
 	if (ret)
 		goto out;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index dd0f1abdacd2..c396533fd3e5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -389,12 +389,12 @@ static inline int inode_need_compress(struct inode *inode)
 	return 0;
 }
 
-static inline void inode_should_defrag(struct inode *inode,
+static inline void inode_should_defrag(struct btrfs_inode *inode,
 		u64 start, u64 end, u64 num_bytes, u64 small_write)
 {
 	/* If this is a small write inside eof, kick off a defrag */
 	if (num_bytes < small_write &&
-	    (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
+	    (start > 0 || end + 1 < inode->disk_i_size))
 		btrfs_add_inode_defrag(NULL, inode);
 }
 
@@ -440,7 +440,8 @@ static noinline void compress_file_range(struct inode *inode,
 	int compress_type = fs_info->compress_type;
 	int redirty = 0;
 
-	inode_should_defrag(inode, start, end, end - start + 1, SZ_16K);
+	inode_should_defrag(BTRFS_I(inode), start, end, end - start + 1,
+			SZ_16K);
 
 	actual_end = min_t(u64, isize, end + 1);
 again:
@@ -943,7 +944,7 @@ static noinline int cow_file_range(struct inode *inode,
 	num_bytes = max(blocksize,  num_bytes);
 	disk_num_bytes = num_bytes;
 
-	inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
+	inode_should_defrag(BTRFS_I(inode), start, end, num_bytes, SZ_64K);
 
 	if (start == 0) {
 		/* lets try to make an inline extent */
-- 
GitLab


From 46e59791836c75210bdf1f715592b49836fad848 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:44 +0200
Subject: [PATCH 0688/1084] btrfs: Make btrfs_requeue_inode_defrag take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 63645e86ad95..dd1b56504e10 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -194,10 +194,10 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
  * the same inode in the tree, we will merge them together (by
  * __btrfs_add_inode_defrag()) and free the one that we want to requeue.
  */
-static void btrfs_requeue_inode_defrag(struct inode *inode,
+static void btrfs_requeue_inode_defrag(struct btrfs_inode *inode,
 				       struct inode_defrag *defrag)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 	int ret;
 
 	if (!__need_auto_defrag(fs_info))
@@ -208,7 +208,7 @@ static void btrfs_requeue_inode_defrag(struct inode *inode,
 	 * them together.
 	 */
 	spin_lock(&fs_info->defrag_inodes_lock);
-	ret = __btrfs_add_inode_defrag(BTRFS_I(inode), defrag);
+	ret = __btrfs_add_inode_defrag(inode, defrag);
 	spin_unlock(&fs_info->defrag_inodes_lock);
 	if (ret)
 		goto out;
@@ -334,7 +334,7 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
 	 */
 	if (num_defrag == BTRFS_DEFRAG_BATCH) {
 		defrag->last_offset = range.start;
-		btrfs_requeue_inode_defrag(inode, defrag);
+		btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
 	} else if (defrag->last_offset && !defrag->cycled) {
 		/*
 		 * we didn't fill our defrag batch, but
@@ -343,7 +343,7 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
 		 */
 		defrag->last_offset = 0;
 		defrag->cycled = 1;
-		btrfs_requeue_inode_defrag(inode, defrag);
+		btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
 	} else {
 		kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
 	}
-- 
GitLab


From dcdbc059f01e242f92e3239654a1a57d15b0da5a Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:45 +0200
Subject: [PATCH 0689/1084] btrfs: Make btrfs_drop_extent_cache take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h             |  2 +-
 fs/btrfs/file.c              | 11 ++++++-----
 fs/btrfs/inode.c             | 31 +++++++++++++++++--------------
 fs/btrfs/ioctl.c             |  2 +-
 fs/btrfs/relocation.c        |  8 ++++----
 fs/btrfs/tests/inode-tests.c |  2 +-
 6 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5246cbe4c17f..4db18e5dc8f9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3219,7 +3219,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
 int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
 void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
 int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
-void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
+void btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end,
 			     int skip_pinned);
 extern const struct file_operations btrfs_file_operations;
 int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index dd1b56504e10..5df1de43aace 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -529,13 +529,13 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
  * this drops all the extents in the cache that intersect the range
  * [start, end].  Existing extents are split as required.
  */
-void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
+void btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end,
 			     int skip_pinned)
 {
 	struct extent_map *em;
 	struct extent_map *split = NULL;
 	struct extent_map *split2 = NULL;
-	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+	struct extent_map_tree *em_tree = &inode->extent_tree;
 	u64 len = end - start + 1;
 	u64 gen;
 	int ret;
@@ -720,7 +720,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
 	int leafs_visited = 0;
 
 	if (drop_cache)
-		btrfs_drop_extent_cache(inode, start, end - 1, 0);
+		btrfs_drop_extent_cache(BTRFS_I(inode), start, end - 1, 0);
 
 	if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
 		modify_tree = 0;
@@ -2297,7 +2297,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
 
 	hole_em = alloc_extent_map();
 	if (!hole_em) {
-		btrfs_drop_extent_cache(inode, offset, end - 1, 0);
+		btrfs_drop_extent_cache(BTRFS_I(inode), offset, end - 1, 0);
 		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
 			&BTRFS_I(inode)->runtime_flags);
 	} else {
@@ -2314,7 +2314,8 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
 		hole_em->generation = trans->transid;
 
 		do {
-			btrfs_drop_extent_cache(inode, offset, end - 1, 0);
+			btrfs_drop_extent_cache(BTRFS_I(inode), offset,
+					end - 1, 0);
 			write_lock(&em_tree->lock);
 			ret = add_extent_mapping(em_tree, hole_em, 1);
 			write_unlock(&em_tree->lock);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c396533fd3e5..c2383a442ff8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -317,7 +317,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
 
 	set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
 	btrfs_delalloc_release_metadata(BTRFS_I(inode), end + 1 - start);
-	btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
+	btrfs_drop_extent_cache(BTRFS_I(inode), start, aligned_end - 1, 0);
 out:
 	/*
 	 * Don't forget to free the reserved space, as for inlined extent
@@ -807,7 +807,8 @@ static noinline void submit_compressed_extents(struct inode *inode,
 						BTRFS_ORDERED_COMPRESSED,
 						async_extent->compress_type);
 		if (ret) {
-			btrfs_drop_extent_cache(inode, async_extent->start,
+			btrfs_drop_extent_cache(BTRFS_I(inode),
+						async_extent->start,
 						async_extent->start +
 						async_extent->ram_size - 1, 0);
 			goto out_free_reserve;
@@ -972,7 +973,8 @@ static noinline int cow_file_range(struct inode *inode,
 	       btrfs_super_total_bytes(fs_info->super_copy));
 
 	alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
-	btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
+	btrfs_drop_extent_cache(BTRFS_I(inode), start,
+			start + num_bytes - 1, 0);
 
 	while (disk_num_bytes > 0) {
 		unsigned long op;
@@ -1040,7 +1042,7 @@ static noinline int cow_file_range(struct inode *inode,
 	return ret;
 
 out_drop_extent_cache:
-	btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
+	btrfs_drop_extent_cache(BTRFS_I(inode), start, start + ram_size - 1, 0);
 out_reserve:
 	btrfs_dec_block_group_reservations(fs_info, ins.objectid);
 	btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
@@ -2931,7 +2933,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 		clear_extent_uptodate(io_tree, start, end, NULL, GFP_NOFS);
 
 		/* Drop the cache for the part of the extent we didn't write. */
-		btrfs_drop_extent_cache(inode, start, end, 0);
+		btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
 
 		/*
 		 * If the ordered extent had an IOERR or something else went
@@ -4337,7 +4339,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 	 */
 	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
 	    root == fs_info->tree_root)
-		btrfs_drop_extent_cache(inode, ALIGN(new_size,
+		btrfs_drop_extent_cache(BTRFS_I(inode), ALIGN(new_size,
 					fs_info->sectorsize),
 					(u64)-1, 0);
 
@@ -4865,7 +4867,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
 						hole_size);
 			if (err)
 				break;
-			btrfs_drop_extent_cache(inode, cur_offset,
+			btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
 						cur_offset + hole_size - 1, 0);
 			hole_em = alloc_extent_map();
 			if (!hole_em) {
@@ -4891,7 +4893,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
 				write_unlock(&em_tree->lock);
 				if (err != -EEXIST)
 					break;
-				btrfs_drop_extent_cache(inode, cur_offset,
+				btrfs_drop_extent_cache(BTRFS_I(inode),
+							cur_offset,
 							cur_offset +
 							hole_size - 1, 0);
 			}
@@ -7164,7 +7167,7 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
 	if (ret) {
 		if (em) {
 			free_extent_map(em);
-			btrfs_drop_extent_cache(inode, start,
+			btrfs_drop_extent_cache(BTRFS_I(inode), start,
 						start + len - 1, 0);
 		}
 		em = ERR_PTR(ret);
@@ -7531,7 +7534,7 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
 	}
 
 	do {
-		btrfs_drop_extent_cache(inode, em->start,
+		btrfs_drop_extent_cache(BTRFS_I(inode), em->start,
 				em->start + em->len - 1, 0);
 		write_lock(&em_tree->lock);
 		ret = add_extent_mapping(em_tree, em, 1);
@@ -9280,7 +9283,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 void btrfs_test_destroy_inode(struct inode *inode)
 {
-	btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
+	btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
 	kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
 }
 #endif
@@ -9335,7 +9338,7 @@ void btrfs_destroy_inode(struct inode *inode)
 	}
 	btrfs_qgroup_check_reserved_leak(inode);
 	inode_tree_del(inode);
-	btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
+	btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
 free:
 	call_rcu(&inode->i_rcu, btrfs_i_callback);
 }
@@ -10328,7 +10331,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
 			break;
 		}
 
-		btrfs_drop_extent_cache(inode, cur_offset,
+		btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
 					cur_offset + ins.offset -1, 0);
 
 		em = alloc_extent_map();
@@ -10355,7 +10358,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
 			write_unlock(&em_tree->lock);
 			if (ret != -EEXIST)
 				break;
-			btrfs_drop_extent_cache(inode, cur_offset,
+			btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
 						cur_offset + ins.offset - 1,
 						0);
 		}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 86f993c958ba..bc2e03a2569e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3359,7 +3359,7 @@ static void clone_update_extent_map(struct inode *inode,
 			free_extent_map(em);
 			break;
 		}
-		btrfs_drop_extent_cache(inode, em->start,
+		btrfs_drop_extent_cache(BTRFS_I(inode), em->start,
 					em->start + em->len - 1, 0);
 	}
 
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index e6470890ce26..e48625413fcb 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1714,8 +1714,8 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
 				if (!ret)
 					continue;
 
-				btrfs_drop_extent_cache(inode, key.offset, end,
-							1);
+				btrfs_drop_extent_cache(BTRFS_I(inode),
+						key.offset,	end, 1);
 				unlock_extent(&BTRFS_I(inode)->io_tree,
 					      key.offset, end);
 			}
@@ -2130,7 +2130,7 @@ static int invalidate_extent_cache(struct btrfs_root *root,
 
 		/* the lock_extent waits for readpage to complete */
 		lock_extent(&BTRFS_I(inode)->io_tree, start, end);
-		btrfs_drop_extent_cache(inode, start, end, 1);
+		btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 1);
 		unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
 	}
 	return 0;
@@ -3161,7 +3161,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
 			free_extent_map(em);
 			break;
 		}
-		btrfs_drop_extent_cache(inode, start, end, 0);
+		btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
 	}
 	unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
 	return ret;
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 4d0f038e14f1..924bcbf43275 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -293,7 +293,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 		goto out;
 	}
 	free_extent_map(em);
-	btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
+	btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
 
 	/*
 	 * All of the magic numbers are based on the mapping setup in
-- 
GitLab


From 35339c245b5939315e8763deb7f9b68fffe54912 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:46 +0200
Subject: [PATCH 0690/1084] btrfs: Make hole_mergeable take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 5df1de43aace..27dfdfb7ff19 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2195,7 +2195,7 @@ static int btrfs_file_mmap(struct file	*filp, struct vm_area_struct *vma)
 	return 0;
 }
 
-static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf,
+static int hole_mergeable(struct btrfs_inode *inode, struct extent_buffer *leaf,
 			  int slot, u64 start, u64 end)
 {
 	struct btrfs_file_extent_item *fi;
@@ -2205,7 +2205,7 @@ static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf,
 		return 0;
 
 	btrfs_item_key_to_cpu(leaf, &key, slot);
-	if (key.objectid != btrfs_ino(BTRFS_I(inode)) ||
+	if (key.objectid != btrfs_ino(inode) ||
 	    key.type != BTRFS_EXTENT_DATA_KEY)
 		return 0;
 
@@ -2255,7 +2255,8 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
 	}
 
 	leaf = path->nodes[0];
-	if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) {
+	if (hole_mergeable(BTRFS_I(inode), leaf, path->slots[0] - 1,
+				offset, end)) {
 		u64 num_bytes;
 
 		path->slots[0]--;
@@ -2270,7 +2271,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
 		goto out;
 	}
 
-	if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) {
+	if (hole_mergeable(BTRFS_I(inode), leaf, path->slots[0], offset, end)) {
 		u64 num_bytes;
 
 		key.offset = offset;
-- 
GitLab


From a012a74e78d99aa27f8487c050e9ac3183bc3785 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:47 +0200
Subject: [PATCH 0691/1084] btrfs: Make fill_holes take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 27dfdfb7ff19..ef4ecd003742 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2224,22 +2224,23 @@ static int hole_mergeable(struct btrfs_inode *inode, struct extent_buffer *leaf,
 	return 0;
 }
 
-static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
-		      struct btrfs_path *path, u64 offset, u64 end)
+static int fill_holes(struct btrfs_trans_handle *trans,
+		struct btrfs_inode *inode,
+		struct btrfs_path *path, u64 offset, u64 end)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_root *root = inode->root;
 	struct extent_buffer *leaf;
 	struct btrfs_file_extent_item *fi;
 	struct extent_map *hole_em;
-	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+	struct extent_map_tree *em_tree = &inode->extent_tree;
 	struct btrfs_key key;
 	int ret;
 
 	if (btrfs_fs_incompat(fs_info, NO_HOLES))
 		goto out;
 
-	key.objectid = btrfs_ino(BTRFS_I(inode));
+	key.objectid = btrfs_ino(inode);
 	key.type = BTRFS_EXTENT_DATA_KEY;
 	key.offset = offset;
 
@@ -2255,8 +2256,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
 	}
 
 	leaf = path->nodes[0];
-	if (hole_mergeable(BTRFS_I(inode), leaf, path->slots[0] - 1,
-				offset, end)) {
+	if (hole_mergeable(inode, leaf, path->slots[0] - 1, offset, end)) {
 		u64 num_bytes;
 
 		path->slots[0]--;
@@ -2271,7 +2271,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
 		goto out;
 	}
 
-	if (hole_mergeable(BTRFS_I(inode), leaf, path->slots[0], offset, end)) {
+	if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) {
 		u64 num_bytes;
 
 		key.offset = offset;
@@ -2288,7 +2288,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
 	}
 	btrfs_release_path(path);
 
-	ret = btrfs_insert_file_extent(trans, root, btrfs_ino(BTRFS_I(inode)),
+	ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode),
 			offset, 0, 0, end - offset, 0, end - offset, 0, 0, 0);
 	if (ret)
 		return ret;
@@ -2298,9 +2298,8 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
 
 	hole_em = alloc_extent_map();
 	if (!hole_em) {
-		btrfs_drop_extent_cache(BTRFS_I(inode), offset, end - 1, 0);
-		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-			&BTRFS_I(inode)->runtime_flags);
+		btrfs_drop_extent_cache(inode, offset, end - 1, 0);
+		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
 	} else {
 		hole_em->start = offset;
 		hole_em->len = end - offset;
@@ -2315,8 +2314,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
 		hole_em->generation = trans->transid;
 
 		do {
-			btrfs_drop_extent_cache(BTRFS_I(inode), offset,
-					end - 1, 0);
+			btrfs_drop_extent_cache(inode, offset, end - 1, 0);
 			write_lock(&em_tree->lock);
 			ret = add_extent_mapping(em_tree, hole_em, 1);
 			write_unlock(&em_tree->lock);
@@ -2324,7 +2322,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
 		free_extent_map(hole_em);
 		if (ret)
 			set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-				&BTRFS_I(inode)->runtime_flags);
+					&inode->runtime_flags);
 	}
 
 	return 0;
@@ -2554,8 +2552,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 		trans->block_rsv = &fs_info->trans_block_rsv;
 
 		if (cur_offset < drop_end && cur_offset < ino_size) {
-			ret = fill_holes(trans, inode, path, cur_offset,
-					 drop_end);
+			ret = fill_holes(trans, BTRFS_I(inode), path,
+					cur_offset, drop_end);
 			if (ret) {
 				/*
 				 * If we failed then we didn't insert our hole
@@ -2626,7 +2624,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 	 * cur_offset == drop_end).
 	 */
 	if (cur_offset < ino_size && cur_offset < drop_end) {
-		ret = fill_holes(trans, inode, path, cur_offset, drop_end);
+		ret = fill_holes(trans, BTRFS_I(inode), path,
+				cur_offset, drop_end);
 		if (ret) {
 			/* Same comment as above. */
 			btrfs_abort_transaction(trans, ret);
-- 
GitLab


From 7a6d7067958c32d1c76e0bfd1a0d46be06340b2e Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:48 +0200
Subject: [PATCH 0692/1084] btrfs: Make btrfs_mark_extent_written take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h | 2 +-
 fs/btrfs/file.c  | 8 ++++----
 fs/btrfs/inode.c | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4db18e5dc8f9..2bfc2e289f51 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3233,7 +3233,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 		       struct btrfs_root *root, struct inode *inode, u64 start,
 		       u64 end, int drop_cache);
 int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
-			      struct inode *inode, u64 start, u64 end);
+			      struct btrfs_inode *inode, u64 start, u64 end);
 int btrfs_release_file(struct inode *inode, struct file *file);
 int btrfs_dirty_pages(struct inode *inode, struct page **pages,
 		      size_t num_pages, loff_t pos, size_t write_bytes,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ef4ecd003742..e704b6ce2058 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1082,10 +1082,10 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
  * two or three.
  */
 int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
-			      struct inode *inode, u64 start, u64 end)
+			      struct btrfs_inode *inode, u64 start, u64 end)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_root *root = inode->root;
 	struct extent_buffer *leaf;
 	struct btrfs_path *path;
 	struct btrfs_file_extent_item *fi;
@@ -1102,7 +1102,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
 	int del_slot = 0;
 	int recow;
 	int ret;
-	u64 ino = btrfs_ino(BTRFS_I(inode));
+	u64 ino = btrfs_ino(inode);
 
 	path = btrfs_alloc_path();
 	if (!path)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c2383a442ff8..8114bbd359d4 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2876,7 +2876,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 		compress_type = ordered_extent->compress_type;
 	if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
 		BUG_ON(compress_type);
-		ret = btrfs_mark_extent_written(trans, inode,
+		ret = btrfs_mark_extent_written(trans, BTRFS_I(inode),
 						ordered_extent->file_offset,
 						ordered_extent->file_offset +
 						logical_len);
-- 
GitLab


From a776c6fa1feba7a84519170ebdb7f4a4155b89d6 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:49 +0200
Subject: [PATCH 0693/1084] btrfs: Make btrfs_lookup_ordered_range take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c    |  4 ++--
 fs/btrfs/file.c         |  4 ++--
 fs/btrfs/inode.c        | 11 ++++++-----
 fs/btrfs/ordered-data.c |  9 ++++-----
 fs/btrfs/ordered-data.h |  7 ++++---
 fs/btrfs/scrub.c        |  2 +-
 6 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d15b5ddb6732..996306d322de 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3101,7 +3101,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
 	inode = pages[0]->mapping->host;
 	while (1) {
 		lock_extent(tree, start, end);
-		ordered = btrfs_lookup_ordered_range(inode, start,
+		ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
 						     end - start + 1);
 		if (!ordered)
 			break;
@@ -3173,7 +3173,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
 
 	while (1) {
 		lock_extent(tree, start, end);
-		ordered = btrfs_lookup_ordered_range(inode, start,
+		ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
 						PAGE_SIZE);
 		if (!ordered)
 			break;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e704b6ce2058..f3648e9bfa01 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1436,7 +1436,7 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
 		struct btrfs_ordered_extent *ordered;
 		lock_extent_bits(&BTRFS_I(inode)->io_tree,
 				 start_pos, last_pos, cached_state);
-		ordered = btrfs_lookup_ordered_range(inode, start_pos,
+		ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start_pos,
 						     last_pos - start_pos + 1);
 		if (ordered &&
 		    ordered->file_offset + ordered->len > start_pos &&
@@ -1494,7 +1494,7 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos,
 
 	while (1) {
 		lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
-		ordered = btrfs_lookup_ordered_range(inode, lockstart,
+		ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), lockstart,
 						     lockend - lockstart + 1);
 		if (!ordered) {
 			break;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8114bbd359d4..723f69bd49f3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1966,7 +1966,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
 	if (PagePrivate2(page))
 		goto out;
 
-	ordered = btrfs_lookup_ordered_range(inode, page_start,
+	ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
 					PAGE_SIZE);
 	if (ordered) {
 		unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
@@ -4838,7 +4838,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
 
 		lock_extent_bits(io_tree, hole_start, block_end - 1,
 				 &cached_state);
-		ordered = btrfs_lookup_ordered_range(inode, hole_start,
+		ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), hole_start,
 						     block_end - hole_start);
 		if (!ordered)
 			break;
@@ -7428,7 +7428,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
 		 * doing DIO to, so we need to make sure there's no ordered
 		 * extents in this range.
 		 */
-		ordered = btrfs_lookup_ordered_range(inode, lockstart,
+		ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), lockstart,
 						     lockend - lockstart + 1);
 
 		/*
@@ -8801,7 +8801,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
 		lock_extent_bits(tree, page_start, page_end, &cached_state);
 again:
 	start = page_start;
-	ordered = btrfs_lookup_ordered_range(inode, start,
+	ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
 					page_end - start + 1);
 	if (ordered) {
 		end = min(page_end, ordered->file_offset + ordered->len - 1);
@@ -8967,7 +8967,8 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	 * we can't set the delalloc bits if there are pending ordered
 	 * extents.  Drop our locks and wait for them to finish
 	 */
-	ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
+	ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
+			PAGE_SIZE);
 	if (ordered) {
 		unlock_extent_cached(io_tree, page_start, page_end,
 				     &cached_state, GFP_NOFS);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index bc2aba810629..9a46878ba60f 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -879,15 +879,14 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
 /* Since the DIO code tries to lock a wide area we need to look for any ordered
  * extents that exist in the range, rather than just the start of the range.
  */
-struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
-							u64 file_offset,
-							u64 len)
+struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
+		struct btrfs_inode *inode, u64 file_offset, u64 len)
 {
 	struct btrfs_ordered_inode_tree *tree;
 	struct rb_node *node;
 	struct btrfs_ordered_extent *entry = NULL;
 
-	tree = &BTRFS_I(inode)->ordered_tree;
+	tree = &inode->ordered_tree;
 	spin_lock_irq(&tree->lock);
 	node = tree_search(tree, file_offset);
 	if (!node) {
@@ -923,7 +922,7 @@ bool btrfs_have_ordered_extents_in_range(struct inode *inode,
 {
 	struct btrfs_ordered_extent *oe;
 
-	oe = btrfs_lookup_ordered_range(inode, file_offset, len);
+	oe = btrfs_lookup_ordered_range(BTRFS_I(inode), file_offset, len);
 	if (oe) {
 		btrfs_put_ordered_extent(oe);
 		return true;
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index a8cb8efe6fae..195c93b67fe0 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -189,9 +189,10 @@ void btrfs_start_ordered_extent(struct inode *inode,
 int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
 struct btrfs_ordered_extent *
 btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
-struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
-							u64 file_offset,
-							u64 len);
+struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
+		struct btrfs_inode *inode,
+		u64 file_offset,
+		u64 len);
 bool btrfs_have_ordered_extents_in_range(struct inode *inode,
 					 u64 file_offset,
 					 u64 len);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ff9a11c39f5e..82d873406aa3 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -4249,7 +4249,7 @@ static int check_extent_to_block(struct inode *inode, u64 start, u64 len,
 	io_tree = &BTRFS_I(inode)->io_tree;
 
 	lock_extent_bits(io_tree, lockstart, lockend, &cached_state);
-	ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
+	ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), lockstart, len);
 	if (ordered) {
 		btrfs_put_ordered_extent(ordered);
 		ret = 1;
-- 
GitLab


From 85b7ab6705d9a2e6173361f5da7cbf8f9eb07864 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:50 +0200
Subject: [PATCH 0694/1084] btrfs: Make check_can_nocow take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index f3648e9bfa01..1d09eccec477 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1474,11 +1474,11 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
 	return ret;
 }
 
-static noinline int check_can_nocow(struct inode *inode, loff_t pos,
+static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
 				    size_t *write_bytes)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_root *root = inode->root;
 	struct btrfs_ordered_extent *ordered;
 	u64 lockstart, lockend;
 	u64 num_bytes;
@@ -1493,19 +1493,20 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos,
 			   fs_info->sectorsize) - 1;
 
 	while (1) {
-		lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
-		ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), lockstart,
+		lock_extent(&inode->io_tree, lockstart, lockend);
+		ordered = btrfs_lookup_ordered_range(inode, lockstart,
 						     lockend - lockstart + 1);
 		if (!ordered) {
 			break;
 		}
-		unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
-		btrfs_start_ordered_extent(inode, ordered, 1);
+		unlock_extent(&inode->io_tree, lockstart, lockend);
+		btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
 		btrfs_put_ordered_extent(ordered);
 	}
 
 	num_bytes = lockend - lockstart + 1;
-	ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL);
+	ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
+			NULL, NULL, NULL);
 	if (ret <= 0) {
 		ret = 0;
 		btrfs_end_write_no_snapshoting(root);
@@ -1514,7 +1515,7 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos,
 				     num_bytes - pos + lockstart);
 	}
 
-	unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
+	unlock_extent(&inode->io_tree, lockstart, lockend);
 
 	return ret;
 }
@@ -1579,7 +1580,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 		if (ret < 0) {
 			if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
 						      BTRFS_INODE_PREALLOC)) &&
-			    check_can_nocow(inode, pos, &write_bytes) > 0) {
+			    check_can_nocow(BTRFS_I(inode), pos,
+					&write_bytes) > 0) {
 				/*
 				 * For nodata cow case, no need to reserve
 				 * data space.
-- 
GitLab


From 2cff578cfceba883eef199c52674a301a8f91d19 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <n.borisov.lkml@gmail.com>
Date: Mon, 20 Feb 2017 13:50:51 +0200
Subject: [PATCH 0695/1084] btrfs: Make lock_and_cleanup_extent_if_need take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1d09eccec477..dff7ec1770c1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1415,13 +1415,13 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages,
  * the other < 0 number - Something wrong happens
  */
 static noinline int
-lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
+lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
 				size_t num_pages, loff_t pos,
 				size_t write_bytes,
 				u64 *lockstart, u64 *lockend,
 				struct extent_state **cached_state)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 	u64 start_pos;
 	u64 last_pos;
 	int i;
@@ -1432,30 +1432,30 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
 		+ round_up(pos + write_bytes - start_pos,
 			   fs_info->sectorsize) - 1;
 
-	if (start_pos < inode->i_size) {
+	if (start_pos < inode->vfs_inode.i_size) {
 		struct btrfs_ordered_extent *ordered;
-		lock_extent_bits(&BTRFS_I(inode)->io_tree,
-				 start_pos, last_pos, cached_state);
-		ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start_pos,
+		lock_extent_bits(&inode->io_tree, start_pos, last_pos,
+				cached_state);
+		ordered = btrfs_lookup_ordered_range(inode, start_pos,
 						     last_pos - start_pos + 1);
 		if (ordered &&
 		    ordered->file_offset + ordered->len > start_pos &&
 		    ordered->file_offset <= last_pos) {
-			unlock_extent_cached(&BTRFS_I(inode)->io_tree,
-					     start_pos, last_pos,
-					     cached_state, GFP_NOFS);
+			unlock_extent_cached(&inode->io_tree, start_pos,
+					last_pos, cached_state, GFP_NOFS);
 			for (i = 0; i < num_pages; i++) {
 				unlock_page(pages[i]);
 				put_page(pages[i]);
 			}
-			btrfs_start_ordered_extent(inode, ordered, 1);
+			btrfs_start_ordered_extent(&inode->vfs_inode,
+					ordered, 1);
 			btrfs_put_ordered_extent(ordered);
 			return -EAGAIN;
 		}
 		if (ordered)
 			btrfs_put_ordered_extent(ordered);
 
-		clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,
+		clear_extent_bit(&inode->io_tree, start_pos,
 				  last_pos, EXTENT_DIRTY | EXTENT_DELALLOC |
 				  EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
 				  0, 0, cached_state, GFP_NOFS);
@@ -1626,9 +1626,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 		if (ret)
 			break;
 
-		ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages,
-						pos, write_bytes, &lockstart,
-						&lockend, &cached_state);
+		ret = lock_and_cleanup_extent_if_need(BTRFS_I(inode), pages,
+				num_pages, pos, write_bytes, &lockstart,
+				&lockend, &cached_state);
 		if (ret < 0) {
 			if (ret == -EAGAIN)
 				goto again;
-- 
GitLab


From 4ac1f4acd7c60c95e3efa63d463418093aff9ce5 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:52 +0200
Subject: [PATCH 0696/1084] btrfs: make free_io_failure take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 14 +++++++-------
 fs/btrfs/extent_io.h |  4 +++-
 fs/btrfs/inode.c     |  6 +++---
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 996306d322de..0e73e48a9c4c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1959,11 +1959,11 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
 		SetPageUptodate(page);
 }
 
-int free_io_failure(struct inode *inode, struct io_failure_record *rec)
+int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec)
 {
 	int ret;
 	int err = 0;
-	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
+	struct extent_io_tree *failure_tree = &inode->io_failure_tree;
 
 	set_state_failrec(failure_tree, rec->start, NULL);
 	ret = clear_extent_bits(failure_tree, rec->start,
@@ -1972,7 +1972,7 @@ int free_io_failure(struct inode *inode, struct io_failure_record *rec)
 	if (ret)
 		err = ret;
 
-	ret = clear_extent_bits(&BTRFS_I(inode)->io_tree, rec->start,
+	ret = clear_extent_bits(&inode->io_tree, rec->start,
 				rec->start + rec->len - 1,
 				EXTENT_DAMAGED);
 	if (ret && !err)
@@ -2140,7 +2140,7 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page,
 	}
 
 out:
-	free_io_failure(inode, failrec);
+	free_io_failure(BTRFS_I(inode), failrec);
 
 	return 0;
 }
@@ -2393,7 +2393,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 
 	ret = btrfs_check_repairable(inode, failed_bio, failrec, failed_mirror);
 	if (!ret) {
-		free_io_failure(inode, failrec);
+		free_io_failure(BTRFS_I(inode), failrec);
 		return -EIO;
 	}
 
@@ -2406,7 +2406,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 				      (int)phy_offset, failed_bio->bi_end_io,
 				      NULL);
 	if (!bio) {
-		free_io_failure(inode, failrec);
+		free_io_failure(BTRFS_I(inode), failrec);
 		return -EIO;
 	}
 	bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
@@ -2418,7 +2418,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 	ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
 					 failrec->bio_flags, 0);
 	if (ret) {
-		free_io_failure(inode, failrec);
+		free_io_failure(BTRFS_I(inode), failrec);
 		bio_put(bio);
 	}
 
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 270d03be290e..5b4132a9093a 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -480,6 +480,8 @@ struct io_failure_record {
 	int in_validation;
 };
 
+struct btrfs_inode;
+
 void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end);
 int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
 				struct io_failure_record **failrec_ret);
@@ -489,7 +491,7 @@ struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
 				    struct io_failure_record *failrec,
 				    struct page *page, int pg_offset, int icsum,
 				    bio_end_io_t *endio_func, void *data);
-int free_io_failure(struct inode *inode, struct io_failure_record *rec);
+int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec);
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 noinline u64 find_lock_delalloc_range(struct inode *inode,
 				      struct extent_io_tree *tree,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 723f69bd49f3..f1f9e201f9f3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7859,7 +7859,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
 	ret = btrfs_check_dio_repairable(inode, failed_bio, failrec,
 					 failed_mirror);
 	if (!ret) {
-		free_io_failure(inode, failrec);
+		free_io_failure(BTRFS_I(inode), failrec);
 		return -EIO;
 	}
 
@@ -7873,7 +7873,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
 	bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
 				pgoff, isector, repair_endio, repair_arg);
 	if (!bio) {
-		free_io_failure(inode, failrec);
+		free_io_failure(BTRFS_I(inode), failrec);
 		return -EIO;
 	}
 	bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
@@ -7884,7 +7884,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
 
 	ret = submit_dio_repair_bio(inode, bio, failrec->this_mirror);
 	if (ret) {
-		free_io_failure(inode, failrec);
+		free_io_failure(BTRFS_I(inode), failrec);
 		bio_put(bio);
 	}
 
-- 
GitLab


From 0970a22e58d4c49f2c3e84519613a9db8f00f579 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:53 +0200
Subject: [PATCH 0697/1084] btrfs: make btrfs_print_data_csum_error take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/btrfs_inode.h | 8 ++++----
 fs/btrfs/compression.c | 5 +++--
 fs/btrfs/inode.c       | 2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 36eca5464e1b..c10d21b2f0df 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -324,21 +324,21 @@ static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode)
 		  &BTRFS_I(inode)->runtime_flags);
 }
 
-static inline void btrfs_print_data_csum_error(struct inode *inode,
+static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode,
 		u64 logical_start, u32 csum, u32 csum_expected, int mirror_num)
 {
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_root *root = inode->root;
 
 	/* Output minus objectid, which is more meaningful */
 	if (root->objectid >= BTRFS_LAST_FREE_OBJECTID)
 		btrfs_warn_rl(root->fs_info,
 	"csum failed root %lld ino %lld off %llu csum 0x%08x expected csum 0x%08x mirror %d",
-			root->objectid, btrfs_ino(BTRFS_I(inode)),
+			root->objectid, btrfs_ino(inode),
 			logical_start, csum, csum_expected, mirror_num);
 	else
 		btrfs_warn_rl(root->fs_info,
 	"csum failed root %llu ino %llu off %llu csum 0x%08x expected csum 0x%08x mirror %d",
-			root->objectid, btrfs_ino(BTRFS_I(inode)),
+			root->objectid, btrfs_ino(inode),
 			logical_start, csum, csum_expected, mirror_num);
 }
 
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 903c32c9eb22..9ce85d5e5cae 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -124,8 +124,9 @@ static int check_compressed_csum(struct inode *inode,
 		kunmap_atomic(kaddr);
 
 		if (csum != *cb_sum) {
-			btrfs_print_data_csum_error(inode, disk_start, csum,
-						    *cb_sum, cb->mirror_num);
+			btrfs_print_data_csum_error(BTRFS_I(inode),
+					disk_start, csum,
+					*cb_sum, cb->mirror_num);
 			ret = -EIO;
 			goto fail;
 		}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f1f9e201f9f3..fc00117a0dd0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3032,7 +3032,7 @@ static int __readpage_endio_check(struct inode *inode,
 	kunmap_atomic(kaddr);
 	return 0;
 zeroit:
-	btrfs_print_data_csum_error(inode, start, csum, csum_expected,
+	btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
 				    io_bio->mirror_num);
 	memset(kaddr + pgoff, 1, len);
 	flush_dcache_page(page);
-- 
GitLab


From f898ac6ae339782bc304b2b15c9e187e438da9f7 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:54 +0200
Subject: [PATCH 0698/1084] btrfs: make check_compressed_csum take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 9ce85d5e5cae..f32e86b61240 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -100,7 +100,7 @@ static struct bio *compressed_bio_alloc(struct block_device *bdev,
 	return btrfs_bio_alloc(bdev, first_byte >> 9, BIO_MAX_PAGES, gfp_flags);
 }
 
-static int check_compressed_csum(struct inode *inode,
+static int check_compressed_csum(struct btrfs_inode *inode,
 				 struct compressed_bio *cb,
 				 u64 disk_start)
 {
@@ -111,7 +111,7 @@ static int check_compressed_csum(struct inode *inode,
 	u32 csum;
 	u32 *cb_sum = &cb->sums;
 
-	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
+	if (inode->flags & BTRFS_INODE_NODATASUM)
 		return 0;
 
 	for (i = 0; i < cb->nr_pages; i++) {
@@ -124,8 +124,7 @@ static int check_compressed_csum(struct inode *inode,
 		kunmap_atomic(kaddr);
 
 		if (csum != *cb_sum) {
-			btrfs_print_data_csum_error(BTRFS_I(inode),
-					disk_start, csum,
+			btrfs_print_data_csum_error(inode, disk_start, csum,
 					*cb_sum, cb->mirror_num);
 			ret = -EIO;
 			goto fail;
@@ -166,7 +165,7 @@ static void end_compressed_bio_read(struct bio *bio)
 		goto out;
 
 	inode = cb->inode;
-	ret = check_compressed_csum(inode, cb,
+	ret = check_compressed_csum(BTRFS_I(inode), cb,
 				    (u64)bio->bi_iter.bi_sector << 9);
 	if (ret)
 		goto csum_failed;
-- 
GitLab


From 9d4f7f8ad69112137da0bbe4036b94739ae25f78 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:55 +0200
Subject: [PATCH 0699/1084] btrfs: make repair_io_failure take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 13 +++++++------
 fs/btrfs/extent_io.h |  8 ++++----
 fs/btrfs/scrub.c     |  2 +-
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 0e73e48a9c4c..e99e8dad3cc7 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1992,10 +1992,11 @@ int free_io_failure(struct btrfs_inode *inode, struct io_failure_record *rec)
  * currently, there can be no more than two copies of every data bit. thus,
  * exactly one rewrite is required.
  */
-int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
-		      struct page *page, unsigned int pg_offset, int mirror_num)
+int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
+		u64 logical, struct page *page,
+		unsigned int pg_offset, int mirror_num)
 {
-	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct bio *bio;
 	struct btrfs_device *dev;
 	u64 map_length = 0;
@@ -2054,7 +2055,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
 
 	btrfs_info_rl_in_rcu(fs_info,
 		"read error corrected: ino %llu off %llu (dev %s sector %llu)",
-				  btrfs_ino(BTRFS_I(inode)), start,
+				  btrfs_ino(inode), start,
 				  rcu_str_deref(dev->name), sector);
 	btrfs_bio_counter_dec(fs_info);
 	bio_put(bio);
@@ -2074,7 +2075,7 @@ int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
 	for (i = 0; i < num_pages; i++) {
 		struct page *p = eb->pages[i];
 
-		ret = repair_io_failure(fs_info->btree_inode, start,
+		ret = repair_io_failure(BTRFS_I(fs_info->btree_inode), start,
 					PAGE_SIZE, start, p,
 					start - page_offset(p), mirror_num);
 		if (ret)
@@ -2133,7 +2134,7 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page,
 		num_copies = btrfs_num_copies(fs_info, failrec->logical,
 					      failrec->len);
 		if (num_copies > 1)  {
-			repair_io_failure(inode, start, failrec->len,
+			repair_io_failure(BTRFS_I(inode), start, failrec->len,
 					  failrec->logical, page,
 					  pg_offset, failrec->failed_mirror);
 		}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5b4132a9093a..039a6daa392b 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -451,10 +451,11 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs);
 struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask);
 
 struct btrfs_fs_info;
+struct btrfs_inode;
 
-int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
-		      struct page *page, unsigned int pg_offset,
-		      int mirror_num);
+int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
+		u64 logical, struct page *page,
+		unsigned int pg_offset, int mirror_num);
 int clean_io_failure(struct inode *inode, u64 start, struct page *page,
 		     unsigned int pg_offset);
 void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
@@ -480,7 +481,6 @@ struct io_failure_record {
 	int in_validation;
 };
 
-struct btrfs_inode;
 
 void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end);
 int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 82d873406aa3..bdf58b0eaef8 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -731,7 +731,7 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
 			ret = -EIO;
 			goto out;
 		}
-		ret = repair_io_failure(inode, offset, PAGE_SIZE,
+		ret = repair_io_failure(BTRFS_I(inode), offset, PAGE_SIZE,
 					fixup->logical, page,
 					offset - page_offset(page),
 					fixup->mirror_num);
-- 
GitLab


From b30cb441fcf8786773dab590739ca4ebc2b4628b Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:56 +0200
Subject: [PATCH 0700/1084] btrfs: make clean_io_failure take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 21 +++++++++++----------
 fs/btrfs/extent_io.h |  4 ++--
 fs/btrfs/inode.c     |  4 ++--
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e99e8dad3cc7..b08fa96678ee 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2090,23 +2090,23 @@ int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
  * each time an IO finishes, we do a fast check in the IO failure tree
  * to see if we need to process or clean up an io_failure_record
  */
-int clean_io_failure(struct inode *inode, u64 start, struct page *page,
+int clean_io_failure(struct btrfs_inode *inode, u64 start, struct page *page,
 		     unsigned int pg_offset)
 {
 	u64 private;
 	struct io_failure_record *failrec;
-	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct extent_state *state;
 	int num_copies;
 	int ret;
 
 	private = 0;
-	ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
+	ret = count_range_bits(&inode->io_failure_tree, &private,
 				(u64)-1, 1, EXTENT_DIRTY, 0);
 	if (!ret)
 		return 0;
 
-	ret = get_state_failrec(&BTRFS_I(inode)->io_failure_tree, start,
+	ret = get_state_failrec(&inode->io_failure_tree, start,
 			&failrec);
 	if (ret)
 		return 0;
@@ -2123,25 +2123,25 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page,
 	if (fs_info->sb->s_flags & MS_RDONLY)
 		goto out;
 
-	spin_lock(&BTRFS_I(inode)->io_tree.lock);
-	state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
+	spin_lock(&inode->io_tree.lock);
+	state = find_first_extent_bit_state(&inode->io_tree,
 					    failrec->start,
 					    EXTENT_LOCKED);
-	spin_unlock(&BTRFS_I(inode)->io_tree.lock);
+	spin_unlock(&inode->io_tree.lock);
 
 	if (state && state->start <= failrec->start &&
 	    state->end >= failrec->start + failrec->len - 1) {
 		num_copies = btrfs_num_copies(fs_info, failrec->logical,
 					      failrec->len);
 		if (num_copies > 1)  {
-			repair_io_failure(BTRFS_I(inode), start, failrec->len,
+			repair_io_failure(inode, start, failrec->len,
 					  failrec->logical, page,
 					  pg_offset, failrec->failed_mirror);
 		}
 	}
 
 out:
-	free_io_failure(BTRFS_I(inode), failrec);
+	free_io_failure(inode, failrec);
 
 	return 0;
 }
@@ -2577,7 +2577,8 @@ static void end_bio_extent_readpage(struct bio *bio)
 			if (ret)
 				uptodate = 0;
 			else
-				clean_io_failure(inode, start, page, 0);
+				clean_io_failure(BTRFS_I(inode), start,
+						page, 0);
 		}
 
 		if (likely(uptodate))
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 039a6daa392b..0f67222f4464 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -456,8 +456,8 @@ struct btrfs_inode;
 int repair_io_failure(struct btrfs_inode *inode, u64 start, u64 length,
 		u64 logical, struct page *page,
 		unsigned int pg_offset, int mirror_num);
-int clean_io_failure(struct inode *inode, u64 start, struct page *page,
-		     unsigned int pg_offset);
+int clean_io_failure(struct btrfs_inode *inode, u64 start,
+		struct page *page, unsigned int pg_offset);
 void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
 int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
 			 struct extent_buffer *eb, int mirror_num);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fc00117a0dd0..4498921bb608 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7914,7 +7914,7 @@ static void btrfs_retry_endio_nocsum(struct bio *bio)
 
 	done->uptodate = 1;
 	bio_for_each_segment_all(bvec, bio, i)
-		clean_io_failure(done->inode, done->start, bvec->bv_page, 0);
+	clean_io_failure(BTRFS_I(done->inode), done->start, bvec->bv_page, 0);
 end:
 	complete(&done->done);
 	bio_put(bio);
@@ -8000,7 +8000,7 @@ static void btrfs_retry_endio(struct bio *bio)
 					bvec->bv_page, bvec->bv_offset,
 					done->start, bvec->bv_len);
 		if (!ret)
-			clean_io_failure(done->inode, done->start,
+			clean_io_failure(BTRFS_I(done->inode), done->start,
 					bvec->bv_page, bvec->bv_offset);
 		else
 			uptodate = 0;
-- 
GitLab


From 7ab7956ec3fc77667739d065748d96f87bff6c5d Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:57 +0200
Subject: [PATCH 0701/1084] btrfs: make btrfs_free_io_failure_record take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 4 ++--
 fs/btrfs/extent_io.h | 3 ++-
 fs/btrfs/inode.c     | 9 +++++----
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index b08fa96678ee..ded750eb90c4 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2152,9 +2152,9 @@ int clean_io_failure(struct btrfs_inode *inode, u64 start, struct page *page,
  * - under ordered extent
  * - the inode is freeing
  */
-void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end)
+void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
 {
-	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
+	struct extent_io_tree *failure_tree = &inode->io_failure_tree;
 	struct io_failure_record *failrec;
 	struct extent_state *state, *next;
 
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 0f67222f4464..345fc33f843d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -482,7 +482,8 @@ struct io_failure_record {
 };
 
 
-void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end);
+void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start,
+		u64 end);
 int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
 				struct io_failure_record **failrec_ret);
 int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4498921bb608..4805489984eb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2803,9 +2803,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 		goto out;
 	}
 
-	btrfs_free_io_failure_record(inode, ordered_extent->file_offset,
-				     ordered_extent->file_offset +
-				     ordered_extent->len - 1);
+	btrfs_free_io_failure_record(BTRFS_I(inode),
+			ordered_extent->file_offset,
+			ordered_extent->file_offset +
+			ordered_extent->len - 1);
 
 	if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
 		truncated = true;
@@ -5196,7 +5197,7 @@ void btrfs_evict_inode(struct inode *inode)
 	if (!special_file(inode->i_mode))
 		btrfs_wait_ordered_range(inode, 0, (u64)-1);
 
-	btrfs_free_io_failure_record(inode, 0, (u64)-1);
+	btrfs_free_io_failure_record(BTRFS_I(inode), 0, (u64)-1);
 
 	if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
 		BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
-- 
GitLab


From 3d6ae7bb6a64b7dbc516863eeb99c3182d499aba Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:58 +0200
Subject: [PATCH 0702/1084] btrfs: make btrfs_orphan_del take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4805489984eb..ad93974f86a9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3271,20 +3271,20 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
  * item for this particular inode.
  */
 static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
-			    struct inode *inode)
+			    struct btrfs_inode *inode)
 {
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_root *root = inode->root;
 	int delete_item = 0;
 	int release_rsv = 0;
 	int ret = 0;
 
 	spin_lock(&root->orphan_lock);
 	if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
-			       &BTRFS_I(inode)->runtime_flags))
+			       &inode->runtime_flags))
 		delete_item = 1;
 
 	if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
-			       &BTRFS_I(inode)->runtime_flags))
+			       &inode->runtime_flags))
 		release_rsv = 1;
 	spin_unlock(&root->orphan_lock);
 
@@ -3292,11 +3292,11 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
 		atomic_dec(&root->orphan_inodes);
 		if (trans)
 			ret = btrfs_del_orphan_item(trans, root,
-						    btrfs_ino(BTRFS_I(inode)));
+						    btrfs_ino(inode));
 	}
 
 	if (release_rsv)
-		btrfs_orphan_release_metadata(BTRFS_I(inode));
+		btrfs_orphan_release_metadata(inode);
 
 	return ret;
 }
@@ -3467,7 +3467,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
 
 			ret = btrfs_truncate(inode);
 			if (ret)
-				btrfs_orphan_del(NULL, inode);
+				btrfs_orphan_del(NULL, BTRFS_I(inode));
 		} else {
 			nr_unlink++;
 		}
@@ -5012,7 +5012,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 			/* To get a stable disk_i_size */
 			err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
 			if (err) {
-				btrfs_orphan_del(NULL, inode);
+				btrfs_orphan_del(NULL, BTRFS_I(inode));
 				return err;
 			}
 
@@ -5024,11 +5024,11 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 			 */
 			trans = btrfs_join_transaction(root);
 			if (IS_ERR(trans)) {
-				btrfs_orphan_del(NULL, inode);
+				btrfs_orphan_del(NULL, BTRFS_I(inode));
 				return ret;
 			}
 			i_size_write(inode, BTRFS_I(inode)->disk_i_size);
-			err = btrfs_orphan_del(trans, inode);
+			err = btrfs_orphan_del(trans, BTRFS_I(inode));
 			if (err)
 				btrfs_abort_transaction(trans, err);
 			btrfs_end_transaction(trans);
@@ -5190,7 +5190,7 @@ void btrfs_evict_inode(struct inode *inode)
 		goto no_delete;
 
 	if (is_bad_inode(inode)) {
-		btrfs_orphan_del(NULL, inode);
+		btrfs_orphan_del(NULL, BTRFS_I(inode));
 		goto no_delete;
 	}
 	/* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
@@ -5213,13 +5213,13 @@ void btrfs_evict_inode(struct inode *inode)
 
 	ret = btrfs_commit_inode_delayed_inode(BTRFS_I(inode));
 	if (ret) {
-		btrfs_orphan_del(NULL, inode);
+		btrfs_orphan_del(NULL, BTRFS_I(inode));
 		goto no_delete;
 	}
 
 	rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
 	if (!rsv) {
-		btrfs_orphan_del(NULL, inode);
+		btrfs_orphan_del(NULL, BTRFS_I(inode));
 		goto no_delete;
 	}
 	rsv->size = min_size;
@@ -5261,14 +5261,14 @@ void btrfs_evict_inode(struct inode *inode)
 			btrfs_warn(fs_info,
 				   "Could not get space for a delete, will truncate on mount %d",
 				   ret);
-			btrfs_orphan_del(NULL, inode);
+			btrfs_orphan_del(NULL, BTRFS_I(inode));
 			btrfs_free_block_rsv(fs_info, rsv);
 			goto no_delete;
 		}
 
 		trans = btrfs_join_transaction(root);
 		if (IS_ERR(trans)) {
-			btrfs_orphan_del(NULL, inode);
+			btrfs_orphan_del(NULL, BTRFS_I(inode));
 			btrfs_free_block_rsv(fs_info, rsv);
 			goto no_delete;
 		}
@@ -5294,7 +5294,7 @@ void btrfs_evict_inode(struct inode *inode)
 		if (ret) {
 			ret = btrfs_commit_transaction(trans);
 			if (ret) {
-				btrfs_orphan_del(NULL, inode);
+				btrfs_orphan_del(NULL, BTRFS_I(inode));
 				btrfs_free_block_rsv(fs_info, rsv);
 				goto no_delete;
 			}
@@ -5323,9 +5323,9 @@ void btrfs_evict_inode(struct inode *inode)
 	 */
 	if (ret == 0) {
 		trans->block_rsv = root->orphan_block_rsv;
-		btrfs_orphan_del(trans, inode);
+		btrfs_orphan_del(trans, BTRFS_I(inode));
 	} else {
-		btrfs_orphan_del(NULL, inode);
+		btrfs_orphan_del(NULL, BTRFS_I(inode));
 	}
 
 	trans->block_rsv = &fs_info->trans_block_rsv;
@@ -6534,7 +6534,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 			 * If new hard link count is 1, it's a file created
 			 * with open(2) O_TMPFILE flag.
 			 */
-			err = btrfs_orphan_del(trans, inode);
+			err = btrfs_orphan_del(trans, BTRFS_I(inode));
 			if (err)
 				goto fail;
 		}
@@ -9167,7 +9167,7 @@ static int btrfs_truncate(struct inode *inode)
 
 	if (ret == 0 && inode->i_nlink > 0) {
 		trans->block_rsv = root->orphan_block_rsv;
-		ret = btrfs_orphan_del(trans, inode);
+		ret = btrfs_orphan_del(trans, BTRFS_I(inode));
 		if (ret)
 			err = ret;
 	}
-- 
GitLab


From 73f2e545b68f6af033fd2c083ca9dc3079e79083 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:50:59 +0200
Subject: [PATCH 0703/1084] btrfs: Make btrfs_orphan_add take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       |  3 ++-
 fs/btrfs/extent-tree.c |  2 +-
 fs/btrfs/inode.c       | 39 ++++++++++++++++++++-------------------
 fs/btrfs/relocation.c  |  2 +-
 4 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2bfc2e289f51..af68f8452f19 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3174,7 +3174,8 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans,
 			      struct inode *inode);
 int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
 				struct btrfs_root *root, struct inode *inode);
-int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
+int btrfs_orphan_add(struct btrfs_trans_handle *trans,
+		struct btrfs_inode *inode);
 int btrfs_orphan_cleanup(struct btrfs_root *root);
 void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *root);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 7b2313a4441e..3853fab2f49f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -10337,7 +10337,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 	mutex_unlock(&trans->transaction->cache_write_mutex);
 
 	if (!IS_ERR(inode)) {
-		ret = btrfs_orphan_add(trans, inode);
+		ret = btrfs_orphan_add(trans, BTRFS_I(inode));
 		if (ret) {
 			btrfs_add_delayed_iput(inode);
 			goto out;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ad93974f86a9..d5339b8d4663 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3172,10 +3172,11 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
  * NOTE: caller of this function should reserve 5 units of metadata for
  *	 this function.
  */
-int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
+int btrfs_orphan_add(struct btrfs_trans_handle *trans,
+		struct btrfs_inode *inode)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_root *root = inode->root;
 	struct btrfs_block_rsv *block_rsv = NULL;
 	int reserve = 0;
 	int insert = 0;
@@ -3197,7 +3198,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 	}
 
 	if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
-			      &BTRFS_I(inode)->runtime_flags)) {
+			      &inode->runtime_flags)) {
 #if 0
 		/*
 		 * For proper ENOSPC handling, we should do orphan
@@ -3214,39 +3215,38 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 	}
 
 	if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
-			      &BTRFS_I(inode)->runtime_flags))
+			      &inode->runtime_flags))
 		reserve = 1;
 	spin_unlock(&root->orphan_lock);
 
 	/* grab metadata reservation from transaction handle */
 	if (reserve) {
-		ret = btrfs_orphan_reserve_metadata(trans, BTRFS_I(inode));
+		ret = btrfs_orphan_reserve_metadata(trans, inode);
 		ASSERT(!ret);
 		if (ret) {
 			atomic_dec(&root->orphan_inodes);
 			clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
-				  &BTRFS_I(inode)->runtime_flags);
+				  &inode->runtime_flags);
 			if (insert)
 				clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
-					  &BTRFS_I(inode)->runtime_flags);
+					  &inode->runtime_flags);
 			return ret;
 		}
 	}
 
 	/* insert an orphan item to track this unlinked/truncated file */
 	if (insert >= 1) {
-		ret = btrfs_insert_orphan_item(trans, root,
-				btrfs_ino(BTRFS_I(inode)));
+		ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
 		if (ret) {
 			atomic_dec(&root->orphan_inodes);
 			if (reserve) {
 				clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
-					  &BTRFS_I(inode)->runtime_flags);
-				btrfs_orphan_release_metadata(BTRFS_I(inode));
+					  &inode->runtime_flags);
+				btrfs_orphan_release_metadata(inode);
 			}
 			if (ret != -EEXIST) {
 				clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
-					  &BTRFS_I(inode)->runtime_flags);
+					  &inode->runtime_flags);
 				btrfs_abort_transaction(trans, ret);
 				return ret;
 			}
@@ -3458,7 +3458,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
 				ret = PTR_ERR(trans);
 				goto out;
 			}
-			ret = btrfs_orphan_add(trans, inode);
+			ret = btrfs_orphan_add(trans, BTRFS_I(inode));
 			btrfs_end_transaction(trans);
 			if (ret) {
 				iput(inode);
@@ -4060,7 +4060,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
 		goto out;
 
 	if (inode->i_nlink == 0) {
-		ret = btrfs_orphan_add(trans, inode);
+		ret = btrfs_orphan_add(trans, BTRFS_I(inode));
 		if (ret)
 			goto out;
 	}
@@ -4177,7 +4177,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
 		goto out;
 	}
 
-	err = btrfs_orphan_add(trans, inode);
+	err = btrfs_orphan_add(trans, BTRFS_I(inode));
 	if (err)
 		goto out;
 
@@ -4992,7 +4992,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 		 * so we need to guarantee from this point on that everything
 		 * will be consistent.
 		 */
-		ret = btrfs_orphan_add(trans, inode);
+		ret = btrfs_orphan_add(trans, BTRFS_I(inode));
 		btrfs_end_transaction(trans);
 		if (ret)
 			return ret;
@@ -9865,7 +9865,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 						 new_dentry->d_name.len);
 		}
 		if (!ret && new_inode->i_nlink == 0)
-			ret = btrfs_orphan_add(trans, d_inode(new_dentry));
+			ret = btrfs_orphan_add(trans,
+					BTRFS_I(d_inode(new_dentry)));
 		if (ret) {
 			btrfs_abort_transaction(trans, ret);
 			goto out_fail;
@@ -10482,7 +10483,7 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
 	ret = btrfs_update_inode(trans, root, inode);
 	if (ret)
 		goto out_inode;
-	ret = btrfs_orphan_add(trans, inode);
+	ret = btrfs_orphan_add(trans, BTRFS_I(inode));
 	if (ret)
 		goto out_inode;
 
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index e48625413fcb..d60df51959f7 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4246,7 +4246,7 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
 	BUG_ON(IS_ERR(inode) || is_bad_inode(inode));
 	BTRFS_I(inode)->index_cnt = group->key.objectid;
 
-	err = btrfs_orphan_add(trans, inode);
+	err = btrfs_orphan_add(trans, BTRFS_I(inode));
 out:
 	btrfs_end_transaction(trans);
 	btrfs_btree_balance_dirty(fs_info);
-- 
GitLab


From aefa6115c04ee561302e133ab5916246cd3695f7 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:00 +0200
Subject: [PATCH 0704/1084] btrfs: Make check_parent_dirs_for_sync take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 071f64944711..399561b039c3 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5045,14 +5045,14 @@ static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
  * a full commit is required.
  */
 static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
-					       struct inode *inode,
+					       struct btrfs_inode *inode,
 					       struct dentry *parent,
 					       struct super_block *sb,
 					       u64 last_committed)
 {
 	int ret = 0;
 	struct dentry *old_parent = NULL;
-	struct inode *orig_inode = inode;
+	struct btrfs_inode *orig_inode = inode;
 
 	/*
 	 * for regular files, if its inode is already on disk, we don't
@@ -5060,15 +5060,15 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
 	 * we can use the last_unlink_trans field to record renames
 	 * and other fun in this file.
 	 */
-	if (S_ISREG(inode->i_mode) &&
-	    BTRFS_I(inode)->generation <= last_committed &&
-	    BTRFS_I(inode)->last_unlink_trans <= last_committed)
-			goto out;
+	if (S_ISREG(inode->vfs_inode.i_mode) &&
+	    inode->generation <= last_committed &&
+	    inode->last_unlink_trans <= last_committed)
+		goto out;
 
-	if (!S_ISDIR(inode->i_mode)) {
+	if (!S_ISDIR(inode->vfs_inode.i_mode)) {
 		if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
 			goto out;
-		inode = d_inode(parent);
+		inode = BTRFS_I(d_inode(parent));
 	}
 
 	while (1) {
@@ -5079,10 +5079,10 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
 		 * think this inode has already been logged.
 		 */
 		if (inode != orig_inode)
-			BTRFS_I(inode)->logged_trans = trans->transid;
+			inode->logged_trans = trans->transid;
 		smp_mb();
 
-		if (btrfs_must_commit_transaction(trans, BTRFS_I(inode))) {
+		if (btrfs_must_commit_transaction(trans, inode)) {
 			ret = 1;
 			break;
 		}
@@ -5091,8 +5091,8 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
 			break;
 
 		if (IS_ROOT(parent)) {
-			inode = d_inode(parent);
-			if (btrfs_must_commit_transaction(trans, BTRFS_I(inode)))
+			inode = BTRFS_I(d_inode(parent));
+			if (btrfs_must_commit_transaction(trans, inode))
 				ret = 1;
 			break;
 		}
@@ -5100,7 +5100,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
 		parent = dget_parent(parent);
 		dput(old_parent);
 		old_parent = parent;
-		inode = d_inode(parent);
+		inode = BTRFS_I(d_inode(parent));
 
 	}
 	dput(old_parent);
@@ -5429,7 +5429,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 		goto end_no_trans;
 	}
 
-	ret = check_parent_dirs_for_sync(trans, inode, parent,
+	ret = check_parent_dirs_for_sync(trans, BTRFS_I(inode), parent,
 					 sb, last_committed);
 	if (ret)
 		goto end_no_trans;
-- 
GitLab


From 19df27a9e47085dcc81cbf9e7b464e7402511e18 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:01 +0200
Subject: [PATCH 0705/1084] btrfs: make btrfs_log_inode_parent take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 50 ++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 399561b039c3..f8965b081212 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5390,7 +5390,8 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
  * the last committed transaction
  */
 static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
-			    	  struct btrfs_root *root, struct inode *inode,
+				  struct btrfs_root *root,
+				  struct btrfs_inode *inode,
 				  struct dentry *parent,
 				  const loff_t start,
 				  const loff_t end,
@@ -5404,9 +5405,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 	int ret = 0;
 	u64 last_committed = fs_info->last_trans_committed;
 	bool log_dentries = false;
-	struct inode *orig_inode = inode;
+	struct btrfs_inode *orig_inode = inode;
 
-	sb = inode->i_sb;
+	sb = inode->vfs_inode.i_sb;
 
 	if (btrfs_test_opt(fs_info, NOTREELOG)) {
 		ret = 1;
@@ -5423,18 +5424,17 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 		goto end_no_trans;
 	}
 
-	if (root != BTRFS_I(inode)->root ||
-	    btrfs_root_refs(&root->root_item) == 0) {
+	if (root != inode->root || btrfs_root_refs(&root->root_item) == 0) {
 		ret = 1;
 		goto end_no_trans;
 	}
 
-	ret = check_parent_dirs_for_sync(trans, BTRFS_I(inode), parent,
-					 sb, last_committed);
+	ret = check_parent_dirs_for_sync(trans, inode, parent, sb,
+			last_committed);
 	if (ret)
 		goto end_no_trans;
 
-	if (btrfs_inode_in_log(BTRFS_I(inode), trans->transid)) {
+	if (btrfs_inode_in_log(inode, trans->transid)) {
 		ret = BTRFS_NO_LOG_SYNC;
 		goto end_no_trans;
 	}
@@ -5443,8 +5443,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 	if (ret)
 		goto end_no_trans;
 
-	ret = btrfs_log_inode(trans, root, BTRFS_I(inode), inode_only,
-			start, end, ctx);
+	ret = btrfs_log_inode(trans, root, inode, inode_only, start, end, ctx);
 	if (ret)
 		goto end_trans;
 
@@ -5454,14 +5453,14 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 	 * we can use the last_unlink_trans field to record renames
 	 * and other fun in this file.
 	 */
-	if (S_ISREG(inode->i_mode) &&
-	    BTRFS_I(inode)->generation <= last_committed &&
-	    BTRFS_I(inode)->last_unlink_trans <= last_committed) {
+	if (S_ISREG(inode->vfs_inode.i_mode) &&
+	    inode->generation <= last_committed &&
+	    inode->last_unlink_trans <= last_committed) {
 		ret = 0;
 		goto end_trans;
 	}
 
-	if (S_ISDIR(inode->i_mode) && ctx && ctx->log_new_dentries)
+	if (S_ISDIR(inode->vfs_inode.i_mode) && ctx && ctx->log_new_dentries)
 		log_dentries = true;
 
 	/*
@@ -5505,8 +5504,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 	 * but the file inode does not have a matching BTRFS_INODE_REF_KEY item
 	 * and has a link count of 2.
 	 */
-	if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
-		ret = btrfs_log_all_parents(trans, BTRFS_I(orig_inode), ctx);
+	if (inode->last_unlink_trans > last_committed) {
+		ret = btrfs_log_all_parents(trans, orig_inode, ctx);
 		if (ret)
 			goto end_trans;
 	}
@@ -5515,14 +5514,13 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 		if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
 			break;
 
-		inode = d_inode(parent);
-		if (root != BTRFS_I(inode)->root)
+		inode = BTRFS_I(d_inode(parent));
+		if (root != inode->root)
 			break;
 
-		if (BTRFS_I(inode)->generation > last_committed) {
-			ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
-					      LOG_INODE_EXISTS,
-					      0, LLONG_MAX, ctx);
+		if (inode->generation > last_committed) {
+			ret = btrfs_log_inode(trans, root, inode,
+					LOG_INODE_EXISTS, 0, LLONG_MAX, ctx);
 			if (ret)
 				goto end_trans;
 		}
@@ -5534,7 +5532,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 		old_parent = parent;
 	}
 	if (log_dentries)
-		ret = log_new_dir_dentries(trans, root, BTRFS_I(orig_inode), ctx);
+		ret = log_new_dir_dentries(trans, root, orig_inode, ctx);
 	else
 		ret = 0;
 end_trans:
@@ -5566,8 +5564,8 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
 	struct dentry *parent = dget_parent(dentry);
 	int ret;
 
-	ret = btrfs_log_inode_parent(trans, root, d_inode(dentry), parent,
-				     start, end, 0, ctx);
+	ret = btrfs_log_inode_parent(trans, root, BTRFS_I(d_inode(dentry)),
+			parent, start, end, 0, ctx);
 	dput(parent);
 
 	return ret;
@@ -5829,7 +5827,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
 	    (!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed))
 		return 0;
 
-	return btrfs_log_inode_parent(trans, root, &inode->vfs_inode, parent, 0,
+	return btrfs_log_inode_parent(trans, root, inode, parent, 0,
 				      LLONG_MAX, 1, NULL);
 }
 
-- 
GitLab


From 9cdc51241090a36d3b7b4ff374fb18b764b3b3a4 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:02 +0200
Subject: [PATCH 0706/1084] btrfs: Make btrfs_extent_item_to_extent_map take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h     |  2 +-
 fs/btrfs/file-item.c | 10 +++++-----
 fs/btrfs/inode.c     |  3 ++-
 fs/btrfs/ioctl.c     |  3 ++-
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index af68f8452f19..2e0845eafcbc 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3081,7 +3081,7 @@ int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
 		       u64 file_start, int contig);
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 			     struct list_head *list, int search_commit);
-void btrfs_extent_item_to_extent_map(struct inode *inode,
+void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
 				     const struct btrfs_path *path,
 				     struct btrfs_file_extent_item *fi,
 				     const bool new_inline,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a8a0dd217084..54ec6d6ef016 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -904,14 +904,14 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
 	goto out;
 }
 
-void btrfs_extent_item_to_extent_map(struct inode *inode,
+void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
 				     const struct btrfs_path *path,
 				     struct btrfs_file_extent_item *fi,
 				     const bool new_inline,
 				     struct extent_map *em)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct btrfs_root *root = inode->root;
 	struct extent_buffer *leaf = path->nodes[0];
 	const int slot = path->slots[0];
 	struct btrfs_key key;
@@ -976,8 +976,8 @@ void btrfs_extent_item_to_extent_map(struct inode *inode,
 		}
 	} else {
 		btrfs_err(fs_info,
-			  "unknown file extent item type %d, inode %llu, offset %llu, root %llu",
-			  type, btrfs_ino(BTRFS_I(inode)), extent_start,
+			  "unknown file extent item type %d, inode %llu, offset %llu, "
+			  "root %llu", type, btrfs_ino(inode), extent_start,
 			  root->root_key.objectid);
 	}
 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d5339b8d4663..0b46542010fb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6862,7 +6862,8 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
 		goto not_found_em;
 	}
 
-	btrfs_extent_item_to_extent_map(inode, path, item, new_inline, em);
+	btrfs_extent_item_to_extent_map(BTRFS_I(inode), path, item,
+			new_inline, em);
 
 	if (found_type == BTRFS_FILE_EXTENT_REG ||
 	    found_type == BTRFS_FILE_EXTENT_PREALLOC) {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bc2e03a2569e..bb4e6ec26b7f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3333,7 +3333,8 @@ static void clone_update_extent_map(struct inode *inode,
 
 		fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
 				    struct btrfs_file_extent_item);
-		btrfs_extent_item_to_extent_map(inode, path, fi, false, em);
+		btrfs_extent_item_to_extent_map(BTRFS_I(inode), path, fi,
+				false, em);
 		em->generation = -1;
 		if (btrfs_file_extent_type(path->nodes[0], fi) ==
 		    BTRFS_FILE_EXTENT_INLINE)
-- 
GitLab


From 6fc0ef687029760476e309aa85d437a47313eb08 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:03 +0200
Subject: [PATCH 0707/1084] btrfs: Make btrfs_clear_bit_hook take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c |  3 ++-
 fs/btrfs/extent_io.h |  6 ++++--
 fs/btrfs/inode.c     | 37 +++++++++++++++++++------------------
 3 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ded750eb90c4..d21082962565 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -428,7 +428,8 @@ static void clear_state_cb(struct extent_io_tree *tree,
 			   struct extent_state *state, unsigned *bits)
 {
 	if (tree->ops && tree->ops->clear_bit_hook)
-		tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
+		tree->ops->clear_bit_hook(BTRFS_I(tree->mapping->host),
+				state, bits);
 }
 
 static void set_state_bits(struct extent_io_tree *tree,
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 345fc33f843d..cd8b3dd6948d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -84,6 +84,7 @@ extern void le_bitmap_clear(u8 *map, unsigned int start, int len);
 
 struct extent_state;
 struct btrfs_root;
+struct btrfs_inode;
 struct btrfs_io_bio;
 struct io_failure_record;
 
@@ -107,8 +108,9 @@ struct extent_io_ops {
 				      struct extent_state *state, int uptodate);
 	void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
 			     unsigned *bits);
-	void (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
-			       unsigned *bits);
+	void (*clear_bit_hook)(struct btrfs_inode *inode,
+			struct extent_state *state,
+			unsigned *bits);
 	void (*merge_extent_hook)(struct inode *inode,
 				  struct extent_state *new,
 				  struct extent_state *other);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0b46542010fb..0600d55bb173 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1703,18 +1703,18 @@ static void btrfs_set_bit_hook(struct inode *inode,
 /*
  * extent_io.c clear_bit_hook, see set_bit_hook for why
  */
-static void btrfs_clear_bit_hook(struct inode *inode,
+static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
 				 struct extent_state *state,
 				 unsigned *bits)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 	u64 len = state->end + 1 - state->start;
 	u32 num_extents = count_max_extents(len);
 
-	spin_lock(&BTRFS_I(inode)->lock);
+	spin_lock(&inode->lock);
 	if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
-		BTRFS_I(inode)->defrag_bytes -= len;
-	spin_unlock(&BTRFS_I(inode)->lock);
+		inode->defrag_bytes -= len;
+	spin_unlock(&inode->lock);
 
 	/*
 	 * set_bit and clear bit hooks normally require _irqsave/restore
@@ -1722,15 +1722,15 @@ static void btrfs_clear_bit_hook(struct inode *inode,
 	 * bit, which is only set or cleared with irqs on
 	 */
 	if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
-		struct btrfs_root *root = BTRFS_I(inode)->root;
-		bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
+		struct btrfs_root *root = inode->root;
+		bool do_list = !btrfs_is_free_space_inode(inode);
 
 		if (*bits & EXTENT_FIRST_DELALLOC) {
 			*bits &= ~EXTENT_FIRST_DELALLOC;
 		} else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
-			spin_lock(&BTRFS_I(inode)->lock);
-			BTRFS_I(inode)->outstanding_extents -= num_extents;
-			spin_unlock(&BTRFS_I(inode)->lock);
+			spin_lock(&inode->lock);
+			inode->outstanding_extents -= num_extents;
+			spin_unlock(&inode->lock);
 		}
 
 		/*
@@ -1740,7 +1740,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
 		 */
 		if (*bits & EXTENT_DO_ACCOUNTING &&
 		    root != fs_info->tree_root)
-			btrfs_delalloc_release_metadata(BTRFS_I(inode), len);
+			btrfs_delalloc_release_metadata(inode, len);
 
 		/* For sanity tests. */
 		if (btrfs_is_testing(fs_info))
@@ -1750,18 +1750,19 @@ static void btrfs_clear_bit_hook(struct inode *inode,
 		    && do_list && !(state->state & EXTENT_NORESERVE)
 		    && (*bits & (EXTENT_DO_ACCOUNTING |
 		    EXTENT_CLEAR_DATA_RESV)))
-			btrfs_free_reserved_data_space_noquota(inode,
+			btrfs_free_reserved_data_space_noquota(
+					&inode->vfs_inode,
 					state->start, len);
 
 		__percpu_counter_add(&fs_info->delalloc_bytes, -len,
 				     fs_info->delalloc_batch);
-		spin_lock(&BTRFS_I(inode)->lock);
-		BTRFS_I(inode)->delalloc_bytes -= len;
-		if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
+		spin_lock(&inode->lock);
+		inode->delalloc_bytes -= len;
+		if (do_list && inode->delalloc_bytes == 0 &&
 		    test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
-			     &BTRFS_I(inode)->runtime_flags))
-			btrfs_del_delalloc_inode(root, inode);
-		spin_unlock(&BTRFS_I(inode)->lock);
+				&inode->runtime_flags))
+			btrfs_del_delalloc_inode(root, &inode->vfs_inode);
+		spin_unlock(&inode->lock);
 	}
 }
 
-- 
GitLab


From a2f392e4015a45c896528d5fdfcec4bad3ddd121 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:04 +0200
Subject: [PATCH 0708/1084] btrfs: Make clone_update_extent_map take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bb4e6ec26b7f..12ae210f8719 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3311,20 +3311,19 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
 	return ret;
 }
 
-static void clone_update_extent_map(struct inode *inode,
+static void clone_update_extent_map(struct btrfs_inode *inode,
 				    const struct btrfs_trans_handle *trans,
 				    const struct btrfs_path *path,
 				    const u64 hole_offset,
 				    const u64 hole_len)
 {
-	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+	struct extent_map_tree *em_tree = &inode->extent_tree;
 	struct extent_map *em;
 	int ret;
 
 	em = alloc_extent_map();
 	if (!em) {
-		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-			&BTRFS_I(inode)->runtime_flags);
+		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
 		return;
 	}
 
@@ -3333,13 +3332,12 @@ static void clone_update_extent_map(struct inode *inode,
 
 		fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
 				    struct btrfs_file_extent_item);
-		btrfs_extent_item_to_extent_map(BTRFS_I(inode), path, fi,
-				false, em);
+		btrfs_extent_item_to_extent_map(inode, path, fi, false, em);
 		em->generation = -1;
 		if (btrfs_file_extent_type(path->nodes[0], fi) ==
 		    BTRFS_FILE_EXTENT_INLINE)
 			set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-				&BTRFS_I(inode)->runtime_flags);
+					&inode->runtime_flags);
 	} else {
 		em->start = hole_offset;
 		em->len = hole_len;
@@ -3360,13 +3358,12 @@ static void clone_update_extent_map(struct inode *inode,
 			free_extent_map(em);
 			break;
 		}
-		btrfs_drop_extent_cache(BTRFS_I(inode), em->start,
+		btrfs_drop_extent_cache(inode, em->start,
 					em->start + em->len - 1, 0);
 	}
 
 	if (ret)
-		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
-			&BTRFS_I(inode)->runtime_flags);
+		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
 }
 
 /*
@@ -3792,11 +3789,12 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
 
 			/* If we have an implicit hole (NO_HOLES feature). */
 			if (drop_start < new_key.offset)
-				clone_update_extent_map(inode, trans,
+				clone_update_extent_map(BTRFS_I(inode), trans,
 						NULL, drop_start,
 						new_key.offset - drop_start);
 
-			clone_update_extent_map(inode, trans, path, 0, 0);
+			clone_update_extent_map(BTRFS_I(inode), trans,
+					path, 0, 0);
 
 			btrfs_mark_buffer_dirty(leaf);
 			btrfs_release_path(path);
@@ -3846,8 +3844,9 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
 			btrfs_end_transaction(trans);
 			goto out;
 		}
-		clone_update_extent_map(inode, trans, NULL, last_dest_end,
-					destoff + len - last_dest_end);
+		clone_update_extent_map(BTRFS_I(inode), trans, NULL,
+				last_dest_end,
+				destoff + len - last_dest_end);
 		ret = clone_finish_inode_update(trans, inode, destoff + len,
 						destoff, olen, no_time_update);
 	}
-- 
GitLab


From 1c8c9c5216295711c79d0e512dc8b3d5f1bfc35d Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:05 +0200
Subject: [PATCH 0709/1084] btrfs: Make check_extent_to_block take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index bdf58b0eaef8..b0251eb1239f 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -4236,7 +4236,7 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
 	scrub_pending_trans_workers_dec(sctx);
 }
 
-static int check_extent_to_block(struct inode *inode, u64 start, u64 len,
+static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len,
 				 u64 logical)
 {
 	struct extent_state *cached_state = NULL;
@@ -4246,10 +4246,10 @@ static int check_extent_to_block(struct inode *inode, u64 start, u64 len,
 	u64 lockstart = start, lockend = start + len - 1;
 	int ret = 0;
 
-	io_tree = &BTRFS_I(inode)->io_tree;
+	io_tree = &inode->io_tree;
 
 	lock_extent_bits(io_tree, lockstart, lockend, &cached_state);
-	ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), lockstart, len);
+	ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
 	if (ordered) {
 		btrfs_put_ordered_extent(ordered);
 		ret = 1;
@@ -4325,7 +4325,8 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
 	io_tree = &BTRFS_I(inode)->io_tree;
 	nocow_ctx_logical = nocow_ctx->logical;
 
-	ret = check_extent_to_block(inode, offset, len, nocow_ctx_logical);
+	ret = check_extent_to_block(BTRFS_I(inode), offset, len,
+			nocow_ctx_logical);
 	if (ret) {
 		ret = ret > 0 ? 0 : ret;
 		goto out;
@@ -4372,7 +4373,7 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
 			}
 		}
 
-		ret = check_extent_to_block(inode, offset, len,
+		ret = check_extent_to_block(BTRFS_I(inode), offset, len,
 					    nocow_ctx_logical);
 		if (ret) {
 			ret = ret > 0 ? 0 : ret;
-- 
GitLab


From fc4f21b1d8d023cf0a2b1b050ae18e15dbe7068e Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:06 +0200
Subject: [PATCH 0710/1084] btrfs: Make get_extent_t take btrfs_inode

In addition to changing the signature, this patch also switches
all the functions which are used as an argument to also take btrfs_inode.
Namely those are: btrfs_get_extent and btrfs_get_extent_filemap.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h             | 12 +++++-----
 fs/btrfs/disk-io.c           |  6 ++---
 fs/btrfs/extent_io.c         |  6 ++---
 fs/btrfs/extent_io.h         |  2 +-
 fs/btrfs/file.c              |  7 +++---
 fs/btrfs/inode.c             | 34 +++++++++++++++-------------
 fs/btrfs/ioctl.c             |  2 +-
 fs/btrfs/tests/inode-tests.c | 44 +++++++++++++++++++-----------------
 8 files changed, 59 insertions(+), 54 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2e0845eafcbc..809736ec549b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3100,9 +3100,9 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
 						    int delay_iput);
 void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
 
-struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
-					   size_t pg_offset, u64 start, u64 len,
-					   int create);
+struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
+		struct page *page, size_t pg_offset, u64 start,
+		u64 len, int create);
 noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
 			      u64 *orig_start, u64 *orig_block_len,
 			      u64 *ram_bytes);
@@ -3166,9 +3166,9 @@ void btrfs_destroy_cachep(void);
 long btrfs_ioctl_trans_end(struct file *file);
 struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
 			 struct btrfs_root *root, int *was_new);
-struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
-				    size_t pg_offset, u64 start, u64 end,
-				    int create);
+struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
+		struct page *page, size_t pg_offset,
+		u64 start, u64 end, int create);
 int btrfs_update_inode(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *root,
 			      struct inode *inode);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 2b06f557c176..a53ff3bff8eb 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -219,12 +219,12 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
  * extents on the btree inode are pretty simple, there's one extent
  * that covers the entire device
  */
-static struct extent_map *btree_get_extent(struct inode *inode,
+static struct extent_map *btree_get_extent(struct btrfs_inode *inode,
 		struct page *page, size_t pg_offset, u64 start, u64 len,
 		int create)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
+	struct extent_map_tree *em_tree = &inode->extent_tree;
 	struct extent_map *em;
 	int ret;
 
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d21082962565..c3abf846a449 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2860,7 +2860,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
 		*em_cached = NULL;
 	}
 
-	em = get_extent(inode, page, pg_offset, start, len, 0);
+	em = get_extent(BTRFS_I(inode), page, pg_offset, start, len, 0);
 	if (em_cached && !IS_ERR_OR_NULL(em)) {
 		BUG_ON(*em_cached);
 		atomic_inc(&em->refs);
@@ -3373,7 +3373,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
 							 page_end, NULL, 1);
 			break;
 		}
-		em = epd->get_extent(inode, page, pg_offset, cur,
+		em = epd->get_extent(BTRFS_I(inode), page, pg_offset, cur,
 				     end - cur + 1, 1);
 		if (IS_ERR_OR_NULL(em)) {
 			SetPageError(page);
@@ -4338,7 +4338,7 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
 		if (len == 0)
 			break;
 		len = ALIGN(len, sectorsize);
-		em = get_extent(inode, NULL, 0, offset, len, 0);
+		em = get_extent(BTRFS_I(inode), NULL, 0, offset, len, 0);
 		if (IS_ERR_OR_NULL(em))
 			return em;
 
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index cd8b3dd6948d..c16260c6c14f 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -211,7 +211,7 @@ static inline int extent_compress_type(unsigned long bio_flags)
 
 struct extent_map_tree;
 
-typedef struct extent_map *(get_extent_t)(struct inode *inode,
+typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
 					  struct page *page,
 					  size_t pg_offset,
 					  u64 start, u64 len,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index dff7ec1770c1..48dfb8e4baf2 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2341,7 +2341,7 @@ static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
 	struct extent_map *em;
 	int ret = 0;
 
-	em = btrfs_get_extent(inode, NULL, 0, *start, *len, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, *start, *len, 0);
 	if (IS_ERR_OR_NULL(em)) {
 		if (!em)
 			ret = -ENOMEM;
@@ -2833,7 +2833,7 @@ static long btrfs_fallocate(struct file *file, int mode,
 	/* First, check if we exceed the qgroup limit */
 	INIT_LIST_HEAD(&reserve_list);
 	while (1) {
-		em = btrfs_get_extent(inode, NULL, 0, cur_offset,
+		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
 				      alloc_end - cur_offset, 0);
 		if (IS_ERR_OR_NULL(em)) {
 			if (!em)
@@ -2960,7 +2960,8 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
 			 &cached_state);
 
 	while (start < inode->i_size) {
-		em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
+		em = btrfs_get_extent_fiemap(BTRFS_I(inode), NULL, 0,
+				start, len, 0);
 		if (IS_ERR(em)) {
 			ret = PTR_ERR(em);
 			em = NULL;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0600d55bb173..7e12c727f791 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4852,7 +4852,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
 
 	cur_offset = hole_start;
 	while (1) {
-		em = btrfs_get_extent(inode, NULL, 0, cur_offset,
+		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
 				block_end - cur_offset, 0);
 		if (IS_ERR(em)) {
 			err = PTR_ERR(em);
@@ -6732,25 +6732,26 @@ static noinline int uncompress_inline(struct btrfs_path *path,
  * This also copies inline extents directly into the page.
  */
 
-struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
-				    size_t pg_offset, u64 start, u64 len,
-				    int create)
+struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
+		struct page *page,
+	    size_t pg_offset, u64 start, u64 len,
+		int create)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 	int ret;
 	int err = 0;
 	u64 extent_start = 0;
 	u64 extent_end = 0;
-	u64 objectid = btrfs_ino(BTRFS_I(inode));
+	u64 objectid = btrfs_ino(inode);
 	u32 found_type;
 	struct btrfs_path *path = NULL;
-	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_root *root = inode->root;
 	struct btrfs_file_extent_item *item;
 	struct extent_buffer *leaf;
 	struct btrfs_key found_key;
 	struct extent_map *em = NULL;
-	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
-	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+	struct extent_map_tree *em_tree = &inode->extent_tree;
+	struct extent_io_tree *io_tree = &inode->io_tree;
 	struct btrfs_trans_handle *trans = NULL;
 	const bool new_inline = !page || create;
 
@@ -6863,7 +6864,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
 		goto not_found_em;
 	}
 
-	btrfs_extent_item_to_extent_map(BTRFS_I(inode), path, item,
+	btrfs_extent_item_to_extent_map(inode, path, item,
 			new_inline, em);
 
 	if (found_type == BTRFS_FILE_EXTENT_REG ||
@@ -7000,7 +7001,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
 	write_unlock(&em_tree->lock);
 out:
 
-	trace_btrfs_get_extent(root, BTRFS_I(inode), em);
+	trace_btrfs_get_extent(root, inode, em);
 
 	btrfs_free_path(path);
 	if (trans) {
@@ -7016,9 +7017,10 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
 	return em;
 }
 
-struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
-					   size_t pg_offset, u64 start, u64 len,
-					   int create)
+struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
+		struct page *page,
+		size_t pg_offset, u64 start, u64 len,
+		int create)
 {
 	struct extent_map *em;
 	struct extent_map *hole_em = NULL;
@@ -7055,7 +7057,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
 	em = NULL;
 
 	/* ok, we didn't find anything, lets look for delalloc */
-	found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
+	found = count_range_bits(&inode->io_tree, &range_start,
 				 end, len, EXTENT_DELALLOC, 1);
 	found_end = range_start + found;
 	if (found_end < range_start)
@@ -7625,7 +7627,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
 		goto err;
 	}
 
-	em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
 	if (IS_ERR(em)) {
 		ret = PTR_ERR(em);
 		goto unlock_err;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 12ae210f8719..45a708555f9b 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1009,7 +1009,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
 
 		/* get the big lock and read metadata off disk */
 		lock_extent_bits(io_tree, start, end, &cached);
-		em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
+		em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
 		unlock_extent_cached(io_tree, start, end, &cached, GFP_NOFS);
 
 		if (IS_ERR(em))
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 924bcbf43275..8c91d03cc82d 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -278,7 +278,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 
 	/* First with no extents */
 	BTRFS_I(inode)->root = root;
-	em = btrfs_get_extent(inode, NULL, 0, 0, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, sectorsize, 0);
 	if (IS_ERR(em)) {
 		em = NULL;
 		test_msg("Got an error when we shouldn't have\n");
@@ -302,7 +302,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	 */
 	setup_file_extents(root, sectorsize);
 
-	em = btrfs_get_extent(inode, NULL, 0, 0, (u64)-1, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, (u64)-1, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -323,7 +323,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -350,7 +350,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -372,7 +372,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	free_extent_map(em);
 
 	/* Regular extent */
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -399,7 +399,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	free_extent_map(em);
 
 	/* The next 3 are split extents */
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -428,7 +428,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -450,7 +450,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -484,7 +484,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	free_extent_map(em);
 
 	/* Prealloc extent */
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -513,7 +513,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	free_extent_map(em);
 
 	/* The next 3 are a half written prealloc extent */
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -543,7 +543,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -576,7 +576,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -611,7 +611,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	free_extent_map(em);
 
 	/* Now for the compressed extent */
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -645,7 +645,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	free_extent_map(em);
 
 	/* Split compressed extent */
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -680,7 +680,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -707,7 +707,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -742,7 +742,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	free_extent_map(em);
 
 	/* A hole between regular extents but no hole extent */
-	em = btrfs_get_extent(inode, NULL, 0, offset + 6, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset + 6,
+			sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -769,7 +770,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, 4096 * 1024, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, 4096 * 1024, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -802,7 +803,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
 	offset = em->start + em->len;
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -885,7 +886,7 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
 	insert_inode_item_key(root);
 	insert_extent(root, sectorsize, sectorsize, sectorsize, 0, sectorsize,
 		      sectorsize, BTRFS_FILE_EXTENT_REG, 0, 1);
-	em = btrfs_get_extent(inode, NULL, 0, 0, 2 * sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, 2 * sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
@@ -907,7 +908,8 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
 	}
 	free_extent_map(em);
 
-	em = btrfs_get_extent(inode, NULL, 0, sectorsize, 2 * sectorsize, 0);
+	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, sectorsize,
+			2 * sectorsize, 0);
 	if (IS_ERR(em)) {
 		test_msg("Got an error when we shouldn't have\n");
 		goto out;
-- 
GitLab


From 9e3e97f45c1f4debe53f72ad309ec06890d3aac2 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:07 +0200
Subject: [PATCH 0711/1084] btrfs: Make btrfs_del_delalloc_inode take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7e12c727f791..1934db1af2fc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1632,15 +1632,15 @@ static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
 }
 
 static void btrfs_del_delalloc_inode(struct btrfs_root *root,
-				     struct inode *inode)
+				     struct btrfs_inode *inode)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 
 	spin_lock(&root->delalloc_lock);
-	if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
-		list_del_init(&BTRFS_I(inode)->delalloc_inodes);
+	if (!list_empty(&inode->delalloc_inodes)) {
+		list_del_init(&inode->delalloc_inodes);
 		clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
-			  &BTRFS_I(inode)->runtime_flags);
+			  &inode->runtime_flags);
 		root->nr_delalloc_inodes--;
 		if (!root->nr_delalloc_inodes) {
 			spin_lock(&fs_info->delalloc_root_lock);
@@ -1760,8 +1760,8 @@ static void btrfs_clear_bit_hook(struct btrfs_inode *inode,
 		inode->delalloc_bytes -= len;
 		if (do_list && inode->delalloc_bytes == 0 &&
 		    test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
-				&inode->runtime_flags))
-			btrfs_del_delalloc_inode(root, &inode->vfs_inode);
+					&inode->runtime_flags))
+			btrfs_del_delalloc_inode(root, inode);
 		spin_unlock(&inode->lock);
 	}
 }
-- 
GitLab


From db0a669fb002416faafe34481d6a6e21cdf0e926 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:08 +0200
Subject: [PATCH 0712/1084] btrfs: Make btrfs_add_link take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h    |  2 +-
 fs/btrfs/inode.c    | 39 ++++++++++++++++++++-------------------
 fs/btrfs/tree-log.c |  8 +++++---
 3 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 809736ec549b..f03c2f285eb1 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3129,7 +3129,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
 		       struct btrfs_inode *dir, struct btrfs_inode *inode,
 		       const char *name, int name_len);
 int btrfs_add_link(struct btrfs_trans_handle *trans,
-		   struct inode *parent_inode, struct inode *inode,
+		   struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
 		   const char *name, int name_len, int add_backref, u64 index);
 int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1934db1af2fc..91c6be6a72df 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6251,18 +6251,18 @@ static inline u8 btrfs_inode_type(struct inode *inode)
  * inode to the parent directory.
  */
 int btrfs_add_link(struct btrfs_trans_handle *trans,
-		   struct inode *parent_inode, struct inode *inode,
+		   struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
 		   const char *name, int name_len, int add_backref, u64 index)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
 	int ret = 0;
 	struct btrfs_key key;
-	struct btrfs_root *root = BTRFS_I(parent_inode)->root;
-	u64 ino = btrfs_ino(BTRFS_I(inode));
-	u64 parent_ino = btrfs_ino(BTRFS_I(parent_inode));
+	struct btrfs_root *root = parent_inode->root;
+	u64 ino = btrfs_ino(inode);
+	u64 parent_ino = btrfs_ino(parent_inode);
 
 	if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
-		memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));
+		memcpy(&key, &inode->root->root_key, sizeof(key));
 	} else {
 		key.objectid = ino;
 		key.type = BTRFS_INODE_ITEM_KEY;
@@ -6283,8 +6283,8 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
 		return ret;
 
 	ret = btrfs_insert_dir_item(trans, root, name, name_len,
-				    BTRFS_I(parent_inode), &key,
-				    btrfs_inode_type(inode), index);
+				    parent_inode, &key,
+				    btrfs_inode_type(&inode->vfs_inode), index);
 	if (ret == -EEXIST || ret == -EOVERFLOW)
 		goto fail_dir_item;
 	else if (ret) {
@@ -6292,12 +6292,12 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
 		return ret;
 	}
 
-	btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size +
+	btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size +
 			   name_len * 2);
-	inode_inc_iversion(parent_inode);
-	parent_inode->i_mtime = parent_inode->i_ctime =
-		current_time(parent_inode);
-	ret = btrfs_update_inode(trans, root, parent_inode);
+	inode_inc_iversion(&parent_inode->vfs_inode);
+	parent_inode->vfs_inode.i_mtime = parent_inode->vfs_inode.i_ctime =
+		current_time(&parent_inode->vfs_inode);
+	ret = btrfs_update_inode(trans, root, &parent_inode->vfs_inode);
 	if (ret)
 		btrfs_abort_transaction(trans, ret);
 	return ret;
@@ -6324,7 +6324,7 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
 			    struct inode *dir, struct dentry *dentry,
 			    struct inode *inode, int backref, u64 index)
 {
-	int err = btrfs_add_link(trans, dir, inode,
+	int err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
 				 dentry->d_name.name, dentry->d_name.len,
 				 backref, index);
 	if (err > 0)
@@ -6601,8 +6601,9 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	if (err)
 		goto out_fail_inode;
 
-	err = btrfs_add_link(trans, dir, inode, dentry->d_name.name,
-			     dentry->d_name.len, 0, index);
+	err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
+			dentry->d_name.name,
+			dentry->d_name.len, 0, index);
 	if (err)
 		goto out_fail_inode;
 
@@ -9592,7 +9593,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 		goto out_fail;
 	}
 
-	ret = btrfs_add_link(trans, new_dir, old_inode,
+	ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
 			     new_dentry->d_name.name,
 			     new_dentry->d_name.len, 0, old_idx);
 	if (ret) {
@@ -9600,7 +9601,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 		goto out_fail;
 	}
 
-	ret = btrfs_add_link(trans, old_dir, new_inode,
+	ret = btrfs_add_link(trans, BTRFS_I(old_dir), BTRFS_I(new_inode),
 			     old_dentry->d_name.name,
 			     old_dentry->d_name.len, 0, new_idx);
 	if (ret) {
@@ -9877,7 +9878,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		}
 	}
 
-	ret = btrfs_add_link(trans, new_dir, old_inode,
+	ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
 			     new_dentry->d_name.name,
 			     new_dentry->d_name.len, 0, index);
 	if (ret) {
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index f8965b081212..c9ada4b90004 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1322,8 +1322,9 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
 			}
 
 			/* insert our name */
-			ret = btrfs_add_link(trans, dir, inode, name, namelen,
-					     0, ref_index);
+			ret = btrfs_add_link(trans, BTRFS_I(dir),
+					BTRFS_I(inode),
+					name, namelen, 0, ref_index);
 			if (ret)
 				goto out;
 
@@ -1641,7 +1642,8 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
 		return -EIO;
 	}
 
-	ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index);
+	ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name,
+			name_len, 1, index);
 
 	/* FIXME, put inode into FIXUP list */
 
-- 
GitLab


From cef415af208984650de925e28d40aa4a6e8513f4 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:09 +0200
Subject: [PATCH 0713/1084] btrfs: Make btrfs_add_nondir take btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 91c6be6a72df..dafd3fdd6f2b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6321,10 +6321,10 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
 }
 
 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
-			    struct inode *dir, struct dentry *dentry,
-			    struct inode *inode, int backref, u64 index)
+			    struct btrfs_inode *dir, struct dentry *dentry,
+			    struct btrfs_inode *inode, int backref, u64 index)
 {
-	int err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
+	int err = btrfs_add_link(trans, dir, inode,
 				 dentry->d_name.name, dentry->d_name.len,
 				 backref, index);
 	if (err > 0)
@@ -6378,7 +6378,8 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
 	if (err)
 		goto out_unlock_inode;
 
-	err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
+	err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
+			0, index);
 	if (err) {
 		goto out_unlock_inode;
 	} else {
@@ -6455,7 +6456,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
 	if (err)
 		goto out_unlock_inode;
 
-	err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
+	err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
+			0, index);
 	if (err)
 		goto out_unlock_inode;
 
@@ -6521,7 +6523,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 	ihold(inode);
 	set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
 
-	err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
+	err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
+			1, index);
 
 	if (err) {
 		drop_inode = 1;
@@ -9703,8 +9706,8 @@ static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
 	if (ret)
 		goto out;
 
-	ret = btrfs_add_nondir(trans, dir, dentry,
-				inode, 0, index);
+	ret = btrfs_add_nondir(trans, BTRFS_I(dir), dentry,
+				BTRFS_I(inode), 0, index);
 	if (ret)
 		goto out;
 
@@ -10253,7 +10256,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 	 * elsewhere above.
 	 */
 	if (!err)
-		err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
+		err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry,
+				BTRFS_I(inode), 0, index);
 	if (err) {
 		drop_inode = 1;
 		goto out_unlock_inode;
-- 
GitLab


From abcefb1eeeff04734a59b4dd3724abbf0688252c Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:10 +0200
Subject: [PATCH 0714/1084] btrfs: make btrfs_inode_block_unlocked_dio take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/btrfs_inode.h | 4 ++--
 fs/btrfs/inode.c       | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index c10d21b2f0df..d84cf2b0c7b6 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -311,9 +311,9 @@ struct btrfs_dio_private {
  * to grab i_mutex. It is used to avoid the endless truncate due to
  * nonlocked dio read.
  */
-static inline void btrfs_inode_block_unlocked_dio(struct inode *inode)
+static inline void btrfs_inode_block_unlocked_dio(struct btrfs_inode *inode)
 {
-	set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags);
+	set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags);
 	smp_mb();
 }
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index dafd3fdd6f2b..59b93a2369aa 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5002,7 +5002,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 		truncate_setsize(inode, newsize);
 
 		/* Disable nonlocked read DIO to avoid the end less truncate */
-		btrfs_inode_block_unlocked_dio(inode);
+		btrfs_inode_block_unlocked_dio(BTRFS_I(inode));
 		inode_dio_wait(inode);
 		btrfs_inode_resume_unlocked_dio(inode);
 
-- 
GitLab


From 0b581701d9771c55a908c612ca49850e6088fe08 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 20 Feb 2017 13:51:11 +0200
Subject: [PATCH 0715/1084] btrfs: make btrfs_inode_resume_unlocked_dio take
 btrfs_inode

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/btrfs_inode.h | 5 ++---
 fs/btrfs/inode.c       | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index d84cf2b0c7b6..0c6baaba0651 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -317,11 +317,10 @@ static inline void btrfs_inode_block_unlocked_dio(struct btrfs_inode *inode)
 	smp_mb();
 }
 
-static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode)
+static inline void btrfs_inode_resume_unlocked_dio(struct btrfs_inode *inode)
 {
 	smp_mb__before_atomic();
-	clear_bit(BTRFS_INODE_READDIO_NEED_LOCK,
-		  &BTRFS_I(inode)->runtime_flags);
+	clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags);
 }
 
 static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 59b93a2369aa..596e5cb4bfa2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5004,7 +5004,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 		/* Disable nonlocked read DIO to avoid the end less truncate */
 		btrfs_inode_block_unlocked_dio(BTRFS_I(inode));
 		inode_dio_wait(inode);
-		btrfs_inode_resume_unlocked_dio(inode);
+		btrfs_inode_resume_unlocked_dio(BTRFS_I(inode));
 
 		ret = btrfs_truncate(inode);
 		if (ret && inode->i_nlink) {
-- 
GitLab


From 374d446d25d6271ee615952a3b7f123ba4983c35 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@redhat.com>
Date: Fri, 13 Jan 2017 22:51:08 +0100
Subject: [PATCH 0716/1084] ARM: 8636/1: Cleanup sanity_check_meminfo

The logic for sanity_check_meminfo has become difficult to
follow. Clean up the code so it's more obvious what the code
is actually trying to do. Additionally, meminfo is now removed
so rename the function to better describe its purpose.

Tested-by: Magnus Lilja <lilja.magnus@gmail.com>
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
Signed-off-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/setup.c |  4 +--
 arch/arm/mm/mmu.c       | 66 +++++++++++++++--------------------------
 arch/arm/mm/nommu.c     |  8 ++---
 3 files changed, 30 insertions(+), 48 deletions(-)

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 34e3f3c45634..8a8051cf57d1 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -81,7 +81,7 @@ __setup("fpe=", fpe_setup);
 extern void init_default_cache_policy(unsigned long);
 extern void paging_init(const struct machine_desc *desc);
 extern void early_paging_init(const struct machine_desc *);
-extern void sanity_check_meminfo(void);
+extern void adjust_lowmem_bounds(void);
 extern enum reboot_mode reboot_mode;
 extern void setup_dma_zone(const struct machine_desc *desc);
 
@@ -1093,7 +1093,7 @@ void __init setup_arch(char **cmdline_p)
 	setup_dma_zone(mdesc);
 	xen_early_init();
 	efi_init();
-	sanity_check_meminfo();
+	adjust_lowmem_bounds();
 	arm_memblock_init(mdesc);
 
 	early_ioremap_reset();
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 4001dd15818d..b8f70a3bb7f8 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1152,13 +1152,11 @@ early_param("vmalloc", early_vmalloc);
 
 phys_addr_t arm_lowmem_limit __initdata = 0;
 
-void __init sanity_check_meminfo(void)
+void __init adjust_lowmem_bounds(void)
 {
 	phys_addr_t memblock_limit = 0;
-	int highmem = 0;
 	u64 vmalloc_limit;
 	struct memblock_region *reg;
-	bool should_use_highmem = false;
 
 	/*
 	 * Let's use our own (unoptimized) equivalent of __pa() that is
@@ -1172,43 +1170,18 @@ void __init sanity_check_meminfo(void)
 	for_each_memblock(memory, reg) {
 		phys_addr_t block_start = reg->base;
 		phys_addr_t block_end = reg->base + reg->size;
-		phys_addr_t size_limit = reg->size;
 
-		if (reg->base >= vmalloc_limit)
-			highmem = 1;
-		else
-			size_limit = vmalloc_limit - reg->base;
-
-
-		if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) {
-
-			if (highmem) {
-				pr_notice("Ignoring RAM at %pa-%pa (!CONFIG_HIGHMEM)\n",
-					  &block_start, &block_end);
-				memblock_remove(reg->base, reg->size);
-				should_use_highmem = true;
-				continue;
-			}
-
-			if (reg->size > size_limit) {
-				phys_addr_t overlap_size = reg->size - size_limit;
-
-				pr_notice("Truncating RAM at %pa-%pa",
-					  &block_start, &block_end);
-				block_end = vmalloc_limit;
-				pr_cont(" to -%pa", &block_end);
-				memblock_remove(vmalloc_limit, overlap_size);
-				should_use_highmem = true;
-			}
-		}
-
-		if (!highmem) {
-			if (block_end > arm_lowmem_limit) {
-				if (reg->size > size_limit)
-					arm_lowmem_limit = vmalloc_limit;
-				else
-					arm_lowmem_limit = block_end;
-			}
+		if (reg->base < vmalloc_limit) {
+			if (block_end > arm_lowmem_limit)
+				/*
+				 * Compare as u64 to ensure vmalloc_limit does
+				 * not get truncated. block_end should always
+				 * fit in phys_addr_t so there should be no
+				 * issue with assignment.
+				 */
+				arm_lowmem_limit = min_t(u64,
+							 vmalloc_limit,
+							 block_end);
 
 			/*
 			 * Find the first non-pmd-aligned page, and point
@@ -1233,9 +1206,6 @@ void __init sanity_check_meminfo(void)
 		}
 	}
 
-	if (should_use_highmem)
-		pr_notice("Consider using a HIGHMEM enabled kernel.\n");
-
 	high_memory = __va(arm_lowmem_limit - 1) + 1;
 
 	/*
@@ -1248,6 +1218,18 @@ void __init sanity_check_meminfo(void)
 	if (!memblock_limit)
 		memblock_limit = arm_lowmem_limit;
 
+	if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) {
+		if (memblock_end_of_DRAM() > arm_lowmem_limit) {
+			phys_addr_t end = memblock_end_of_DRAM();
+
+			pr_notice("Ignoring RAM at %pa-%pa\n",
+				  &memblock_limit, &end);
+			pr_notice("Consider using a HIGHMEM enabled kernel.\n");
+
+			memblock_remove(memblock_limit, end - memblock_limit);
+		}
+	}
+
 	memblock_set_current_limit(memblock_limit);
 }
 
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 2740967727e2..13a25d6282f8 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -85,7 +85,7 @@ static unsigned long irbar_read(void)
 }
 
 /* MPU initialisation functions */
-void __init sanity_check_meminfo_mpu(void)
+void __init adjust_lowmem_bounds_mpu(void)
 {
 	phys_addr_t phys_offset = PHYS_OFFSET;
 	phys_addr_t aligned_region_size, specified_mem_size, rounded_mem_size;
@@ -274,7 +274,7 @@ void __init mpu_setup(void)
 	}
 }
 #else
-static void sanity_check_meminfo_mpu(void) {}
+static void adjust_lowmem_bounds_mpu(void) {}
 static void __init mpu_setup(void) {}
 #endif /* CONFIG_ARM_MPU */
 
@@ -295,10 +295,10 @@ void __init arm_mm_memblock_reserve(void)
 #endif
 }
 
-void __init sanity_check_meminfo(void)
+void __init adjust_lowmem_bounds(void)
 {
 	phys_addr_t end;
-	sanity_check_meminfo_mpu();
+	adjust_lowmem_bounds_mpu();
 	end = memblock_end_of_DRAM();
 	high_memory = __va(end - 1) + 1;
 	memblock_set_current_limit(end);
-- 
GitLab


From 985626564eedc470ce2866e53938303368ad41b7 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@redhat.com>
Date: Fri, 13 Jan 2017 22:51:45 +0100
Subject: [PATCH 0717/1084] ARM: 8637/1: Adjust memory boundaries after
 reservations

adjust_lowmem_bounds is responsible for setting up the boundary for
lowmem/highmem. This needs to be setup before memblock reservations can
occur. At the time memblock reservations can occur, memory can also be
removed from the system. The lowmem/highmem boundary and end of memory
may be affected by this but it is currently not recalculated. On some
systems this may be harmless, on others this may result in incorrect
ranges being passed to the main memory allocator. Correct this by
recalculating the lowmem/highmem boundary after all reservations have
been made.

Tested-by: Magnus Lilja <lilja.magnus@gmail.com>
Signed-off-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/setup.c | 6 ++++++
 arch/arm/mm/mmu.c       | 9 ++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 8a8051cf57d1..f4e54503afa9 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -1093,8 +1093,14 @@ void __init setup_arch(char **cmdline_p)
 	setup_dma_zone(mdesc);
 	xen_early_init();
 	efi_init();
+	/*
+	 * Make sure the calculation for lowmem/highmem is set appropriately
+	 * before reserving/allocating any mmeory
+	 */
 	adjust_lowmem_bounds();
 	arm_memblock_init(mdesc);
+	/* Memory may have been removed so recalculate the bounds. */
+	adjust_lowmem_bounds();
 
 	early_ioremap_reset();
 
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index b8f70a3bb7f8..5cbfd9f86412 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1157,6 +1157,7 @@ void __init adjust_lowmem_bounds(void)
 	phys_addr_t memblock_limit = 0;
 	u64 vmalloc_limit;
 	struct memblock_region *reg;
+	phys_addr_t lowmem_limit = 0;
 
 	/*
 	 * Let's use our own (unoptimized) equivalent of __pa() that is
@@ -1172,14 +1173,14 @@ void __init adjust_lowmem_bounds(void)
 		phys_addr_t block_end = reg->base + reg->size;
 
 		if (reg->base < vmalloc_limit) {
-			if (block_end > arm_lowmem_limit)
+			if (block_end > lowmem_limit)
 				/*
 				 * Compare as u64 to ensure vmalloc_limit does
 				 * not get truncated. block_end should always
 				 * fit in phys_addr_t so there should be no
 				 * issue with assignment.
 				 */
-				arm_lowmem_limit = min_t(u64,
+				lowmem_limit = min_t(u64,
 							 vmalloc_limit,
 							 block_end);
 
@@ -1200,12 +1201,14 @@ void __init adjust_lowmem_bounds(void)
 				if (!IS_ALIGNED(block_start, PMD_SIZE))
 					memblock_limit = block_start;
 				else if (!IS_ALIGNED(block_end, PMD_SIZE))
-					memblock_limit = arm_lowmem_limit;
+					memblock_limit = lowmem_limit;
 			}
 
 		}
 	}
 
+	arm_lowmem_limit = lowmem_limit;
+
 	high_memory = __va(arm_lowmem_limit - 1) + 1;
 
 	/*
-- 
GitLab


From 4e23612bb09809ef89eb396fd5f568392e0f1ec7 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sun, 15 Jan 2017 03:57:03 +0100
Subject: [PATCH 0718/1084] ARM: 8638/1: mtd: lart: Rename partition defines to
 be prefixed with PART_

In preparation for defining KERNEL_START on ARM, rename KERNEL_START to
PART_KERNEL_START, and to be consistent, do this for all
partition-related constants.

Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/mtd/devices/lart.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/mtd/devices/lart.c b/drivers/mtd/devices/lart.c
index 82bd00af5cc3..268aae45b514 100644
--- a/drivers/mtd/devices/lart.c
+++ b/drivers/mtd/devices/lart.c
@@ -75,18 +75,18 @@ static char module_name[] = "lart";
 
 /* blob */
 #define NUM_BLOB_BLOCKS		FLASH_NUMBLOCKS_16m_PARAM
-#define BLOB_START			0x00000000
-#define BLOB_LEN			(NUM_BLOB_BLOCKS * FLASH_BLOCKSIZE_PARAM)
+#define PART_BLOB_START		0x00000000
+#define PART_BLOB_LEN		(NUM_BLOB_BLOCKS * FLASH_BLOCKSIZE_PARAM)
 
 /* kernel */
 #define NUM_KERNEL_BLOCKS	7
-#define KERNEL_START		(BLOB_START + BLOB_LEN)
-#define KERNEL_LEN			(NUM_KERNEL_BLOCKS * FLASH_BLOCKSIZE_MAIN)
+#define PART_KERNEL_START	(PART_BLOB_START + PART_BLOB_LEN)
+#define PART_KERNEL_LEN		(NUM_KERNEL_BLOCKS * FLASH_BLOCKSIZE_MAIN)
 
 /* initial ramdisk */
 #define NUM_INITRD_BLOCKS	24
-#define INITRD_START		(KERNEL_START + KERNEL_LEN)
-#define INITRD_LEN			(NUM_INITRD_BLOCKS * FLASH_BLOCKSIZE_MAIN)
+#define PART_INITRD_START	(PART_KERNEL_START + PART_KERNEL_LEN)
+#define PART_INITRD_LEN		(NUM_INITRD_BLOCKS * FLASH_BLOCKSIZE_MAIN)
 
 /*
  * See section 4.0 in "3 Volt Fast Boot Block Flash Memory" Intel Datasheet
@@ -587,20 +587,20 @@ static struct mtd_partition lart_partitions[] = {
 	/* blob */
 	{
 		.name	= "blob",
-		.offset	= BLOB_START,
-		.size	= BLOB_LEN,
+		.offset	= PART_BLOB_START,
+		.size	= PART_BLOB_LEN,
 	},
 	/* kernel */
 	{
 		.name	= "kernel",
-		.offset	= KERNEL_START,		/* MTDPART_OFS_APPEND */
-		.size	= KERNEL_LEN,
+		.offset	= PART_KERNEL_START,	/* MTDPART_OFS_APPEND */
+		.size	= PART_KERNEL_LEN,
 	},
 	/* initial ramdisk / file system */
 	{
 		.name	= "file system",
-		.offset	= INITRD_START,		/* MTDPART_OFS_APPEND */
-		.size	= INITRD_LEN,		/* MTDPART_SIZ_FULL */
+		.offset	= PART_INITRD_START,	/* MTDPART_OFS_APPEND */
+		.size	= PART_INITRD_LEN,	/* MTDPART_SIZ_FULL */
 	}
 };
 #define NUM_PARTITIONS ARRAY_SIZE(lart_partitions)
-- 
GitLab


From a09975bf6c756e4555a95258ff4b2286dcfddc4e Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sun, 15 Jan 2017 03:57:40 +0100
Subject: [PATCH 0719/1084] ARM: 8639/1: Define KERNEL_START and KERNEL_END

In preparation for adding CONFIG_DEBUG_VIRTUAL support, define a set of
common constants: KERNEL_START and KERNEL_END which abstract
CONFIG_XIP_KERNEL vs. !CONFIG_XIP_KERNEL. Update the code where
relevant.

Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/memory.h | 7 +++++++
 arch/arm/mm/init.c            | 7 ++-----
 arch/arm/mm/mmu.c             | 6 +-----
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 76cbd9c674df..bee7511c5098 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -111,6 +111,13 @@
 
 #endif /* !CONFIG_MMU */
 
+#ifdef CONFIG_XIP_KERNEL
+#define KERNEL_START		_sdata
+#else
+#define KERNEL_START		_stext
+#endif
+#define KERNEL_END		_end
+
 /*
  * We fix the TCM memories max 32 KiB ITCM resp DTCM at these
  * locations
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 370581aeb871..4127f578086c 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -230,11 +230,8 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align)
 void __init arm_memblock_init(const struct machine_desc *mdesc)
 {
 	/* Register the kernel text, kernel data and initrd with memblock. */
-#ifdef CONFIG_XIP_KERNEL
-	memblock_reserve(__pa(_sdata), _end - _sdata);
-#else
-	memblock_reserve(__pa(_stext), _end - _stext);
-#endif
+	memblock_reserve(__pa(KERNEL_START), KERNEL_END - KERNEL_START);
+
 #ifdef CONFIG_BLK_DEV_INITRD
 	/* FDT scan will populate initrd_start */
 	if (initrd_start && !phys_initrd_size) {
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 5cbfd9f86412..4e016d7f37b3 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1422,11 +1422,7 @@ static void __init kmap_init(void)
 static void __init map_lowmem(void)
 {
 	struct memblock_region *reg;
-#ifdef CONFIG_XIP_KERNEL
-	phys_addr_t kernel_x_start = round_down(__pa(_sdata), SECTION_SIZE);
-#else
-	phys_addr_t kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
-#endif
+	phys_addr_t kernel_x_start = round_down(__pa(KERNEL_START), SECTION_SIZE);
 	phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
 
 	/* Map all the lowmem memory banks. */
-- 
GitLab


From e377cd8221ebbe0b517861aa3d823bb42f9abbd4 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sun, 15 Jan 2017 03:59:00 +0100
Subject: [PATCH 0720/1084] ARM: 8640/1: Add support for CONFIG_DEBUG_VIRTUAL

x86 has an option: CONFIG_DEBUG_VIRTUAL to do additional checks on
virt_to_phys calls. The goal is to catch users who are calling
virt_to_phys on non-linear addresses immediately. This includes caller
using __virt_to_phys() on image addresses instead of __pa_symbol(). This
is a generally useful debug feature to spot bad code (particulary in
drivers).

Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
Acked-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig              |  1 +
 arch/arm/include/asm/memory.h | 15 +++++++--
 arch/arm/mm/Makefile          |  1 +
 arch/arm/mm/physaddr.c        | 57 +++++++++++++++++++++++++++++++++++
 4 files changed, 72 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm/mm/physaddr.c

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 5fab553fd03a..4700294f4e09 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2,6 +2,7 @@ config ARM
 	bool
 	default y
 	select ARCH_CLOCKSOURCE_DATA
+	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index bee7511c5098..c30d0d82a105 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -213,7 +213,7 @@ extern const void *__pv_table_begin, *__pv_table_end;
 	: "r" (x), "I" (__PV_BITS_31_24)		\
 	: "cc")
 
-static inline phys_addr_t __virt_to_phys(unsigned long x)
+static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
 {
 	phys_addr_t t;
 
@@ -245,7 +245,7 @@ static inline unsigned long __phys_to_virt(phys_addr_t x)
 #define PHYS_OFFSET	PLAT_PHYS_OFFSET
 #define PHYS_PFN_OFFSET	((unsigned long)(PHYS_OFFSET >> PAGE_SHIFT))
 
-static inline phys_addr_t __virt_to_phys(unsigned long x)
+static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
 {
 	return (phys_addr_t)x - PAGE_OFFSET + PHYS_OFFSET;
 }
@@ -261,6 +261,16 @@ static inline unsigned long __phys_to_virt(phys_addr_t x)
 	((((unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT) + \
 	 PHYS_PFN_OFFSET)
 
+#define __pa_symbol_nodebug(x)	__virt_to_phys_nodebug((x))
+
+#ifdef CONFIG_DEBUG_VIRTUAL
+extern phys_addr_t __virt_to_phys(unsigned long x);
+extern phys_addr_t __phys_addr_symbol(unsigned long x);
+#else
+#define __virt_to_phys(x)	__virt_to_phys_nodebug(x)
+#define __phys_addr_symbol(x)	__pa_symbol_nodebug(x)
+#endif
+
 /*
  * These are *only* valid on the kernel direct mapped RAM memory.
  * Note: Drivers should NOT use these.  They are the wrong
@@ -283,6 +293,7 @@ static inline void *phys_to_virt(phys_addr_t x)
  * Drivers should NOT use these either.
  */
 #define __pa(x)			__virt_to_phys((unsigned long)(x))
+#define __pa_symbol(x)		__phys_addr_symbol(RELOC_HIDE((unsigned long)(x), 0))
 #define __va(x)			((void *)__phys_to_virt((phys_addr_t)(x)))
 #define pfn_to_kaddr(pfn)	__va((phys_addr_t)(pfn) << PAGE_SHIFT)
 
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index e8698241ece9..b3dea80715b4 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -14,6 +14,7 @@ endif
 
 obj-$(CONFIG_ARM_PTDUMP)	+= dump.o
 obj-$(CONFIG_MODULES)		+= proc-syms.o
+obj-$(CONFIG_DEBUG_VIRTUAL)	+= physaddr.o
 
 obj-$(CONFIG_ALIGNMENT_TRAP)	+= alignment.o
 obj-$(CONFIG_HIGHMEM)		+= highmem.o
diff --git a/arch/arm/mm/physaddr.c b/arch/arm/mm/physaddr.c
new file mode 100644
index 000000000000..02e60f495608
--- /dev/null
+++ b/arch/arm/mm/physaddr.c
@@ -0,0 +1,57 @@
+#include <linux/bug.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/mmdebug.h>
+#include <linux/mm.h>
+
+#include <asm/sections.h>
+#include <asm/memory.h>
+#include <asm/fixmap.h>
+#include <asm/dma.h>
+
+#include "mm.h"
+
+static inline bool __virt_addr_valid(unsigned long x)
+{
+	/*
+	 * high_memory does not get immediately defined, and there
+	 * are early callers of __pa() against PAGE_OFFSET
+	 */
+	if (!high_memory && x >= PAGE_OFFSET)
+		return true;
+
+	if (high_memory && x >= PAGE_OFFSET && x < (unsigned long)high_memory)
+		return true;
+
+	/*
+	 * MAX_DMA_ADDRESS is a virtual address that may not correspond to an
+	 * actual physical address. Enough code relies on __pa(MAX_DMA_ADDRESS)
+	 * that we just need to work around it and always return true.
+	 */
+	if (x == MAX_DMA_ADDRESS)
+		return true;
+
+	return false;
+}
+
+phys_addr_t __virt_to_phys(unsigned long x)
+{
+	WARN(!__virt_addr_valid(x),
+	     "virt_to_phys used for non-linear address: %pK (%pS)\n",
+	     (void *)x, (void *)x);
+
+	return __virt_to_phys_nodebug(x);
+}
+EXPORT_SYMBOL(__virt_to_phys);
+
+phys_addr_t __phys_addr_symbol(unsigned long x)
+{
+	/* This is bounds checking against the kernel image only.
+	 * __pa_symbol should only be used on kernel symbol addresses.
+	 */
+	VIRTUAL_BUG_ON(x < (unsigned long)KERNEL_START ||
+		       x > (unsigned long)KERNEL_END);
+
+	return __pa_symbol_nodebug(x);
+}
+EXPORT_SYMBOL(__phys_addr_symbol);
-- 
GitLab


From 64fc2a947a9873700929ec0ef02b4654a04e0476 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sun, 15 Jan 2017 03:59:29 +0100
Subject: [PATCH 0721/1084] ARM: 8641/1: treewide: Replace uses of virt_to_phys
 with __pa_symbol

All low-level PM/SMP code using virt_to_phys() should actually use
__pa_symbol() against kernel symbols. Update code where relevant to move
away from virt_to_phys().

Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
Reviewed-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/common/mcpm_entry.c              | 12 ++++++------
 arch/arm/mach-alpine/platsmp.c            |  2 +-
 arch/arm/mach-axxia/platsmp.c             |  2 +-
 arch/arm/mach-bcm/bcm63xx_smp.c           |  2 +-
 arch/arm/mach-bcm/platsmp-brcmstb.c       |  2 +-
 arch/arm/mach-bcm/platsmp.c               |  4 ++--
 arch/arm/mach-berlin/platsmp.c            |  2 +-
 arch/arm/mach-exynos/firmware.c           |  4 ++--
 arch/arm/mach-exynos/mcpm-exynos.c        |  2 +-
 arch/arm/mach-exynos/platsmp.c            |  4 ++--
 arch/arm/mach-exynos/pm.c                 |  6 +++---
 arch/arm/mach-exynos/suspend.c            |  6 +++---
 arch/arm/mach-hisi/platmcpm.c             |  2 +-
 arch/arm/mach-hisi/platsmp.c              |  6 +++---
 arch/arm/mach-imx/platsmp.c               |  2 +-
 arch/arm/mach-imx/pm-imx6.c               |  2 +-
 arch/arm/mach-imx/src.c                   |  2 +-
 arch/arm/mach-mediatek/platsmp.c          |  2 +-
 arch/arm/mach-mvebu/pm.c                  |  2 +-
 arch/arm/mach-mvebu/pmsu.c                |  2 +-
 arch/arm/mach-mvebu/system-controller.c   |  2 +-
 arch/arm/mach-omap2/control.c             |  8 ++++----
 arch/arm/mach-omap2/omap-mpuss-lowpower.c | 12 ++++++------
 arch/arm/mach-omap2/omap-smp.c            |  4 ++--
 arch/arm/mach-prima2/platsmp.c            |  2 +-
 arch/arm/mach-prima2/pm.c                 |  2 +-
 arch/arm/mach-pxa/palmz72.c               |  2 +-
 arch/arm/mach-pxa/pxa25x.c                |  2 +-
 arch/arm/mach-pxa/pxa27x.c                |  2 +-
 arch/arm/mach-pxa/pxa3xx.c                |  2 +-
 arch/arm/mach-realview/platsmp-dt.c       |  2 +-
 arch/arm/mach-rockchip/platsmp.c          |  4 ++--
 arch/arm/mach-rockchip/pm.c               |  2 +-
 arch/arm/mach-s3c24xx/mach-jive.c         |  2 +-
 arch/arm/mach-s3c24xx/pm-s3c2410.c        |  2 +-
 arch/arm/mach-s3c24xx/pm-s3c2416.c        |  2 +-
 arch/arm/mach-s3c64xx/pm.c                |  2 +-
 arch/arm/mach-s5pv210/pm.c                |  2 +-
 arch/arm/mach-sa1100/pm.c                 |  2 +-
 arch/arm/mach-shmobile/platsmp-apmu.c     |  6 +++---
 arch/arm/mach-shmobile/platsmp-scu.c      |  4 ++--
 arch/arm/mach-socfpga/platsmp.c           |  4 ++--
 arch/arm/mach-spear/platsmp.c             |  2 +-
 arch/arm/mach-sti/platsmp.c               |  2 +-
 arch/arm/mach-sunxi/platsmp.c             |  4 ++--
 arch/arm/mach-tango/platsmp.c             |  2 +-
 arch/arm/mach-tango/pm.c                  |  2 +-
 arch/arm/mach-tegra/reset.c               |  4 ++--
 arch/arm/mach-ux500/platsmp.c             |  2 +-
 arch/arm/mach-vexpress/dcscb.c            |  2 +-
 arch/arm/mach-vexpress/platsmp.c          |  2 +-
 arch/arm/mach-vexpress/tc2_pm.c           |  4 ++--
 arch/arm/mach-zx/platsmp.c                |  4 ++--
 arch/arm/mach-zynq/platsmp.c              |  2 +-
 54 files changed, 86 insertions(+), 86 deletions(-)

diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index a923524d1040..cf062472e07b 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -144,7 +144,7 @@ extern unsigned long mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER];
 
 void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr)
 {
-	unsigned long val = ptr ? virt_to_phys(ptr) : 0;
+	unsigned long val = ptr ? __pa_symbol(ptr) : 0;
 	mcpm_entry_vectors[cluster][cpu] = val;
 	sync_cache_w(&mcpm_entry_vectors[cluster][cpu]);
 }
@@ -299,8 +299,8 @@ void mcpm_cpu_power_down(void)
 	 * the kernel as if the power_up method just had deasserted reset
 	 * on the CPU.
 	 */
-	phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
-	phys_reset(virt_to_phys(mcpm_entry_point));
+	phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset);
+	phys_reset(__pa_symbol(mcpm_entry_point));
 
 	/* should never get here */
 	BUG();
@@ -388,8 +388,8 @@ static int __init nocache_trampoline(unsigned long _arg)
 	__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
 	__mcpm_cpu_down(cpu, cluster);
 
-	phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
-	phys_reset(virt_to_phys(mcpm_entry_point));
+	phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset);
+	phys_reset(__pa_symbol(mcpm_entry_point));
 	BUG();
 }
 
@@ -449,7 +449,7 @@ int __init mcpm_sync_init(
 	sync_cache_w(&mcpm_sync);
 
 	if (power_up_setup) {
-		mcpm_power_up_setup_phys = virt_to_phys(power_up_setup);
+		mcpm_power_up_setup_phys = __pa_symbol(power_up_setup);
 		sync_cache_w(&mcpm_power_up_setup_phys);
 	}
 
diff --git a/arch/arm/mach-alpine/platsmp.c b/arch/arm/mach-alpine/platsmp.c
index dd77ea25e7ca..6dc6d491f88a 100644
--- a/arch/arm/mach-alpine/platsmp.c
+++ b/arch/arm/mach-alpine/platsmp.c
@@ -27,7 +27,7 @@ static int alpine_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
 	phys_addr_t addr;
 
-	addr = virt_to_phys(secondary_startup);
+	addr = __pa_symbol(secondary_startup);
 
 	if (addr > (phys_addr_t)(uint32_t)(-1)) {
 		pr_err("FAIL: resume address over 32bit (%pa)", &addr);
diff --git a/arch/arm/mach-axxia/platsmp.c b/arch/arm/mach-axxia/platsmp.c
index ffbd71d45008..502e3df69f69 100644
--- a/arch/arm/mach-axxia/platsmp.c
+++ b/arch/arm/mach-axxia/platsmp.c
@@ -25,7 +25,7 @@
 static void write_release_addr(u32 release_phys)
 {
 	u32 *virt = (u32 *) phys_to_virt(release_phys);
-	writel_relaxed(virt_to_phys(secondary_startup), virt);
+	writel_relaxed(__pa_symbol(secondary_startup), virt);
 	/* Make sure this store is visible to other CPUs */
 	smp_wmb();
 	__cpuc_flush_dcache_area(virt, sizeof(u32));
diff --git a/arch/arm/mach-bcm/bcm63xx_smp.c b/arch/arm/mach-bcm/bcm63xx_smp.c
index 9b6727ed68cd..f5fb10b4376f 100644
--- a/arch/arm/mach-bcm/bcm63xx_smp.c
+++ b/arch/arm/mach-bcm/bcm63xx_smp.c
@@ -135,7 +135,7 @@ static int bcm63138_smp_boot_secondary(unsigned int cpu,
 	}
 
 	/* Write the secondary init routine to the BootLUT reset vector */
-	val = virt_to_phys(secondary_startup);
+	val = __pa_symbol(secondary_startup);
 	writel_relaxed(val, bootlut_base + BOOTLUT_RESET_VECT);
 
 	/* Power up the core, will jump straight to its reset vector when we
diff --git a/arch/arm/mach-bcm/platsmp-brcmstb.c b/arch/arm/mach-bcm/platsmp-brcmstb.c
index 40dc8448445e..12379960e982 100644
--- a/arch/arm/mach-bcm/platsmp-brcmstb.c
+++ b/arch/arm/mach-bcm/platsmp-brcmstb.c
@@ -151,7 +151,7 @@ static void brcmstb_cpu_boot(u32 cpu)
 	 * Set the reset vector to point to the secondary_startup
 	 * routine
 	 */
-	cpu_set_boot_addr(cpu, virt_to_phys(secondary_startup));
+	cpu_set_boot_addr(cpu, __pa_symbol(secondary_startup));
 
 	/* Unhalt the cpu */
 	cpu_rst_cfg_set(cpu, 0);
diff --git a/arch/arm/mach-bcm/platsmp.c b/arch/arm/mach-bcm/platsmp.c
index 3ac3a9bc663c..582886d0d02f 100644
--- a/arch/arm/mach-bcm/platsmp.c
+++ b/arch/arm/mach-bcm/platsmp.c
@@ -116,7 +116,7 @@ static int nsp_write_lut(unsigned int cpu)
 		return -ENOMEM;
 	}
 
-	secondary_startup_phy = virt_to_phys(secondary_startup);
+	secondary_startup_phy = __pa_symbol(secondary_startup);
 	BUG_ON(secondary_startup_phy > (phys_addr_t)U32_MAX);
 
 	writel_relaxed(secondary_startup_phy, sku_rom_lut);
@@ -189,7 +189,7 @@ static int kona_boot_secondary(unsigned int cpu, struct task_struct *idle)
 	 * Secondary cores will start in secondary_startup(),
 	 * defined in "arch/arm/kernel/head.S"
 	 */
-	boot_func = virt_to_phys(secondary_startup);
+	boot_func = __pa_symbol(secondary_startup);
 	BUG_ON(boot_func & BOOT_ADDR_CPUID_MASK);
 	BUG_ON(boot_func > (phys_addr_t)U32_MAX);
 
diff --git a/arch/arm/mach-berlin/platsmp.c b/arch/arm/mach-berlin/platsmp.c
index 93f90688db18..1167b0ed92c8 100644
--- a/arch/arm/mach-berlin/platsmp.c
+++ b/arch/arm/mach-berlin/platsmp.c
@@ -92,7 +92,7 @@ static void __init berlin_smp_prepare_cpus(unsigned int max_cpus)
 	 * Write the secondary startup address into the SW reset address
 	 * vector. This is used by boot_inst.
 	 */
-	writel(virt_to_phys(secondary_startup), vectors_base + SW_RESET_ADDR);
+	writel(__pa_symbol(secondary_startup), vectors_base + SW_RESET_ADDR);
 
 	iounmap(vectors_base);
 unmap_scu:
diff --git a/arch/arm/mach-exynos/firmware.c b/arch/arm/mach-exynos/firmware.c
index fd6da5419b51..e81a78b125d9 100644
--- a/arch/arm/mach-exynos/firmware.c
+++ b/arch/arm/mach-exynos/firmware.c
@@ -41,7 +41,7 @@ static int exynos_do_idle(unsigned long mode)
 	case FW_DO_IDLE_AFTR:
 		if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
 			exynos_save_cp15();
-		writel_relaxed(virt_to_phys(exynos_cpu_resume_ns),
+		writel_relaxed(__pa_symbol(exynos_cpu_resume_ns),
 			       sysram_ns_base_addr + 0x24);
 		writel_relaxed(EXYNOS_AFTR_MAGIC, sysram_ns_base_addr + 0x20);
 		if (soc_is_exynos3250()) {
@@ -135,7 +135,7 @@ static int exynos_suspend(void)
 		exynos_save_cp15();
 
 	writel(EXYNOS_SLEEP_MAGIC, sysram_ns_base_addr + EXYNOS_BOOT_FLAG);
-	writel(virt_to_phys(exynos_cpu_resume_ns),
+	writel(__pa_symbol(exynos_cpu_resume_ns),
 		sysram_ns_base_addr + EXYNOS_BOOT_ADDR);
 
 	return cpu_suspend(0, exynos_cpu_suspend);
diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c
index f086bf615b29..214a9cfa92e9 100644
--- a/arch/arm/mach-exynos/mcpm-exynos.c
+++ b/arch/arm/mach-exynos/mcpm-exynos.c
@@ -221,7 +221,7 @@ static void exynos_mcpm_setup_entry_point(void)
 	 */
 	__raw_writel(0xe59f0000, ns_sram_base_addr);     /* ldr r0, [pc, #0] */
 	__raw_writel(0xe12fff10, ns_sram_base_addr + 4); /* bx  r0 */
-	__raw_writel(virt_to_phys(mcpm_entry_point), ns_sram_base_addr + 8);
+	__raw_writel(__pa_symbol(mcpm_entry_point), ns_sram_base_addr + 8);
 }
 
 static struct syscore_ops exynos_mcpm_syscore_ops = {
diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c
index 98ffe1e62ad5..9f4949f7ed88 100644
--- a/arch/arm/mach-exynos/platsmp.c
+++ b/arch/arm/mach-exynos/platsmp.c
@@ -353,7 +353,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
 
 		smp_rmb();
 
-		boot_addr = virt_to_phys(exynos4_secondary_startup);
+		boot_addr = __pa_symbol(exynos4_secondary_startup);
 
 		ret = exynos_set_boot_addr(core_id, boot_addr);
 		if (ret)
@@ -443,7 +443,7 @@ static void __init exynos_smp_prepare_cpus(unsigned int max_cpus)
 
 		mpidr = cpu_logical_map(i);
 		core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
-		boot_addr = virt_to_phys(exynos4_secondary_startup);
+		boot_addr = __pa_symbol(exynos4_secondary_startup);
 
 		ret = exynos_set_boot_addr(core_id, boot_addr);
 		if (ret)
diff --git a/arch/arm/mach-exynos/pm.c b/arch/arm/mach-exynos/pm.c
index 487295f4a56b..1a7e5b5d08d8 100644
--- a/arch/arm/mach-exynos/pm.c
+++ b/arch/arm/mach-exynos/pm.c
@@ -132,7 +132,7 @@ static void exynos_set_wakeupmask(long mask)
 
 static void exynos_cpu_set_boot_vector(long flags)
 {
-	writel_relaxed(virt_to_phys(exynos_cpu_resume),
+	writel_relaxed(__pa_symbol(exynos_cpu_resume),
 		       exynos_boot_vector_addr());
 	writel_relaxed(flags, exynos_boot_vector_flag());
 }
@@ -238,7 +238,7 @@ static int exynos_cpu0_enter_aftr(void)
 
 abort:
 	if (cpu_online(1)) {
-		unsigned long boot_addr = virt_to_phys(exynos_cpu_resume);
+		unsigned long boot_addr = __pa_symbol(exynos_cpu_resume);
 
 		/*
 		 * Set the boot vector to something non-zero
@@ -330,7 +330,7 @@ static int exynos_cpu1_powerdown(void)
 
 static void exynos_pre_enter_aftr(void)
 {
-	unsigned long boot_addr = virt_to_phys(exynos_cpu_resume);
+	unsigned long boot_addr = __pa_symbol(exynos_cpu_resume);
 
 	(void)exynos_set_boot_addr(1, boot_addr);
 }
diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c
index 06332f626565..97765be2cc12 100644
--- a/arch/arm/mach-exynos/suspend.c
+++ b/arch/arm/mach-exynos/suspend.c
@@ -344,7 +344,7 @@ static void exynos_pm_prepare(void)
 	exynos_pm_enter_sleep_mode();
 
 	/* ensure at least INFORM0 has the resume address */
-	pmu_raw_writel(virt_to_phys(exynos_cpu_resume), S5P_INFORM0);
+	pmu_raw_writel(__pa_symbol(exynos_cpu_resume), S5P_INFORM0);
 }
 
 static void exynos3250_pm_prepare(void)
@@ -361,7 +361,7 @@ static void exynos3250_pm_prepare(void)
 	exynos_pm_enter_sleep_mode();
 
 	/* ensure at least INFORM0 has the resume address */
-	pmu_raw_writel(virt_to_phys(exynos_cpu_resume), S5P_INFORM0);
+	pmu_raw_writel(__pa_symbol(exynos_cpu_resume), S5P_INFORM0);
 }
 
 static void exynos5420_pm_prepare(void)
@@ -386,7 +386,7 @@ static void exynos5420_pm_prepare(void)
 
 	/* ensure at least INFORM0 has the resume address */
 	if (IS_ENABLED(CONFIG_EXYNOS5420_MCPM))
-		pmu_raw_writel(virt_to_phys(mcpm_entry_point), S5P_INFORM0);
+		pmu_raw_writel(__pa_symbol(mcpm_entry_point), S5P_INFORM0);
 
 	tmp = pmu_raw_readl(EXYNOS5_ARM_L2_OPTION);
 	tmp &= ~EXYNOS5_USE_RETENTION;
diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c
index 4b653a8cb75c..a6c117622d67 100644
--- a/arch/arm/mach-hisi/platmcpm.c
+++ b/arch/arm/mach-hisi/platmcpm.c
@@ -327,7 +327,7 @@ static int __init hip04_smp_init(void)
 	 */
 	writel_relaxed(hip04_boot_method[0], relocation);
 	writel_relaxed(0xa5a5a5a5, relocation + 4);	/* magic number */
-	writel_relaxed(virt_to_phys(secondary_startup), relocation + 8);
+	writel_relaxed(__pa_symbol(secondary_startup), relocation + 8);
 	writel_relaxed(0, relocation + 12);
 	iounmap(relocation);
 
diff --git a/arch/arm/mach-hisi/platsmp.c b/arch/arm/mach-hisi/platsmp.c
index e1d67648d5d0..91bb02dec20f 100644
--- a/arch/arm/mach-hisi/platsmp.c
+++ b/arch/arm/mach-hisi/platsmp.c
@@ -28,7 +28,7 @@ void hi3xxx_set_cpu_jump(int cpu, void *jump_addr)
 	cpu = cpu_logical_map(cpu);
 	if (!cpu || !ctrl_base)
 		return;
-	writel_relaxed(virt_to_phys(jump_addr), ctrl_base + ((cpu - 1) << 2));
+	writel_relaxed(__pa_symbol(jump_addr), ctrl_base + ((cpu - 1) << 2));
 }
 
 int hi3xxx_get_cpu_jump(int cpu)
@@ -118,7 +118,7 @@ static int hix5hd2_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
 	phys_addr_t jumpaddr;
 
-	jumpaddr = virt_to_phys(secondary_startup);
+	jumpaddr = __pa_symbol(secondary_startup);
 	hix5hd2_set_scu_boot_addr(HIX5HD2_BOOT_ADDRESS, jumpaddr);
 	hix5hd2_set_cpu(cpu, true);
 	arch_send_wakeup_ipi_mask(cpumask_of(cpu));
@@ -156,7 +156,7 @@ static int hip01_boot_secondary(unsigned int cpu, struct task_struct *idle)
 	struct device_node *node;
 
 
-	jumpaddr = virt_to_phys(secondary_startup);
+	jumpaddr = __pa_symbol(secondary_startup);
 	hip01_set_boot_addr(HIP01_BOOT_ADDRESS, jumpaddr);
 
 	node = of_find_compatible_node(NULL, NULL, "hisilicon,hip01-sysctrl");
diff --git a/arch/arm/mach-imx/platsmp.c b/arch/arm/mach-imx/platsmp.c
index 711dbbd5badd..c2d1b329fba1 100644
--- a/arch/arm/mach-imx/platsmp.c
+++ b/arch/arm/mach-imx/platsmp.c
@@ -117,7 +117,7 @@ static void __init ls1021a_smp_prepare_cpus(unsigned int max_cpus)
 	dcfg_base = of_iomap(np, 0);
 	BUG_ON(!dcfg_base);
 
-	paddr = virt_to_phys(secondary_startup);
+	paddr = __pa_symbol(secondary_startup);
 	writel_relaxed(cpu_to_be32(paddr), dcfg_base + DCFG_CCSR_SCRATCHRW1);
 
 	iounmap(dcfg_base);
diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c
index 1515e498d348..e61b1d1027e1 100644
--- a/arch/arm/mach-imx/pm-imx6.c
+++ b/arch/arm/mach-imx/pm-imx6.c
@@ -499,7 +499,7 @@ static int __init imx6q_suspend_init(const struct imx6_pm_socdata *socdata)
 	memset(suspend_ocram_base, 0, sizeof(*pm_info));
 	pm_info = suspend_ocram_base;
 	pm_info->pbase = ocram_pbase;
-	pm_info->resume_addr = virt_to_phys(v7_cpu_resume);
+	pm_info->resume_addr = __pa_symbol(v7_cpu_resume);
 	pm_info->pm_info_size = sizeof(*pm_info);
 
 	/*
diff --git a/arch/arm/mach-imx/src.c b/arch/arm/mach-imx/src.c
index 70b083fe934a..495d85d0fe7e 100644
--- a/arch/arm/mach-imx/src.c
+++ b/arch/arm/mach-imx/src.c
@@ -99,7 +99,7 @@ void imx_enable_cpu(int cpu, bool enable)
 void imx_set_cpu_jump(int cpu, void *jump_addr)
 {
 	cpu = cpu_logical_map(cpu);
-	writel_relaxed(virt_to_phys(jump_addr),
+	writel_relaxed(__pa_symbol(jump_addr),
 		       src_base + SRC_GPR1 + cpu * 8);
 }
 
diff --git a/arch/arm/mach-mediatek/platsmp.c b/arch/arm/mach-mediatek/platsmp.c
index b821e34474b6..726eb69bb655 100644
--- a/arch/arm/mach-mediatek/platsmp.c
+++ b/arch/arm/mach-mediatek/platsmp.c
@@ -122,7 +122,7 @@ static void __init __mtk_smp_prepare_cpus(unsigned int max_cpus, int trustzone)
 	 * write the address of slave startup address into the system-wide
 	 * jump register
 	 */
-	writel_relaxed(virt_to_phys(secondary_startup_arm),
+	writel_relaxed(__pa_symbol(secondary_startup_arm),
 			mtk_smp_base + mtk_smp_info->jump_reg);
 }
 
diff --git a/arch/arm/mach-mvebu/pm.c b/arch/arm/mach-mvebu/pm.c
index 2990c5269b18..c487be61d6d8 100644
--- a/arch/arm/mach-mvebu/pm.c
+++ b/arch/arm/mach-mvebu/pm.c
@@ -110,7 +110,7 @@ static void mvebu_pm_store_armadaxp_bootinfo(u32 *store_addr)
 {
 	phys_addr_t resume_pc;
 
-	resume_pc = virt_to_phys(armada_370_xp_cpu_resume);
+	resume_pc = __pa_symbol(armada_370_xp_cpu_resume);
 
 	/*
 	 * The bootloader expects the first two words to be a magic
diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c
index f39bd51bce18..27a78c80e5b1 100644
--- a/arch/arm/mach-mvebu/pmsu.c
+++ b/arch/arm/mach-mvebu/pmsu.c
@@ -112,7 +112,7 @@ static const struct of_device_id of_pmsu_table[] = {
 
 void mvebu_pmsu_set_cpu_boot_addr(int hw_cpu, void *boot_addr)
 {
-	writel(virt_to_phys(boot_addr), pmsu_mp_base +
+	writel(__pa_symbol(boot_addr), pmsu_mp_base +
 		PMSU_BOOT_ADDR_REDIRECT_OFFSET(hw_cpu));
 }
 
diff --git a/arch/arm/mach-mvebu/system-controller.c b/arch/arm/mach-mvebu/system-controller.c
index 76cbc82a7407..04d9ebe6a90a 100644
--- a/arch/arm/mach-mvebu/system-controller.c
+++ b/arch/arm/mach-mvebu/system-controller.c
@@ -153,7 +153,7 @@ void mvebu_system_controller_set_cpu_boot_addr(void *boot_addr)
 	if (of_machine_is_compatible("marvell,armada375"))
 		mvebu_armada375_smp_wa_init();
 
-	writel(virt_to_phys(boot_addr), system_controller_base +
+	writel(__pa_symbol(boot_addr), system_controller_base +
 	       mvebu_sc->resume_boot_addr);
 }
 #endif
diff --git a/arch/arm/mach-omap2/control.c b/arch/arm/mach-omap2/control.c
index 1662071bb2cc..bd8089ff929f 100644
--- a/arch/arm/mach-omap2/control.c
+++ b/arch/arm/mach-omap2/control.c
@@ -315,15 +315,15 @@ void omap3_save_scratchpad_contents(void)
 	scratchpad_contents.boot_config_ptr = 0x0;
 	if (cpu_is_omap3630())
 		scratchpad_contents.public_restore_ptr =
-			virt_to_phys(omap3_restore_3630);
+			__pa_symbol(omap3_restore_3630);
 	else if (omap_rev() != OMAP3430_REV_ES3_0 &&
 					omap_rev() != OMAP3430_REV_ES3_1 &&
 					omap_rev() != OMAP3430_REV_ES3_1_2)
 		scratchpad_contents.public_restore_ptr =
-			virt_to_phys(omap3_restore);
+			__pa_symbol(omap3_restore);
 	else
 		scratchpad_contents.public_restore_ptr =
-			virt_to_phys(omap3_restore_es3);
+			__pa_symbol(omap3_restore_es3);
 
 	if (omap_type() == OMAP2_DEVICE_TYPE_GP)
 		scratchpad_contents.secure_ram_restore_ptr = 0x0;
@@ -395,7 +395,7 @@ void omap3_save_scratchpad_contents(void)
 	sdrc_block_contents.flags = 0x0;
 	sdrc_block_contents.block_size = 0x0;
 
-	arm_context_addr = virt_to_phys(omap3_arm_context);
+	arm_context_addr = __pa_symbol(omap3_arm_context);
 
 	/* Copy all the contents to the scratchpad location */
 	scratchpad_address = OMAP2_L4_IO_ADDRESS(OMAP343X_SCRATCHPAD);
diff --git a/arch/arm/mach-omap2/omap-mpuss-lowpower.c b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
index 7d62ad48c7c9..113ab2dd2ee9 100644
--- a/arch/arm/mach-omap2/omap-mpuss-lowpower.c
+++ b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
@@ -273,7 +273,7 @@ int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state)
 	cpu_clear_prev_logic_pwrst(cpu);
 	pwrdm_set_next_pwrst(pm_info->pwrdm, power_state);
 	pwrdm_set_logic_retst(pm_info->pwrdm, cpu_logic_state);
-	set_cpu_wakeup_addr(cpu, virt_to_phys(omap_pm_ops.resume));
+	set_cpu_wakeup_addr(cpu, __pa_symbol(omap_pm_ops.resume));
 	omap_pm_ops.scu_prepare(cpu, power_state);
 	l2x0_pwrst_prepare(cpu, save_state);
 
@@ -325,7 +325,7 @@ int omap4_hotplug_cpu(unsigned int cpu, unsigned int power_state)
 
 	pwrdm_clear_all_prev_pwrst(pm_info->pwrdm);
 	pwrdm_set_next_pwrst(pm_info->pwrdm, power_state);
-	set_cpu_wakeup_addr(cpu, virt_to_phys(omap_pm_ops.hotplug_restart));
+	set_cpu_wakeup_addr(cpu, __pa_symbol(omap_pm_ops.hotplug_restart));
 	omap_pm_ops.scu_prepare(cpu, power_state);
 
 	/*
@@ -467,13 +467,13 @@ void __init omap4_mpuss_early_init(void)
 	sar_base = omap4_get_sar_ram_base();
 
 	if (cpu_is_omap443x())
-		startup_pa = virt_to_phys(omap4_secondary_startup);
+		startup_pa = __pa_symbol(omap4_secondary_startup);
 	else if (cpu_is_omap446x())
-		startup_pa = virt_to_phys(omap4460_secondary_startup);
+		startup_pa = __pa_symbol(omap4460_secondary_startup);
 	else if ((__boot_cpu_mode & MODE_MASK) == HYP_MODE)
-		startup_pa = virt_to_phys(omap5_secondary_hyp_startup);
+		startup_pa = __pa_symbol(omap5_secondary_hyp_startup);
 	else
-		startup_pa = virt_to_phys(omap5_secondary_startup);
+		startup_pa = __pa_symbol(omap5_secondary_startup);
 
 	if (cpu_is_omap44xx())
 		writel_relaxed(startup_pa, sar_base +
diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
index b4de3da6dffa..003353b0b794 100644
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -316,9 +316,9 @@ static void __init omap4_smp_prepare_cpus(unsigned int max_cpus)
 	 * A barrier is added to ensure that write buffer is drained
 	 */
 	if (omap_secure_apis_support())
-		omap_auxcoreboot_addr(virt_to_phys(cfg.startup_addr));
+		omap_auxcoreboot_addr(__pa_symbol(cfg.startup_addr));
 	else
-		writel_relaxed(virt_to_phys(cfg.startup_addr),
+		writel_relaxed(__pa_symbol(cfg.startup_addr),
 			       base + OMAP_AUX_CORE_BOOT_1);
 }
 
diff --git a/arch/arm/mach-prima2/platsmp.c b/arch/arm/mach-prima2/platsmp.c
index 0875b99add18..75ef5d4be554 100644
--- a/arch/arm/mach-prima2/platsmp.c
+++ b/arch/arm/mach-prima2/platsmp.c
@@ -65,7 +65,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle)
 	 * waiting for. This would wake up the secondary core from WFE
 	 */
 #define SIRFSOC_CPU1_JUMPADDR_OFFSET 0x2bc
-	__raw_writel(virt_to_phys(sirfsoc_secondary_startup),
+	__raw_writel(__pa_symbol(sirfsoc_secondary_startup),
 		clk_base + SIRFSOC_CPU1_JUMPADDR_OFFSET);
 
 #define SIRFSOC_CPU1_WAKEMAGIC_OFFSET 0x2b8
diff --git a/arch/arm/mach-prima2/pm.c b/arch/arm/mach-prima2/pm.c
index 83e94c95e314..b0bcf1ff02dd 100644
--- a/arch/arm/mach-prima2/pm.c
+++ b/arch/arm/mach-prima2/pm.c
@@ -54,7 +54,7 @@ static void sirfsoc_set_sleep_mode(u32 mode)
 
 static int sirfsoc_pre_suspend_power_off(void)
 {
-	u32 wakeup_entry = virt_to_phys(cpu_resume);
+	u32 wakeup_entry = __pa_symbol(cpu_resume);
 
 	sirfsoc_rtc_iobrg_writel(wakeup_entry, sirfsoc_pwrc_base +
 		SIRFSOC_PWRC_SCRATCH_PAD1);
diff --git a/arch/arm/mach-pxa/palmz72.c b/arch/arm/mach-pxa/palmz72.c
index 9c308de158c6..29630061e700 100644
--- a/arch/arm/mach-pxa/palmz72.c
+++ b/arch/arm/mach-pxa/palmz72.c
@@ -249,7 +249,7 @@ static int palmz72_pm_suspend(void)
 	store_ptr = *PALMZ72_SAVE_DWORD;
 
 	/* Setting PSPR to a proper value */
-	PSPR = virt_to_phys(&palmz72_resume_info);
+	PSPR = __pa_symbol(&palmz72_resume_info);
 
 	return 0;
 }
diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c
index c725baf119e1..ba431fad5c47 100644
--- a/arch/arm/mach-pxa/pxa25x.c
+++ b/arch/arm/mach-pxa/pxa25x.c
@@ -85,7 +85,7 @@ static void pxa25x_cpu_pm_enter(suspend_state_t state)
 static int pxa25x_cpu_pm_prepare(void)
 {
 	/* set resume return address */
-	PSPR = virt_to_phys(cpu_resume);
+	PSPR = __pa_symbol(cpu_resume);
 	return 0;
 }
 
diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c
index c0185c5c5a08..9b69be4e9fe3 100644
--- a/arch/arm/mach-pxa/pxa27x.c
+++ b/arch/arm/mach-pxa/pxa27x.c
@@ -168,7 +168,7 @@ static int pxa27x_cpu_pm_valid(suspend_state_t state)
 static int pxa27x_cpu_pm_prepare(void)
 {
 	/* set resume return address */
-	PSPR = virt_to_phys(cpu_resume);
+	PSPR = __pa_symbol(cpu_resume);
 	return 0;
 }
 
diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c
index 87acc96388c7..0cc9f124c9ac 100644
--- a/arch/arm/mach-pxa/pxa3xx.c
+++ b/arch/arm/mach-pxa/pxa3xx.c
@@ -123,7 +123,7 @@ static void pxa3xx_cpu_pm_suspend(void)
 	PSPR = 0x5c014000;
 
 	/* overwrite with the resume address */
-	*p = virt_to_phys(cpu_resume);
+	*p = __pa_symbol(cpu_resume);
 
 	cpu_suspend(0, pxa3xx_finish_suspend);
 
diff --git a/arch/arm/mach-realview/platsmp-dt.c b/arch/arm/mach-realview/platsmp-dt.c
index 70ca99eb52c6..c242423bf8db 100644
--- a/arch/arm/mach-realview/platsmp-dt.c
+++ b/arch/arm/mach-realview/platsmp-dt.c
@@ -76,7 +76,7 @@ static void __init realview_smp_prepare_cpus(unsigned int max_cpus)
 	}
 	/* Put the boot address in this magic register */
 	regmap_write(map, REALVIEW_SYS_FLAGSSET_OFFSET,
-		     virt_to_phys(versatile_secondary_startup));
+		     __pa_symbol(versatile_secondary_startup));
 }
 
 static const struct smp_operations realview_dt_smp_ops __initconst = {
diff --git a/arch/arm/mach-rockchip/platsmp.c b/arch/arm/mach-rockchip/platsmp.c
index 4d827a069d49..3abafdbdd7f4 100644
--- a/arch/arm/mach-rockchip/platsmp.c
+++ b/arch/arm/mach-rockchip/platsmp.c
@@ -156,7 +156,7 @@ static int rockchip_boot_secondary(unsigned int cpu, struct task_struct *idle)
 		 */
 		mdelay(1); /* ensure the cpus other than cpu0 to startup */
 
-		writel(virt_to_phys(secondary_startup), sram_base_addr + 8);
+		writel(__pa_symbol(secondary_startup), sram_base_addr + 8);
 		writel(0xDEADBEAF, sram_base_addr + 4);
 		dsb_sev();
 	}
@@ -195,7 +195,7 @@ static int __init rockchip_smp_prepare_sram(struct device_node *node)
 	}
 
 	/* set the boot function for the sram code */
-	rockchip_boot_fn = virt_to_phys(secondary_startup);
+	rockchip_boot_fn = __pa_symbol(secondary_startup);
 
 	/* copy the trampoline to sram, that runs during startup of the core */
 	memcpy(sram_base_addr, &rockchip_secondary_trampoline, trampoline_sz);
diff --git a/arch/arm/mach-rockchip/pm.c b/arch/arm/mach-rockchip/pm.c
index bee8c8051929..0592534e0b88 100644
--- a/arch/arm/mach-rockchip/pm.c
+++ b/arch/arm/mach-rockchip/pm.c
@@ -62,7 +62,7 @@ static inline u32 rk3288_l2_config(void)
 static void rk3288_config_bootdata(void)
 {
 	rkpm_bootdata_cpusp = rk3288_bootram_phy + (SZ_4K - 8);
-	rkpm_bootdata_cpu_code = virt_to_phys(cpu_resume);
+	rkpm_bootdata_cpu_code = __pa_symbol(cpu_resume);
 
 	rkpm_bootdata_l2ctlr_f  = 1;
 	rkpm_bootdata_l2ctlr = rk3288_l2_config();
diff --git a/arch/arm/mach-s3c24xx/mach-jive.c b/arch/arm/mach-s3c24xx/mach-jive.c
index 895aca225952..f5b5c49b56ac 100644
--- a/arch/arm/mach-s3c24xx/mach-jive.c
+++ b/arch/arm/mach-s3c24xx/mach-jive.c
@@ -484,7 +484,7 @@ static int jive_pm_suspend(void)
 	 * correct address to resume from. */
 
 	__raw_writel(0x2BED, S3C2412_INFORM0);
-	__raw_writel(virt_to_phys(s3c_cpu_resume), S3C2412_INFORM1);
+	__raw_writel(__pa_symbol(s3c_cpu_resume), S3C2412_INFORM1);
 
 	return 0;
 }
diff --git a/arch/arm/mach-s3c24xx/pm-s3c2410.c b/arch/arm/mach-s3c24xx/pm-s3c2410.c
index 20e481d8a33a..a4588daeddb0 100644
--- a/arch/arm/mach-s3c24xx/pm-s3c2410.c
+++ b/arch/arm/mach-s3c24xx/pm-s3c2410.c
@@ -45,7 +45,7 @@ static void s3c2410_pm_prepare(void)
 {
 	/* ensure at least GSTATUS3 has the resume address */
 
-	__raw_writel(virt_to_phys(s3c_cpu_resume), S3C2410_GSTATUS3);
+	__raw_writel(__pa_symbol(s3c_cpu_resume), S3C2410_GSTATUS3);
 
 	S3C_PMDBG("GSTATUS3 0x%08x\n", __raw_readl(S3C2410_GSTATUS3));
 	S3C_PMDBG("GSTATUS4 0x%08x\n", __raw_readl(S3C2410_GSTATUS4));
diff --git a/arch/arm/mach-s3c24xx/pm-s3c2416.c b/arch/arm/mach-s3c24xx/pm-s3c2416.c
index c0e328e37bd6..b5bbf0d5985c 100644
--- a/arch/arm/mach-s3c24xx/pm-s3c2416.c
+++ b/arch/arm/mach-s3c24xx/pm-s3c2416.c
@@ -48,7 +48,7 @@ static void s3c2416_pm_prepare(void)
 	 * correct address to resume from.
 	 */
 	__raw_writel(0x2BED, S3C2412_INFORM0);
-	__raw_writel(virt_to_phys(s3c_cpu_resume), S3C2412_INFORM1);
+	__raw_writel(__pa_symbol(s3c_cpu_resume), S3C2412_INFORM1);
 }
 
 static int s3c2416_pm_add(struct device *dev, struct subsys_interface *sif)
diff --git a/arch/arm/mach-s3c64xx/pm.c b/arch/arm/mach-s3c64xx/pm.c
index 59d91b83b03d..945a9d1e1a71 100644
--- a/arch/arm/mach-s3c64xx/pm.c
+++ b/arch/arm/mach-s3c64xx/pm.c
@@ -304,7 +304,7 @@ static void s3c64xx_pm_prepare(void)
 			      wake_irqs, ARRAY_SIZE(wake_irqs));
 
 	/* store address of resume. */
-	__raw_writel(virt_to_phys(s3c_cpu_resume), S3C64XX_INFORM0);
+	__raw_writel(__pa_symbol(s3c_cpu_resume), S3C64XX_INFORM0);
 
 	/* ensure previous wakeup state is cleared before sleeping */
 	__raw_writel(__raw_readl(S3C64XX_WAKEUP_STAT), S3C64XX_WAKEUP_STAT);
diff --git a/arch/arm/mach-s5pv210/pm.c b/arch/arm/mach-s5pv210/pm.c
index 21b4b13c5ab7..2d5f08015e34 100644
--- a/arch/arm/mach-s5pv210/pm.c
+++ b/arch/arm/mach-s5pv210/pm.c
@@ -69,7 +69,7 @@ static void s5pv210_pm_prepare(void)
 	__raw_writel(s5pv210_irqwake_intmask, S5P_WAKEUP_MASK);
 
 	/* ensure at least INFORM0 has the resume address */
-	__raw_writel(virt_to_phys(s5pv210_cpu_resume), S5P_INFORM0);
+	__raw_writel(__pa_symbol(s5pv210_cpu_resume), S5P_INFORM0);
 
 	tmp = __raw_readl(S5P_SLEEP_CFG);
 	tmp &= ~(S5P_SLEEP_CFG_OSC_EN | S5P_SLEEP_CFG_USBOSC_EN);
diff --git a/arch/arm/mach-sa1100/pm.c b/arch/arm/mach-sa1100/pm.c
index 34853d5dfda2..9a7079f565bd 100644
--- a/arch/arm/mach-sa1100/pm.c
+++ b/arch/arm/mach-sa1100/pm.c
@@ -73,7 +73,7 @@ static int sa11x0_pm_enter(suspend_state_t state)
 	RCSR = RCSR_HWR | RCSR_SWR | RCSR_WDR | RCSR_SMR;
 
 	/* set resume return address */
-	PSPR = virt_to_phys(cpu_resume);
+	PSPR = __pa_symbol(cpu_resume);
 
 	/* go zzz */
 	cpu_suspend(0, sa1100_finish_suspend);
diff --git a/arch/arm/mach-shmobile/platsmp-apmu.c b/arch/arm/mach-shmobile/platsmp-apmu.c
index 0c6bb458b7a4..71729b8d1900 100644
--- a/arch/arm/mach-shmobile/platsmp-apmu.c
+++ b/arch/arm/mach-shmobile/platsmp-apmu.c
@@ -171,7 +171,7 @@ static void apmu_parse_dt(void (*fn)(struct resource *res, int cpu, int bit))
 static void __init shmobile_smp_apmu_setup_boot(void)
 {
 	/* install boot code shared by all CPUs */
-	shmobile_boot_fn = virt_to_phys(shmobile_smp_boot);
+	shmobile_boot_fn = __pa_symbol(shmobile_smp_boot);
 }
 
 void __init shmobile_smp_apmu_prepare_cpus(unsigned int max_cpus,
@@ -185,7 +185,7 @@ void __init shmobile_smp_apmu_prepare_cpus(unsigned int max_cpus,
 int shmobile_smp_apmu_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
 	/* For this particular CPU register boot vector */
-	shmobile_smp_hook(cpu, virt_to_phys(secondary_startup), 0);
+	shmobile_smp_hook(cpu, __pa_symbol(secondary_startup), 0);
 
 	return apmu_wrap(cpu, apmu_power_on);
 }
@@ -301,7 +301,7 @@ int shmobile_smp_apmu_cpu_kill(unsigned int cpu)
 #if defined(CONFIG_SUSPEND)
 static int shmobile_smp_apmu_do_suspend(unsigned long cpu)
 {
-	shmobile_smp_hook(cpu, virt_to_phys(cpu_resume), 0);
+	shmobile_smp_hook(cpu, __pa_symbol(cpu_resume), 0);
 	shmobile_smp_apmu_cpu_shutdown(cpu);
 	cpu_do_idle(); /* WFI selects Core Standby */
 	return 1;
diff --git a/arch/arm/mach-shmobile/platsmp-scu.c b/arch/arm/mach-shmobile/platsmp-scu.c
index d1ecaf37d142..f1a1efde4beb 100644
--- a/arch/arm/mach-shmobile/platsmp-scu.c
+++ b/arch/arm/mach-shmobile/platsmp-scu.c
@@ -24,7 +24,7 @@ static void __iomem *shmobile_scu_base;
 static int shmobile_scu_cpu_prepare(unsigned int cpu)
 {
 	/* For this particular CPU register SCU SMP boot vector */
-	shmobile_smp_hook(cpu, virt_to_phys(shmobile_boot_scu),
+	shmobile_smp_hook(cpu, __pa_symbol(shmobile_boot_scu),
 			  shmobile_scu_base_phys);
 	return 0;
 }
@@ -33,7 +33,7 @@ void __init shmobile_smp_scu_prepare_cpus(phys_addr_t scu_base_phys,
 					  unsigned int max_cpus)
 {
 	/* install boot code shared by all CPUs */
-	shmobile_boot_fn = virt_to_phys(shmobile_smp_boot);
+	shmobile_boot_fn = __pa_symbol(shmobile_smp_boot);
 
 	/* enable SCU and cache coherency on booting CPU */
 	shmobile_scu_base_phys = scu_base_phys;
diff --git a/arch/arm/mach-socfpga/platsmp.c b/arch/arm/mach-socfpga/platsmp.c
index 07945748b571..0ee76772b507 100644
--- a/arch/arm/mach-socfpga/platsmp.c
+++ b/arch/arm/mach-socfpga/platsmp.c
@@ -40,7 +40,7 @@ static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle)
 
 		memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size);
 
-		writel(virt_to_phys(secondary_startup),
+		writel(__pa_symbol(secondary_startup),
 		       sys_manager_base_addr + (socfpga_cpu1start_addr & 0x000000ff));
 
 		flush_cache_all();
@@ -63,7 +63,7 @@ static int socfpga_a10_boot_secondary(unsigned int cpu, struct task_struct *idle
 		       SOCFPGA_A10_RSTMGR_MODMPURST);
 		memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size);
 
-		writel(virt_to_phys(secondary_startup),
+		writel(__pa_symbol(secondary_startup),
 		       sys_manager_base_addr + (socfpga_cpu1start_addr & 0x00000fff));
 
 		flush_cache_all();
diff --git a/arch/arm/mach-spear/platsmp.c b/arch/arm/mach-spear/platsmp.c
index 8d1e2d551786..39038a03836a 100644
--- a/arch/arm/mach-spear/platsmp.c
+++ b/arch/arm/mach-spear/platsmp.c
@@ -117,7 +117,7 @@ static void __init spear13xx_smp_prepare_cpus(unsigned int max_cpus)
 	 * (presently it is in SRAM). The BootMonitor waits until it receives a
 	 * soft interrupt, and then the secondary CPU branches to this address.
 	 */
-	__raw_writel(virt_to_phys(spear13xx_secondary_startup), SYS_LOCATION);
+	__raw_writel(__pa_symbol(spear13xx_secondary_startup), SYS_LOCATION);
 }
 
 const struct smp_operations spear13xx_smp_ops __initconst = {
diff --git a/arch/arm/mach-sti/platsmp.c b/arch/arm/mach-sti/platsmp.c
index ea5a2277ee46..231f19e17436 100644
--- a/arch/arm/mach-sti/platsmp.c
+++ b/arch/arm/mach-sti/platsmp.c
@@ -103,7 +103,7 @@ static void __init sti_smp_prepare_cpus(unsigned int max_cpus)
 	u32 __iomem *cpu_strt_ptr;
 	u32 release_phys;
 	int cpu;
-	unsigned long entry_pa = virt_to_phys(sti_secondary_startup);
+	unsigned long entry_pa = __pa_symbol(sti_secondary_startup);
 
 	np = of_find_compatible_node(NULL, NULL, "arm,cortex-a9-scu");
 
diff --git a/arch/arm/mach-sunxi/platsmp.c b/arch/arm/mach-sunxi/platsmp.c
index 6642267812c9..8fb5088464db 100644
--- a/arch/arm/mach-sunxi/platsmp.c
+++ b/arch/arm/mach-sunxi/platsmp.c
@@ -80,7 +80,7 @@ static int sun6i_smp_boot_secondary(unsigned int cpu,
 	spin_lock(&cpu_lock);
 
 	/* Set CPU boot address */
-	writel(virt_to_phys(secondary_startup),
+	writel(__pa_symbol(secondary_startup),
 	       cpucfg_membase + CPUCFG_PRIVATE0_REG);
 
 	/* Assert the CPU core in reset */
@@ -162,7 +162,7 @@ static int sun8i_smp_boot_secondary(unsigned int cpu,
 	spin_lock(&cpu_lock);
 
 	/* Set CPU boot address */
-	writel(virt_to_phys(secondary_startup),
+	writel(__pa_symbol(secondary_startup),
 	       cpucfg_membase + CPUCFG_PRIVATE0_REG);
 
 	/* Assert the CPU core in reset */
diff --git a/arch/arm/mach-tango/platsmp.c b/arch/arm/mach-tango/platsmp.c
index 98c62a4a8623..2f0c6c050fed 100644
--- a/arch/arm/mach-tango/platsmp.c
+++ b/arch/arm/mach-tango/platsmp.c
@@ -5,7 +5,7 @@
 
 static int tango_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
-	tango_set_aux_boot_addr(virt_to_phys(secondary_startup));
+	tango_set_aux_boot_addr(__pa_symbol(secondary_startup));
 	tango_start_aux_core(cpu);
 	return 0;
 }
diff --git a/arch/arm/mach-tango/pm.c b/arch/arm/mach-tango/pm.c
index b05c6d6f99d0..406c0814eb6e 100644
--- a/arch/arm/mach-tango/pm.c
+++ b/arch/arm/mach-tango/pm.c
@@ -5,7 +5,7 @@
 
 static int tango_pm_powerdown(unsigned long arg)
 {
-	tango_suspend(virt_to_phys(cpu_resume));
+	tango_suspend(__pa_symbol(cpu_resume));
 
 	return -EIO; /* tango_suspend has failed */
 }
diff --git a/arch/arm/mach-tegra/reset.c b/arch/arm/mach-tegra/reset.c
index 6fd9db54887e..dc558892753c 100644
--- a/arch/arm/mach-tegra/reset.c
+++ b/arch/arm/mach-tegra/reset.c
@@ -94,14 +94,14 @@ void __init tegra_cpu_reset_handler_init(void)
 	__tegra_cpu_reset_handler_data[TEGRA_RESET_MASK_PRESENT] =
 		*((u32 *)cpu_possible_mask);
 	__tegra_cpu_reset_handler_data[TEGRA_RESET_STARTUP_SECONDARY] =
-		virt_to_phys((void *)secondary_startup);
+		__pa_symbol((void *)secondary_startup);
 #endif
 
 #ifdef CONFIG_PM_SLEEP
 	__tegra_cpu_reset_handler_data[TEGRA_RESET_STARTUP_LP1] =
 		TEGRA_IRAM_LPx_RESUME_AREA;
 	__tegra_cpu_reset_handler_data[TEGRA_RESET_STARTUP_LP2] =
-		virt_to_phys((void *)tegra_resume);
+		__pa_symbol((void *)tegra_resume);
 #endif
 
 	tegra_cpu_reset_handler_enable();
diff --git a/arch/arm/mach-ux500/platsmp.c b/arch/arm/mach-ux500/platsmp.c
index 8f2f615ff958..8c8f26389067 100644
--- a/arch/arm/mach-ux500/platsmp.c
+++ b/arch/arm/mach-ux500/platsmp.c
@@ -54,7 +54,7 @@ static void wakeup_secondary(void)
 	 * backup ram register at offset 0x1FF0, which is what boot rom code
 	 * is waiting for. This will wake up the secondary core from WFE.
 	 */
-	writel(virt_to_phys(secondary_startup),
+	writel(__pa_symbol(secondary_startup),
 	       backupram + UX500_CPU1_JUMPADDR_OFFSET);
 	writel(0xA1FEED01,
 	       backupram + UX500_CPU1_WAKEMAGIC_OFFSET);
diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c
index 5cedcf572104..ee2a0faafaa1 100644
--- a/arch/arm/mach-vexpress/dcscb.c
+++ b/arch/arm/mach-vexpress/dcscb.c
@@ -166,7 +166,7 @@ static int __init dcscb_init(void)
 	 * Future entries into the kernel can now go
 	 * through the cluster entry vectors.
 	 */
-	vexpress_flags_set(virt_to_phys(mcpm_entry_point));
+	vexpress_flags_set(__pa_symbol(mcpm_entry_point));
 
 	return 0;
 }
diff --git a/arch/arm/mach-vexpress/platsmp.c b/arch/arm/mach-vexpress/platsmp.c
index 98e29dee91e8..742499bac6d0 100644
--- a/arch/arm/mach-vexpress/platsmp.c
+++ b/arch/arm/mach-vexpress/platsmp.c
@@ -79,7 +79,7 @@ static void __init vexpress_smp_dt_prepare_cpus(unsigned int max_cpus)
 	 * until it receives a soft interrupt, and then the
 	 * secondary CPU branches to this address.
 	 */
-	vexpress_flags_set(virt_to_phys(versatile_secondary_startup));
+	vexpress_flags_set(__pa_symbol(versatile_secondary_startup));
 }
 
 const struct smp_operations vexpress_smp_dt_ops __initconst = {
diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c
index 1aa4ccece69f..9b5f3c427086 100644
--- a/arch/arm/mach-vexpress/tc2_pm.c
+++ b/arch/arm/mach-vexpress/tc2_pm.c
@@ -54,7 +54,7 @@ static int tc2_pm_cpu_powerup(unsigned int cpu, unsigned int cluster)
 	if (cluster >= TC2_CLUSTERS || cpu >= tc2_nr_cpus[cluster])
 		return -EINVAL;
 	ve_spc_set_resume_addr(cluster, cpu,
-			       virt_to_phys(mcpm_entry_point));
+			       __pa_symbol(mcpm_entry_point));
 	ve_spc_cpu_wakeup_irq(cluster, cpu, true);
 	return 0;
 }
@@ -159,7 +159,7 @@ static int tc2_pm_wait_for_powerdown(unsigned int cpu, unsigned int cluster)
 
 static void tc2_pm_cpu_suspend_prepare(unsigned int cpu, unsigned int cluster)
 {
-	ve_spc_set_resume_addr(cluster, cpu, virt_to_phys(mcpm_entry_point));
+	ve_spc_set_resume_addr(cluster, cpu, __pa_symbol(mcpm_entry_point));
 }
 
 static void tc2_pm_cpu_is_up(unsigned int cpu, unsigned int cluster)
diff --git a/arch/arm/mach-zx/platsmp.c b/arch/arm/mach-zx/platsmp.c
index 0297f92084e0..afb9a82dedc3 100644
--- a/arch/arm/mach-zx/platsmp.c
+++ b/arch/arm/mach-zx/platsmp.c
@@ -76,7 +76,7 @@ void __init zx_smp_prepare_cpus(unsigned int max_cpus)
 	 * until it receives a soft interrupt, and then the
 	 * secondary CPU branches to this address.
 	 */
-	__raw_writel(virt_to_phys(zx_secondary_startup),
+	__raw_writel(__pa_symbol(zx_secondary_startup),
 		     aonsysctrl_base + AON_SYS_CTRL_RESERVED1);
 
 	iounmap(aonsysctrl_base);
@@ -94,7 +94,7 @@ void __init zx_smp_prepare_cpus(unsigned int max_cpus)
 
 	/* Map the first 4 KB IRAM for suspend usage */
 	sys_iram = __arm_ioremap_exec(ZX_IRAM_BASE, PAGE_SIZE, false);
-	zx_secondary_startup_pa = virt_to_phys(zx_secondary_startup);
+	zx_secondary_startup_pa = __pa_symbol(zx_secondary_startup);
 	fncpy(sys_iram, &zx_resume_jump, zx_suspend_iram_sz);
 }
 
diff --git a/arch/arm/mach-zynq/platsmp.c b/arch/arm/mach-zynq/platsmp.c
index 7cd9865bdeb7..caa6d5fe9078 100644
--- a/arch/arm/mach-zynq/platsmp.c
+++ b/arch/arm/mach-zynq/platsmp.c
@@ -89,7 +89,7 @@ EXPORT_SYMBOL(zynq_cpun_start);
 
 static int zynq_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
-	return zynq_cpun_start(virt_to_phys(secondary_startup), cpu);
+	return zynq_cpun_start(__pa_symbol(secondary_startup), cpu);
 }
 
 /*
-- 
GitLab


From 035e787543de709f29b38752251d4724200ec353 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 19 Jan 2017 01:26:28 +0100
Subject: [PATCH 0722/1084] ARM: 8644/1: Reduce "CPU: shutdown" message to
 debug level

Similar to c68b0274fb3c ("ARM: reduce "Booted secondary processor"
message to debug level"), demote the "CPU: shutdown" pr_notice() into a
pr_debug().

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 7dd14e8395e6..46377c40d056 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -251,7 +251,7 @@ void __cpu_die(unsigned int cpu)
 		pr_err("CPU%u: cpu didn't die\n", cpu);
 		return;
 	}
-	pr_notice("CPU%u: shutdown\n", cpu);
+	pr_debug("CPU%u: shutdown\n", cpu);
 
 	/*
 	 * platform_cpu_kill() is generally expected to do the powering off
-- 
GitLab


From d2ca5f2491c1246adf3847101fdc538a3b89439c Mon Sep 17 00:00:00 2001
From: Afzal Mohammed <afzal.mohd.ma@gmail.com>
Date: Sun, 29 Jan 2017 17:31:32 +0100
Subject: [PATCH 0723/1084] ARM: 8646/1: mmu: decouple VECTORS_BASE from
 Kconfig

For MMU configurations, VECTORS_BASE is always 0xffff0000, a macro
definition will suffice.

For no-MMU, exception base address is dynamically determined in
subsequent patches. To preserve bisectability, now make the
macro applicable for no-MMU scenario too.

Thanks to 0-DAY kernel test infrastructure that found the
bisectability issue. This macro will be restricted to MMU case upon
dynamically determining exception base address for no-MMU.

Once exception address is handled dynamically for no-MMU,
VECTORS_BASE can be removed from Kconfig.

Signed-off-by: afzal mohammed <afzal.mohd.ma@gmail.com>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/memory.h  | 2 ++
 arch/arm/mach-berlin/platsmp.c | 3 ++-
 arch/arm/mm/dump.c             | 5 +++--
 arch/arm/mm/init.c             | 4 ++--
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index c30d0d82a105..00bd3529854a 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -111,6 +111,8 @@
 
 #endif /* !CONFIG_MMU */
 
+#define VECTORS_BASE		UL(0xffff0000)
+
 #ifdef CONFIG_XIP_KERNEL
 #define KERNEL_START		_sdata
 #else
diff --git a/arch/arm/mach-berlin/platsmp.c b/arch/arm/mach-berlin/platsmp.c
index 1167b0ed92c8..7586b7aec272 100644
--- a/arch/arm/mach-berlin/platsmp.c
+++ b/arch/arm/mach-berlin/platsmp.c
@@ -15,6 +15,7 @@
 
 #include <asm/cacheflush.h>
 #include <asm/cp15.h>
+#include <asm/memory.h>
 #include <asm/smp_plat.h>
 #include <asm/smp_scu.h>
 
@@ -75,7 +76,7 @@ static void __init berlin_smp_prepare_cpus(unsigned int max_cpus)
 	if (!cpu_ctrl)
 		goto unmap_scu;
 
-	vectors_base = ioremap(CONFIG_VECTORS_BASE, SZ_32K);
+	vectors_base = ioremap(VECTORS_BASE, SZ_32K);
 	if (!vectors_base)
 		goto unmap_scu;
 
diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c
index 9fe8e241335c..21192d6eda40 100644
--- a/arch/arm/mm/dump.c
+++ b/arch/arm/mm/dump.c
@@ -18,6 +18,7 @@
 #include <linux/seq_file.h>
 
 #include <asm/fixmap.h>
+#include <asm/memory.h>
 #include <asm/pgtable.h>
 
 struct addr_marker {
@@ -31,8 +32,8 @@ static struct addr_marker address_markers[] = {
 	{ 0,			"vmalloc() Area" },
 	{ VMALLOC_END,		"vmalloc() End" },
 	{ FIXADDR_START,	"Fixmap Area" },
-	{ CONFIG_VECTORS_BASE,	"Vectors" },
-	{ CONFIG_VECTORS_BASE + PAGE_SIZE * 2, "Vectors End" },
+	{ VECTORS_BASE,	"Vectors" },
+	{ VECTORS_BASE + PAGE_SIZE * 2, "Vectors End" },
 	{ -1,			NULL },
 };
 
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 4127f578086c..50e5402a8ef3 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -27,6 +27,7 @@
 #include <asm/cp15.h>
 #include <asm/mach-types.h>
 #include <asm/memblock.h>
+#include <asm/memory.h>
 #include <asm/prom.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
@@ -518,8 +519,7 @@ void __init mem_init(void)
 			"      .data : 0x%p" " - 0x%p" "   (%4td kB)\n"
 			"       .bss : 0x%p" " - 0x%p" "   (%4td kB)\n",
 
-			MLK(UL(CONFIG_VECTORS_BASE), UL(CONFIG_VECTORS_BASE) +
-				(PAGE_SIZE)),
+			MLK(VECTORS_BASE, VECTORS_BASE + PAGE_SIZE),
 #ifdef CONFIG_HAVE_TCM
 			MLK(DTCM_OFFSET, (unsigned long) dtcm_end),
 			MLK(ITCM_OFFSET, (unsigned long) itcm_end),
-- 
GitLab


From f8300a0b5de08c09db105db3c34a2a1c618e147e Mon Sep 17 00:00:00 2001
From: Afzal Mohammed <afzal.mohd.ma@gmail.com>
Date: Wed, 1 Feb 2017 13:46:36 +0100
Subject: [PATCH 0724/1084] ARM: 8647/2: nommu: dynamic exception base address
 setting

No-MMU dynamic exception base address configuration on CP15
processors. In the case of low vectors, decision based on whether
security extensions are enabled & whether remap vectors to RAM
CONFIG option is selected.

For no-MMU without CP15, current default value of 0x0 is retained.

Signed-off-by: afzal mohammed <afzal.mohd.ma@gmail.com>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/nommu.c | 52 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 50 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 13a25d6282f8..3b5c7aaf9c76 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 
 #include <asm/cacheflush.h>
+#include <asm/cp15.h>
 #include <asm/sections.h>
 #include <asm/page.h>
 #include <asm/setup.h>
@@ -22,6 +23,8 @@
 
 #include "mm.h"
 
+unsigned long vectors_base;
+
 #ifdef CONFIG_ARM_MPU
 struct mpu_rgn_info mpu_rgn_info;
 
@@ -278,15 +281,60 @@ static void adjust_lowmem_bounds_mpu(void) {}
 static void __init mpu_setup(void) {}
 #endif /* CONFIG_ARM_MPU */
 
+#ifdef CONFIG_CPU_CP15
+#ifdef CONFIG_CPU_HIGH_VECTOR
+static unsigned long __init setup_vectors_base(void)
+{
+	unsigned long reg = get_cr();
+
+	set_cr(reg | CR_V);
+	return 0xffff0000;
+}
+#else /* CONFIG_CPU_HIGH_VECTOR */
+/* Write exception base address to VBAR */
+static inline void set_vbar(unsigned long val)
+{
+	asm("mcr p15, 0, %0, c12, c0, 0" : : "r" (val) : "cc");
+}
+
+/*
+ * Security extensions, bits[7:4], permitted values,
+ * 0b0000 - not implemented, 0b0001/0b0010 - implemented
+ */
+static inline bool security_extensions_enabled(void)
+{
+	return !!cpuid_feature_extract(CPUID_EXT_PFR1, 4);
+}
+
+static unsigned long __init setup_vectors_base(void)
+{
+	unsigned long base = 0, reg = get_cr();
+
+	set_cr(reg & ~CR_V);
+	if (security_extensions_enabled()) {
+		if (IS_ENABLED(CONFIG_REMAP_VECTORS_TO_RAM))
+			base = CONFIG_DRAM_BASE;
+		set_vbar(base);
+	} else if (IS_ENABLED(CONFIG_REMAP_VECTORS_TO_RAM)) {
+		if (CONFIG_DRAM_BASE != 0)
+			pr_err("Security extensions not enabled, vectors cannot be remapped to RAM, vectors base will be 0x00000000\n");
+	}
+
+	return base;
+}
+#endif /* CONFIG_CPU_HIGH_VECTOR */
+#endif /* CONFIG_CPU_CP15 */
+
 void __init arm_mm_memblock_reserve(void)
 {
 #ifndef CONFIG_CPU_V7M
+	vectors_base = IS_ENABLED(CONFIG_CPU_CP15) ? setup_vectors_base() : 0;
 	/*
 	 * Register the exception vector page.
 	 * some architectures which the DRAM is the exception vector to trap,
 	 * alloc_page breaks with error, although it is not NULL, but "0."
 	 */
-	memblock_reserve(CONFIG_VECTORS_BASE, 2 * PAGE_SIZE);
+	memblock_reserve(vectors_base, 2 * PAGE_SIZE);
 #else /* ifndef CONFIG_CPU_V7M */
 	/*
 	 * There is no dedicated vector page on V7-M. So nothing needs to be
@@ -310,7 +358,7 @@ void __init adjust_lowmem_bounds(void)
  */
 void __init paging_init(const struct machine_desc *mdesc)
 {
-	early_trap_init((void *)CONFIG_VECTORS_BASE);
+	early_trap_init((void *)vectors_base);
 	mpu_setup();
 	bootmem_init();
 }
-- 
GitLab


From 58c16709f9cad7e6daabeaa5c94ac4dcb260aedd Mon Sep 17 00:00:00 2001
From: Afzal Mohammed <afzal.mohd.ma@gmail.com>
Date: Wed, 1 Feb 2017 13:39:18 +0100
Subject: [PATCH 0725/1084] ARM: 8648/2: nommu: display vectors base

VECTORS_BASE displays the exception base address. Now on no-MMU as
the exception base address is dynamically estimated, define
VECTORS_BASE to the variable holding it.

As it is the case, limit VECTORS_BASE constant definition to MMU.

Suggested-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: afzal mohammed <afzal.mohd.ma@gmail.com>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/memory.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 00bd3529854a..1f54e4e98c1e 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -83,8 +83,15 @@
 #define IOREMAP_MAX_ORDER	24
 #endif
 
+#define VECTORS_BASE		UL(0xffff0000)
+
 #else /* CONFIG_MMU */
 
+#ifndef __ASSEMBLY__
+extern unsigned long vectors_base;
+#define VECTORS_BASE		vectors_base
+#endif
+
 /*
  * The limitation of user task size can grow up to the end of free ram region.
  * It is difficult to define and perhaps will never meet the original meaning
@@ -111,8 +118,6 @@
 
 #endif /* !CONFIG_MMU */
 
-#define VECTORS_BASE		UL(0xffff0000)
-
 #ifdef CONFIG_XIP_KERNEL
 #define KERNEL_START		_sdata
 #else
-- 
GitLab


From ad475117d2015781789364d599b85c67254680a1 Mon Sep 17 00:00:00 2001
From: Afzal Mohammed <afzal.mohd.ma@gmail.com>
Date: Wed, 1 Feb 2017 13:47:34 +0100
Subject: [PATCH 0726/1084] ARM: 8649/2: nommu: remove Hivecs configuration is
 asm

Now that exception based address is handled dynamically for
processors with CP15, remove Hivecs configuration in assembly.

Signed-off-by: afzal mohammed <afzal.mohd.ma@gmail.com>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/head-nommu.S | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 6b4eb27b8758..2e21e08de747 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -151,11 +151,6 @@ __after_proc_init:
 #endif
 #ifdef CONFIG_CPU_ICACHE_DISABLE
 	bic	r0, r0, #CR_I
-#endif
-#ifdef CONFIG_CPU_HIGH_VECTOR
-	orr	r0, r0, #CR_V
-#else
-	bic	r0, r0, #CR_V
 #endif
 	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
 #elif defined (CONFIG_CPU_V7M)
-- 
GitLab


From 050d18d1c65113b4558d86d53465ebe1d04910fb Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 30 Jan 2017 18:29:28 +0100
Subject: [PATCH 0727/1084] ARM: 8650/1: module: handle negative R_ARM_PREL31
 addends correctly

According to the spec 'ELF for the ARM Architecture' (IHI 0044E),
addends for R_ARM_PREL31 relocations are 31-bit signed quantities,
so we need to sign extend the value to 32 bits before it can be used
as an offset in the calculation of the relocated value.

We have not been bitten by this because these relocations are usually
emitted against the start of a section, which means the addends never
assume negative values in practice. But it is a bug nonetheless, so fix
it.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/module.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index 4f14b5ce6535..80254b47dc34 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -155,8 +155,17 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 		       break;
 
 		case R_ARM_PREL31:
-			offset = *(u32 *)loc + sym->st_value - loc;
-			*(u32 *)loc = offset & 0x7fffffff;
+			offset = (*(s32 *)loc << 1) >> 1; /* sign extend */
+			offset += sym->st_value - loc;
+			if (offset >= 0x40000000 || offset < -0x40000000) {
+				pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
+				       module->name, relindex, i, symname,
+				       ELF32_R_TYPE(rel->r_info), loc,
+				       sym->st_value);
+				return -ENOEXEC;
+			}
+			*(u32 *)loc &= 0x80000000;
+			*(u32 *)loc |= offset & 0x7fffffff;
 			break;
 
 		case R_ARM_MOVW_ABS_NC:
-- 
GitLab


From 1c63d4c5e16394698072d671029e753715af100a Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 1 Feb 2017 14:29:22 +0100
Subject: [PATCH 0728/1084] ARM: 8651/1: cache-uniphier: include
 <linux/errno.h> instead of <linux/types.h>

Nothing in this header file depends on <linux/types.h>.
Rather, <linux/errno.h> should be included for -ENODEV.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/hardware/cache-uniphier.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/hardware/cache-uniphier.h b/arch/arm/include/asm/hardware/cache-uniphier.h
index eaa60da7dac3..0ef42ae75b6c 100644
--- a/arch/arm/include/asm/hardware/cache-uniphier.h
+++ b/arch/arm/include/asm/hardware/cache-uniphier.h
@@ -16,7 +16,7 @@
 #ifndef __CACHE_UNIPHIER_H
 #define __CACHE_UNIPHIER_H
 
-#include <linux/types.h>
+#include <linux/errno.h>
 
 #ifdef CONFIG_CACHE_UNIPHIER
 int uniphier_cache_init(void);
-- 
GitLab


From 06369a1e58bd5eb89c02f9a2cf1a35152cbf1154 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 1 Feb 2017 14:30:09 +0100
Subject: [PATCH 0729/1084] ARM: 8652/1: cache-uniphier: clean up active way
 setup code

Now, the active way setup function is called with a fixed value zero
for the second argument.  The code can be simpler.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/cache-uniphier.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/arch/arm/mm/cache-uniphier.c b/arch/arm/mm/cache-uniphier.c
index dfe97b409916..f57b080b6fd4 100644
--- a/arch/arm/mm/cache-uniphier.c
+++ b/arch/arm/mm/cache-uniphier.c
@@ -15,6 +15,7 @@
 
 #define pr_fmt(fmt)		"uniphier: " fmt
 
+#include <linux/bitops.h>
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/log2.h>
@@ -71,8 +72,7 @@
  * @ctrl_base: virtual base address of control registers
  * @rev_base: virtual base address of revision registers
  * @op_base: virtual base address of operation registers
- * @way_present_mask: each bit specifies if the way is present
- * @way_locked_mask: each bit specifies if the way is locked
+ * @way_mask: each bit specifies if the way is present
  * @nsets: number of associativity sets
  * @line_size: line size in bytes
  * @range_op_max_size: max size that can be handled by a single range operation
@@ -83,8 +83,7 @@ struct uniphier_cache_data {
 	void __iomem *rev_base;
 	void __iomem *op_base;
 	void __iomem *way_ctrl_base;
-	u32 way_present_mask;
-	u32 way_locked_mask;
+	u32 way_mask;
 	u32 nsets;
 	u32 line_size;
 	u32 range_op_max_size;
@@ -234,17 +233,13 @@ static void __uniphier_cache_enable(struct uniphier_cache_data *data, bool on)
 	writel_relaxed(val, data->ctrl_base + UNIPHIER_SSCC);
 }
 
-static void __init __uniphier_cache_set_locked_ways(
-					struct uniphier_cache_data *data,
-					u32 way_mask)
+static void __init __uniphier_cache_set_active_ways(
+					struct uniphier_cache_data *data)
 {
 	unsigned int cpu;
 
-	data->way_locked_mask = way_mask & data->way_present_mask;
-
 	for_each_possible_cpu(cpu)
-		writel_relaxed(~data->way_locked_mask & data->way_present_mask,
-			       data->way_ctrl_base + 4 * cpu);
+		writel_relaxed(data->way_mask, data->way_ctrl_base + 4 * cpu);
 }
 
 static void uniphier_cache_maint_range(unsigned long start, unsigned long end,
@@ -307,7 +302,7 @@ static void __init uniphier_cache_enable(void)
 
 	list_for_each_entry(data, &uniphier_cache_list, list) {
 		__uniphier_cache_enable(data, true);
-		__uniphier_cache_set_locked_ways(data, 0);
+		__uniphier_cache_set_active_ways(data);
 	}
 }
 
@@ -382,8 +377,8 @@ static int __init __uniphier_cache_init(struct device_node *np,
 		goto err;
 	}
 
-	data->way_present_mask =
-		((u32)1 << cache_size / data->nsets / data->line_size) - 1;
+	data->way_mask = GENMASK(cache_size / data->nsets / data->line_size - 1,
+				 0);
 
 	data->ctrl_base = of_iomap(np, 0);
 	if (!data->ctrl_base) {
-- 
GitLab


From 7b96ddd02ec33d705df02260e2d5dcfb1d498f4b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 2 Feb 2017 13:29:36 +0100
Subject: [PATCH 0730/1084] ARM: 8654/1: decompressor: add strlen prototype

The decompress.c file contains a declaration for strstr() so we can
include some compression library code.

With the updated LZ4 implementation, we run into the same problem again
for strlen():

In file included from ../include/linux/rcupdate.h:40:0,
                 from ../include/linux/srcu.h:33,
                 from ../include/linux/notifier.h:15,
                 from ../include/linux/memory_hotplug.h:6,
                 from ../include/linux/mmzone.h:749,
                 from ../include/linux/gfp.h:5,
                 from ../include/linux/kmod.h:22,
                 from ../include/linux/module.h:13,
                 from ../arch/arm/boot/compressed/../../../../lib/lz4/lz4_decompress.c:39,
                 from ../arch/arm/boot/compressed/../../../../lib/decompress_unlz4.c:13,
                 from ../arch/arm/boot/compressed/decompress.c:55:
include/linux/cpumask.h: In function 'cpumask_parse':
include/linux/cpumask.h:592:53: error: implicit declaration of function 'strlen';did you mean 'strstr'? [-Werror=implicit-function-declaration]

This adds another declaration to work around the new problem.

Fixes: ce83d9ab80d6 ("lib: update LZ4 compressor module")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/boot/compressed/decompress.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c
index a0765e7ed6c7..ea7832702a8f 100644
--- a/arch/arm/boot/compressed/decompress.c
+++ b/arch/arm/boot/compressed/decompress.c
@@ -32,6 +32,7 @@ extern void error(char *);
 
 /* Not needed, but used in some headers pulled in by decompressors */
 extern char * strstr(const char * s1, const char *s2);
+extern size_t strlen(const char *s);
 
 #ifdef CONFIG_KERNEL_GZIP
 #include "../../../../lib/decompress_inflate.c"
-- 
GitLab


From e2fce0a28d8f4293d68abf6485956e7794a30124 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 2 Feb 2017 13:30:40 +0100
Subject: [PATCH 0731/1084] ARM: 8655/1: improve NOMMU definition of pgprot_*()

The tegra DRM driver produces a harmless warning when built for NOMMU:

drivers/gpu/drm/tegra/gem.c: In function 'tegra_drm_mmap':
drivers/gpu/drm/tegra/gem.c:508:12: unused variable 'prot'

This is because pgprot_writecombine() on ARM returns a constant and
ignores its argument. The version in asm-generic doesn't have that
problem, so let's use that one instead. We don't actually care
about the value on NOMMU, and this is consistent with what some
other architectures do.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/pgtable-nommu.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h
index add094d09e3e..302240c19a5a 100644
--- a/arch/arm/include/asm/pgtable-nommu.h
+++ b/arch/arm/include/asm/pgtable-nommu.h
@@ -63,9 +63,9 @@ typedef pte_t *pte_addr_t;
 /*
  * Mark the prot value as uncacheable and unbufferable.
  */
-#define pgprot_noncached(prot)	__pgprot(0)
-#define pgprot_writecombine(prot) __pgprot(0)
-#define pgprot_dmacoherent(prot) __pgprot(0)
+#define pgprot_noncached(prot)	(prot)
+#define pgprot_writecombine(prot) (prot)
+#define pgprot_dmacoherent(prot) (prot)
 
 
 /*
-- 
GitLab


From 3928624812dcfa39b6a67f9de46efcb51c573ad0 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Mon, 16 Jan 2017 15:11:10 +0000
Subject: [PATCH 0732/1084] ARM: mm: move initrd init code out of
 arm_memblock_init()

Move the ARM initrd initialisation code out of arm_memblock_init() into
its own function, so it can be cleaned up.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/init.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 50e5402a8ef3..43d8825e59bb 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -228,11 +228,8 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align)
 	return phys;
 }
 
-void __init arm_memblock_init(const struct machine_desc *mdesc)
+static void __init arm_initrd_init(void)
 {
-	/* Register the kernel text, kernel data and initrd with memblock. */
-	memblock_reserve(__pa(KERNEL_START), KERNEL_END - KERNEL_START);
-
 #ifdef CONFIG_BLK_DEV_INITRD
 	/* FDT scan will populate initrd_start */
 	if (initrd_start && !phys_initrd_size) {
@@ -260,6 +257,14 @@ void __init arm_memblock_init(const struct machine_desc *mdesc)
 		initrd_end = initrd_start + phys_initrd_size;
 	}
 #endif
+}
+
+void __init arm_memblock_init(const struct machine_desc *mdesc)
+{
+	/* Register the kernel text, kernel data and initrd with memblock. */
+	memblock_reserve(__pa(KERNEL_START), KERNEL_END - KERNEL_START);
+
+	arm_initrd_init();
 
 	arm_mm_memblock_reserve();
 
-- 
GitLab


From 68b32f361f3892fc376051b1702954b2dc692d13 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Mon, 16 Jan 2017 15:13:25 +0000
Subject: [PATCH 0733/1084] ARM: mm: clean up initrd initialisation

Rather than repeatedly testing phys_initrd_size to see if the initrd
is still enabled, return from the new function to avoid executing the
remaining initialisation.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/init.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 43d8825e59bb..15739a95552a 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -236,26 +236,29 @@ static void __init arm_initrd_init(void)
 		phys_initrd_start = __virt_to_phys(initrd_start);
 		phys_initrd_size = initrd_end - initrd_start;
 	}
+
 	initrd_start = initrd_end = 0;
-	if (phys_initrd_size &&
-	    !memblock_is_region_memory(phys_initrd_start, phys_initrd_size)) {
+
+	if (!phys_initrd_size)
+		return;
+
+	if (!memblock_is_region_memory(phys_initrd_start, phys_initrd_size)) {
 		pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region - disabling initrd\n",
 		       (u64)phys_initrd_start, phys_initrd_size);
-		phys_initrd_start = phys_initrd_size = 0;
+		return;
 	}
-	if (phys_initrd_size &&
-	    memblock_is_region_reserved(phys_initrd_start, phys_initrd_size)) {
+
+	if (memblock_is_region_reserved(phys_initrd_start, phys_initrd_size)) {
 		pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region - disabling initrd\n",
 		       (u64)phys_initrd_start, phys_initrd_size);
-		phys_initrd_start = phys_initrd_size = 0;
+		return;
 	}
-	if (phys_initrd_size) {
-		memblock_reserve(phys_initrd_start, phys_initrd_size);
 
-		/* Now convert initrd to virtual addresses */
-		initrd_start = __phys_to_virt(phys_initrd_start);
-		initrd_end = initrd_start + phys_initrd_size;
-	}
+	memblock_reserve(phys_initrd_start, phys_initrd_size);
+
+	/* Now convert initrd to virtual addresses */
+	initrd_start = __phys_to_virt(phys_initrd_start);
+	initrd_end = initrd_start + phys_initrd_size;
 #endif
 }
 
-- 
GitLab


From cdcc5fa0415d943288c6316bd32e76befb1027c5 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Mon, 16 Jan 2017 15:21:05 +0000
Subject: [PATCH 0734/1084] ARM: mm: round the initrd reservation to page
 boundaries

Round the initrd memblock reservation to page boundaries to prevent
other data sharing the initrd pages.  This prevents an allocation
possibly overlapping with the initrd, which would later get trampled
on in free_initrd_mem().

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/init.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 15739a95552a..d1e26610977d 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -231,6 +231,9 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align)
 static void __init arm_initrd_init(void)
 {
 #ifdef CONFIG_BLK_DEV_INITRD
+	phys_addr_t start;
+	unsigned long size;
+
 	/* FDT scan will populate initrd_start */
 	if (initrd_start && !phys_initrd_size) {
 		phys_initrd_start = __virt_to_phys(initrd_start);
@@ -242,19 +245,29 @@ static void __init arm_initrd_init(void)
 	if (!phys_initrd_size)
 		return;
 
-	if (!memblock_is_region_memory(phys_initrd_start, phys_initrd_size)) {
+	/*
+	 * Round the memory region to page boundaries as per free_initrd_mem()
+	 * This allows us to detect whether the pages overlapping the initrd
+	 * are in use, but more importantly, reserves the entire set of pages
+	 * as we don't want these pages allocated for other purposes.
+	 */
+	start = round_down(phys_initrd_start, PAGE_SIZE);
+	size = phys_initrd_size + (phys_initrd_start - start);
+	size = round_up(size, PAGE_SIZE);
+
+	if (!memblock_is_region_memory(start, size)) {
 		pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region - disabling initrd\n",
-		       (u64)phys_initrd_start, phys_initrd_size);
+		       (u64)start, size);
 		return;
 	}
 
-	if (memblock_is_region_reserved(phys_initrd_start, phys_initrd_size)) {
+	if (memblock_is_region_reserved(start, size)) {
 		pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region - disabling initrd\n",
-		       (u64)phys_initrd_start, phys_initrd_size);
+		       (u64)start, size);
 		return;
 	}
 
-	memblock_reserve(phys_initrd_start, phys_initrd_size);
+	memblock_reserve(start, size);
 
 	/* Now convert initrd to virtual addresses */
 	initrd_start = __phys_to_virt(phys_initrd_start);
-- 
GitLab


From 29e09229d9f26129a39462fae0ddabc4d9533989 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 17 Feb 2017 08:39:28 +0100
Subject: [PATCH 0735/1084] netfilter: use skb_to_full_sk in ip_route_me_harder

inet_sk(skb->sk) is illegal in case skb is attached to request socket.

Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener")
Reported by: Daniel J Blueman <daniel@quora.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Tested-by: Daniel J Blueman <daniel@quora.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index b3cc1335adbc..c0cc6aa8cfaa 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -23,7 +23,8 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
 	struct rtable *rt;
 	struct flowi4 fl4 = {};
 	__be32 saddr = iph->saddr;
-	__u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
+	const struct sock *sk = skb_to_full_sk(skb);
+	__u8 flags = sk ? inet_sk_flowi_flags(sk) : 0;
 	struct net_device *dev = skb_dst(skb)->dev;
 	unsigned int hh_len;
 
@@ -40,7 +41,7 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
 	fl4.daddr = iph->daddr;
 	fl4.saddr = saddr;
 	fl4.flowi4_tos = RT_TOS(iph->tos);
-	fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
+	fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0;
 	if (!fl4.flowi4_oif)
 		fl4.flowi4_oif = l3mdev_master_ifindex(dev);
 	fl4.flowi4_mark = skb->mark;
@@ -61,7 +62,7 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
 	    xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) {
 		struct dst_entry *dst = skb_dst(skb);
 		skb_dst_set(skb, NULL);
-		dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0);
+		dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), sk, 0);
 		if (IS_ERR(dst))
 			return PTR_ERR(dst);
 		skb_dst_set(skb, dst);
-- 
GitLab


From c3a49c8991212e1ef795c7a46881ac5cc7766993 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 28 Feb 2017 00:05:01 +0100
Subject: [PATCH 0736/1084] cpufreq: intel_pstate: Fix limits issue with
 operation mode switching

There is a problem with intel_pstate operation mode switching
introduced by commit fb1fe1041c04 (cpufreq: intel_pstate: Operation
mode control from sysfs), because the global sysfs limits are
preserved across operation modes while per-policy limits are
reinitialized from scratch on a mode switch and both sets of limits
may get out of sync this way.

Fix that by always reinitializing the global limits upon the
registration of the driver.

Fixes: fb1fe1041c04 (cpufreq: intel_pstate: Operation mode control from sysfs)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 drivers/cpufreq/intel_pstate.c | 65 ++++++++++++----------------------
 1 file changed, 22 insertions(+), 43 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index eb0f7fb71685..9ee13f195c37 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -364,37 +364,25 @@ static bool driver_registered __read_mostly;
 static bool acpi_ppc;
 #endif
 
-static struct perf_limits performance_limits = {
-	.no_turbo = 0,
-	.turbo_disabled = 0,
-	.max_perf_pct = 100,
-	.max_perf = int_ext_tofp(1),
-	.min_perf_pct = 100,
-	.min_perf = int_ext_tofp(1),
-	.max_policy_pct = 100,
-	.max_sysfs_pct = 100,
-	.min_policy_pct = 0,
-	.min_sysfs_pct = 0,
-};
+static struct perf_limits performance_limits;
+static struct perf_limits powersave_limits;
+static struct perf_limits *limits;
 
-static struct perf_limits powersave_limits = {
-	.no_turbo = 0,
-	.turbo_disabled = 0,
-	.max_perf_pct = 100,
-	.max_perf = int_ext_tofp(1),
-	.min_perf_pct = 0,
-	.min_perf = 0,
-	.max_policy_pct = 100,
-	.max_sysfs_pct = 100,
-	.min_policy_pct = 0,
-	.min_sysfs_pct = 0,
-};
+static void intel_pstate_init_limits(struct perf_limits *limits)
+{
+	memset(limits, 0, sizeof(*limits));
+	limits->max_perf_pct = 100;
+	limits->max_perf = int_ext_tofp(1);
+	limits->max_policy_pct = 100;
+	limits->max_sysfs_pct = 100;
+}
 
-#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE
-static struct perf_limits *limits = &performance_limits;
-#else
-static struct perf_limits *limits = &powersave_limits;
-#endif
+static void intel_pstate_set_performance_limits(struct perf_limits *limits)
+{
+	intel_pstate_init_limits(limits);
+	limits->min_perf_pct = 100;
+	limits->min_perf = int_ext_tofp(1);
+}
 
 static DEFINE_MUTEX(intel_pstate_driver_lock);
 static DEFINE_MUTEX(intel_pstate_limits_lock);
@@ -2084,20 +2072,6 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu)
 	synchronize_sched();
 }
 
-static void intel_pstate_set_performance_limits(struct perf_limits *limits)
-{
-	limits->no_turbo = 0;
-	limits->turbo_disabled = 0;
-	limits->max_perf_pct = 100;
-	limits->max_perf = int_ext_tofp(1);
-	limits->min_perf_pct = 100;
-	limits->min_perf = int_ext_tofp(1);
-	limits->max_policy_pct = 100;
-	limits->max_sysfs_pct = 100;
-	limits->min_policy_pct = 0;
-	limits->min_sysfs_pct = 0;
-}
-
 static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
 					    struct perf_limits *limits)
 {
@@ -2466,6 +2440,11 @@ static int intel_pstate_register_driver(void)
 {
 	int ret;
 
+	intel_pstate_init_limits(&powersave_limits);
+	intel_pstate_set_performance_limits(&performance_limits);
+	limits = IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) ?
+			&performance_limits : &powersave_limits;
+
 	ret = cpufreq_register_driver(intel_pstate_driver);
 	if (ret) {
 		intel_pstate_driver_cleanup();
-- 
GitLab


From da353f6b30bc02da29b54f5f024039698fbd23f4 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 14 Feb 2017 17:55:53 +0100
Subject: [PATCH 0737/1084] btrfs: constify device path passed to relevant
 helpers

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/dev-replace.c |  5 +++--
 fs/btrfs/dev-replace.h |  5 +++--
 fs/btrfs/volumes.c     | 18 ++++++++++--------
 fs/btrfs/volumes.h     | 12 ++++++------
 4 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 5de280b9ad73..e653921f05d9 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -304,8 +304,9 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info)
 		dev_replace->cursor_left_last_write_of_item;
 }
 
-int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, char *tgtdev_name,
-				u64 srcdevid, char *srcdev_name, int read_src)
+int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
+		const char *tgtdev_name, u64 srcdevid, const char *srcdev_name,
+		int read_src)
 {
 	struct btrfs_root *root = fs_info->dev_root;
 	struct btrfs_trans_handle *trans;
diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h
index 54ea12bda15b..f94a76844ae7 100644
--- a/fs/btrfs/dev-replace.h
+++ b/fs/btrfs/dev-replace.h
@@ -27,8 +27,9 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
 void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info);
 int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info,
 			    struct btrfs_ioctl_dev_replace_args *args);
-int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, char *tgtdev_name,
-				u64 srcdevid, char *srcdev_name, int read_src);
+int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
+		const char *tgtdev_name, u64 srcdevid, const char *srcdev_name,
+		int read_src);
 void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
 			      struct btrfs_ioctl_dev_replace_args *args);
 int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1fac98728814..b443cc71830f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1725,7 +1725,7 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans,
  * Function to update ctime/mtime for a given device path.
  * Mainly used for ctime/mtime based probe like libblkid.
  */
-static void update_dev_time(char *path_name)
+static void update_dev_time(const char *path_name)
 {
 	struct file *filp;
 
@@ -1851,7 +1851,8 @@ void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
 		fs_info->fs_devices->latest_bdev = next_device->bdev;
 }
 
-int btrfs_rm_device(struct btrfs_fs_info *fs_info, char *device_path, u64 devid)
+int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
+		u64 devid)
 {
 	struct btrfs_device *device;
 	struct btrfs_fs_devices *cur_devices;
@@ -2091,7 +2092,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 }
 
 static int btrfs_find_device_by_path(struct btrfs_fs_info *fs_info,
-				     char *device_path,
+				     const char *device_path,
 				     struct btrfs_device **device)
 {
 	int ret = 0;
@@ -2118,7 +2119,7 @@ static int btrfs_find_device_by_path(struct btrfs_fs_info *fs_info,
 }
 
 int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
-					 char *device_path,
+					 const char *device_path,
 					 struct btrfs_device **device)
 {
 	*device = NULL;
@@ -2151,7 +2152,8 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
  * Lookup a device given by device id, or the path if the id is 0.
  */
 int btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info, u64 devid,
-				 char *devpath, struct btrfs_device **device)
+				 const char *devpath,
+				 struct btrfs_device **device)
 {
 	int ret;
 
@@ -2307,7 +2309,7 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans,
 	return ret;
 }
 
-int btrfs_init_new_device(struct btrfs_fs_info *fs_info, char *device_path)
+int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path)
 {
 	struct btrfs_root *root = fs_info->dev_root;
 	struct request_queue *q;
@@ -2515,7 +2517,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, char *device_path)
 }
 
 int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
-				  char *device_path,
+				  const char *device_path,
 				  struct btrfs_device *srcdev,
 				  struct btrfs_device **device_out)
 {
@@ -7102,7 +7104,7 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
 	return 0;
 }
 
-void btrfs_scratch_superblocks(struct block_device *bdev, char *device_path)
+void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path)
 {
 	struct buffer_head *bh;
 	struct btrfs_super_block *disk_super;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 24ba6bc3ec34..59be81206dd7 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -422,16 +422,16 @@ void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step);
 void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
 		struct btrfs_device *device, struct btrfs_device *this_dev);
 int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
-					 char *device_path,
+					 const char *device_path,
 					 struct btrfs_device **device);
 int btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info, u64 devid,
-					 char *devpath,
+					 const char *devpath,
 					 struct btrfs_device **device);
 struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
 					const u64 *devid,
 					const u8 *uuid);
 int btrfs_rm_device(struct btrfs_fs_info *fs_info,
-		    char *device_path, u64 devid);
+		    const char *device_path, u64 devid);
 void btrfs_cleanup_fs_uuids(void);
 int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
 int btrfs_grow_device(struct btrfs_trans_handle *trans,
@@ -439,9 +439,9 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
 struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
 				       u8 *uuid, u8 *fsid);
 int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
-int btrfs_init_new_device(struct btrfs_fs_info *fs_info, char *path);
+int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path);
 int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
-				  char *device_path,
+				  const char *device_path,
 				  struct btrfs_device *srcdev,
 				  struct btrfs_device **device_out);
 int btrfs_balance(struct btrfs_balance_control *bctl,
@@ -474,7 +474,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 				      struct btrfs_device *tgtdev);
 void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
 					      struct btrfs_device *tgtdev);
-void btrfs_scratch_superblocks(struct block_device *bdev, char *device_path);
+void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path);
 int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
 			   u64 logical, u64 len, int mirror_num);
 unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
-- 
GitLab


From 9ed573674ae607bc616c804657bfaf5b6ea5889d Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 14 Feb 2017 18:03:49 +0100
Subject: [PATCH 0738/1084] btrfs: constify input buffer of btrfs_csum_data

The function does not modify the input buffer, also update a typecast in
one caller.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 4 ++--
 fs/btrfs/disk-io.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a53ff3bff8eb..67d663ab658a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -265,7 +265,7 @@ static struct extent_map *btree_get_extent(struct btrfs_inode *inode,
 	return em;
 }
 
-u32 btrfs_csum_data(char *data, u32 seed, size_t len)
+u32 btrfs_csum_data(const char *data, u32 seed, size_t len)
 {
 	return btrfs_crc32c(seed, data, len);
 }
@@ -3448,7 +3448,7 @@ static int write_dev_supers(struct btrfs_device *device,
 			btrfs_set_super_bytenr(sb, bytenr);
 
 			crc = ~(u32)0;
-			crc = btrfs_csum_data((char *)sb +
+			crc = btrfs_csum_data((const char *)sb +
 					      BTRFS_CSUM_SIZE, crc,
 					      BTRFS_SUPER_INFO_SIZE -
 					      BTRFS_CSUM_SIZE);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 0be2d4fe705b..2e0ec29bfd69 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -116,7 +116,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
 			  int atomic);
 int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
-u32 btrfs_csum_data(char *data, u32 seed, size_t len);
+u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
 void btrfs_csum_final(u32 crc, u8 *result);
 int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 			enum btrfs_wq_endio_type metadata);
-- 
GitLab


From 14a3357b4097d94ac6401c597abc2d02965e0fdd Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 14 Feb 2017 17:58:04 +0100
Subject: [PATCH 0739/1084] btrfs: constify buffers used by compression helpers

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 2 +-
 fs/btrfs/compression.h | 2 +-
 fs/btrfs/lzo.c         | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index f32e86b61240..d426e4a10b2b 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -1015,7 +1015,7 @@ void btrfs_exit_compress(void)
  *
  * total_out is the last byte of the buffer
  */
-int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
+int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
 			      unsigned long total_out, u64 disk_start,
 			      struct bio *bio)
 {
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 09879579fbc8..1c700bb20b52 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -32,7 +32,7 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
 			 unsigned long max_out);
 int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
 		     unsigned long start_byte, size_t srclen, size_t destlen);
-int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
+int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
 			      unsigned long total_out, u64 disk_start,
 			      struct bio *bio);
 
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 45d26980caf9..e074b85aa054 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -76,7 +76,7 @@ static inline void write_compress_length(char *buf, size_t len)
 	memcpy(buf, &dlen, LZO_LEN);
 }
 
-static inline size_t read_compress_length(char *buf)
+static inline size_t read_compress_length(const char *buf)
 {
 	__le32 dlen;
 
-- 
GitLab


From 52f75f4fe74ce86376d68b30c94d5fb11cb4019e Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 14 Feb 2017 18:33:53 +0100
Subject: [PATCH 0740/1084] btrfs: constify name of subvolume in creation
 helpers

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 45a708555f9b..dabfc7ac48a6 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -434,7 +434,7 @@ int btrfs_is_empty_uuid(u8 *uuid)
 
 static noinline int create_subvol(struct inode *dir,
 				  struct dentry *dentry,
-				  char *name, int namelen,
+				  const char *name, int namelen,
 				  u64 *async_transid,
 				  struct btrfs_qgroup_inherit *inherit)
 {
@@ -832,7 +832,7 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
  * inside this filesystem so it's quite a bit simpler.
  */
 static noinline int btrfs_mksubvol(const struct path *parent,
-				   char *name, int namelen,
+				   const char *name, int namelen,
 				   struct btrfs_root *snap_src,
 				   u64 *async_transid, bool readonly,
 				   struct btrfs_qgroup_inherit *inherit)
@@ -1625,7 +1625,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
 }
 
 static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
-				char *name, unsigned long fd, int subvol,
+				const char *name, unsigned long fd, int subvol,
 				u64 *transid, bool readonly,
 				struct btrfs_qgroup_inherit *inherit)
 {
-- 
GitLab


From 38c31464089f639630b7c28ce933a4d60e135a02 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 14 Feb 2017 19:04:07 +0100
Subject: [PATCH 0741/1084] btrfs: merge length input and output parameter in
 compress_pages

The length parameter is basically duplicated for input and output in the
top level caller of the compress_pages chain. We can simply use one
variable for that and reduce stack consumption. The compression
implementation will sink the parameter to a local variable so everything
works as before.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 24 +++++++++++-------------
 fs/btrfs/compression.h |  5 ++---
 fs/btrfs/inode.c       |  2 +-
 fs/btrfs/lzo.c         |  4 ++--
 fs/btrfs/zlib.c        |  3 ++-
 5 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index d426e4a10b2b..7e214fd254e4 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -911,27 +911,25 @@ static void free_workspaces(void)
 }
 
 /*
- * given an address space and start/len, compress the bytes.
+ * Given an address space and start and length, compress the bytes into @pages
+ * that are allocated on demand.
  *
- * pages are allocated to hold the compressed result and stored
- * in 'pages'
+ * @out_pages is used to return the number of pages allocated.  There
+ * may be pages allocated even if we return an error.
  *
- * out_pages is used to return the number of pages allocated.  There
- * may be pages allocated even if we return an error
- *
- * total_in is used to return the number of bytes actually read.  It
- * may be smaller then len if we had to exit early because we
+ * @total_in is used to return the number of bytes actually read.  It
+ * may be smaller than the input length if we had to exit early because we
  * ran out of room in the pages array or because we cross the
  * max_out threshold.
  *
- * total_out is used to return the total number of compressed bytes
+ * @total_out is an in/out parameter, must be set to the input length and will
+ * be also used to return the total number of compressed bytes
  *
- * max_out tells us the max number of bytes that we're allowed to
+ * @max_out tells us the max number of bytes that we're allowed to
  * stuff into pages
  */
 int btrfs_compress_pages(int type, struct address_space *mapping,
-			 u64 start, unsigned long len,
-			 struct page **pages,
+			 u64 start, struct page **pages,
 			 unsigned long nr_dest_pages,
 			 unsigned long *out_pages,
 			 unsigned long *total_in,
@@ -944,7 +942,7 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
 	workspace = find_workspace(type);
 
 	ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
-						      start, len, pages,
+						      start, pages,
 						      nr_dest_pages, out_pages,
 						      total_in, total_out,
 						      max_out);
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 1c700bb20b52..725d1e0bd25d 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -23,8 +23,7 @@ void btrfs_init_compress(void);
 void btrfs_exit_compress(void);
 
 int btrfs_compress_pages(int type, struct address_space *mapping,
-			 u64 start, unsigned long len,
-			 struct page **pages,
+			 u64 start, struct page **pages,
 			 unsigned long nr_dest_pages,
 			 unsigned long *out_pages,
 			 unsigned long *total_in,
@@ -59,7 +58,7 @@ struct btrfs_compress_op {
 
 	int (*compress_pages)(struct list_head *workspace,
 			      struct address_space *mapping,
-			      u64 start, unsigned long len,
+			      u64 start,
 			      struct page **pages,
 			      unsigned long nr_dest_pages,
 			      unsigned long *out_pages,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 596e5cb4bfa2..8b9eac33d431 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -517,7 +517,7 @@ static noinline void compress_file_range(struct inode *inode,
 		redirty = 1;
 		ret = btrfs_compress_pages(compress_type,
 					   inode->i_mapping, start,
-					   total_compressed, pages,
+					   pages,
 					   nr_pages, &nr_pages_ret,
 					   &total_in,
 					   &total_compressed,
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index e074b85aa054..489f78c4e5ec 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -86,7 +86,7 @@ static inline size_t read_compress_length(const char *buf)
 
 static int lzo_compress_pages(struct list_head *ws,
 			      struct address_space *mapping,
-			      u64 start, unsigned long len,
+			      u64 start,
 			      struct page **pages,
 			      unsigned long nr_dest_pages,
 			      unsigned long *out_pages,
@@ -102,7 +102,7 @@ static int lzo_compress_pages(struct list_head *ws,
 	struct page *in_page = NULL;
 	struct page *out_page = NULL;
 	unsigned long bytes_left;
-
+	unsigned long len = *total_out;
 	size_t in_len;
 	size_t out_len;
 	char *buf;
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index da497f184ff4..42d76b7824c3 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -73,7 +73,7 @@ static struct list_head *zlib_alloc_workspace(void)
 
 static int zlib_compress_pages(struct list_head *ws,
 			       struct address_space *mapping,
-			       u64 start, unsigned long len,
+			       u64 start,
 			       struct page **pages,
 			       unsigned long nr_dest_pages,
 			       unsigned long *out_pages,
@@ -89,6 +89,7 @@ static int zlib_compress_pages(struct list_head *ws,
 	struct page *in_page = NULL;
 	struct page *out_page = NULL;
 	unsigned long bytes_left;
+	unsigned long len = *total_out;
 
 	*out_pages = 0;
 	*total_out = 0;
-- 
GitLab


From 4d3a800ebb1299944408f3b40b5b6b996477fba2 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 14 Feb 2017 19:04:07 +0100
Subject: [PATCH 0742/1084] btrfs: merge nr_pages input and output parameter in
 compress_pages

The parameter saying how many pages can be allocated at maximum can be
merged with the output page counter, to save some stack space.  The
compression implementation will sink the parameter to a local variable
so everything works as before.

The nr_pages variables can also be simply merged in compress_file_range
into one.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c |  7 +++----
 fs/btrfs/compression.h |  2 --
 fs/btrfs/inode.c       | 13 ++++++-------
 fs/btrfs/lzo.c         |  2 +-
 fs/btrfs/zlib.c        |  2 +-
 5 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 7e214fd254e4..11dcda57e15c 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -914,8 +914,8 @@ static void free_workspaces(void)
  * Given an address space and start and length, compress the bytes into @pages
  * that are allocated on demand.
  *
- * @out_pages is used to return the number of pages allocated.  There
- * may be pages allocated even if we return an error.
+ * @out_pages is an in/out parameter, holds maximum number of pages to allocate
+ * and returns number of actually allocated pages
  *
  * @total_in is used to return the number of bytes actually read.  It
  * may be smaller than the input length if we had to exit early because we
@@ -930,7 +930,6 @@ static void free_workspaces(void)
  */
 int btrfs_compress_pages(int type, struct address_space *mapping,
 			 u64 start, struct page **pages,
-			 unsigned long nr_dest_pages,
 			 unsigned long *out_pages,
 			 unsigned long *total_in,
 			 unsigned long *total_out,
@@ -943,7 +942,7 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
 
 	ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
 						      start, pages,
-						      nr_dest_pages, out_pages,
+						      out_pages,
 						      total_in, total_out,
 						      max_out);
 	free_workspace(type, workspace);
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 725d1e0bd25d..cfcec709c61b 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -24,7 +24,6 @@ void btrfs_exit_compress(void);
 
 int btrfs_compress_pages(int type, struct address_space *mapping,
 			 u64 start, struct page **pages,
-			 unsigned long nr_dest_pages,
 			 unsigned long *out_pages,
 			 unsigned long *total_in,
 			 unsigned long *total_out,
@@ -60,7 +59,6 @@ struct btrfs_compress_op {
 			      struct address_space *mapping,
 			      u64 start,
 			      struct page **pages,
-			      unsigned long nr_dest_pages,
 			      unsigned long *out_pages,
 			      unsigned long *total_in,
 			      unsigned long *total_out,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8b9eac33d431..83bbe1e31869 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -430,7 +430,6 @@ static noinline void compress_file_range(struct inode *inode,
 	int ret = 0;
 	struct page **pages = NULL;
 	unsigned long nr_pages;
-	unsigned long nr_pages_ret = 0;
 	unsigned long total_compressed = 0;
 	unsigned long total_in = 0;
 	unsigned long max_compressed = SZ_128K;
@@ -518,7 +517,7 @@ static noinline void compress_file_range(struct inode *inode,
 		ret = btrfs_compress_pages(compress_type,
 					   inode->i_mapping, start,
 					   pages,
-					   nr_pages, &nr_pages_ret,
+					   &nr_pages,
 					   &total_in,
 					   &total_compressed,
 					   max_compressed);
@@ -526,7 +525,7 @@ static noinline void compress_file_range(struct inode *inode,
 		if (!ret) {
 			unsigned long offset = total_compressed &
 				(PAGE_SIZE - 1);
-			struct page *page = pages[nr_pages_ret - 1];
+			struct page *page = pages[nr_pages - 1];
 			char *kaddr;
 
 			/* zero the tail end of the last page, we might be
@@ -607,7 +606,7 @@ static noinline void compress_file_range(struct inode *inode,
 			 * will submit them to the elevator.
 			 */
 			add_async_extent(async_cow, start, num_bytes,
-					total_compressed, pages, nr_pages_ret,
+					total_compressed, pages, nr_pages,
 					compress_type);
 
 			if (start + num_bytes < end) {
@@ -624,14 +623,14 @@ static noinline void compress_file_range(struct inode *inode,
 		 * the compression code ran but failed to make things smaller,
 		 * free any pages it allocated and our page pointer array
 		 */
-		for (i = 0; i < nr_pages_ret; i++) {
+		for (i = 0; i < nr_pages; i++) {
 			WARN_ON(pages[i]->mapping);
 			put_page(pages[i]);
 		}
 		kfree(pages);
 		pages = NULL;
 		total_compressed = 0;
-		nr_pages_ret = 0;
+		nr_pages = 0;
 
 		/* flag the file so we don't compress in the future */
 		if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) &&
@@ -660,7 +659,7 @@ static noinline void compress_file_range(struct inode *inode,
 	return;
 
 free_pages_out:
-	for (i = 0; i < nr_pages_ret; i++) {
+	for (i = 0; i < nr_pages; i++) {
 		WARN_ON(pages[i]->mapping);
 		put_page(pages[i]);
 	}
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 489f78c4e5ec..0baf978fbcaf 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -88,7 +88,6 @@ static int lzo_compress_pages(struct list_head *ws,
 			      struct address_space *mapping,
 			      u64 start,
 			      struct page **pages,
-			      unsigned long nr_dest_pages,
 			      unsigned long *out_pages,
 			      unsigned long *total_in,
 			      unsigned long *total_out,
@@ -103,6 +102,7 @@ static int lzo_compress_pages(struct list_head *ws,
 	struct page *out_page = NULL;
 	unsigned long bytes_left;
 	unsigned long len = *total_out;
+	unsigned long nr_dest_pages = *out_pages;
 	size_t in_len;
 	size_t out_len;
 	char *buf;
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 42d76b7824c3..e7f2020f8ee7 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -75,7 +75,6 @@ static int zlib_compress_pages(struct list_head *ws,
 			       struct address_space *mapping,
 			       u64 start,
 			       struct page **pages,
-			       unsigned long nr_dest_pages,
 			       unsigned long *out_pages,
 			       unsigned long *total_in,
 			       unsigned long *total_out,
@@ -90,6 +89,7 @@ static int zlib_compress_pages(struct list_head *ws,
 	struct page *out_page = NULL;
 	unsigned long bytes_left;
 	unsigned long len = *total_out;
+	unsigned long nr_dest_pages = *out_pages;
 
 	*out_pages = 0;
 	*total_out = 0;
-- 
GitLab


From ff7638665c9a82894cabd18d26a9f8957f280f55 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 14 Feb 2017 19:30:39 +0100
Subject: [PATCH 0743/1084] btrfs: export compression buffer limits in a header

Move the buffer limit definitions out of compress_file_range.

Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.h | 15 +++++++++++++++
 fs/btrfs/inode.c       | 10 ----------
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index cfcec709c61b..619e64c27444 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -19,6 +19,21 @@
 #ifndef __BTRFS_COMPRESSION_
 #define __BTRFS_COMPRESSION_
 
+/*
+ * We want to make sure that amount of RAM required to uncompress an extent is
+ * reasonable, so we limit the total size in ram of a compressed extent to
+ * 128k.  This is a crucial number because it also controls how easily we can
+ * spread reads across cpus for decompression.
+ *
+ * We also want to make sure the amount of IO required to do a random read is
+ * reasonably small, so we limit the size of a compressed extent to 128k.
+ */
+
+/* Maximum length of compressed data stored on disk */
+#define BTRFS_MAX_COMPRESSED		(SZ_128K)
+/* Maximum size of data before compression */
+#define BTRFS_MAX_UNCOMPRESSED		(SZ_128K)
+
 void btrfs_init_compress(void);
 void btrfs_exit_compress(void);
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 83bbe1e31869..77fb24b94a9e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -471,16 +471,6 @@ static noinline void compress_file_range(struct inode *inode,
 	   (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
 		goto cleanup_and_bail_uncompressed;
 
-	/* we want to make sure that amount of ram required to uncompress
-	 * an extent is reasonable, so we limit the total size in ram
-	 * of a compressed extent to 128k.  This is a crucial number
-	 * because it also controls how easily we can spread reads across
-	 * cpus for decompression.
-	 *
-	 * We also want to make sure the amount of IO required to do
-	 * a random read is reasonably small, so we limit the size of
-	 * a compressed extent to 128k.
-	 */
 	total_compressed = min(total_compressed, max_uncompressed);
 	num_bytes = ALIGN(end - start + 1, blocksize);
 	num_bytes = max(blocksize,  num_bytes);
-- 
GitLab


From 069eac7850890acf0d3c21a6c8ca9f33ddb34a0d Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 14 Feb 2017 19:36:54 +0100
Subject: [PATCH 0744/1084] btrfs: use predefined limits for calculating
 maximum number of pages for compression

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 77fb24b94a9e..11a4eeadf662 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -432,8 +432,6 @@ static noinline void compress_file_range(struct inode *inode,
 	unsigned long nr_pages;
 	unsigned long total_compressed = 0;
 	unsigned long total_in = 0;
-	unsigned long max_compressed = SZ_128K;
-	unsigned long max_uncompressed = SZ_128K;
 	int i;
 	int will_compress;
 	int compress_type = fs_info->compress_type;
@@ -446,7 +444,9 @@ static noinline void compress_file_range(struct inode *inode,
 again:
 	will_compress = 0;
 	nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
-	nr_pages = min_t(unsigned long, nr_pages, SZ_128K / PAGE_SIZE);
+	BUILD_BUG_ON((BTRFS_MAX_COMPRESSED % PAGE_SIZE) != 0);
+	nr_pages = min_t(unsigned long, nr_pages,
+			BTRFS_MAX_COMPRESSED / PAGE_SIZE);
 
 	/*
 	 * we don't want to send crud past the end of i_size through
@@ -471,7 +471,8 @@ static noinline void compress_file_range(struct inode *inode,
 	   (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
 		goto cleanup_and_bail_uncompressed;
 
-	total_compressed = min(total_compressed, max_uncompressed);
+	total_compressed = min_t(unsigned long, total_compressed,
+			BTRFS_MAX_UNCOMPRESSED);
 	num_bytes = ALIGN(end - start + 1, blocksize);
 	num_bytes = max(blocksize,  num_bytes);
 	total_in = 0;
@@ -510,7 +511,7 @@ static noinline void compress_file_range(struct inode *inode,
 					   &nr_pages,
 					   &total_in,
 					   &total_compressed,
-					   max_compressed);
+					   BTRFS_MAX_COMPRESSED);
 
 		if (!ret) {
 			unsigned long offset = total_compressed &
-- 
GitLab


From e5d74902362f1a06ea3674042d09f1af178c0a20 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 14 Feb 2017 19:45:05 +0100
Subject: [PATCH 0745/1084] btrfs: derive maximum output size in the
 compression implementation

The value of max_out can be calculated from the parameters passed to the
compressors, which is number of pages and the page size, and we don't
have to needlessly pass it around.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 6 ++----
 fs/btrfs/compression.h | 6 ++----
 fs/btrfs/inode.c       | 3 +--
 fs/btrfs/lzo.c         | 4 ++--
 fs/btrfs/zlib.c        | 4 ++--
 5 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 11dcda57e15c..c7721a6aa3bb 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -932,8 +932,7 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
 			 u64 start, struct page **pages,
 			 unsigned long *out_pages,
 			 unsigned long *total_in,
-			 unsigned long *total_out,
-			 unsigned long max_out)
+			 unsigned long *total_out)
 {
 	struct list_head *workspace;
 	int ret;
@@ -943,8 +942,7 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
 	ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
 						      start, pages,
 						      out_pages,
-						      total_in, total_out,
-						      max_out);
+						      total_in, total_out);
 	free_workspace(type, workspace);
 	return ret;
 }
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 619e64c27444..39ec43ab8df1 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -41,8 +41,7 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
 			 u64 start, struct page **pages,
 			 unsigned long *out_pages,
 			 unsigned long *total_in,
-			 unsigned long *total_out,
-			 unsigned long max_out);
+			 unsigned long *total_out);
 int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
 		     unsigned long start_byte, size_t srclen, size_t destlen);
 int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
@@ -76,8 +75,7 @@ struct btrfs_compress_op {
 			      struct page **pages,
 			      unsigned long *out_pages,
 			      unsigned long *total_in,
-			      unsigned long *total_out,
-			      unsigned long max_out);
+			      unsigned long *total_out);
 
 	int (*decompress_bio)(struct list_head *workspace,
 				 struct page **pages_in,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 11a4eeadf662..0d932b9594de 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -510,8 +510,7 @@ static noinline void compress_file_range(struct inode *inode,
 					   pages,
 					   &nr_pages,
 					   &total_in,
-					   &total_compressed,
-					   BTRFS_MAX_COMPRESSED);
+					   &total_compressed);
 
 		if (!ret) {
 			unsigned long offset = total_compressed &
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 0baf978fbcaf..f48c8c14dc14 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -90,8 +90,7 @@ static int lzo_compress_pages(struct list_head *ws,
 			      struct page **pages,
 			      unsigned long *out_pages,
 			      unsigned long *total_in,
-			      unsigned long *total_out,
-			      unsigned long max_out)
+			      unsigned long *total_out)
 {
 	struct workspace *workspace = list_entry(ws, struct workspace, list);
 	int ret = 0;
@@ -103,6 +102,7 @@ static int lzo_compress_pages(struct list_head *ws,
 	unsigned long bytes_left;
 	unsigned long len = *total_out;
 	unsigned long nr_dest_pages = *out_pages;
+	const unsigned long max_out = nr_dest_pages * PAGE_SIZE;
 	size_t in_len;
 	size_t out_len;
 	char *buf;
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index e7f2020f8ee7..135b10823c6d 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -77,8 +77,7 @@ static int zlib_compress_pages(struct list_head *ws,
 			       struct page **pages,
 			       unsigned long *out_pages,
 			       unsigned long *total_in,
-			       unsigned long *total_out,
-			       unsigned long max_out)
+			       unsigned long *total_out)
 {
 	struct workspace *workspace = list_entry(ws, struct workspace, list);
 	int ret;
@@ -90,6 +89,7 @@ static int zlib_compress_pages(struct list_head *ws,
 	unsigned long bytes_left;
 	unsigned long len = *total_out;
 	unsigned long nr_dest_pages = *out_pages;
+	const unsigned long max_out = nr_dest_pages * PAGE_SIZE;
 
 	*out_pages = 0;
 	*total_out = 0;
-- 
GitLab


From 047e5e17c1e3551ef0fb34b629c66bec0591db0d Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 14 Feb 2017 17:33:27 +0100
Subject: [PATCH 0746/1084] btrfs: remove BUG_ON from __tree_mod_log_insert

All callers dereference the 'tm' parameter before it gets to this
function, the NULL check does not make much sense here.

Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 1192bc7d2ee7..2c3c943bfcdc 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -453,8 +453,6 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
 	struct rb_node *parent = NULL;
 	struct tree_mod_elem *cur;
 
-	BUG_ON(!tm);
-
 	tm->seq = btrfs_inc_tree_mod_seq(fs_info);
 
 	tm_root = &fs_info->tree_mod_log;
-- 
GitLab


From fa2529923d3bc5f0b39ff7a7c52be74c4aea6f2a Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Wed, 15 Feb 2017 09:35:01 +0100
Subject: [PATCH 0747/1084] btrfs: handle allocation error in
 update_dev_stat_item

Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b443cc71830f..7c8c7bbee197 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6956,7 +6956,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
 	key.offset = device->devid;
 
 	path = btrfs_alloc_path();
-	BUG_ON(!path);
+	if (!path)
+		return -ENOMEM;
 	ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
 	if (ret < 0) {
 		btrfs_warn_in_rcu(fs_info,
-- 
GitLab


From b9d04c607c21fafe0a346792d0d358e7ab9a768e Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 17 Feb 2017 19:42:43 +0100
Subject: [PATCH 0748/1084] btrfs: do proper error handling in
 btrfs_insert_xattr_item

The space check in btrfs_insert_xattr_item is duplicated in it's caller
(do_setxattr) so we won't hit the BUG_ON. Continuing without any check
could be disasterous so turn it to a proper error handling.

Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/dir-item.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index b13d9536d4de..60a750678a82 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -80,7 +80,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
 	struct extent_buffer *leaf;
 	u32 data_size;
 
-	BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info));
+	if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info))
+		return -ENOSPC;
 
 	key.objectid = objectid;
 	key.type = BTRFS_XATTR_ITEM_KEY;
-- 
GitLab


From c3988d630a4dfec5c09f2b6496734f320949ea9c Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 17 Feb 2017 15:18:32 +0100
Subject: [PATCH 0749/1084] btrfs: let writepage_end_io_hook return void

There's no error path in any of the instances, always return 0.

Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 9 +++------
 fs/btrfs/extent_io.h | 2 +-
 fs/btrfs/inode.c     | 6 ++----
 3 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c3abf846a449..94beb758e8aa 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2437,12 +2437,9 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
 
 	tree = &BTRFS_I(page->mapping->host)->io_tree;
 
-	if (tree->ops && tree->ops->writepage_end_io_hook) {
-		ret = tree->ops->writepage_end_io_hook(page, start,
-					       end, NULL, uptodate);
-		if (ret)
-			uptodate = 0;
-	}
+	if (tree->ops && tree->ops->writepage_end_io_hook)
+		tree->ops->writepage_end_io_hook(page, start, end, NULL,
+				uptodate);
 
 	if (!uptodate) {
 		ClearPageUptodate(page);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index c16260c6c14f..78bb4d76127b 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -104,7 +104,7 @@ struct extent_io_ops {
 	int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
 				    struct page *page, u64 start, u64 end,
 				    int mirror);
-	int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
+	void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
 				      struct extent_state *state, int uptodate);
 	void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
 			     unsigned *bits);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0d932b9594de..b904c74f31fd 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2972,7 +2972,7 @@ static void finish_ordered_fn(struct btrfs_work *work)
 	btrfs_finish_ordered_io(ordered_extent);
 }
 
-static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
+static void btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
 				struct extent_state *state, int uptodate)
 {
 	struct inode *inode = page->mapping->host;
@@ -2986,7 +2986,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
 	ClearPagePrivate2(page);
 	if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
 					    end - start + 1, uptodate))
-		return 0;
+		return;
 
 	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
 		wq = fs_info->endio_freespace_worker;
@@ -2999,8 +2999,6 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
 	btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
 			NULL);
 	btrfs_queue_work(wq, &ordered_extent->work);
-
-	return 0;
 }
 
 static int __readpage_endio_check(struct inode *inode,
-- 
GitLab


From 4d53dddbec671bcb64a936a3d2b7bf1ce2252ed0 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 17 Feb 2017 15:27:44 +0100
Subject: [PATCH 0750/1084] btrfs: document existence of extent_io ops
 callbacks

Some of the callbacks defined in btree_extent_io_ops and
btrfs_extent_io_ops do always exist so we don't need to check the
existence before each call. This patch just reorders the definition and
documents which are mandatory/optional.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c   |  7 +++++--
 fs/btrfs/extent_io.h | 23 ++++++++++++++++-------
 fs/btrfs/inode.c     |  7 +++++--
 3 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 67d663ab658a..60d4f1b210d7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4653,9 +4653,12 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
 }
 
 static const struct extent_io_ops btree_extent_io_ops = {
-	.readpage_end_io_hook = btree_readpage_end_io_hook,
-	.readpage_io_failed_hook = btree_io_failed_hook,
+	/* mandatory callbacks */
 	.submit_bio_hook = btree_submit_bio_hook,
+	.readpage_end_io_hook = btree_readpage_end_io_hook,
 	/* note we're sharing with inode.c for the merge bio hook */
 	.merge_bio_hook = btrfs_merge_bio_hook,
+
+	/* optional callbacks */
+	.readpage_io_failed_hook = btree_io_failed_hook,
 };
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 78bb4d76127b..1f8478dc9f8e 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -92,18 +92,27 @@ typedef	int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio,
 				       int mirror_num, unsigned long bio_flags,
 				       u64 bio_offset);
 struct extent_io_ops {
-	int (*fill_delalloc)(struct inode *inode, struct page *locked_page,
-			     u64 start, u64 end, int *page_started,
-			     unsigned long *nr_written);
-	int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
+	/*
+	 * The following callbacks must be allways defined, the function
+	 * pointer will be called unconditionally.
+	 */
 	extent_submit_bio_hook_t *submit_bio_hook;
+	int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
+				    struct page *page, u64 start, u64 end,
+				    int mirror);
 	int (*merge_bio_hook)(struct page *page, unsigned long offset,
 			      size_t size, struct bio *bio,
 			      unsigned long bio_flags);
+
+	/*
+	 * Optional hooks, called if the pointer is not NULL
+	 */
+	int (*fill_delalloc)(struct inode *inode, struct page *locked_page,
+			     u64 start, u64 end, int *page_started,
+			     unsigned long *nr_written);
 	int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
-	int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
-				    struct page *page, u64 start, u64 end,
-				    int mirror);
+
+	int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
 	void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
 				      struct extent_state *state, int uptodate);
 	void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b904c74f31fd..3b327c8cfb16 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -10547,10 +10547,13 @@ static const struct file_operations btrfs_dir_file_operations = {
 };
 
 static const struct extent_io_ops btrfs_extent_io_ops = {
-	.fill_delalloc = run_delalloc_range,
+	/* mandatory callbacks */
 	.submit_bio_hook = btrfs_submit_bio_hook,
-	.merge_bio_hook = btrfs_merge_bio_hook,
 	.readpage_end_io_hook = btrfs_readpage_end_io_hook,
+	.merge_bio_hook = btrfs_merge_bio_hook,
+
+	/* optional callbacks */
+	.fill_delalloc = run_delalloc_range,
 	.writepage_end_io_hook = btrfs_writepage_end_io_hook,
 	.writepage_start_hook = btrfs_writepage_start_hook,
 	.set_bit_hook = btrfs_set_bit_hook,
-- 
GitLab


From 20c9801d393d9f077c476f40440b481daaccb9d6 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 17 Feb 2017 15:59:35 +0100
Subject: [PATCH 0751/1084] btrfs: drop checks for mandatory extent_io_ops
 callbacks

We know that eadpage_end_io_hook, submit_bio_hook and merge_bio_hook are
always defined so we can drop the checks before we call them.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 94beb758e8aa..0786985a8cc5 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2567,8 +2567,7 @@ static void end_bio_extent_readpage(struct bio *bio)
 		len = bvec->bv_len;
 
 		mirror = io_bio->mirror_num;
-		if (likely(uptodate && tree->ops &&
-			   tree->ops->readpage_end_io_hook)) {
+		if (likely(uptodate && tree->ops)) {
 			ret = tree->ops->readpage_end_io_hook(io_bio, offset,
 							      page, start, end,
 							      mirror);
@@ -2731,7 +2730,7 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
 	bio->bi_private = NULL;
 	bio_get(bio);
 
-	if (tree->ops && tree->ops->submit_bio_hook)
+	if (tree->ops)
 		ret = tree->ops->submit_bio_hook(page->mapping->host, bio,
 					   mirror_num, bio_flags, start);
 	else
@@ -2746,7 +2745,7 @@ static int merge_bio(struct extent_io_tree *tree, struct page *page,
 		     unsigned long bio_flags)
 {
 	int ret = 0;
-	if (tree->ops && tree->ops->merge_bio_hook)
+	if (tree->ops)
 		ret = tree->ops->merge_bio_hook(page, offset, size, bio,
 						bio_flags);
 	return ret;
-- 
GitLab


From 20a7db8ab3f2057a518448b1728d504ffadef65e Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 17 Feb 2017 16:24:29 +0100
Subject: [PATCH 0752/1084] btrfs: add dummy callback for readpage_io_failed
 and drop checks

Make extent_io_ops::readpage_io_failed_hook callback mandatory and
define a dummy function for btrfs_extent_io_ops. As the failed IO
callback is not performance critical, the branch vs extra trade off does
not hurt.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c   | 2 +-
 fs/btrfs/extent_io.c | 2 +-
 fs/btrfs/extent_io.h | 2 +-
 fs/btrfs/inode.c     | 7 +++++++
 4 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 60d4f1b210d7..cad47ece2fdf 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4658,7 +4658,7 @@ static const struct extent_io_ops btree_extent_io_ops = {
 	.readpage_end_io_hook = btree_readpage_end_io_hook,
 	/* note we're sharing with inode.c for the merge bio hook */
 	.merge_bio_hook = btrfs_merge_bio_hook,
+	.readpage_io_failed_hook = btree_io_failed_hook,
 
 	/* optional callbacks */
-	.readpage_io_failed_hook = btree_io_failed_hook,
 };
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 0786985a8cc5..28e81922a21c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2581,7 +2581,7 @@ static void end_bio_extent_readpage(struct bio *bio)
 		if (likely(uptodate))
 			goto readpage_ok;
 
-		if (tree->ops && tree->ops->readpage_io_failed_hook) {
+		if (tree->ops) {
 			ret = tree->ops->readpage_io_failed_hook(page, mirror);
 			if (!ret && !bio->bi_error)
 				uptodate = 1;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 1f8478dc9f8e..3e4fad4a909d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -103,6 +103,7 @@ struct extent_io_ops {
 	int (*merge_bio_hook)(struct page *page, unsigned long offset,
 			      size_t size, struct bio *bio,
 			      unsigned long bio_flags);
+	int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
 
 	/*
 	 * Optional hooks, called if the pointer is not NULL
@@ -110,7 +111,6 @@ struct extent_io_ops {
 	int (*fill_delalloc)(struct inode *inode, struct page *locked_page,
 			     u64 start, u64 end, int *page_started,
 			     unsigned long *nr_written);
-	int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
 
 	int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
 	void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3b327c8cfb16..ca1995cfd8e9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -10509,6 +10509,12 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
 
 }
 
+__attribute__((const))
+static int dummy_readpage_io_failed_hook(struct page *page, int failed_mirror)
+{
+	return 0;
+}
+
 static const struct inode_operations btrfs_dir_inode_operations = {
 	.getattr	= btrfs_getattr,
 	.lookup		= btrfs_lookup,
@@ -10551,6 +10557,7 @@ static const struct extent_io_ops btrfs_extent_io_ops = {
 	.submit_bio_hook = btrfs_submit_bio_hook,
 	.readpage_end_io_hook = btrfs_readpage_end_io_hook,
 	.merge_bio_hook = btrfs_merge_bio_hook,
+	.readpage_io_failed_hook = dummy_readpage_io_failed_hook,
 
 	/* optional callbacks */
 	.fill_delalloc = run_delalloc_range,
-- 
GitLab


From 71321eb3f2d0df4e6c327e0b936eec4458a12054 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 28 Feb 2017 14:49:07 +0100
Subject: [PATCH 0753/1084] ALSA: timer: Reject user params with too small
 ticks

When a user sets a too small ticks with a fine-grained timer like
hrtimer, the kernel tries to fire up the timer irq too frequently.
This may lead to the condensed locks, eventually the kernel spinlock
lockup with warnings.

For avoiding such a situation, we define a lower limit of the
resolution, namely 1ms.  When the user passes a too small tick value
that results in less than that, the kernel returns -EINVAL now.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/timer.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/sound/core/timer.c b/sound/core/timer.c
index fc144f43faa6..ad153149b231 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -1702,9 +1702,21 @@ static int snd_timer_user_params(struct file *file,
 		return -EBADFD;
 	if (copy_from_user(&params, _params, sizeof(params)))
 		return -EFAULT;
-	if (!(t->hw.flags & SNDRV_TIMER_HW_SLAVE) && params.ticks < 1) {
-		err = -EINVAL;
-		goto _end;
+	if (!(t->hw.flags & SNDRV_TIMER_HW_SLAVE)) {
+		u64 resolution;
+
+		if (params.ticks < 1) {
+			err = -EINVAL;
+			goto _end;
+		}
+
+		/* Don't allow resolution less than 1ms */
+		resolution = snd_timer_resolution(tu->timeri);
+		resolution *= params.ticks;
+		if (resolution < 1000000) {
+			err = -EINVAL;
+			goto _end;
+		}
 	}
 	if (params.queue_size > 0 &&
 	    (params.queue_size < 32 || params.queue_size > 1024)) {
-- 
GitLab


From a7fdb6e648fb10a4174483b5fc4dac9c25bd2093 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 28 Feb 2017 13:57:18 +0100
Subject: [PATCH 0754/1084] iommu/vt-d: Fix crash when accessing VT-d sysfs
 entries

The link between the iommu sysfs-device and the struct
intel_iommu is no longer stored as driver-data. Update the
code to use the new access method.

Reported-by: Dave Jones <davej@codemonkey.org.uk>
Fixes: 39ab9555c241 ('iommu: Add sysfs bindings for struct iommu_device')
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index a8f7ae0eb7a4..238ad3447712 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4730,11 +4730,16 @@ static int intel_iommu_cpu_dead(unsigned int cpu)
 	return 0;
 }
 
+static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
+{
+	return container_of(dev, struct intel_iommu, iommu.dev);
+}
+
 static ssize_t intel_iommu_show_version(struct device *dev,
 					struct device_attribute *attr,
 					char *buf)
 {
-	struct intel_iommu *iommu = dev_get_drvdata(dev);
+	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
 	u32 ver = readl(iommu->reg + DMAR_VER_REG);
 	return sprintf(buf, "%d:%d\n",
 		       DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
@@ -4745,7 +4750,7 @@ static ssize_t intel_iommu_show_address(struct device *dev,
 					struct device_attribute *attr,
 					char *buf)
 {
-	struct intel_iommu *iommu = dev_get_drvdata(dev);
+	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
 	return sprintf(buf, "%llx\n", iommu->reg_phys);
 }
 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
@@ -4754,7 +4759,7 @@ static ssize_t intel_iommu_show_cap(struct device *dev,
 				    struct device_attribute *attr,
 				    char *buf)
 {
-	struct intel_iommu *iommu = dev_get_drvdata(dev);
+	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
 	return sprintf(buf, "%llx\n", iommu->cap);
 }
 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
@@ -4763,7 +4768,7 @@ static ssize_t intel_iommu_show_ecap(struct device *dev,
 				    struct device_attribute *attr,
 				    char *buf)
 {
-	struct intel_iommu *iommu = dev_get_drvdata(dev);
+	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
 	return sprintf(buf, "%llx\n", iommu->ecap);
 }
 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
@@ -4772,7 +4777,7 @@ static ssize_t intel_iommu_show_ndoms(struct device *dev,
 				      struct device_attribute *attr,
 				      char *buf)
 {
-	struct intel_iommu *iommu = dev_get_drvdata(dev);
+	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
 	return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
 }
 static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
@@ -4781,7 +4786,7 @@ static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
 					   struct device_attribute *attr,
 					   char *buf)
 {
-	struct intel_iommu *iommu = dev_get_drvdata(dev);
+	struct intel_iommu *iommu = dev_to_intel_iommu(dev);
 	return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
 						  cap_ndoms(iommu->cap)));
 }
-- 
GitLab


From b7a42b9d38063e58cdf201c575da75943f71051e Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 28 Feb 2017 13:57:18 +0100
Subject: [PATCH 0755/1084] iommu/amd: Fix crash when accessing AMD-Vi sysfs
 entries

The link between the iommu sysfs-device and the struct
amd_iommu is no longer stored as driver-data. Update the
code to the new correct way of getting from device to
amd_iommu.

Reported-by: Dave Jones <davej@codemonkey.org.uk>
Fixes: 39ab9555c241 ('iommu: Add sysfs bindings for struct iommu_device')
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu_init.c  | 4 ++--
 drivers/iommu/amd_iommu_types.h | 5 +++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 04cdac7ab3e3..6130278c5d71 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1507,7 +1507,7 @@ static ssize_t amd_iommu_show_cap(struct device *dev,
 				  struct device_attribute *attr,
 				  char *buf)
 {
-	struct amd_iommu *iommu = dev_get_drvdata(dev);
+	struct amd_iommu *iommu = dev_to_amd_iommu(dev);
 	return sprintf(buf, "%x\n", iommu->cap);
 }
 static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL);
@@ -1516,7 +1516,7 @@ static ssize_t amd_iommu_show_features(struct device *dev,
 				       struct device_attribute *attr,
 				       char *buf)
 {
-	struct amd_iommu *iommu = dev_get_drvdata(dev);
+	struct amd_iommu *iommu = dev_to_amd_iommu(dev);
 	return sprintf(buf, "%llx\n", iommu->features);
 }
 static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index af00f381a7b1..003f3ceb2661 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -569,6 +569,11 @@ struct amd_iommu {
 	volatile u64 __aligned(8) cmd_sem;
 };
 
+static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev)
+{
+	return container_of(dev, struct amd_iommu, iommu.dev);
+}
+
 #define ACPIHID_UID_LEN 256
 #define ACPIHID_HID_LEN 9
 
-- 
GitLab


From 15c75b09f8d190f89ab4db463b87d411ca349dfe Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 28 Feb 2017 17:16:48 +0100
Subject: [PATCH 0756/1084] ALSA: ctxfi: Fallback DMA mask to 32bit

Currently ctxfi driver tries to set only the 64bit DMA mask on 64bit
architectures, and bails out if it fails.  This causes a problem on
some platforms since the 64bit DMA isn't always guaranteed.  We should
fall back to the default 32bit DMA when 64bit DMA fails.

Fixes: 6d74b86d3c0f ("ALSA: ctxfi - Allow 64bit DMA")
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/ctxfi/cthw20k1.c | 19 ++++++-------------
 sound/pci/ctxfi/cthw20k2.c | 19 ++++++-------------
 2 files changed, 12 insertions(+), 26 deletions(-)

diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c
index 9667cbfb0ca2..ab4cdab5cfa5 100644
--- a/sound/pci/ctxfi/cthw20k1.c
+++ b/sound/pci/ctxfi/cthw20k1.c
@@ -27,12 +27,6 @@
 #include "cthw20k1.h"
 #include "ct20k1reg.h"
 
-#if BITS_PER_LONG == 32
-#define CT_XFI_DMA_MASK		DMA_BIT_MASK(32) /* 32 bit PTE */
-#else
-#define CT_XFI_DMA_MASK		DMA_BIT_MASK(64) /* 64 bit PTE */
-#endif
-
 struct hw20k1 {
 	struct hw hw;
 	spinlock_t reg_20k1_lock;
@@ -1904,19 +1898,18 @@ static int hw_card_start(struct hw *hw)
 {
 	int err;
 	struct pci_dev *pci = hw->pci;
+	const unsigned int dma_bits = BITS_PER_LONG;
 
 	err = pci_enable_device(pci);
 	if (err < 0)
 		return err;
 
 	/* Set DMA transfer mask */
-	if (dma_set_mask(&pci->dev, CT_XFI_DMA_MASK) < 0 ||
-	    dma_set_coherent_mask(&pci->dev, CT_XFI_DMA_MASK) < 0) {
-		dev_err(hw->card->dev,
-			"architecture does not support PCI busmaster DMA with mask 0x%llx\n",
-			CT_XFI_DMA_MASK);
-		err = -ENXIO;
-		goto error1;
+	if (dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) {
+		dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(dma_bits));
+	} else {
+		dma_set_mask(&pci->dev, DMA_BIT_MASK(32));
+		dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(32));
 	}
 
 	if (!hw->io_base) {
diff --git a/sound/pci/ctxfi/cthw20k2.c b/sound/pci/ctxfi/cthw20k2.c
index 6414ecf93efa..18ee7768b7c4 100644
--- a/sound/pci/ctxfi/cthw20k2.c
+++ b/sound/pci/ctxfi/cthw20k2.c
@@ -26,12 +26,6 @@
 #include "cthw20k2.h"
 #include "ct20k2reg.h"
 
-#if BITS_PER_LONG == 32
-#define CT_XFI_DMA_MASK		DMA_BIT_MASK(32) /* 32 bit PTE */
-#else
-#define CT_XFI_DMA_MASK		DMA_BIT_MASK(64) /* 64 bit PTE */
-#endif
-
 struct hw20k2 {
 	struct hw hw;
 	/* for i2c */
@@ -2029,19 +2023,18 @@ static int hw_card_start(struct hw *hw)
 	int err = 0;
 	struct pci_dev *pci = hw->pci;
 	unsigned int gctl;
+	const unsigned int dma_bits = BITS_PER_LONG;
 
 	err = pci_enable_device(pci);
 	if (err < 0)
 		return err;
 
 	/* Set DMA transfer mask */
-	if (dma_set_mask(&pci->dev, CT_XFI_DMA_MASK) < 0 ||
-	    dma_set_coherent_mask(&pci->dev, CT_XFI_DMA_MASK) < 0) {
-		dev_err(hw->card->dev,
-			"architecture does not support PCI busmaster DMA with mask 0x%llx\n",
-			CT_XFI_DMA_MASK);
-		err = -ENXIO;
-		goto error1;
+	if (!dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) {
+		dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(dma_bits));
+	} else {
+		dma_set_mask(&pci->dev, DMA_BIT_MASK(32));
+		dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(32));
 	}
 
 	if (!hw->io_base) {
-- 
GitLab


From 493de342748cc6f52938096f5480cf291da58a0b Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 28 Feb 2017 17:27:57 +0100
Subject: [PATCH 0757/1084] ALSA: hda - Add subwoofer support for Dell Inspiron
 17 7000 Gaming

Dell Inspiron 17 7000 Gaming laptop needs a similar quirk like
Inspiron 7599 to support its subwoofer speaker.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=194191
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index e2da176ac169..4e112221d825 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5606,6 +5606,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE),
 	SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
 	SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME),
+	SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),
 	SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
-- 
GitLab


From 900f736251c81886f3064c9d489c85eddee921b7 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Mon, 27 Feb 2017 21:28:53 -0800
Subject: [PATCH 0758/1084] f2fs: avoid to flush nat journal entries

This patch adds a missing condition which flushes nat journal entries
unnecessarily introduced by:

    f2fs: add bitmaps for empty or full NAT blocks

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b3aead4c5a0f..94967171dee8 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -2513,7 +2513,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	 * entries, remove all entries from journal and merge them
 	 * into nat entry set.
 	 */
-	if (cpc->reason == CP_UMOUNT ||
+	if (enabled_nat_bits(sbi, cpc) ||
 		!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
 		remove_nats_in_journal(sbi);
 
-- 
GitLab


From 2a7275a3d867b228216886aae35e1f64291180b1 Mon Sep 17 00:00:00 2001
From: Ley Foon Tan <ley.foon.tan@intel.com>
Date: Tue, 28 Feb 2017 18:37:16 +0800
Subject: [PATCH 0759/1084] PCI: altera: Fix TLP_CFG_DW0 for TLP write

eb5767122feb ("PCI: altera: Simplify TLB_CFG_DW0 usage") used
TLP_FMTTYPE_CFGRD* (instead of TLP_FMTTYPE_CFGWR*) for TLP writes, which
causes writing to configuration space to fail.  Fix it by using correct
FMTTYPE for write operation.

Fixes: eb5767122feb ("PCI: altera: Simplify TLB_CFG_DW0 usage")
Signed-off-by: Ley Foon Tan <ley.foon.tan@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: stable@vger.kernel.org	# v4.9+
---
 drivers/pci/host/pcie-altera.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/host/pcie-altera.c b/drivers/pci/host/pcie-altera.c
index 5043b5f00ed8..75ec5cea26f6 100644
--- a/drivers/pci/host/pcie-altera.c
+++ b/drivers/pci/host/pcie-altera.c
@@ -57,10 +57,14 @@
 #define TLP_WRITE_TAG			0x10
 #define RP_DEVFN			0
 #define TLP_REQ_ID(bus, devfn)		(((bus) << 8) | (devfn))
-#define TLP_CFG_DW0(pcie, bus)						\
+#define TLP_CFGRD_DW0(pcie, bus)					\
     ((((bus == pcie->root_bus_nr) ? TLP_FMTTYPE_CFGRD0			\
 				    : TLP_FMTTYPE_CFGRD1) << 24) |	\
      TLP_PAYLOAD_SIZE)
+#define TLP_CFGWR_DW0(pcie, bus)					\
+    ((((bus == pcie->root_bus_nr) ? TLP_FMTTYPE_CFGWR0			\
+				    : TLP_FMTTYPE_CFGWR1) << 24) |	\
+     TLP_PAYLOAD_SIZE)
 #define TLP_CFG_DW1(pcie, tag, be)	\
     (((TLP_REQ_ID(pcie->root_bus_nr,  RP_DEVFN)) << 16) | (tag << 8) | (be))
 #define TLP_CFG_DW2(bus, devfn, offset)	\
@@ -222,7 +226,7 @@ static int tlp_cfg_dword_read(struct altera_pcie *pcie, u8 bus, u32 devfn,
 {
 	u32 headers[TLP_HDR_SIZE];
 
-	headers[0] = TLP_CFG_DW0(pcie, bus);
+	headers[0] = TLP_CFGRD_DW0(pcie, bus);
 	headers[1] = TLP_CFG_DW1(pcie, TLP_READ_TAG, byte_en);
 	headers[2] = TLP_CFG_DW2(bus, devfn, where);
 
@@ -237,7 +241,7 @@ static int tlp_cfg_dword_write(struct altera_pcie *pcie, u8 bus, u32 devfn,
 	u32 headers[TLP_HDR_SIZE];
 	int ret;
 
-	headers[0] = TLP_CFG_DW0(pcie, bus);
+	headers[0] = TLP_CFGWR_DW0(pcie, bus);
 	headers[1] = TLP_CFG_DW1(pcie, TLP_WRITE_TAG, byte_en);
 	headers[2] = TLP_CFG_DW2(bus, devfn, where);
 
-- 
GitLab


From f3ac9f737603da80c2da3e84b89e74429836bb6d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 28 Feb 2017 22:15:51 +0100
Subject: [PATCH 0760/1084] ALSA: seq: Fix link corruption by event error
 handling

The sequencer FIFO management has a bug that may lead to a corruption
(shortage) of the cell linked list.  When a sequencer client faces an
error at the event delivery, it tries to put back the dequeued cell.
When the first queue was put back, this forgot the tail pointer
tracking, and the link will be screwed up.

Although there is no memory corruption, the sequencer client may stall
forever at exit while flushing the pending FIFO cells in
snd_seq_pool_done(), as spotted by syzkaller.

This patch addresses the missing tail pointer tracking at
snd_seq_fifo_cell_putback().  Also the patch makes sure to clear the
cell->enxt pointer at snd_seq_fifo_event_in() for avoiding a similar
mess-up of the FIFO linked list.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/seq/seq_fifo.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c
index 1d5acbe0c08b..86240d02b530 100644
--- a/sound/core/seq/seq_fifo.c
+++ b/sound/core/seq/seq_fifo.c
@@ -135,6 +135,7 @@ int snd_seq_fifo_event_in(struct snd_seq_fifo *f,
 	f->tail = cell;
 	if (f->head == NULL)
 		f->head = cell;
+	cell->next = NULL;
 	f->cells++;
 	spin_unlock_irqrestore(&f->lock, flags);
 
@@ -214,6 +215,8 @@ void snd_seq_fifo_cell_putback(struct snd_seq_fifo *f,
 		spin_lock_irqsave(&f->lock, flags);
 		cell->next = f->head;
 		f->head = cell;
+		if (!f->tail)
+			f->tail = cell;
 		f->cells++;
 		spin_unlock_irqrestore(&f->lock, flags);
 	}
-- 
GitLab


From ad47047220777460c6d7dc8333808591f29e5c17 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Tue, 28 Feb 2017 11:55:16 -0500
Subject: [PATCH 0761/1084] dm raid: fix raid "check" regression due to
 improper cleanup in raid_message()

While cleaning up awkward branching in raid_message() a raid set "check"
regression was introduced because "check" needs both MD_RECOVERY_SYNC
and MD_RECOVERY_REQUESTED flags set.

Fix this regression by explicitly setting both flags for the "check"
case (like is also done for the "repair" case, but redundant set_bit()s
are perfectly fine because it adds clarity to what is needed in response
to both messages -- in addition this isn't fast path code).

Fixes: 105db59912 ("dm raid: cleanup awkward branching in raid_message() option processing")
Reported-by: Heinz Mauelshagen <heinzm@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-raid.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 5c9e95d66f3b..0460cf84fd0e 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3462,9 +3462,11 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv)
 	else if (!strcasecmp(argv[0], "recover"))
 		set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
 	else {
-		if (!strcasecmp(argv[0], "check"))
+		if (!strcasecmp(argv[0], "check")) {
 			set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
-		else if (!strcasecmp(argv[0], "repair")) {
+			set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
+			set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+		} else if (!strcasecmp(argv[0], "repair")) {
 			set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
 			set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
 		} else
-- 
GitLab


From d36a19541fe8f392778ac137d60f9be8dfdd8f9d Mon Sep 17 00:00:00 2001
From: Heinz Mauelshagen <heinzm@redhat.com>
Date: Tue, 28 Feb 2017 19:17:49 +0100
Subject: [PATCH 0762/1084] dm raid: fix data corruption on reshape request

The lvm2 sequence to manage dm-raid constructor flags that trigger a
rebuild or a reshape is defined as:

1) load table with flags (e.g. rebuild/delta_disks/data_offset)
2) clear out the flags in lvm2 metadata
3) store the lvm2 metadata, reload the table to reset the flags
   previously established during the initial load (1) -- in order to
   prevent repeatedly requesting a rebuild or a reshape on activation

Currently, loading an inactive table with rebuild/reshape flags
specified will cause dm-raid to rebuild/reshape on resume and thus start
updating the raid metadata (about the progress).  When the second table
reload, to reset the flags, occurs the constructor accesses the volatile
progress state kept in the raid superblocks.  Because the active mapping
is still processing the rebuild/reshape, that position will be stale by
the time the device is resumed.

In the reshape case, this causes data corruption by processing already
reshaped stripes again.  In the rebuild case, it does _not_ cause data
corruption but instead involves superfluous rebuilds.

Fix by keeping the raid set frozen during the first resume and then
allow the rebuild/reshape during the second resume.

Fixes: 9dbd1aa3a ("dm raid: add reshaping support to the target")
Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org # 4.8+
---
 drivers/md/dm-raid.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 0460cf84fd0e..350527f60834 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3756,6 +3756,8 @@ static int raid_preresume(struct dm_target *ti)
 	return r;
 }
 
+#define RESUME_STAY_FROZEN_FLAGS (CTR_FLAG_DELTA_DISKS | CTR_FLAG_DATA_OFFSET)
+
 static void raid_resume(struct dm_target *ti)
 {
 	struct raid_set *rs = ti->private;
@@ -3773,7 +3775,15 @@ static void raid_resume(struct dm_target *ti)
 	mddev->ro = 0;
 	mddev->in_sync = 0;
 
-	clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+	/*
+	 * Keep the RAID set frozen if reshape/rebuild flags are set.
+	 * The RAID set is unfrozen once the next table load/resume,
+	 * which clears the reshape/rebuild flags, occurs.
+	 * This ensures that the constructor for the inactive table
+	 * retrieves an up-to-date reshape_position.
+	 */
+	if (!(rs->ctr_flags & RESUME_STAY_FROZEN_FLAGS))
+		clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 
 	if (mddev->suspended)
 		mddev_resume(mddev);
-- 
GitLab


From 2664f3c94abc7181171b7c05b2aaa76ea7d9d613 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Tue, 28 Feb 2017 15:31:44 -0500
Subject: [PATCH 0763/1084] dm raid: bump the target version

This version bump reflects that the reshape corruption fix (commit
92a39f6cc "dm raid: fix data corruption on reshape request") is
present.

Done as a separate fix because the above referenced commit is marked for
stable and target version bumps in a stable@ fix are a recipe for the
fix to never get backported to stable@ kernels (because of target
version number conflicts).

Also, move RESUME_STAY_FROZEN_FLAGS up with the reset the the _FLAGS
definitions now that we don't need to worry about stable@ conflicts as a
result of missing context.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-raid.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 350527f60834..f8564d63982f 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -101,6 +101,8 @@ struct raid_dev {
 #define CTR_FLAG_RAID10_USE_NEAR_SETS	(1 << __CTR_FLAG_RAID10_USE_NEAR_SETS)
 #define CTR_FLAG_JOURNAL_DEV		(1 << __CTR_FLAG_JOURNAL_DEV)
 
+#define RESUME_STAY_FROZEN_FLAGS (CTR_FLAG_DELTA_DISKS | CTR_FLAG_DATA_OFFSET)
+
 /*
  * Definitions of various constructor flags to
  * be used in checks of valid / invalid flags
@@ -3756,8 +3758,6 @@ static int raid_preresume(struct dm_target *ti)
 	return r;
 }
 
-#define RESUME_STAY_FROZEN_FLAGS (CTR_FLAG_DELTA_DISKS | CTR_FLAG_DATA_OFFSET)
-
 static void raid_resume(struct dm_target *ti)
 {
 	struct raid_set *rs = ti->private;
@@ -3791,7 +3791,7 @@ static void raid_resume(struct dm_target *ti)
 
 static struct target_type raid_target = {
 	.name = "raid",
-	.version = {1, 10, 0},
+	.version = {1, 10, 1},
 	.module = THIS_MODULE,
 	.ctr = raid_ctr,
 	.dtr = raid_dtr,
-- 
GitLab


From de09cdd09fa12c3f837902680c3011d96e811821 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 28 Feb 2017 16:32:44 -0500
Subject: [PATCH 0764/1084] intel_idle: stop exposing platform acronyms in
 sysfs

Cosmetic only -- no functional change in this patch.

sysfs before:

state4/desc:MWAIT 0x20
state4/name:C6-HSW

sysfs after:

state4/desc:MWAIT 0x20
state4/name:C6

We remove the platform acronyms from the end of the state name
(-HSW in this case) for three reasonse.

 1. more consistency with acpi_idle, which prints C1, C2, C3 etc.

 2. users know what platform they are on already
    an acronym for the processor code name here
    seems to cause more confusion than clarity.

 3. less clutter in "cpupower monitor" output,
    which truncates the names to 4 columns.

The precise definition of the state continues to be available in "desc".

Reported-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/idle/intel_idle.c | 172 +++++++++++++++++++-------------------
 1 file changed, 86 insertions(+), 86 deletions(-)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 7d8ea3d5fda6..e045a5caa5bb 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -125,7 +125,7 @@ static struct cpuidle_state *cpuidle_state_table;
  */
 static struct cpuidle_state nehalem_cstates[] = {
 	{
-		.name = "C1-NHM",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 3,
@@ -133,7 +133,7 @@ static struct cpuidle_state nehalem_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-NHM",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -141,7 +141,7 @@ static struct cpuidle_state nehalem_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C3-NHM",
+		.name = "C3",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 20,
@@ -149,7 +149,7 @@ static struct cpuidle_state nehalem_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-NHM",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 200,
@@ -162,7 +162,7 @@ static struct cpuidle_state nehalem_cstates[] = {
 
 static struct cpuidle_state snb_cstates[] = {
 	{
-		.name = "C1-SNB",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 2,
@@ -170,7 +170,7 @@ static struct cpuidle_state snb_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-SNB",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -178,7 +178,7 @@ static struct cpuidle_state snb_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C3-SNB",
+		.name = "C3",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 80,
@@ -186,7 +186,7 @@ static struct cpuidle_state snb_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-SNB",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 104,
@@ -194,7 +194,7 @@ static struct cpuidle_state snb_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7-SNB",
+		.name = "C7",
 		.desc = "MWAIT 0x30",
 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 109,
@@ -207,7 +207,7 @@ static struct cpuidle_state snb_cstates[] = {
 
 static struct cpuidle_state byt_cstates[] = {
 	{
-		.name = "C1-BYT",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 1,
@@ -215,7 +215,7 @@ static struct cpuidle_state byt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6N-BYT",
+		.name = "C6N",
 		.desc = "MWAIT 0x58",
 		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 300,
@@ -223,7 +223,7 @@ static struct cpuidle_state byt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6S-BYT",
+		.name = "C6S",
 		.desc = "MWAIT 0x52",
 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 500,
@@ -231,7 +231,7 @@ static struct cpuidle_state byt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7-BYT",
+		.name = "C7",
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 1200,
@@ -239,7 +239,7 @@ static struct cpuidle_state byt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7S-BYT",
+		.name = "C7S",
 		.desc = "MWAIT 0x64",
 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 10000,
@@ -252,7 +252,7 @@ static struct cpuidle_state byt_cstates[] = {
 
 static struct cpuidle_state cht_cstates[] = {
 	{
-		.name = "C1-CHT",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 1,
@@ -260,7 +260,7 @@ static struct cpuidle_state cht_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6N-CHT",
+		.name = "C6N",
 		.desc = "MWAIT 0x58",
 		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 80,
@@ -268,7 +268,7 @@ static struct cpuidle_state cht_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6S-CHT",
+		.name = "C6S",
 		.desc = "MWAIT 0x52",
 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 200,
@@ -276,7 +276,7 @@ static struct cpuidle_state cht_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7-CHT",
+		.name = "C7",
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 1200,
@@ -284,7 +284,7 @@ static struct cpuidle_state cht_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7S-CHT",
+		.name = "C7S",
 		.desc = "MWAIT 0x64",
 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 10000,
@@ -297,7 +297,7 @@ static struct cpuidle_state cht_cstates[] = {
 
 static struct cpuidle_state ivb_cstates[] = {
 	{
-		.name = "C1-IVB",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 1,
@@ -305,7 +305,7 @@ static struct cpuidle_state ivb_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-IVB",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -313,7 +313,7 @@ static struct cpuidle_state ivb_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C3-IVB",
+		.name = "C3",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 59,
@@ -321,7 +321,7 @@ static struct cpuidle_state ivb_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-IVB",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 80,
@@ -329,7 +329,7 @@ static struct cpuidle_state ivb_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7-IVB",
+		.name = "C7",
 		.desc = "MWAIT 0x30",
 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 87,
@@ -342,7 +342,7 @@ static struct cpuidle_state ivb_cstates[] = {
 
 static struct cpuidle_state ivt_cstates[] = {
 	{
-		.name = "C1-IVT",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 1,
@@ -350,7 +350,7 @@ static struct cpuidle_state ivt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-IVT",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -358,7 +358,7 @@ static struct cpuidle_state ivt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C3-IVT",
+		.name = "C3",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 59,
@@ -366,7 +366,7 @@ static struct cpuidle_state ivt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-IVT",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 82,
@@ -379,7 +379,7 @@ static struct cpuidle_state ivt_cstates[] = {
 
 static struct cpuidle_state ivt_cstates_4s[] = {
 	{
-		.name = "C1-IVT-4S",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 1,
@@ -387,7 +387,7 @@ static struct cpuidle_state ivt_cstates_4s[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-IVT-4S",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -395,7 +395,7 @@ static struct cpuidle_state ivt_cstates_4s[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C3-IVT-4S",
+		.name = "C3",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 59,
@@ -403,7 +403,7 @@ static struct cpuidle_state ivt_cstates_4s[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-IVT-4S",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 84,
@@ -416,7 +416,7 @@ static struct cpuidle_state ivt_cstates_4s[] = {
 
 static struct cpuidle_state ivt_cstates_8s[] = {
 	{
-		.name = "C1-IVT-8S",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 1,
@@ -424,7 +424,7 @@ static struct cpuidle_state ivt_cstates_8s[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-IVT-8S",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -432,7 +432,7 @@ static struct cpuidle_state ivt_cstates_8s[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C3-IVT-8S",
+		.name = "C3",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 59,
@@ -440,7 +440,7 @@ static struct cpuidle_state ivt_cstates_8s[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-IVT-8S",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 88,
@@ -453,7 +453,7 @@ static struct cpuidle_state ivt_cstates_8s[] = {
 
 static struct cpuidle_state hsw_cstates[] = {
 	{
-		.name = "C1-HSW",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 2,
@@ -461,7 +461,7 @@ static struct cpuidle_state hsw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-HSW",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -469,7 +469,7 @@ static struct cpuidle_state hsw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C3-HSW",
+		.name = "C3",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 33,
@@ -477,7 +477,7 @@ static struct cpuidle_state hsw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-HSW",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 133,
@@ -485,7 +485,7 @@ static struct cpuidle_state hsw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7s-HSW",
+		.name = "C7s",
 		.desc = "MWAIT 0x32",
 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 166,
@@ -493,7 +493,7 @@ static struct cpuidle_state hsw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C8-HSW",
+		.name = "C8",
 		.desc = "MWAIT 0x40",
 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 300,
@@ -501,7 +501,7 @@ static struct cpuidle_state hsw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C9-HSW",
+		.name = "C9",
 		.desc = "MWAIT 0x50",
 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 600,
@@ -509,7 +509,7 @@ static struct cpuidle_state hsw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C10-HSW",
+		.name = "C10",
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 2600,
@@ -521,7 +521,7 @@ static struct cpuidle_state hsw_cstates[] = {
 };
 static struct cpuidle_state bdw_cstates[] = {
 	{
-		.name = "C1-BDW",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 2,
@@ -529,7 +529,7 @@ static struct cpuidle_state bdw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-BDW",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -537,7 +537,7 @@ static struct cpuidle_state bdw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C3-BDW",
+		.name = "C3",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 40,
@@ -545,7 +545,7 @@ static struct cpuidle_state bdw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-BDW",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 133,
@@ -553,7 +553,7 @@ static struct cpuidle_state bdw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7s-BDW",
+		.name = "C7s",
 		.desc = "MWAIT 0x32",
 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 166,
@@ -561,7 +561,7 @@ static struct cpuidle_state bdw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C8-BDW",
+		.name = "C8",
 		.desc = "MWAIT 0x40",
 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 300,
@@ -569,7 +569,7 @@ static struct cpuidle_state bdw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C9-BDW",
+		.name = "C9",
 		.desc = "MWAIT 0x50",
 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 600,
@@ -577,7 +577,7 @@ static struct cpuidle_state bdw_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C10-BDW",
+		.name = "C10",
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 2600,
@@ -590,7 +590,7 @@ static struct cpuidle_state bdw_cstates[] = {
 
 static struct cpuidle_state skl_cstates[] = {
 	{
-		.name = "C1-SKL",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 2,
@@ -598,7 +598,7 @@ static struct cpuidle_state skl_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-SKL",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -606,7 +606,7 @@ static struct cpuidle_state skl_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C3-SKL",
+		.name = "C3",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 70,
@@ -614,7 +614,7 @@ static struct cpuidle_state skl_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-SKL",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 85,
@@ -622,7 +622,7 @@ static struct cpuidle_state skl_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7s-SKL",
+		.name = "C7s",
 		.desc = "MWAIT 0x33",
 		.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 124,
@@ -630,7 +630,7 @@ static struct cpuidle_state skl_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C8-SKL",
+		.name = "C8",
 		.desc = "MWAIT 0x40",
 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 200,
@@ -638,7 +638,7 @@ static struct cpuidle_state skl_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C9-SKL",
+		.name = "C9",
 		.desc = "MWAIT 0x50",
 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 480,
@@ -646,7 +646,7 @@ static struct cpuidle_state skl_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C10-SKL",
+		.name = "C10",
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 890,
@@ -659,7 +659,7 @@ static struct cpuidle_state skl_cstates[] = {
 
 static struct cpuidle_state skx_cstates[] = {
 	{
-		.name = "C1-SKX",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 2,
@@ -667,7 +667,7 @@ static struct cpuidle_state skx_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-SKX",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -675,7 +675,7 @@ static struct cpuidle_state skx_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-SKX",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 133,
@@ -688,7 +688,7 @@ static struct cpuidle_state skx_cstates[] = {
 
 static struct cpuidle_state atom_cstates[] = {
 	{
-		.name = "C1E-ATM",
+		.name = "C1E",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 10,
@@ -696,7 +696,7 @@ static struct cpuidle_state atom_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C2-ATM",
+		.name = "C2",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10),
 		.exit_latency = 20,
@@ -704,7 +704,7 @@ static struct cpuidle_state atom_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C4-ATM",
+		.name = "C4",
 		.desc = "MWAIT 0x30",
 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 100,
@@ -712,7 +712,7 @@ static struct cpuidle_state atom_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-ATM",
+		.name = "C6",
 		.desc = "MWAIT 0x52",
 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 140,
@@ -724,7 +724,7 @@ static struct cpuidle_state atom_cstates[] = {
 };
 static struct cpuidle_state tangier_cstates[] = {
 	{
-		.name = "C1-TNG",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 1,
@@ -732,7 +732,7 @@ static struct cpuidle_state tangier_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C4-TNG",
+		.name = "C4",
 		.desc = "MWAIT 0x30",
 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 100,
@@ -740,7 +740,7 @@ static struct cpuidle_state tangier_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-TNG",
+		.name = "C6",
 		.desc = "MWAIT 0x52",
 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 140,
@@ -748,7 +748,7 @@ static struct cpuidle_state tangier_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7-TNG",
+		.name = "C7",
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 1200,
@@ -756,7 +756,7 @@ static struct cpuidle_state tangier_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C9-TNG",
+		.name = "C9",
 		.desc = "MWAIT 0x64",
 		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 10000,
@@ -768,7 +768,7 @@ static struct cpuidle_state tangier_cstates[] = {
 };
 static struct cpuidle_state avn_cstates[] = {
 	{
-		.name = "C1-AVN",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 2,
@@ -776,7 +776,7 @@ static struct cpuidle_state avn_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-AVN",
+		.name = "C6",
 		.desc = "MWAIT 0x51",
 		.flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 15,
@@ -788,7 +788,7 @@ static struct cpuidle_state avn_cstates[] = {
 };
 static struct cpuidle_state knl_cstates[] = {
 	{
-		.name = "C1-KNL",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 1,
@@ -796,7 +796,7 @@ static struct cpuidle_state knl_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze },
 	{
-		.name = "C6-KNL",
+		.name = "C6",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 120,
@@ -809,7 +809,7 @@ static struct cpuidle_state knl_cstates[] = {
 
 static struct cpuidle_state bxt_cstates[] = {
 	{
-		.name = "C1-BXT",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 2,
@@ -817,7 +817,7 @@ static struct cpuidle_state bxt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-BXT",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -825,7 +825,7 @@ static struct cpuidle_state bxt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-BXT",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 133,
@@ -833,7 +833,7 @@ static struct cpuidle_state bxt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C7s-BXT",
+		.name = "C7s",
 		.desc = "MWAIT 0x31",
 		.flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 155,
@@ -841,7 +841,7 @@ static struct cpuidle_state bxt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C8-BXT",
+		.name = "C8",
 		.desc = "MWAIT 0x40",
 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 1000,
@@ -849,7 +849,7 @@ static struct cpuidle_state bxt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C9-BXT",
+		.name = "C9",
 		.desc = "MWAIT 0x50",
 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 2000,
@@ -857,7 +857,7 @@ static struct cpuidle_state bxt_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C10-BXT",
+		.name = "C10",
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 10000,
@@ -870,7 +870,7 @@ static struct cpuidle_state bxt_cstates[] = {
 
 static struct cpuidle_state dnv_cstates[] = {
 	{
-		.name = "C1-DNV",
+		.name = "C1",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00),
 		.exit_latency = 2,
@@ -878,7 +878,7 @@ static struct cpuidle_state dnv_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C1E-DNV",
+		.name = "C1E",
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
@@ -886,7 +886,7 @@ static struct cpuidle_state dnv_cstates[] = {
 		.enter = &intel_idle,
 		.enter_freeze = intel_idle_freeze, },
 	{
-		.name = "C6-DNV",
+		.name = "C6",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 50,
-- 
GitLab


From 70cd4c10b290dd77fff6dc702a9a2c8c679df121 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Mon, 27 Feb 2017 11:51:37 +1100
Subject: [PATCH 0765/1084] KVM: PPC: Book3S HV: Fix software walk of guest
 process page tables

This fixes some bugs in the code that walks the guest's page tables.
These bugs cause MMIO emulation to fail whenever the guest is in
virtial mode (MMU on), leading to the guest hanging if it tried to
access a virtio device.

The first bug was that when reading the guest's process table, we were
using the whole of arch->process_table, not just the field that contains
the process table base address.  The second bug was that the mask used
when reading the process table entry to get the radix tree base address,
RPDB_MASK, had the wrong value.

Fixes: 9e04ba69beec ("KVM: PPC: Book3S HV: Add basic infrastructure for radix guests")
Fixes: e99833448c5f ("powerpc/mm/radix: Add partition table format & callback")
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/book3s/64/mmu.h | 3 ++-
 arch/powerpc/kvm/book3s_64_mmu_radix.c   | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index d73e9dfa5237..440f3423e213 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -46,7 +46,7 @@ extern struct patb_entry *partition_tb;
 
 /* Bits in patb0 field */
 #define PATB_HR		(1UL << 63)
-#define RPDB_MASK	0x0ffffffffffff00fUL
+#define RPDB_MASK	0x0fffffffffffff00UL
 #define RPDB_SHIFT	(1UL << 8)
 #define RTS1_SHIFT	61		/* top 2 bits of radix tree size */
 #define RTS1_MASK	(3UL << RTS1_SHIFT)
@@ -57,6 +57,7 @@ extern struct patb_entry *partition_tb;
 /* Bits in patb1 field */
 #define PATB_GR		(1UL << 63)	/* guest uses radix; must match HR */
 #define PRTS_MASK	0x1f		/* process table size field */
+#define PRTB_MASK	0x0ffffffffffff000UL
 
 /*
  * Limit process table to PAGE_SIZE table. This
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 4344651f408c..f6b3e67c5762 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -32,6 +32,7 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	u32 pid;
 	int ret, level, ps;
 	__be64 prte, rpte;
+	unsigned long ptbl;
 	unsigned long root, pte, index;
 	unsigned long rts, bits, offset;
 	unsigned long gpa;
@@ -53,8 +54,8 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 		return -EINVAL;
 
 	/* Read partition table to find root of tree for effective PID */
-	ret = kvm_read_guest(kvm, kvm->arch.process_table + pid * 16,
-			     &prte, sizeof(prte));
+	ptbl = (kvm->arch.process_table & PRTB_MASK) + (pid * 16);
+	ret = kvm_read_guest(kvm, ptbl, &prte, sizeof(prte));
 	if (ret)
 		return ret;
 
-- 
GitLab


From 4e5acdc23a3dcbd6ad6dc93a9783dd9c838987c8 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Tue, 28 Feb 2017 11:05:47 +1100
Subject: [PATCH 0766/1084] KVM: PPC: Book3S HV: Don't use ASDR for real-mode
 HPT faults on POWER9

In HPT mode on POWER9, the ASDR register is supposed to record
segment information for hypervisor page faults.  It turns out that
POWER9 DD1 does not record the page size information in the ASDR
for faults in guest real mode.  We have the necessary information
in memory already, so by moving the checks for real mode that already
existed, we can use the in-memory copy.  Since a load is likely to
be faster than reading an SPR, we do this unconditionally (not just
for POWER9 DD1).

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 47414a6fe2dd..7c6477d1840a 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1787,12 +1787,12 @@ kvmppc_hdsi:
 	/* HPTE not found fault or protection fault? */
 	andis.	r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
 	beq	1f			/* if not, send it to the guest */
+	andi.	r0, r11, MSR_DR		/* data relocation enabled? */
+	beq	3f
 BEGIN_FTR_SECTION
 	mfspr	r5, SPRN_ASDR		/* on POWER9, use ASDR to get VSID */
 	b	4f
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-	andi.	r0, r11, MSR_DR		/* data relocation enabled? */
-	beq	3f
 	clrrdi	r0, r4, 28
 	PPC_SLBFEE_DOT(R5, R0)		/* if so, look up SLB */
 	li	r0, BOOK3S_INTERRUPT_DATA_SEGMENT
@@ -1879,12 +1879,12 @@ kvmppc_hisi:
 	bne	.Lradix_hisi		/* for radix, just save ASDR */
 	andis.	r0, r11, SRR1_ISI_NOPT@h
 	beq	1f
+	andi.	r0, r11, MSR_IR		/* instruction relocation enabled? */
+	beq	3f
 BEGIN_FTR_SECTION
 	mfspr	r5, SPRN_ASDR		/* on POWER9, use ASDR to get VSID */
 	b	4f
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-	andi.	r0, r11, MSR_IR		/* instruction relocation enabled? */
-	beq	3f
 	clrrdi	r0, r10, 28
 	PPC_SLBFEE_DOT(R5, R0)		/* if so, look up SLB */
 	li	r0, BOOK3S_INTERRUPT_INST_SEGMENT
-- 
GitLab


From 40496c8ee73a5ca4fa581badf2247418980586b1 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 7 Jan 2017 23:21:18 -0500
Subject: [PATCH 0767/1084] x86: msr-index.h: Define MSR_PKG_CST_CONFIG_CONTROL

define MSR_PKG_CST_CONFIG_CONTROL (0xE2),
which is the string used by Intel Documentation.

We use this MSR in intel_idle and turbostat by a previous name,
to be updated in the next patch.

Cc: x86@kernel.org
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/include/asm/msr-index.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 710273c617b8..975f23eefe14 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -47,6 +47,7 @@
 #define MSR_PLATFORM_INFO		0x000000ce
 
 #define MSR_NHM_SNB_PKG_CST_CFG_CTL	0x000000e2
+#define MSR_PKG_CST_CONFIG_CONTROL	0x000000e2
 #define NHM_C3_AUTO_DEMOTE		(1UL << 25)
 #define NHM_C1_AUTO_DEMOTE		(1UL << 26)
 #define ATM_LNC_C6_AUTO_DEMOTE		(1UL << 25)
-- 
GitLab


From 6cfb2374f83bc70b8acb0ddb989988ba3f140b61 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 7 Jan 2017 23:23:25 -0500
Subject: [PATCH 0768/1084] intel_idle: use new name for
 MSR_PKG_CST_CONFIG_CONTROL

previously known as  MSR_NHM_SNB_PKG_CST_CFG_CTL

Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/idle/intel_idle.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 7d8ea3d5fda6..930537da76d4 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -961,9 +961,9 @@ static void auto_demotion_disable(void)
 {
 	unsigned long long msr_bits;
 
-	rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
+	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
 	msr_bits &= ~(icpu->auto_demotion_disable_flags);
-	wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
+	wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits);
 }
 static void c1e_promotion_disable(void)
 {
@@ -1273,7 +1273,7 @@ static void sklh_idle_state_table_update(void)
 	if ((mwait_substates & (0xF << 28)) == 0)
 		return;
 
-	rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr);
+	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
 
 	/* PC10 is not enabled in PKG C-state limit */
 	if ((msr & 0xF) != 8)
-- 
GitLab


From 1df2e55abce64e2f3117fac3968a9ac382fbc9c3 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 7 Jan 2017 23:24:57 -0500
Subject: [PATCH 0769/1084] tools/power turbostat: use new name for
 MSR_PKG_CST_CONFIG_CONTROL

Previously called MSR_NHM_SNB_PKG_CST_CFG_CTL

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index aedfaddbad30..d56f2b75dc58 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1761,12 +1761,12 @@ dump_nhm_cst_cfg(void)
 {
 	unsigned long long msr;
 
-	get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
 
 #define SNB_C1_AUTO_UNDEMOTE              (1UL << 27)
 #define SNB_C3_AUTO_UNDEMOTE              (1UL << 28)
 
-	fprintf(outf, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr);
+	fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
 
 	fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
 		(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
@@ -2383,7 +2383,7 @@ void check_permissions()
  * MSR_SMI_COUNT                   0x00000034
  *
  * MSR_PLATFORM_INFO               0x000000ce
- * MSR_NHM_SNB_PKG_CST_CFG_CTL     0x000000e2
+ * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
  *
  * MSR_MISC_PWR_MGMT               0x000001aa
  *
@@ -2393,7 +2393,7 @@ void check_permissions()
  * MSR_CORE_C6_RESIDENCY           0x000003fd
  *
  * Side effect:
- * sets global pkg_cstate_limit to decode MSR_NHM_SNB_PKG_CST_CFG_CTL
+ * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
  */
 int probe_nhm_msrs(unsigned int family, unsigned int model)
 {
@@ -2462,7 +2462,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 	default:
 		return 0;
 	}
-	get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
 	pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
 
 	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
-- 
GitLab


From 419c9e986ea453d07140bffb9708d730b6317e8e Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 7 Jan 2017 23:26:22 -0500
Subject: [PATCH 0770/1084] x86: msr-index.h: Remove unused
 MSR_NHM_SNB_PKG_CST_CFG_CTL

The two users, intel_idle driver and turbostat utility
are using the new name, MSR_PKG_CST_CONFIG_CONTROL

Cc: x86@kernel.org
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/include/asm/msr-index.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 975f23eefe14..12ecc6c1e7f8 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -46,7 +46,6 @@
 #define MSR_FSB_FREQ			0x000000cd
 #define MSR_PLATFORM_INFO		0x000000ce
 
-#define MSR_NHM_SNB_PKG_CST_CFG_CTL	0x000000e2
 #define MSR_PKG_CST_CONFIG_CONTROL	0x000000e2
 #define NHM_C3_AUTO_DEMOTE		(1UL << 25)
 #define NHM_C1_AUTO_DEMOTE		(1UL << 26)
-- 
GitLab


From 0539ba118fe241b0d03202fda0cd19cb758b7fbd Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 10 Feb 2017 00:27:20 -0500
Subject: [PATCH 0771/1084] tools/power turbostat: Baytrail c-state support

The Baytrail SOC, with its Silvermont core, has some unique properties:

1. a hardware CC1 residency counter
2. a module-c6 residency counter
3. a package-c6 counter at traditional package-c7 counter address.

The SOC does not support c3, pc3, c7 or pc7 counters.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/include/asm/msr-index.h      |  2 ++
 tools/power/x86/turbostat/turbostat.c | 46 +++++++++++++++++++++------
 2 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 12ecc6c1e7f8..079db99f6560 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -147,6 +147,7 @@
 /* C-state Residency Counters */
 #define MSR_PKG_C3_RESIDENCY		0x000003f8
 #define MSR_PKG_C6_RESIDENCY		0x000003f9
+#define MSR_ATOM_PKG_C6_RESIDENCY	0x000003fa
 #define MSR_PKG_C7_RESIDENCY		0x000003fa
 #define MSR_CORE_C3_RESIDENCY		0x000003fc
 #define MSR_CORE_C6_RESIDENCY		0x000003fd
@@ -203,6 +204,7 @@
 #define MSR_PKG_BOTH_CORE_GFXE_C0_RES	0x0000065B
 
 #define MSR_CORE_C1_RES			0x00000660
+#define MSR_MODULE_C6_RES_MS		0x00000664
 
 #define MSR_CC6_DEMOTION_POLICY_CONFIG	0x00000668
 #define MSR_MC6_DEMOTION_POLICY_CONFIG	0x00000669
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index d56f2b75dc58..19802be6dc02 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -167,6 +167,7 @@ struct core_data {
 	unsigned long long c3;
 	unsigned long long c6;
 	unsigned long long c7;
+	unsigned long long mc6_us;	/* duplicate as per-core for now, even though per module */
 	unsigned int core_temp_c;
 	unsigned int core_id;
 	unsigned long long counter[MAX_ADDED_COUNTERS];
@@ -375,6 +376,7 @@ struct msr_counter bic[] = {
 	{ 0x0, "RAM_J" },
 	{ 0x0, "Core" },
 	{ 0x0, "CPU" },
+	{ 0x0, "Mod%c6" },
 };
 
 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -412,6 +414,7 @@ struct msr_counter bic[] = {
 #define	BIC_RAM_J	(1ULL << 31)
 #define	BIC_Core	(1ULL << 32)
 #define	BIC_CPU		(1ULL << 33)
+#define	BIC_Mod_c6	(1ULL << 34)
 
 unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL;
 unsigned long long bic_present;
@@ -504,6 +507,8 @@ void print_header(void)
 	if (DO_BIC(BIC_CPU_c7))
 		outp += sprintf(outp, "\tCPU%%c7");
 
+	if (DO_BIC(BIC_Mod_c6))
+		outp += sprintf(outp, "\tMod%%c6");
 
 	if (DO_BIC(BIC_CoreTmp))
 		outp += sprintf(outp, "\tCoreTmp");
@@ -629,6 +634,7 @@ int dump_counters(struct thread_data *t, struct core_data *c,
 			outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
 				i, mp->msr_num, c->counter[i]);
 		}
+		outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
 	}
 
 	if (p) {
@@ -774,6 +780,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (DO_BIC(BIC_CPU_c7))
 		outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/t->tsc);
 
+	/* Mod%c6 */
+	if (DO_BIC(BIC_Mod_c6))
+		outp += sprintf(outp, "\t%.2f", 100.0 * c->mc6_us / t->tsc);
+
 	if (DO_BIC(BIC_CoreTmp))
 		outp += sprintf(outp, "\t%d", c->core_temp_c);
 
@@ -988,6 +998,7 @@ delta_core(struct core_data *new, struct core_data *old)
 	old->c6 = new->c6 - old->c6;
 	old->c7 = new->c7 - old->c7;
 	old->core_temp_c = new->core_temp_c;
+	old->mc6_us = new->mc6_us - old->mc6_us;
 
 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW)
@@ -1109,6 +1120,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 	c->c3 = 0;
 	c->c6 = 0;
 	c->c7 = 0;
+	c->mc6_us = 0;
 	c->core_temp_c = 0;
 
 	p->pkg_wtd_core_c0 = 0;
@@ -1173,6 +1185,7 @@ int sum_counters(struct thread_data *t, struct core_data *c,
 	average.cores.c3 += c->c3;
 	average.cores.c6 += c->c6;
 	average.cores.c7 += c->c7;
+	average.cores.mc6_us += c->mc6_us;
 
 	average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
 
@@ -1246,6 +1259,7 @@ void compute_average(struct thread_data *t, struct core_data *c,
 	average.cores.c3 /= topo.num_cores;
 	average.cores.c6 /= topo.num_cores;
 	average.cores.c7 /= topo.num_cores;
+	average.cores.mc6_us /= topo.num_cores;
 
 	if (do_skl_residency) {
 		average.packages.pkg_wtd_core_c0 /= topo.num_packages;
@@ -1376,8 +1390,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 			return -5;
 		t->smi_count = msr & 0xFFFFFFFF;
 	}
-
-	if (use_c1_residency_msr) {
+	if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
 		if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
 			return -6;
 	}
@@ -1409,6 +1422,10 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
 			return -8;
 
+	if (DO_BIC(BIC_Mod_c6))
+		if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
+			return -8;
+
 	if (DO_BIC(BIC_CoreTmp)) {
 		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
 			return -9;
@@ -1437,9 +1454,16 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	if (do_pc3)
 		if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
 			return -9;
-	if (do_pc6)
-		if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
-			return -10;
+	if (do_pc6) {
+		if (do_slm_cstates) {
+			if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
+				return -10;
+		} else {
+			if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
+				return -10;
+		}
+	}
+
 	if (do_pc2)
 		if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
 			return -11;
@@ -1533,7 +1557,7 @@ char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
 int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
-int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
 int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
@@ -3336,8 +3360,6 @@ int has_skl_msrs(unsigned int family, unsigned int model)
 	return 0;
 }
 
-
-
 int is_slm(unsigned int family, unsigned int model)
 {
 	if (!genuine_intel)
@@ -3731,6 +3753,12 @@ void process_cpuid()
 	do_pc3 = (pkg_cstate_limit >= PCL__3);
 	do_pc6 = (pkg_cstate_limit >= PCL__6);
 	do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7);
+	if (has_slv_msrs(family, model)) {
+		do_pc2 = do_pc3 = do_pc7 = 0;
+		do_pc6 = 1;
+		BIC_PRESENT(BIC_Mod_c6);
+		use_c1_residency_msr = 1;
+	}
 	do_c8_c9_c10 = has_hsw_msrs(family, model);
 	do_irtl_hsw = has_hsw_msrs(family, model);
 	do_skl_residency = has_skl_msrs(family, model);
@@ -4112,7 +4140,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-	fprintf(outf, "turbostat version 4.17 1 Jan 2017"
+	fprintf(outf, "turbostat version 4.17 10 Jan 2017"
 		" - Len Brown <lenb@kernel.org>\n");
 }
 
-- 
GitLab


From 710f273ba96182fd93ee8540ae06583c7d889d7c Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 11 Jan 2017 22:12:25 -0500
Subject: [PATCH 0772/1084] tools/power turbostat: add precision to --debug
 frequency output

Add a digit of precision to the --debug output for frequency range.
This is useful when BCLK is not an integer.

old:
6 * 83 = 500 MHz max efficiency frequency
26 * 83 = 2166 MHz base frequency

new:
6 * 83.3 = 499.8 MHz max efficiency frequency
26 * 83.3 = 2165.8 MHz base frequency

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 42 +++++++++++++--------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 19802be6dc02..762b81497c3b 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1581,11 +1581,11 @@ dump_nhm_platform_info(void)
 	fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
 
 	ratio = (msr >> 40) & 0xFF;
-	fprintf(outf, "%d * %.0f = %.0f MHz max efficiency frequency\n",
+	fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n",
 		ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 8) & 0xFF;
-	fprintf(outf, "%d * %.0f = %.0f MHz base frequency\n",
+	fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
 		ratio, bclk, ratio * bclk);
 
 	get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
@@ -1607,12 +1607,12 @@ dump_hsw_turbo_ratio_limits(void)
 
 	ratio = (msr >> 8) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 18 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 0) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 17 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n",
 			ratio, bclk, ratio * bclk);
 	return;
 }
@@ -1629,42 +1629,42 @@ dump_ivt_turbo_ratio_limits(void)
 
 	ratio = (msr >> 56) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 16 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 48) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 15 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 40) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 14 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 32) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 13 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 24) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 12 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 16) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 11 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 8) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 10 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 0) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n",
 			ratio, bclk, ratio * bclk);
 	return;
 }
@@ -1681,42 +1681,42 @@ dump_nhm_turbo_ratio_limits(void)
 
 	ratio = (msr >> 56) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 8 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 8 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 48) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 7 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 7 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 40) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 6 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 6 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 32) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 5 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 5 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 24) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 16) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 8) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
 			ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 0) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active cores\n",
 			ratio, bclk, ratio * bclk);
 	return;
 }
@@ -1776,7 +1776,7 @@ dump_knl_turbo_ratio_limits(void)
 	for (i = buckets_no - 1; i >= 0; i--)
 		if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
 			fprintf(outf,
-				"%d * %.0f = %.0f MHz max turbo %d active cores\n",
+				"%d * %.1f = %.1f MHz max turbo %d active cores\n",
 				ratio[i], bclk, ratio[i] * bclk, cores[i]);
 }
 
-- 
GitLab


From e651262477c6d8cba79dffc1a6039da43d9c96b0 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 11 Jan 2017 23:17:24 -0500
Subject: [PATCH 0773/1084] tools/power turbostat: further decode
 MSR_IA32_MISC_ENABLE

Decode MISC_ENABLE.NO_TURBO,
also use the #defines in msr-index.h for decoding this register

cpu0: MSR_IA32_MISC_ENABLE: 0x00850089 (TCC EIST MWAIT TURBO)

Although it is not architectural, decode also
MSR_IA32_MISC_ENABLE.prefetch-disable (bit-9).
documented to be present on: Core, P4, Intel-Xeon
reserved on: Atom, Silvermont, Nehalem, SNB, PHI ec.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 762b81497c3b..a1ec9d816dfa 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3509,11 +3509,13 @@ void decode_misc_enable_msr(void)
 	unsigned long long msr;
 
 	if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
-		fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n",
+		fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
 			base_cpu, msr,
-			msr & (1 << 3) ? "TCC" : "",
-			msr & (1 << 16) ? "EIST" : "",
-			msr & (1 << 18) ? "MONITOR" : "");
+			msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
+			msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
+			msr & MSR_IA32_MISC_ENABLE_MWAIT ? "No-" : "",
+			msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
+			msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
 }
 
 /*
-- 
GitLab


From 8a34fd0226eaae64d61ff9a113d276e28acb6b5c Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 12 Jan 2017 23:22:28 -0500
Subject: [PATCH 0774/1084] x86 msr-index.h: Define Atom specific core ratio
 MSR locations

These MSRs are currently used by the intel_pstate driver,
using a local definition.

Cc: x86@kernel.org
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/include/asm/msr-index.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 079db99f6560..83bc672c225c 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -209,6 +209,12 @@
 #define MSR_CC6_DEMOTION_POLICY_CONFIG	0x00000668
 #define MSR_MC6_DEMOTION_POLICY_CONFIG	0x00000669
 
+#define MSR_ATOM_CORE_RATIOS		0x0000066a
+#define MSR_ATOM_CORE_VIDS		0x0000066b
+#define MSR_ATOM_CORE_TURBO_RATIOS	0x0000066c
+#define MSR_ATOM_CORE_TURBO_VIDS	0x0000066d
+
+
 #define MSR_CORE_PERF_LIMIT_REASONS	0x00000690
 #define MSR_GFX_PERF_LIMIT_REASONS	0x000006B0
 #define MSR_RING_PERF_LIMIT_REASONS	0x000006B1
-- 
GitLab


From 92134bdbc6272da6e98e9242cc6f1576cedc0735 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 25 Feb 2017 16:55:17 -0500
Subject: [PATCH 0775/1084] intel_pstate: use MSR_ATOM_RATIOS definitions from
 msr-index.h

Originally, these MSRs were locally defined in this driver.
Now the definitions are in msr-index.h -- use them.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 50bd6d987fc3..6e68b556305a 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -39,11 +39,6 @@
 
 #define INTEL_CPUFREQ_TRANSITION_LATENCY	20000
 
-#define ATOM_RATIOS		0x66a
-#define ATOM_VIDS		0x66b
-#define ATOM_TURBO_RATIOS	0x66c
-#define ATOM_TURBO_VIDS		0x66d
-
 #ifdef CONFIG_ACPI
 #include <acpi/processor.h>
 #include <acpi/cppc_acpi.h>
@@ -1258,7 +1253,7 @@ static int atom_get_min_pstate(void)
 {
 	u64 value;
 
-	rdmsrl(ATOM_RATIOS, value);
+	rdmsrl(MSR_ATOM_CORE_RATIOS, value);
 	return (value >> 8) & 0x7F;
 }
 
@@ -1266,7 +1261,7 @@ static int atom_get_max_pstate(void)
 {
 	u64 value;
 
-	rdmsrl(ATOM_RATIOS, value);
+	rdmsrl(MSR_ATOM_CORE_RATIOS, value);
 	return (value >> 16) & 0x7F;
 }
 
@@ -1274,7 +1269,7 @@ static int atom_get_turbo_pstate(void)
 {
 	u64 value;
 
-	rdmsrl(ATOM_TURBO_RATIOS, value);
+	rdmsrl(MSR_ATOM_CORE_TURBO_RATIOS, value);
 	return value & 0x7F;
 }
 
@@ -1336,7 +1331,7 @@ static void atom_get_vid(struct cpudata *cpudata)
 {
 	u64 value;
 
-	rdmsrl(ATOM_VIDS, value);
+	rdmsrl(MSR_ATOM_CORE_VIDS, value);
 	cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
 	cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
 	cpudata->vid.ratio = div_fp(
@@ -1344,7 +1339,7 @@ static void atom_get_vid(struct cpudata *cpudata)
 		int_tofp(cpudata->pstate.max_pstate -
 			cpudata->pstate.min_pstate));
 
-	rdmsrl(ATOM_TURBO_VIDS, value);
+	rdmsrl(MSR_ATOM_CORE_TURBO_VIDS, value);
 	cpudata->vid.turbo = value & 0x7f;
 }
 
-- 
GitLab


From 0f7887c49b0c454aef9936a6eadabe1c91b5af55 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 12 Jan 2017 23:49:18 -0500
Subject: [PATCH 0776/1084] tools/power turbostat: dump Atom P-states correctly

Turbostat dumps MSR_TURBO_RATIO_LIMIT on Core Architecture.
But Atom Architecture uses MSR_ATOM_CORE_RATIOS and
MSR_ATOM_CORE_TURBO_RATIOS.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 103 ++++++++++++++++++++------
 1 file changed, 82 insertions(+), 21 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index a1ec9d816dfa..da6ec640caf7 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1721,6 +1721,54 @@ dump_nhm_turbo_ratio_limits(void)
 	return;
 }
 
+static void
+dump_atom_turbo_ratio_limits(void)
+{
+	unsigned long long msr;
+	unsigned int ratio;
+
+	get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
+	fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
+
+	ratio = (msr >> 0) & 0x3F;
+	if (ratio)
+		fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n",
+			ratio, bclk, ratio * bclk);
+
+	ratio = (msr >> 8) & 0x3F;
+	if (ratio)
+		fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n",
+			ratio, bclk, ratio * bclk);
+
+	ratio = (msr >> 16) & 0x3F;
+	if (ratio)
+		fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
+			ratio, bclk, ratio * bclk);
+
+	get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
+	fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
+
+	ratio = (msr >> 24) & 0x3F;
+	if (ratio)
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
+			ratio, bclk, ratio * bclk);
+
+	ratio = (msr >> 16) & 0x3F;
+	if (ratio)
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
+			ratio, bclk, ratio * bclk);
+
+	ratio = (msr >> 8) & 0x3F;
+	if (ratio)
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
+			ratio, bclk, ratio * bclk);
+
+	ratio = (msr >> 0) & 0x3F;
+	if (ratio)
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n",
+			ratio, bclk, ratio * bclk);
+}
+
 static void
 dump_knl_turbo_ratio_limits(void)
 {
@@ -2496,8 +2544,32 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 	has_base_hz = 1;
 	return 1;
 }
+/*
+ * SLV client has supporet for unique MSRs:
+ *
+ * MSR_CC6_DEMOTION_POLICY_CONFIG
+ * MSR_MC6_DEMOTION_POLICY_CONFIG
+ */
+
+int has_slv_msrs(unsigned int family, unsigned int model)
+{
+	if (!genuine_intel)
+		return 0;
+
+	switch (model) {
+	case INTEL_FAM6_ATOM_SILVERMONT1:
+	case INTEL_FAM6_ATOM_MERRIFIELD:
+	case INTEL_FAM6_ATOM_MOOREFIELD:
+		return 1;
+	}
+	return 0;
+}
+
 int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
 {
+	if (has_slv_msrs(family, model))
+		return 0;
+
 	switch (model) {
 	/* Nehalem compatible, but do not include turbo-ratio limit support */
 	case INTEL_FAM6_NEHALEM_EX:	/* Nehalem-EX Xeon - Beckton */
@@ -2509,6 +2581,13 @@ int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
 		return 1;
 	}
 }
+int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+	if (has_slv_msrs(family, model))
+		return 1;
+
+	return 0;
+}
 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
 {
 	if (!genuine_intel)
@@ -2606,6 +2685,9 @@ dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
 	if (has_nhm_turbo_ratio_limit(family, model))
 		dump_nhm_turbo_ratio_limits();
 
+	if (has_atom_turbo_ratio_limit(family, model))
+		dump_atom_turbo_ratio_limits();
+
 	if (has_knl_turbo_ratio_limit(family, model))
 		dump_knl_turbo_ratio_limits();
 
@@ -3286,27 +3368,6 @@ int has_snb_msrs(unsigned int family, unsigned int model)
 	return 0;
 }
 
-/*
- * SLV client has supporet for unique MSRs:
- *
- * MSR_CC6_DEMOTION_POLICY_CONFIG
- * MSR_MC6_DEMOTION_POLICY_CONFIG
- */
-
-int has_slv_msrs(unsigned int family, unsigned int model)
-{
-	if (!genuine_intel)
-		return 0;
-
-	switch (model) {
-	case INTEL_FAM6_ATOM_SILVERMONT1:
-	case INTEL_FAM6_ATOM_MERRIFIELD:
-	case INTEL_FAM6_ATOM_MOOREFIELD:
-		return 1;
-	}
-	return 0;
-}
-
 /*
  * HSW adds support for additional MSRs:
  *
-- 
GitLab


From b3a34e9382a4aacfa7c0b24f9548737bbb20338e Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 21 Jan 2017 00:50:08 -0500
Subject: [PATCH 0777/1084] tools/power turbostat: decode CPUID(6).TURBO

show the CPUID feature for turbo to clarify the case
when it may not be shown in MISC_ENABLE

CPUID(6): APERF, TURBO, DTS, PTM, No-HWP, No-HWPnotify, No-HWPwindow, No-HWPepp, No-HWPpkg, EPB
cpu4: MSR_IA32_MISC_ENABLE: 0x00850089 (TCC EIST MWAIT TURBO)

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index da6ec640caf7..1011b5f7f21f 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3626,6 +3626,7 @@ void process_cpuid()
 {
 	unsigned int eax, ebx, ecx, edx, max_level, max_extended_level;
 	unsigned int fms, family, model, stepping;
+	unsigned int has_turbo;
 
 	eax = ebx = ecx = edx = 0;
 
@@ -3696,6 +3697,7 @@ void process_cpuid()
 	do_dts = eax & (1 << 0);
 	if (do_dts)
 		BIC_PRESENT(BIC_CoreTmp);
+	has_turbo = eax & (1 << 1);
 	do_ptm = eax & (1 << 6);
 	if (do_ptm)
 		BIC_PRESENT(BIC_PkgTmp);
@@ -3707,9 +3709,10 @@ void process_cpuid()
 	has_epb = ecx & (1 << 3);
 
 	if (debug)
-		fprintf(outf, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, "
+		fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
 			"%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
 			has_aperf ? "" : "No-",
+			has_turbo ? "" : "No-",
 			do_dts ? "" : "No-",
 			do_ptm ? "" : "No-",
 			has_hwp ? "" : "No-",
-- 
GitLab


From 98af74599ea0757098a5776ea29e581b661dcf6f Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 21 Jan 2017 01:15:09 -0500
Subject: [PATCH 0778/1084] x86 msr_index.h: Define MSR_MISC_FEATURE_CONTROL

This non-architectural MSR has disable bits
for various prefetchers on modern processors.

While these bits are generally touched only by the BIOS,
say, via BIOS SETUP, it is useful to dump them
when examining options that can alter performance.

Cc: x86@kernel.org
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/include/asm/msr-index.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 83bc672c225c..312fb7e14cdd 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -467,6 +467,7 @@
 
 #define MSR_IA32_TEMPERATURE_TARGET	0x000001a2
 
+#define MSR_MISC_FEATURE_CONTROL	0x000001a4
 #define MSR_MISC_PWR_MGMT		0x000001aa
 
 #define MSR_IA32_ENERGY_PERF_BIAS	0x000001b0
-- 
GitLab


From 33148d671cc191fceaca5017e1bb060e9f30fbf7 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 21 Jan 2017 01:26:16 -0500
Subject: [PATCH 0779/1084] tools/power turbostat: decode
 MSR_MISC_FEATURE_CONTROL

useful for observing if the BIOS disabled prefetch
Not architectural, but docuemented as present on NHM, SNB
and is present on others.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 1011b5f7f21f..2f033f3d2a21 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -102,6 +102,7 @@ unsigned int has_hwp_notify;		/* IA32_HWP_INTERRUPT */
 unsigned int has_hwp_activity_window;	/* IA32_HWP_REQUEST[bits 41:32] */
 unsigned int has_hwp_epp;		/* IA32_HWP_REQUEST[bits 31:24] */
 unsigned int has_hwp_pkg;		/* IA32_HWP_REQUEST_PKG */
+unsigned int has_misc_feature_control;
 
 #define RAPL_PKG		(1 << 0)
 					/* 0x610 MSR_PKG_POWER_LIMIT */
@@ -2466,6 +2467,7 @@ void check_permissions()
  *
  * Side effect:
  * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
+ * sets has_misc_feature_control
  */
 int probe_nhm_msrs(unsigned int family, unsigned int model)
 {
@@ -2496,6 +2498,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 	case INTEL_FAM6_IVYBRIDGE:	/* IVB */
 	case INTEL_FAM6_IVYBRIDGE_X:	/* IVB Xeon */
 		pkg_cstate_limits = snb_pkg_cstate_limits;
+		has_misc_feature_control = 1;
 		break;
 	case INTEL_FAM6_HASWELL_CORE:	/* HSW */
 	case INTEL_FAM6_HASWELL_X:	/* HSX */
@@ -2510,9 +2513,11 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
 		pkg_cstate_limits = hsw_pkg_cstate_limits;
+		has_misc_feature_control = 1;
 		break;
 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
 		pkg_cstate_limits = skx_pkg_cstate_limits;
+		has_misc_feature_control = 1;
 		break;
 	case INTEL_FAM6_ATOM_SILVERMONT1:	/* BYT */
 		no_MSR_MISC_PWR_MGMT = 1;
@@ -3579,6 +3584,21 @@ void decode_misc_enable_msr(void)
 			msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
 }
 
+void decode_misc_feature_control(void)
+{
+	unsigned long long msr;
+
+	if (!has_misc_feature_control)
+		return;
+
+	if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
+		fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
+			base_cpu, msr,
+			msr & (0 << 0) ? "No-" : "",
+			msr & (1 << 0) ? "No-" : "",
+			msr & (2 << 0) ? "No-" : "",
+			msr & (3 << 0) ? "No-" : "");
+}
 /*
  * Decode MSR_MISC_PWR_MGMT
  *
@@ -3725,6 +3745,7 @@ void process_cpuid()
 	if (debug)
 		decode_misc_enable_msr();
 
+
 	if (max_level >= 0x7 && debug) {
 		int has_sgx;
 
@@ -3852,6 +3873,9 @@ void process_cpuid()
 	if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
 		BIC_PRESENT(BIC_GFXMHz);
 
+	if (debug)
+		decode_misc_feature_control();
+
 	return;
 }
 
-- 
GitLab


From fee86541d28934da4eb235367f7e2137acb1b359 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 21 Jan 2017 01:59:12 -0500
Subject: [PATCH 0780/1084] tools/power turbostat: show all columns,
 independent of --debug

Some time ago, turbostat overflowed 80 columns.

So on the assumption that a "casual" user would always
want topology and frequency columns, we hid the rest
of the columns and the system configuration decoding
behind the --debug option.

Not everybody liked that change -- including me.
I use --debug 99% of the time...

Well, now we have "-o file" to put turbostat output into a file,
so unless you are watching real-time in a small window,
column count is less frequently a factor.

And more recently, we got the "--hide columnA,columnB" option
to specify columns to skip.

So now we "un-hide" the rest of the columns from behind --debug,
and show them all, by default.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 2f033f3d2a21..c50b452a404e 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -479,9 +479,6 @@ void print_header(void)
 	if (DO_BIC(BIC_TSC_MHz))
 		outp += sprintf(outp, "\tTSC_MHz");
 
-	if (!debug)
-		goto done;
-
 	if (DO_BIC(BIC_IRQ))
 		outp += sprintf(outp, "\tIRQ");
 	if (DO_BIC(BIC_SMI))
@@ -593,7 +590,6 @@ void print_header(void)
 		}
 	}
 
-done:
 	outp += sprintf(outp, "\n");
 }
 
@@ -741,9 +737,6 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (DO_BIC(BIC_TSC_MHz))
 		outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc/units/interval_float);
 
-	if (!debug)
-		goto done;
-
 	/* IRQ */
 	if (DO_BIC(BIC_IRQ))
 		outp += sprintf(outp, "\t%d", t->irq_count);
-- 
GitLab


From 96e4715857cf184536ef46e6ea92465f58a7b12d Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 21 Jan 2017 02:26:00 -0500
Subject: [PATCH 0781/1084] tools/power turbostat: print system config, unless
 --quiet

Some users want turbostat to tell them everything, by default.
Some users want turbostat to be quiet, by default.

I find that I'm in the 1st camp, and so I've never liked
needing to type the --debug parameter to decode the system
configuration.

So here we change the default and print the system configuration,
by default.  (The --debug option is now un-documented, though
it does still exist for debugging turbostat internals)

When you do not want to see the system configuration
header, use the new "--quiet" option.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 |   7 +-
 tools/power/x86/turbostat/turbostat.c | 105 +++++++++++++-------------
 2 files changed, 52 insertions(+), 60 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index e8fb1e02d121..a08de27713e0 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -53,8 +53,7 @@ name as necessary to disambiguate it from others is necessary.  Note that option
 .PP
 \fB--Dump\fP displays the raw counter values.
 .PP
-\fB--debug\fP displays additional system configuration information.  Invoking this parameter
-more than once may also enable internal turbostat debug information.
+\fB--quiet\fP Do not decode and print the system configuration header information.
 .PP
 \fB--interval seconds\fP overrides the default 5.0 second measurement interval.
 .PP
@@ -124,7 +123,6 @@ Or a command may be specified as in "FORK EXAMPLE" below.
 
 .fi
 .SH DEBUG EXAMPLE
-The "--debug" option prints additional system information before measurements:
 
 The first row of statistics is a summary for the entire system.
 For residency % columns, the summary is a weighted average.
@@ -188,9 +186,6 @@ should be sustainable on all CPUs indefinitely, given nominal power and cooling.
 The remaining rows show what maximum turbo frequency is possible
 depending on the number of idle cores.  Note that not all information is
 available on all processors.
-.PP
-The --debug option adds additional columns to the measurement ouput, including CPU idle power-state residency processor temperature sensor readinds.
-See the field definitions above.
 .SH FORK EXAMPLE
 If turbostat is invoked with a command, it will fork that command
 and output the statistics gathered after the command exits.
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index c50b452a404e..6867557596af 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -49,6 +49,7 @@ FILE *outf;
 int *fd_percpu;
 struct timespec interval_ts = {5, 0};
 unsigned int debug;
+unsigned int quiet;
 unsigned int rapl_joules;
 unsigned int summary_only;
 unsigned int dump_only;
@@ -3114,7 +3115,7 @@ void rapl_probe(unsigned int family, unsigned int model)
 	tdp = get_tdp(model);
 
 	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
-	if (debug)
+	if (!quiet)
 		fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
 
 	return;
@@ -3239,11 +3240,9 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
 		return -1;
 
-	if (debug) {
-		fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
-			"(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
-			rapl_power_units, rapl_energy_units, rapl_time_units);
-	}
+	fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr,
+		rapl_power_units, rapl_energy_units, rapl_time_units);
+
 	if (do_rapl & RAPL_PKG_POWER_INFO) {
 
 		if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
@@ -3264,7 +3263,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 			return -9;
 
 		fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
-			cpu, msr, (msr >> 63) & 1 ? "": "UN");
+			cpu, msr, (msr >> 63) & 1 ? "" : "UN");
 
 		print_power_limit_msr(cpu, msr, "PKG Limit #1");
 		fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
@@ -3290,40 +3289,34 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
 			return -9;
 		fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
-				cpu, msr, (msr >> 31) & 1 ? "": "UN");
+				cpu, msr, (msr >> 31) & 1 ? "" : "UN");
 
 		print_power_limit_msr(cpu, msr, "DRAM Limit");
 	}
 	if (do_rapl & RAPL_CORE_POLICY) {
-		if (debug) {
-			if (get_msr(cpu, MSR_PP0_POLICY, &msr))
-				return -7;
+		if (get_msr(cpu, MSR_PP0_POLICY, &msr))
+			return -7;
 
-			fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
-		}
+		fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
 	}
 	if (do_rapl & RAPL_CORES_POWER_LIMIT) {
-		if (debug) {
-			if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
-				return -9;
-			fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
-					cpu, msr, (msr >> 31) & 1 ? "": "UN");
-			print_power_limit_msr(cpu, msr, "Cores Limit");
-		}
+		if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
+			return -9;
+		fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
+				cpu, msr, (msr >> 31) & 1 ? "" : "UN");
+		print_power_limit_msr(cpu, msr, "Cores Limit");
 	}
 	if (do_rapl & RAPL_GFX) {
-		if (debug) {
-			if (get_msr(cpu, MSR_PP1_POLICY, &msr))
-				return -8;
+		if (get_msr(cpu, MSR_PP1_POLICY, &msr))
+			return -8;
 
-			fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
+		fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
 
-			if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
-				return -9;
-			fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
-					cpu, msr, (msr >> 31) & 1 ? "": "UN");
-			print_power_limit_msr(cpu, msr, "GFX Limit");
-		}
+		if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
+			return -9;
+		fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
+				cpu, msr, (msr >> 31) & 1 ? "" : "UN");
+		print_power_limit_msr(cpu, msr, "GFX Limit");
 	}
 	return 0;
 }
@@ -3469,7 +3462,7 @@ double slm_bclk(void)
 	}
 	freq = slm_freq_table[i];
 
-	if (debug)
+	if (!quiet)
 		fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
 
 	return freq;
@@ -3533,7 +3526,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
 
 	target_c_local = (msr >> 16) & 0xFF;
 
-	if (debug)
+	if (!quiet)
 		fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
 			cpu, msr, target_c_local);
 
@@ -3648,7 +3641,7 @@ void process_cpuid()
 	if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
 		genuine_intel = 1;
 
-	if (debug)
+	if (!quiet)
 		fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
 			(char *)&ebx, (char *)&edx, (char *)&ecx);
 
@@ -3659,7 +3652,7 @@ void process_cpuid()
 	if (family == 6 || family == 0xf)
 		model += ((fms >> 16) & 0xf) << 4;
 
-	if (debug) {
+	if (!quiet) {
 		fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
 			max_level, family, model, stepping, family, model, stepping);
 		fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n",
@@ -3721,7 +3714,7 @@ void process_cpuid()
 	has_hwp_pkg = eax & (1 << 11);
 	has_epb = ecx & (1 << 3);
 
-	if (debug)
+	if (!quiet)
 		fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
 			"%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
 			has_aperf ? "" : "No-",
@@ -3735,11 +3728,11 @@ void process_cpuid()
 			has_hwp_pkg ? "" : "No-",
 			has_epb ? "" : "No-");
 
-	if (debug)
+	if (!quiet)
 		decode_misc_enable_msr();
 
 
-	if (max_level >= 0x7 && debug) {
+	if (max_level >= 0x7 && !quiet) {
 		int has_sgx;
 
 		ecx = 0;
@@ -3765,7 +3758,7 @@ void process_cpuid()
 
 		if (ebx_tsc != 0) {
 
-			if (debug && (ebx != 0))
+			if (!quiet && (ebx != 0))
 				fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
 					eax_crystal, ebx_tsc, crystal_hz);
 
@@ -3790,7 +3783,7 @@ void process_cpuid()
 
 			if (crystal_hz) {
 				tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
-				if (debug)
+				if (!quiet)
 					fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
 						tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
 			}
@@ -3805,7 +3798,7 @@ void process_cpuid()
 		base_mhz = max_mhz = bus_mhz = edx = 0;
 
 		__cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
-		if (debug)
+		if (!quiet)
 			fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
 				base_mhz, max_mhz, bus_mhz);
 	}
@@ -3845,16 +3838,16 @@ void process_cpuid()
 	do_slm_cstates = is_slm(family, model);
 	do_knl_cstates  = is_knl(family, model);
 
-	if (debug)
+	if (!quiet)
 		decode_misc_pwr_mgmt_msr();
 
-	if (debug && has_slv_msrs(family, model))
+	if (!quiet && has_slv_msrs(family, model))
 		decode_c6_demotion_policy_msr();
 
 	rapl_probe(family, model);
 	perf_limit_reasons_probe(family, model);
 
-	if (debug)
+	if (!quiet)
 		dump_cstate_pstate_config_info(family, model);
 
 	if (has_skl_msrs(family, model))
@@ -3866,7 +3859,7 @@ void process_cpuid()
 	if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
 		BIC_PRESENT(BIC_GFXMHz);
 
-	if (debug)
+	if (!quiet)
 		decode_misc_feature_control();
 
 	return;
@@ -3883,7 +3876,7 @@ void help()
 	"to print statistics, until interrupted.\n"
 	"--add		add a counter\n"
 	"		eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
-	"--debug	run in \"debug\" mode\n"
+	"--quiet	skip decoding system configuration header\n"
 	"--interval sec	Override default 5-second measurement interval\n"
 	"--help		print this help message\n"
 	"--out file	create or truncate \"file\" for all output\n"
@@ -4136,24 +4129,24 @@ void turbostat_init()
 	process_cpuid();
 
 
-	if (debug)
+	if (!quiet)
 		for_all_cpus(print_hwp, ODD_COUNTERS);
 
-	if (debug)
+	if (!quiet)
 		for_all_cpus(print_epb, ODD_COUNTERS);
 
-	if (debug)
+	if (!quiet)
 		for_all_cpus(print_perf_limit, ODD_COUNTERS);
 
-	if (debug)
+	if (!quiet)
 		for_all_cpus(print_rapl, ODD_COUNTERS);
 
 	for_all_cpus(set_temperature_target, ODD_COUNTERS);
 
-	if (debug)
+	if (!quiet)
 		for_all_cpus(print_thermal, ODD_COUNTERS);
 
-	if (debug && do_irtl_snb)
+	if (!quiet && do_irtl_snb)
 		print_irtl();
 }
 
@@ -4429,7 +4422,7 @@ void cmdline(int argc, char **argv)
 	static struct option long_options[] = {
 		{"add",		required_argument,	0, 'a'},
 		{"Dump",	no_argument,		0, 'D'},
-		{"debug",	no_argument,		0, 'd'},
+		{"debug",	no_argument,		0, 'd'},	/* internal, not documented */
 		{"interval",	required_argument,	0, 'i'},
 		{"help",	no_argument,		0, 'h'},
 		{"hide",	required_argument,	0, 'H'},	// meh, -h taken by --help
@@ -4437,6 +4430,7 @@ void cmdline(int argc, char **argv)
 		{"out",		required_argument,	0, 'o'},
 		{"Package",	no_argument,		0, 'p'},
 		{"processor",	no_argument,		0, 'p'},
+		{"quiet",	no_argument,		0, 'q'},
 		{"show",	required_argument,	0, 's'},
 		{"Summary",	no_argument,		0, 'S'},
 		{"TCC",		required_argument,	0, 'T'},
@@ -4446,7 +4440,7 @@ void cmdline(int argc, char **argv)
 
 	progname = argv[0];
 
-	while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v",
+	while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpqST:v",
 				long_options, &option_index)) != -1) {
 		switch (opt) {
 		case 'a':
@@ -4491,6 +4485,9 @@ void cmdline(int argc, char **argv)
 		case 'p':
 			show_core_only++;
 			break;
+		case 'q':
+			quiet = 1;
+			break;
 		case 's':
 			parse_show_hide(optarg, SHOW_LIST);
 			break;
@@ -4514,7 +4511,7 @@ int main(int argc, char **argv)
 
 	cmdline(argc, argv);
 
-	if (debug)
+	if (!quiet)
 		print_version();
 
 	turbostat_init();
-- 
GitLab


From 008d396eb219ee5a1c98c9ef01c35752d35f0d6c Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 10 Feb 2017 00:29:51 -0500
Subject: [PATCH 0782/1084] tools/power turbostat: use tsc_tweak everwhere it
 is needed

The CPU ticks at a rate in the "bus clock" domain.
eg. 100 MHz * bus_ratio.

On newer processors, the TSC has been moved out of this BCLK
domain and into a separate crystal-clock domain.

While the TSC ticks "close to" the base frequency, those that look
closely at the numbers will notice small errors in calculations that
mix units of TSC clocks and bus clocks.

"tsc_tweak" was introduced to address the most visible
mixing -- the %Busy and the the Busy_MHz calculations.
(A simplification as since removed TSC from the BusyMHz calculation)

Here we apply the tsc_tweak to everyplace where BCLK
and TSC units are mixed.  The results is that
on a system which is 100% idle, the sum of the C-states
are now much more likely to be closer to 100%.

Reported-by: Travis Downs <travis.downs@gmail.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 48 ++++++++++++++-------------
 1 file changed, 25 insertions(+), 23 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 6867557596af..bbdf9ba9d41b 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -680,7 +680,7 @@ int dump_counters(struct thread_data *t, struct core_data *c,
 int format_counters(struct thread_data *t, struct core_data *c,
 	struct pkg_data *p)
 {
-	double interval_float;
+	double interval_float, tsc;
 	char *fmt8;
 	int i;
 	struct msr_counter *mp;
@@ -695,6 +695,8 @@ int format_counters(struct thread_data *t, struct core_data *c,
 
 	interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
 
+	tsc = t->tsc * tsc_tweak;
+
 	/* topo columns, print blanks on 1st (average) line */
 	if (t == &average.threads) {
 		if (DO_BIC(BIC_Package))
@@ -725,14 +727,14 @@ int format_counters(struct thread_data *t, struct core_data *c,
 			1.0 / units * t->aperf / interval_float);
 
 	if (DO_BIC(BIC_Busy))
-		outp += sprintf(outp, "\t%.2f", 100.0 * t->mperf/t->tsc/tsc_tweak);
+		outp += sprintf(outp, "\t%.2f", 100.0 * t->mperf/tsc);
 
 	if (DO_BIC(BIC_Bzy_MHz)) {
 		if (has_base_hz)
 			outp += sprintf(outp, "\t%.0f", base_hz / units * t->aperf / t->mperf);
 		else
 			outp += sprintf(outp, "\t%.0f",
-				1.0 * t->tsc / units * t->aperf / t->mperf / interval_float);
+				tsc / units * t->aperf / t->mperf / interval_float);
 	}
 
 	if (DO_BIC(BIC_TSC_MHz))
@@ -748,7 +750,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 
 	/* C1 */
 	if (DO_BIC(BIC_CPU_c1))
-		outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/tsc);
 
 	/* Added counters */
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
@@ -760,7 +762,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		} else if (mp->format == FORMAT_DELTA) {
 			outp += sprintf(outp, "\t%lld", t->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
-			outp += sprintf(outp, "\t%.2f", 100.0 * t->counter[i]/t->tsc);
+			outp += sprintf(outp, "\t%.2f", 100.0 * t->counter[i]/tsc);
 		}
 	}
 
@@ -769,15 +771,15 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		goto done;
 
 	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/tsc);
 	if (DO_BIC(BIC_CPU_c6))
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/tsc);
 	if (DO_BIC(BIC_CPU_c7))
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/tsc);
 
 	/* Mod%c6 */
 	if (DO_BIC(BIC_Mod_c6))
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->mc6_us / t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * c->mc6_us / tsc);
 
 	if (DO_BIC(BIC_CoreTmp))
 		outp += sprintf(outp, "\t%d", c->core_temp_c);
@@ -791,7 +793,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		} else if (mp->format == FORMAT_DELTA) {
 			outp += sprintf(outp, "\t%lld", c->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
-			outp += sprintf(outp, "\t%.2f", 100.0 * c->counter[i]/t->tsc);
+			outp += sprintf(outp, "\t%.2f", 100.0 * c->counter[i]/tsc);
 		}
 	}
 
@@ -819,24 +821,24 @@ int format_counters(struct thread_data *t, struct core_data *c,
 
 	/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
 	if (do_skl_residency) {
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc);
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_core_c0/t->tsc);
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc);
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_wtd_core_c0/tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_core_c0/tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_gfxe_c0/tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_both_core_gfxe_c0/tsc);
 	}
 
 	if (do_pc2)
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc2/t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc2/tsc);
 	if (do_pc3)
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc3/t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc3/tsc);
 	if (do_pc6)
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc6/t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc6/tsc);
 	if (do_pc7)
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc7/t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc7/tsc);
 	if (do_c8_c9_c10) {
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc8/t->tsc);
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc9/t->tsc);
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc10/t->tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc8/tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc9/tsc);
+		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc10/tsc);
 	}
 
 	/*
@@ -878,7 +880,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		} else if (mp->format == FORMAT_DELTA) {
 			outp += sprintf(outp, "\t%lld", p->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
-			outp += sprintf(outp, "\t%.2f", 100.0 * p->counter[i]/t->tsc);
+			outp += sprintf(outp, "\t%.2f", 100.0 * p->counter[i]/tsc);
 		}
 	}
 
@@ -1048,7 +1050,7 @@ delta_thread(struct thread_data *new, struct thread_data *old,
 			old->c1 = 0;
 		else {
 			/* normal case, derive c1 */
-			old->c1 = old->tsc - old->mperf - core_delta->c3
+			old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
 				- core_delta->c6 - core_delta->c7;
 		}
 	}
-- 
GitLab


From 0f47c08d8ccf8252a5c007502919bdc2126ffb1f Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 27 Jan 2017 00:50:45 -0500
Subject: [PATCH 0783/1084] tools/power turbostat: bug fixes to --add,
 --show/--hide features

Fix a bug with --add, where the title of the column
is un-initialized if not specified by the user.

The initial implementation of --show and --hide
neglected to handle the pc8/pc9/pc10 counters.

Fix a bug where "--show Core" only worked with --debug

Reported-by: Wendy Wang <wendy.wang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 138 ++++++++++++++------------
 1 file changed, 77 insertions(+), 61 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index bbdf9ba9d41b..4b04ba86e44f 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -55,11 +55,6 @@ unsigned int summary_only;
 unsigned int dump_only;
 unsigned int do_snb_cstates;
 unsigned int do_knl_cstates;
-unsigned int do_pc2;
-unsigned int do_pc3;
-unsigned int do_pc6;
-unsigned int do_pc7;
-unsigned int do_c8_c9_c10;
 unsigned int do_skl_residency;
 unsigned int do_slm_cstates;
 unsigned int use_c1_residency_msr;
@@ -365,6 +360,9 @@ struct msr_counter bic[] = {
 	{ 0x0, "Pkg%pc3" },
 	{ 0x0, "Pkg%pc6" },
 	{ 0x0, "Pkg%pc7" },
+	{ 0x0, "Pkg%pc8" },
+	{ 0x0, "Pkg%pc9" },
+	{ 0x0, "Pkg%pc10" },
 	{ 0x0, "PkgWatt" },
 	{ 0x0, "CorWatt" },
 	{ 0x0, "GFXWatt" },
@@ -403,26 +401,30 @@ struct msr_counter bic[] = {
 #define	BIC_Pkgpc3	(1ULL << 18)
 #define	BIC_Pkgpc6	(1ULL << 19)
 #define	BIC_Pkgpc7	(1ULL << 20)
-#define	BIC_PkgWatt	(1ULL << 21)
-#define	BIC_CorWatt	(1ULL << 22)
-#define	BIC_GFXWatt	(1ULL << 23)
-#define	BIC_PkgCnt	(1ULL << 24)
-#define	BIC_RAMWatt	(1ULL << 27)
-#define	BIC_PKG__	(1ULL << 28)
-#define	BIC_RAM__	(1ULL << 29)
-#define	BIC_Pkg_J	(1ULL << 30)
-#define	BIC_Cor_J	(1ULL << 31)
-#define	BIC_GFX_J	(1ULL << 30)
-#define	BIC_RAM_J	(1ULL << 31)
-#define	BIC_Core	(1ULL << 32)
-#define	BIC_CPU		(1ULL << 33)
-#define	BIC_Mod_c6	(1ULL << 34)
+#define	BIC_Pkgpc8	(1ULL << 21)
+#define	BIC_Pkgpc9	(1ULL << 22)
+#define	BIC_Pkgpc10	(1ULL << 23)
+#define	BIC_PkgWatt	(1ULL << 24)
+#define	BIC_CorWatt	(1ULL << 25)
+#define	BIC_GFXWatt	(1ULL << 26)
+#define	BIC_PkgCnt	(1ULL << 27)
+#define	BIC_RAMWatt	(1ULL << 28)
+#define	BIC_PKG__	(1ULL << 29)
+#define	BIC_RAM__	(1ULL << 30)
+#define	BIC_Pkg_J	(1ULL << 31)
+#define	BIC_Cor_J	(1ULL << 32)
+#define	BIC_GFX_J	(1ULL << 33)
+#define	BIC_RAM_J	(1ULL << 34)
+#define	BIC_Core	(1ULL << 35)
+#define	BIC_CPU		(1ULL << 36)
+#define	BIC_Mod_c6	(1ULL << 37)
 
 unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL;
 unsigned long long bic_present;
 
 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
+#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
 
 /*
  * bic_lookup
@@ -539,19 +541,20 @@ void print_header(void)
 		outp += sprintf(outp, "\tCPUGFX%%");
 	}
 
-	if (do_pc2)
+	if (DO_BIC(BIC_Pkgpc2))
 		outp += sprintf(outp, "\tPkg%%pc2");
-	if (do_pc3)
+	if (DO_BIC(BIC_Pkgpc3))
 		outp += sprintf(outp, "\tPkg%%pc3");
-	if (do_pc6)
+	if (DO_BIC(BIC_Pkgpc6))
 		outp += sprintf(outp, "\tPkg%%pc6");
-	if (do_pc7)
+	if (DO_BIC(BIC_Pkgpc7))
 		outp += sprintf(outp, "\tPkg%%pc7");
-	if (do_c8_c9_c10) {
+	if (DO_BIC(BIC_Pkgpc8))
 		outp += sprintf(outp, "\tPkg%%pc8");
+	if (DO_BIC(BIC_Pkgpc9))
 		outp += sprintf(outp, "\tPkg%%pc9");
+	if (DO_BIC(BIC_Pkgpc10))
 		outp += sprintf(outp, "\tPk%%pc10");
-	}
 
 	if (do_rapl && !rapl_joules) {
 		if (DO_BIC(BIC_PkgWatt))
@@ -644,11 +647,11 @@ int dump_counters(struct thread_data *t, struct core_data *c,
 		outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
 
 		outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
-		if (do_pc3)
+		if (DO_BIC(BIC_Pkgpc3))
 			outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
-		if (do_pc6)
+		if (DO_BIC(BIC_Pkgpc6))
 			outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
-		if (do_pc7)
+		if (DO_BIC(BIC_Pkgpc7))
 			outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
 		outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
 		outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
@@ -827,19 +830,20 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_both_core_gfxe_c0/tsc);
 	}
 
-	if (do_pc2)
+	if (DO_BIC(BIC_Pkgpc2))
 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc2/tsc);
-	if (do_pc3)
+	if (DO_BIC(BIC_Pkgpc3))
 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc3/tsc);
-	if (do_pc6)
+	if (DO_BIC(BIC_Pkgpc6))
 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc6/tsc);
-	if (do_pc7)
+	if (DO_BIC(BIC_Pkgpc7))
 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc7/tsc);
-	if (do_c8_c9_c10) {
+	if (DO_BIC(BIC_Pkgpc8))
 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc8/tsc);
+	if (DO_BIC(BIC_Pkgpc9))
 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc9/tsc);
+	if (DO_BIC(BIC_Pkgpc10))
 		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc10/tsc);
-	}
 
 	/*
  	 * If measurement interval exceeds minimum RAPL Joule Counter range,
@@ -949,11 +953,11 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
 		old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
 	}
 	old->pc2 = new->pc2 - old->pc2;
-	if (do_pc3)
+	if (DO_BIC(BIC_Pkgpc3))
 		old->pc3 = new->pc3 - old->pc3;
-	if (do_pc6)
+	if (DO_BIC(BIC_Pkgpc6))
 		old->pc6 = new->pc6 - old->pc6;
-	if (do_pc7)
+	if (DO_BIC(BIC_Pkgpc7))
 		old->pc7 = new->pc7 - old->pc7;
 	old->pc8 = new->pc8 - old->pc8;
 	old->pc9 = new->pc9 - old->pc9;
@@ -1126,11 +1130,11 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 	p->pkg_both_core_gfxe_c0 = 0;
 
 	p->pc2 = 0;
-	if (do_pc3)
+	if (DO_BIC(BIC_Pkgpc3))
 		p->pc3 = 0;
-	if (do_pc6)
+	if (DO_BIC(BIC_Pkgpc6))
 		p->pc6 = 0;
-	if (do_pc7)
+	if (DO_BIC(BIC_Pkgpc7))
 		p->pc7 = 0;
 	p->pc8 = 0;
 	p->pc9 = 0;
@@ -1204,11 +1208,11 @@ int sum_counters(struct thread_data *t, struct core_data *c,
 	}
 
 	average.packages.pc2 += p->pc2;
-	if (do_pc3)
+	if (DO_BIC(BIC_Pkgpc3))
 		average.packages.pc3 += p->pc3;
-	if (do_pc6)
+	if (DO_BIC(BIC_Pkgpc6))
 		average.packages.pc6 += p->pc6;
-	if (do_pc7)
+	if (DO_BIC(BIC_Pkgpc7))
 		average.packages.pc7 += p->pc7;
 	average.packages.pc8 += p->pc8;
 	average.packages.pc9 += p->pc9;
@@ -1266,11 +1270,11 @@ void compute_average(struct thread_data *t, struct core_data *c,
 	}
 
 	average.packages.pc2 /= topo.num_packages;
-	if (do_pc3)
+	if (DO_BIC(BIC_Pkgpc3))
 		average.packages.pc3 /= topo.num_packages;
-	if (do_pc6)
+	if (DO_BIC(BIC_Pkgpc6))
 		average.packages.pc6 /= topo.num_packages;
-	if (do_pc7)
+	if (DO_BIC(BIC_Pkgpc7))
 		average.packages.pc7 /= topo.num_packages;
 
 	average.packages.pc8 /= topo.num_packages;
@@ -1448,10 +1452,10 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
 			return -13;
 	}
-	if (do_pc3)
+	if (DO_BIC(BIC_Pkgpc3))
 		if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
 			return -9;
-	if (do_pc6) {
+	if (DO_BIC(BIC_Pkgpc6)) {
 		if (do_slm_cstates) {
 			if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
 				return -10;
@@ -1461,20 +1465,22 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		}
 	}
 
-	if (do_pc2)
+	if (DO_BIC(BIC_Pkgpc2))
 		if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
 			return -11;
-	if (do_pc7)
+	if (DO_BIC(BIC_Pkgpc7))
 		if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
 			return -12;
-	if (do_c8_c9_c10) {
+	if (DO_BIC(BIC_Pkgpc8))
 		if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
 			return -13;
+	if (DO_BIC(BIC_Pkgpc9))
 		if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
 			return -13;
+	if (DO_BIC(BIC_Pkgpc10))
 		if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
 			return -13;
-	}
+
 	if (do_rapl & RAPL_PKG) {
 		if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
 			return -13;
@@ -3824,17 +3830,27 @@ void process_cpuid()
 		BIC_PRESENT(BIC_CPU_c7);
 
 	do_irtl_snb = has_snb_msrs(family, model);
-	do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2);
-	do_pc3 = (pkg_cstate_limit >= PCL__3);
-	do_pc6 = (pkg_cstate_limit >= PCL__6);
-	do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7);
+	if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
+		BIC_PRESENT(BIC_Pkgpc2);
+	if (pkg_cstate_limit >= PCL__3)
+		BIC_PRESENT(BIC_Pkgpc3);
+	if (pkg_cstate_limit >= PCL__6)
+		BIC_PRESENT(BIC_Pkgpc6);
+	if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
+		BIC_PRESENT(BIC_Pkgpc7);
 	if (has_slv_msrs(family, model)) {
-		do_pc2 = do_pc3 = do_pc7 = 0;
-		do_pc6 = 1;
+		BIC_NOT_PRESENT(BIC_Pkgpc2);
+		BIC_NOT_PRESENT(BIC_Pkgpc3);
+		BIC_PRESENT(BIC_Pkgpc6);
+		BIC_NOT_PRESENT(BIC_Pkgpc7);
 		BIC_PRESENT(BIC_Mod_c6);
 		use_c1_residency_msr = 1;
 	}
-	do_c8_c9_c10 = has_hsw_msrs(family, model);
+	if (has_hsw_msrs(family, model)) {
+		BIC_PRESENT(BIC_Pkgpc8);
+		BIC_PRESENT(BIC_Pkgpc9);
+		BIC_PRESENT(BIC_Pkgpc10);
+	}
 	do_irtl_hsw = has_hsw_msrs(family, model);
 	do_skl_residency = has_skl_msrs(family, model);
 	do_slm_cstates = is_slm(family, model);
@@ -3981,7 +3997,7 @@ void topology_probe()
 	if (debug > 1)
 		fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
 			max_core_id, topo.num_cores_per_pkg);
-	if (debug && !summary_only && topo.num_cores_per_pkg > 1)
+	if (!summary_only && topo.num_cores_per_pkg > 1)
 		BIC_PRESENT(BIC_Core);
 
 	topo.num_packages = max_package_id + 1;
@@ -4282,7 +4298,7 @@ int add_counter(unsigned int msr_num, char *name, unsigned int width,
 void parse_add_command(char *add_command)
 {
 	int msr_num = 0;
-	char name_buffer[NAME_BYTES];
+	char name_buffer[NAME_BYTES] = "";
 	int width = 64;
 	int fail = 0;
 	enum counter_scope scope = SCOPE_CPU;
-- 
GitLab


From 311f77708f362c1af0a999ba5ebbbc062fdc5601 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 16 Feb 2017 23:48:50 -0500
Subject: [PATCH 0784/1084] x86: intel-family.h: Add GEMINI_LAKE SOC

Cc: x86@kernel.org
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/include/asm/intel-family.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 8167fdb67ae8..9814db42b790 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -59,6 +59,7 @@
 #define INTEL_FAM6_ATOM_MERRIFIELD	0x4A /* Tangier */
 #define INTEL_FAM6_ATOM_MOOREFIELD	0x5A /* Anniedale */
 #define INTEL_FAM6_ATOM_GOLDMONT	0x5C
+#define INTEL_FAM6_ATOM_GEMINI_LAKE	0x7A
 #define INTEL_FAM6_ATOM_DENVERTON	0x5F /* Goldmont Microserver */
 
 /* Xeon Phi */
-- 
GitLab


From ac01ac1371d01beb02827f89fff9aac9d7941e91 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 27 Jan 2017 01:45:35 -0500
Subject: [PATCH 0785/1084] tools/power turbostat: initial Gemini Lake SOC
 support

Gemini Lake is similar to Apollo Lake (Broxton/Goldmont)

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 4b04ba86e44f..819d67fbb6ca 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2535,6 +2535,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 		pkg_cstate_limits = phi_pkg_cstate_limits;
 		break;
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
+	case INTEL_FAM6_ATOM_GEMINI_LAKE:
 	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
 		pkg_cstate_limits = bxt_pkg_cstate_limits;
 		break;
@@ -3018,6 +3019,7 @@ void rapl_probe(unsigned int family, unsigned int model)
 		}
 		break;
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
+	case INTEL_FAM6_ATOM_GEMINI_LAKE:
 		do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
 		if (rapl_joules)
 			BIC_PRESENT(BIC_Pkg_J);
@@ -3361,6 +3363,7 @@ int has_snb_msrs(unsigned int family, unsigned int model)
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
+	case INTEL_FAM6_ATOM_GEMINI_LAKE:
 	case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
 		return 1;
 	}
@@ -3392,6 +3395,7 @@ int has_hsw_msrs(unsigned int family, unsigned int model)
 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
+	case INTEL_FAM6_ATOM_GEMINI_LAKE:
 		return 1;
 	}
 	return 0;
@@ -3783,6 +3787,7 @@ void process_cpuid()
 					crystal_hz = 25000000;	/* 25.0 MHz */
 					break;
 				case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
+				case INTEL_FAM6_ATOM_GEMINI_LAKE:
 					crystal_hz = 19200000;	/* 19.2 MHz */
 					break;
 				default:
-- 
GitLab


From 7170a374377d7c70d63a2d3eb38f8fe32e6ffadd Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 27 Jan 2017 02:13:27 -0500
Subject: [PATCH 0786/1084] tools/power turbostat: Denverton: use HW CC1
 counter, skip C3, C7

The CC1 column in tubostat can be computed by subtracting
the core c-state residency countes from the total Cx residency.

CC1 = (Idle_time_as_measured by MPERF) - (all core C-states with
residency counters)

However, as the underlying counter reads are not atomic,
error can be noticed in this calculations, especially
when the numbers are small.

Denverton has a hardware CC1 residency counter
to improve the accuracy of the cc1 statistic -- use it.

At the same time, Denverton has no concept of CC3, PC3, CC7, PC7,
so skip collecting and printing those columns.

Finally, a note of clarification.
Turbostat prints the standard PC2 residency counter,
but on Denverton hardware, that actually means PC1E.
Turbostat prints the standard PC6 residency counter,
but on Denverton hardware, that actually means PC2.

At this point, we document that differnce in this commit message,
rather than adding a quirk to the software.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 819d67fbb6ca..1010135ee973 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2572,6 +2572,18 @@ int has_slv_msrs(unsigned int family, unsigned int model)
 	}
 	return 0;
 }
+int is_dnv(unsigned int family, unsigned int model)
+{
+
+	if (!genuine_intel)
+		return 0;
+
+	switch (model) {
+	case INTEL_FAM6_ATOM_DENVERTON:
+		return 1;
+	}
+	return 0;
+}
 
 int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
 {
@@ -3851,6 +3863,14 @@ void process_cpuid()
 		BIC_PRESENT(BIC_Mod_c6);
 		use_c1_residency_msr = 1;
 	}
+	if (is_dnv(family, model)) {
+		BIC_PRESENT(BIC_CPU_c1);
+		BIC_NOT_PRESENT(BIC_CPU_c3);
+		BIC_NOT_PRESENT(BIC_Pkgpc3);
+		BIC_NOT_PRESENT(BIC_CPU_c7);
+		BIC_NOT_PRESENT(BIC_Pkgpc7);
+		use_c1_residency_msr = 1;
+	}
 	if (has_hsw_msrs(family, model)) {
 		BIC_PRESENT(BIC_Pkgpc8);
 		BIC_PRESENT(BIC_Pkgpc9);
-- 
GitLab


From 34c7619762f7b4ebbd5157b312e6022b725c031e Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 27 Jan 2017 02:36:41 -0500
Subject: [PATCH 0787/1084] tools/power turbostat: skip unused counters on SKX

Skip these four counters on SKX, as they are always zero:
cc3, pc3
cc7, pc7

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 1010135ee973..8c437115d41b 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2584,6 +2584,18 @@ int is_dnv(unsigned int family, unsigned int model)
 	}
 	return 0;
 }
+int is_skx(unsigned int family, unsigned int model)
+{
+
+	if (!genuine_intel)
+		return 0;
+
+	switch (model) {
+	case INTEL_FAM6_SKYLAKE_X:
+		return 1;
+	}
+	return 0;
+}
 
 int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
 {
@@ -3871,6 +3883,12 @@ void process_cpuid()
 		BIC_NOT_PRESENT(BIC_Pkgpc7);
 		use_c1_residency_msr = 1;
 	}
+	if (is_skx(family, model)) {
+		BIC_NOT_PRESENT(BIC_CPU_c3);
+		BIC_NOT_PRESENT(BIC_Pkgpc3);
+		BIC_NOT_PRESENT(BIC_CPU_c7);
+		BIC_NOT_PRESENT(BIC_Pkgpc7);
+	}
 	if (has_hsw_msrs(family, model)) {
 		BIC_PRESENT(BIC_Pkgpc8);
 		BIC_PRESENT(BIC_Pkgpc9);
-- 
GitLab


From 31e07522be566cd039ff7a770550076cc1707a0c Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 31 Jan 2017 23:07:49 -0500
Subject: [PATCH 0788/1084] tools/power turbostat: fix decoding for GLM, DNV,
 SKX turbo-ratio limits

Newer processors do not hard-code the the number of cpus in each bin
to {1, 2, 3, 4, 5, 6, 7, 8}  Rather, they can specify any number
of CPUS in each of the 8 bins:

eg.

...
37 * 100.0 = 3600.0 MHz max turbo 4 active cores
38 * 100.0 = 3700.0 MHz max turbo 3 active cores
39 * 100.0 = 3800.0 MHz max turbo 2 active cores
39 * 100.0 = 3900.0 MHz max turbo 1 active cores

could now look something like this:

...
37 * 100.0 = 3600.0 MHz max turbo 16 active cores
38 * 100.0 = 3700.0 MHz max turbo 8 active cores
39 * 100.0 = 3800.0 MHz max turbo 4 active cores
39 * 100.0 = 3900.0 MHz max turbo 2 active cores

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 90 ++++++++++++++++++++-------
 1 file changed, 67 insertions(+), 23 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 8c437115d41b..67a275882a8d 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1671,56 +1671,84 @@ dump_ivt_turbo_ratio_limits(void)
 			ratio, bclk, ratio * bclk);
 	return;
 }
+int has_turbo_ratio_group_limits(int family, int model)
+{
+
+	if (!genuine_intel)
+		return 0;
+
+	switch (model) {
+	case INTEL_FAM6_ATOM_GOLDMONT:
+	case INTEL_FAM6_SKYLAKE_X:
+	case INTEL_FAM6_ATOM_DENVERTON:
+		return 1;
+	}
+	return 0;
+}
 
 static void
-dump_nhm_turbo_ratio_limits(void)
+dump_turbo_ratio_limits(int family, int model)
 {
-	unsigned long long msr;
-	unsigned int ratio;
+	unsigned long long msr, core_counts;
+	unsigned int ratio, group_size;
 
 	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
-
 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
 
+	if (has_turbo_ratio_group_limits(family, model)) {
+		get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
+		fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
+	} else {
+		core_counts = 0x0807060504030201;
+	}
+
 	ratio = (msr >> 56) & 0xFF;
+	group_size = (core_counts >> 56) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 8 active cores\n",
-			ratio, bclk, ratio * bclk);
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+			ratio, bclk, ratio * bclk, group_size);
 
 	ratio = (msr >> 48) & 0xFF;
+	group_size = (core_counts >> 48) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 7 active cores\n",
-			ratio, bclk, ratio * bclk);
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+			ratio, bclk, ratio * bclk, group_size);
 
 	ratio = (msr >> 40) & 0xFF;
+	group_size = (core_counts >> 40) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 6 active cores\n",
-			ratio, bclk, ratio * bclk);
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+			ratio, bclk, ratio * bclk, group_size);
 
 	ratio = (msr >> 32) & 0xFF;
+	group_size = (core_counts >> 32) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 5 active cores\n",
-			ratio, bclk, ratio * bclk);
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+			ratio, bclk, ratio * bclk, group_size);
 
 	ratio = (msr >> 24) & 0xFF;
+	group_size = (core_counts >> 24) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
-			ratio, bclk, ratio * bclk);
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+			ratio, bclk, ratio * bclk, group_size);
 
 	ratio = (msr >> 16) & 0xFF;
+	group_size = (core_counts >> 16) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
-			ratio, bclk, ratio * bclk);
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+			ratio, bclk, ratio * bclk, group_size);
 
 	ratio = (msr >> 8) & 0xFF;
+	group_size = (core_counts >> 8) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
-			ratio, bclk, ratio * bclk);
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+			ratio, bclk, ratio * bclk, group_size);
 
 	ratio = (msr >> 0) & 0xFF;
+	group_size = (core_counts >> 0) & 0xFF;
 	if (ratio)
-		fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active cores\n",
-			ratio, bclk, ratio * bclk);
+		fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+			ratio, bclk, ratio * bclk, group_size);
 	return;
 }
 
@@ -2597,7 +2625,7 @@ int is_skx(unsigned int family, unsigned int model)
 	return 0;
 }
 
-int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
+int has_turbo_ratio_limit(unsigned int family, unsigned int model)
 {
 	if (has_slv_msrs(family, model))
 		return 0;
@@ -2668,6 +2696,22 @@ int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
 		return 0;
 	}
 }
+int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+	if (!genuine_intel)
+		return 0;
+
+	if (family != 6)
+		return 0;
+
+	switch (model) {
+	case INTEL_FAM6_ATOM_GOLDMONT:
+	case INTEL_FAM6_SKYLAKE_X:
+		return 1;
+	default:
+		return 0;
+	}
+}
 int has_config_tdp(unsigned int family, unsigned int model)
 {
 	if (!genuine_intel)
@@ -2714,8 +2758,8 @@ dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
 	if (has_ivt_turbo_ratio_limit(family, model))
 		dump_ivt_turbo_ratio_limits();
 
-	if (has_nhm_turbo_ratio_limit(family, model))
-		dump_nhm_turbo_ratio_limits();
+	if (has_turbo_ratio_limit(family, model))
+		dump_turbo_ratio_limits(family, model);
 
 	if (has_atom_turbo_ratio_limit(family, model))
 		dump_atom_turbo_ratio_limits();
-- 
GitLab


From ade0ebacdf03591b3dab642e6e92da60c20ebdb3 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 10 Feb 2017 01:56:47 -0500
Subject: [PATCH 0789/1084] tools/power turbostat: skip unused counters on BDX

Skip these two counters on BDX, as they are always zero:
cc7, pc7

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 67a275882a8d..334c4c29d4b5 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2612,6 +2612,19 @@ int is_dnv(unsigned int family, unsigned int model)
 	}
 	return 0;
 }
+int is_bdx(unsigned int family, unsigned int model)
+{
+
+	if (!genuine_intel)
+		return 0;
+
+	switch (model) {
+	case INTEL_FAM6_BROADWELL_X:
+	case INTEL_FAM6_BROADWELL_XEON_D:
+		return 1;
+	}
+	return 0;
+}
 int is_skx(unsigned int family, unsigned int model)
 {
 
@@ -3933,6 +3946,10 @@ void process_cpuid()
 		BIC_NOT_PRESENT(BIC_CPU_c7);
 		BIC_NOT_PRESENT(BIC_Pkgpc7);
 	}
+	if (is_bdx(family, model)) {
+		BIC_NOT_PRESENT(BIC_CPU_c7);
+		BIC_NOT_PRESENT(BIC_Pkgpc7);
+	}
 	if (has_hsw_msrs(family, model)) {
 		BIC_PRESENT(BIC_Pkgpc8);
 		BIC_PRESENT(BIC_Pkgpc9);
-- 
GitLab


From 495c7654ccfb771d19ce1b9fbc7be21e45b14636 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 8 Feb 2017 02:41:51 -0500
Subject: [PATCH 0790/1084] tools/power turbostat: extend --add option to
 accept /sys path

Previously, the --add option could specify only an MSR.

Here is is extended so an arbitrary /sys attribute,
as specified by an absolute file path name.

sudo ./turbostat --add /sys/devices/system/cpu/cpu0/cpuidle/state5/usage

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 92 ++++++++++++++++++++-------
 1 file changed, 69 insertions(+), 23 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 334c4c29d4b5..841f837e317f 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -138,6 +138,7 @@ unsigned int has_misc_feature_control;
  * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
  */
 #define	NAME_BYTES 20
+#define PATH_BYTES 128
 
 int backwards_count;
 char *progname;
@@ -213,6 +214,7 @@ enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
 struct msr_counter {
 	unsigned int msr_num;
 	char name[NAME_BYTES];
+	char path[PATH_BYTES];
 	unsigned int width;
 	enum counter_type type;
 	enum counter_format format;
@@ -344,7 +346,7 @@ struct msr_counter bic[] = {
 	{ 0x0, "Bzy_MHz" },
 	{ 0x0, "TSC_MHz" },
 	{ 0x0, "IRQ" },
-	{ 0x0, "SMI", 32, 0, FORMAT_DELTA, NULL},
+	{ 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
 	{ 0x0, "Busy%" },
 	{ 0x0, "CPU%c1" },
 	{ 0x0, "CPU%c3" },
@@ -1307,6 +1309,51 @@ static unsigned long long rdtsc(void)
 	return low | ((unsigned long long)high) << 32;
 }
 
+/*
+ * Open a file, and exit on failure
+ */
+FILE *fopen_or_die(const char *path, const char *mode)
+{
+	FILE *filep = fopen(path, mode);
+
+	if (!filep)
+		err(1, "%s: open failed", path);
+	return filep;
+}
+/*
+ * snapshot_sysfs_counter()
+ *
+ * return snapshot of given counter
+ */
+unsigned long long snapshot_sysfs_counter(char *path)
+{
+	FILE *fp;
+	int retval;
+	unsigned long long counter;
+
+	fp = fopen_or_die(path, "r");
+
+	retval = fscanf(fp, "%lld", &counter);
+	if (retval != 1)
+		err(1, "snapshot_sysfs_counter(%s)", path);
+
+	fclose(fp);
+
+	return counter;
+}
+
+int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
+{
+	if (mp->msr_num != 0) {
+		if (get_msr(cpu, mp->msr_num, counterp))
+			return -1;
+	} else {
+		*counterp = snapshot_sysfs_counter(mp->path);
+	}
+
+	return 0;
+}
+
 /*
  * get_counters(...)
  * migrate to cpu
@@ -1397,11 +1444,10 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	}
 
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
-		if (get_msr(cpu, mp->msr_num, &t->counter[i]))
+		if (get_mp(cpu, mp, &t->counter[i]))
 			return -10;
 	}
 
-
 	/* collect core counters only for 1st thread in core */
 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 		return 0;
@@ -1434,7 +1480,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	}
 
 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
-		if (get_msr(cpu, mp->msr_num, &c->counter[i]))
+		if (get_mp(cpu, mp, &c->counter[i]))
 			return -10;
 	}
 
@@ -1524,7 +1570,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		p->gfx_mhz = gfx_cur_mhz;
 
 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
-		if (get_msr(cpu, mp->msr_num, &p->counter[i]))
+		if (get_mp(cpu, mp, &p->counter[i]))
 			return -10;
 	}
 
@@ -2013,16 +2059,6 @@ void free_all_buffers(void)
 	free(irqs_per_cpu);
 }
 
-/*
- * Open a file, and exit on failure
- */
-FILE *fopen_or_die(const char *path, const char *mode)
-{
-	FILE *filep = fopen(path, mode);
-	if (!filep)
-		err(1, "%s: open failed", path);
-	return filep;
-}
 
 /*
  * Parse a file containing a single int.
@@ -2581,7 +2617,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 	return 1;
 }
 /*
- * SLV client has supporet for unique MSRs:
+ * SLV client has support for unique MSRs:
  *
  * MSR_CC6_DEMOTION_POLICY_CONFIG
  * MSR_MC6_DEMOTION_POLICY_CONFIG
@@ -4342,9 +4378,9 @@ void print_version() {
 		" - Len Brown <lenb@kernel.org>\n");
 }
 
-int add_counter(unsigned int msr_num, char *name, unsigned int width,
-	enum counter_scope scope, enum counter_type type,
-	enum counter_format format)
+int add_counter(unsigned int msr_num, char *path, char *name,
+	unsigned int width, enum counter_scope scope,
+	enum counter_type type, enum counter_format format)
 {
 	struct msr_counter *msrp;
 
@@ -4356,6 +4392,8 @@ int add_counter(unsigned int msr_num, char *name, unsigned int width,
 
 	msrp->msr_num = msr_num;
 	strncpy(msrp->name, name, NAME_BYTES);
+	if (path)
+		strncpy(msrp->path, path, PATH_BYTES);
 	msrp->width = width;
 	msrp->type = type;
 	msrp->format = format;
@@ -4402,6 +4440,7 @@ int add_counter(unsigned int msr_num, char *name, unsigned int width,
 void parse_add_command(char *add_command)
 {
 	int msr_num = 0;
+	char *path = NULL;
 	char name_buffer[NAME_BYTES] = "";
 	int width = 64;
 	int fail = 0;
@@ -4417,6 +4456,11 @@ void parse_add_command(char *add_command)
 		if (sscanf(add_command, "msr%d", &msr_num) == 1)
 			goto next;
 
+		if (*add_command == '/') {
+			path = add_command;
+			goto next;
+		}
+
 		if (sscanf(add_command, "u%d", &width) == 1) {
 			if ((width == 32) || (width == 64))
 				goto next;
@@ -4466,12 +4510,14 @@ void parse_add_command(char *add_command)
 
 next:
 		add_command = strchr(add_command, ',');
-		if (add_command)
+		if (add_command) {
+			*add_command = '\0';
 			add_command++;
+		}
 
 	}
-	if (msr_num == 0) {
-		fprintf(stderr, "--add: (msrDDD | msr0xXXX) required\n");
+	if ((msr_num == 0) && (path == NULL)) {
+		fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
 		fail++;
 	}
 
@@ -4495,7 +4541,7 @@ void parse_add_command(char *add_command)
 		}
 	}
 
-	if (add_counter(msr_num, name_buffer, width, scope, type, format))
+	if (add_counter(msr_num, path, name_buffer, width, scope, type, format))
 		fail++;
 
 	if (fail) {
-- 
GitLab


From 41618e63f2a869902f8534f0db337e85d6bd04c8 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 9 Feb 2017 18:25:22 -0500
Subject: [PATCH 0791/1084] tools/power turbostat: print sysfs C-state stats

When turbostat shows % of time in a CPU idle power state,
it has always been showing information from underlying
hardware residency counters.

While this reflects what the hardware is doing, and is thus
useful for understanding the hardware,
it doesn't directly tell us what Linux requested --
which is useful for tuning Linux itself.

Here we add columns to turbostat to show the
Linux cpuidle sub-system statistics:
/sys/devices/system/cpu/cpu*/cpuidle/state*/*

The first group of columns are the "usage", which is the
number of times software requested that C-state in the
measurement interval. eg C1 below.

The second group of columns are the "time", which is the percentage
of the measurement interval time that software has requested
the specified C-state. eg C1% below.

These software counters can be compared to the underlying
hardware residency counters (eg CPU%c1	CPU%c3	CPU%c6	CPU%c7)
to compare what sofware requested to what the hardware delivered.

These sysfs attributes are discovered when turbostat starts,
rather than being "built in".  So the --show and --hide
parameters do not know about these dynamic column names.
However "--show sysfs" and "--hide sysfs" act on the
entire group of columns:

turbostat --show sysfs
...
cpu4: POLL: CPUIDLE CORE POLL IDLE
cpu4: C1: MWAIT 0x00
cpu4: C1E: MWAIT 0x01
cpu4: C3: MWAIT 0x10
cpu4: C6: MWAIT 0x20
cpu4: C7s: MWAIT 0x32
...
C1 	C1E	C3 	C6 	C7s	C1% 	C1E%	C3%	C6% 	C7s%
3	6	5	1	188	0.00	0.02	0.00	0.00	99.93
0	6	5	0	58	0.00	0.16	0.02	0.00	99.70
0	0	0	0	9	0.00	0.00	0.00	0.00	99.96
0	0	0	1	24	0.00	0.00	0.00	0.02	99.93
0	0	0	0	9	0.00	0.00	0.00	0.00	99.97
0	0	0	0	32	0.00	0.00	0.00	0.00	99.96
0	0	0	0	7	0.00	0.00	0.00	0.00	99.98
2	0	0	0	36	0.00	0.00	0.00	0.00	99.97
1	0	0	0	13	0.00	0.00	0.00	0.00	99.98

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 |   4 +-
 tools/power/x86/turbostat/turbostat.c | 161 +++++++++++++++++++++++---
 2 files changed, 147 insertions(+), 18 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index a08de27713e0..e31b7213fd45 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -47,9 +47,9 @@ name as necessary to disambiguate it from others is necessary.  Note that option
 		default: delta
 .fi
 .PP
-\fB--hide column\fP do not show the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.
+\fB--hide column\fP do not show the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--hide sysfs" to hide the sysfs statistics columns as a group.
 .PP
-\fB--show column\fP show only the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.
+\fB--show column\fP show only the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--show sysfs" to show the sysfs statistics columns as a group.
 .PP
 \fB--Dump\fP displays the raw counter values.
 .PP
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 841f837e317f..eb6cc8ccef06 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -208,7 +208,7 @@ struct pkg_data {
 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
 
 enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
-enum counter_type {COUNTER_CYCLES, COUNTER_SECONDS};
+enum counter_type {COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC};
 enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
 
 struct msr_counter {
@@ -222,6 +222,7 @@ struct msr_counter {
 	unsigned int flags;
 #define	FLAGS_HIDE	(1 << 0)
 #define	FLAGS_SHOW	(1 << 1)
+#define	SYSFS_PERCPU	(1 << 1)
 };
 
 struct sys_counters {
@@ -379,6 +380,7 @@ struct msr_counter bic[] = {
 	{ 0x0, "Core" },
 	{ 0x0, "CPU" },
 	{ 0x0, "Mod%c6" },
+	{ 0x0, "sysfs" },
 };
 
 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -420,9 +422,10 @@ struct msr_counter bic[] = {
 #define	BIC_Core	(1ULL << 35)
 #define	BIC_CPU		(1ULL << 36)
 #define	BIC_Mod_c6	(1ULL << 37)
+#define	BIC_sysfs	(1ULL << 38)
 
 unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL;
-unsigned long long bic_present;
+unsigned long long bic_present = BIC_sysfs;
 
 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
@@ -489,9 +492,6 @@ void print_header(void)
 	if (DO_BIC(BIC_SMI))
 		outp += sprintf(outp, "\tSMI");
 
-	if (DO_BIC(BIC_CPU_c1))
-		outp += sprintf(outp, "\tCPU%%c1");
-
 	for (mp = sys.tp; mp; mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 64)
@@ -499,10 +499,12 @@ void print_header(void)
 			else
 				outp += sprintf(outp, "\t%10.10s", mp->name);
 		} else {
-			outp += sprintf(outp, "\t%-7.7s", mp->name);
+			outp += sprintf(outp, "\t%s", mp->name);
 		}
 	}
 
+	if (DO_BIC(BIC_CPU_c1))
+		outp += sprintf(outp, "\tCPU%%c1");
 	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
 		outp += sprintf(outp, "\tCPU%%c3");
 	if (DO_BIC(BIC_CPU_c6))
@@ -523,7 +525,7 @@ void print_header(void)
 			else
 				outp += sprintf(outp, "\t%10.10s", mp->name);
 		} else {
-			outp += sprintf(outp, "\t%-7.7s", mp->name);
+			outp += sprintf(outp, "\t%s", mp->name);
 		}
 	}
 
@@ -592,7 +594,7 @@ void print_header(void)
 			else
 				outp += sprintf(outp, "\t%10.10s", mp->name);
 		} else {
-			outp += sprintf(outp, "\t%-7.7s", mp->name);
+			outp += sprintf(outp, "\t%s", mp->name);
 		}
 	}
 
@@ -753,10 +755,6 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (DO_BIC(BIC_SMI))
 		outp += sprintf(outp, "\t%d", t->smi_count);
 
-	/* C1 */
-	if (DO_BIC(BIC_CPU_c1))
-		outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/tsc);
-
 	/* Added counters */
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
@@ -767,10 +765,18 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		} else if (mp->format == FORMAT_DELTA) {
 			outp += sprintf(outp, "\t%lld", t->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
-			outp += sprintf(outp, "\t%.2f", 100.0 * t->counter[i]/tsc);
+			if (mp->type == COUNTER_USEC)
+				outp += sprintf(outp, "\t%.2f", t->counter[i]/interval_float/10000);
+			else
+				outp += sprintf(outp, "\t%.2f", 100.0 * t->counter[i]/tsc);
 		}
 	}
 
+	/* C1 */
+	if (DO_BIC(BIC_CPU_c1))
+		outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/tsc);
+
+
 	/* print per-core data only for 1st thread in core */
 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 		goto done;
@@ -1286,6 +1292,8 @@ void compute_average(struct thread_data *t, struct core_data *c,
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW)
 			continue;
+		if (mp->flags & SYSFS_PERCPU && mp->type == COUNTER_ITEMS)
+			continue;
 		average.threads.counter[i] /= topo.num_cpus;
 	}
 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
@@ -1348,7 +1356,16 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
 		if (get_msr(cpu, mp->msr_num, counterp))
 			return -1;
 	} else {
-		*counterp = snapshot_sysfs_counter(mp->path);
+		char path[128];
+
+		if (mp->flags & SYSFS_PERCPU) {
+			sprintf(path, "/sys/devices/system/cpu/cpu%d/%s",
+				 cpu, mp->path);
+
+			*counterp = snapshot_sysfs_counter(path);
+		} else {
+			*counterp = snapshot_sysfs_counter(mp->path);
+		}
 	}
 
 	return 0;
@@ -2822,6 +2839,48 @@ dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
 	dump_nhm_cst_cfg();
 }
 
+static void
+dump_sysfs_cstate_config(void)
+{
+	char path[64];
+	char name_buf[16];
+	char desc[64];
+	FILE *input;
+	int state;
+	char *sp;
+
+	if (!DO_BIC(BIC_sysfs))
+		return;
+
+	for (state = 0; state < 10; ++state) {
+
+		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
+			base_cpu, state);
+		input = fopen(path, "r");
+		if (input == NULL)
+			continue;
+		fgets(name_buf, sizeof(name_buf), input);
+
+		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
+		sp = strchr(name_buf, '-');
+		if (!sp)
+			sp = strchrnul(name_buf, '\n');
+		*sp = '\0';
+
+		fclose(input);
+
+		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
+			base_cpu, state);
+		input = fopen(path, "r");
+		if (input == NULL)
+			continue;
+		fgets(desc, sizeof(desc), input);
+
+		fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
+		fclose(input);
+	}
+}
+
 
 /*
  * print_epb()
@@ -4008,6 +4067,9 @@ void process_cpuid()
 	if (!quiet)
 		dump_cstate_pstate_config_info(family, model);
 
+	if (!quiet)
+		dump_sysfs_cstate_config();
+
 	if (has_skl_msrs(family, model))
 		calculate_tsc_tweak();
 
@@ -4380,7 +4442,7 @@ void print_version() {
 
 int add_counter(unsigned int msr_num, char *path, char *name,
 	unsigned int width, enum counter_scope scope,
-	enum counter_type type, enum counter_format format)
+	enum counter_type type, enum counter_format format, int flags)
 {
 	struct msr_counter *msrp;
 
@@ -4397,6 +4459,7 @@ int add_counter(unsigned int msr_num, char *path, char *name,
 	msrp->width = width;
 	msrp->type = type;
 	msrp->format = format;
+	msrp->flags = flags;
 
 	switch (scope) {
 
@@ -4486,6 +4549,10 @@ void parse_add_command(char *add_command)
 			type = COUNTER_SECONDS;
 			goto next;
 		}
+		if (!strncmp(add_command, "usec", strlen("usec"))) {
+			type = COUNTER_USEC;
+			goto next;
+		}
 		if (!strncmp(add_command, "raw", strlen("raw"))) {
 			format = FORMAT_RAW;
 			goto next;
@@ -4541,7 +4608,7 @@ void parse_add_command(char *add_command)
 		}
 	}
 
-	if (add_counter(msr_num, path, name_buffer, width, scope, type, format))
+	if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
 		fail++;
 
 	if (fail) {
@@ -4549,6 +4616,65 @@ void parse_add_command(char *add_command)
 		exit(1);
 	}
 }
+
+void probe_sysfs(void)
+{
+	char path[64];
+	char name_buf[16];
+	FILE *input;
+	int state;
+	char *sp;
+
+	if (!DO_BIC(BIC_sysfs))
+		return;
+
+	for (state = 10; state > 0; --state) {
+
+		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
+			base_cpu, state);
+		input = fopen(path, "r");
+		if (input == NULL)
+			continue;
+		fgets(name_buf, sizeof(name_buf), input);
+
+		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
+		sp = strchr(name_buf, '-');
+		if (!sp)
+			sp = strchrnul(name_buf, '\n');
+		*sp = '%';
+		*(sp + 1) = '\0';
+
+		fclose(input);
+
+		sprintf(path, "cpuidle/state%d/time", state);
+
+		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC,
+				FORMAT_PERCENT, SYSFS_PERCPU);
+	}
+
+	for (state = 10; state > 0; --state) {
+
+		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
+			base_cpu, state);
+		input = fopen(path, "r");
+		if (input == NULL)
+			continue;
+		fgets(name_buf, sizeof(name_buf), input);
+		 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
+		sp = strchr(name_buf, '-');
+		if (!sp)
+			sp = strchrnul(name_buf, '\n');
+		*sp = '\0';
+		fclose(input);
+
+		sprintf(path, "cpuidle/state%d/usage", state);
+
+		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS,
+				FORMAT_DELTA, SYSFS_PERCPU);
+	}
+
+}
+
 /*
  * HIDE_LIST - hide this list of counters, show the rest [default]
  * SHOW_LIST - show this list of counters, hide the rest
@@ -4581,6 +4707,7 @@ void parse_show_hide(char *optarg, enum show_hide_mode new_mode)
 	 *  multiple invocations simply clear more bits in enabled mask
 	 */
 	bic_enabled &= ~bic_lookup(optarg);
+
 }
 
 void cmdline(int argc, char **argv)
@@ -4682,6 +4809,8 @@ int main(int argc, char **argv)
 	if (!quiet)
 		print_version();
 
+	probe_sysfs();
+
 	turbostat_init();
 
 	/* dump counters and exit */
-- 
GitLab


From 1ef7d21afe2197013aefe0e93641aa2c5a9ac3db Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 10 Feb 2017 23:54:15 -0500
Subject: [PATCH 0792/1084] tools/power turbostat: add --cpu parameter

With the --cpu parameter, turbostat prints only lines
for the specified set of CPUs:

sudo ./turbostat --quiet --show Core,CPU --cpu 0,1,3..5,6-7
	Core	CPU
	-	-
	0	0
	0	4
	1	1
	1	5
	2	6
	3	3
	3	7

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 |  2 +
 tools/power/x86/turbostat/turbostat.c | 95 ++++++++++++++++++++++++++-
 2 files changed, 95 insertions(+), 2 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index e31b7213fd45..efe6a7147ff2 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -47,6 +47,8 @@ name as necessary to disambiguate it from others is necessary.  Note that option
 		default: delta
 .fi
 .PP
+\fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set.  cpu-set is a comma delimited list of cpu ranges. cpu ranges can be individual cpu numbers or start and end numbers, separated by ".." or '-'.  eg. 1,2,8,14..17,21-44
+.PP
 \fB--hide column\fP do not show the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--hide sysfs" to hide the sysfs statistics columns as a group.
 .PP
 \fB--show column\fP show only the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--show sysfs" to show the sysfs statistics columns as a group.
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index eb6cc8ccef06..8c965bb2f461 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -143,8 +143,9 @@ unsigned int has_misc_feature_control;
 int backwards_count;
 char *progname;
 
-cpu_set_t *cpu_present_set, *cpu_affinity_set;
-size_t cpu_present_setsize, cpu_affinity_setsize;
+#define CPU_SUBSET_MAXCPUS	1024	/* need to use before probe... */
+cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
+size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
 #define MAX_ADDED_COUNTERS 16
 
 struct thread_data {
@@ -700,6 +701,11 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
 		return 0;
 
+	/*if not summary line and --cpu is used */
+	if ((t != &average.threads) &&
+		(cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
+		return 0;
+
 	interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
 
 	tsc = t->tsc * tsc_tweak;
@@ -4096,6 +4102,7 @@ void help()
 	"to print statistics, until interrupted.\n"
 	"--add		add a counter\n"
 	"		eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
+	"--cpu	cpu-set	limit output to summary plus cpu-set cpu-set\n"
 	"--quiet	skip decoding system configuration header\n"
 	"--interval sec	Override default 5-second measurement interval\n"
 	"--help		print this help message\n"
@@ -4158,6 +4165,15 @@ void topology_probe()
 	CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
 	for_all_proc_cpus(mark_cpu_present);
 
+	/*
+	 * Validate that all cpus in cpu_subset are also in cpu_present_set
+	 */
+	for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
+		if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
+			if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
+				err(1, "cpu%d not present", i);
+	}
+
 	/*
 	 * Allocate and initialize cpu_affinity_set
 	 */
@@ -4675,6 +4691,77 @@ void probe_sysfs(void)
 
 }
 
+
+/*
+ * parse cpuset with following syntax
+ * 1,2,4..6,8-10 and set bits in cpu_subset
+ */
+void parse_cpu_command(char *optarg)
+{
+	unsigned int start, end;
+	char *next;
+
+	cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
+	if (cpu_subset == NULL)
+		err(3, "CPU_ALLOC");
+	cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
+
+	CPU_ZERO_S(cpu_subset_size, cpu_subset);
+
+	next = optarg;
+
+	while (next && *next) {
+
+		if (*next == '-')	/* no negative cpu numbers */
+			goto error;
+
+		start = strtoul(next, &next, 10);
+
+		if (start >= CPU_SUBSET_MAXCPUS)
+			goto error;
+		CPU_SET_S(start, cpu_subset_size, cpu_subset);
+
+		if (*next == '\0')
+			break;
+
+		if (*next == ',') {
+			next += 1;
+			continue;
+		}
+
+		if (*next == '-') {
+			next += 1;	/* start range */
+		} else if (*next == '.') {
+			next += 1;
+			if (*next == '.')
+				next += 1;	/* start range */
+			else
+				goto error;
+		}
+
+		end = strtoul(next, &next, 10);
+		if (end <= start)
+			goto error;
+
+		while (++start <= end) {
+			if (start >= CPU_SUBSET_MAXCPUS)
+				goto error;
+			CPU_SET_S(start, cpu_subset_size, cpu_subset);
+		}
+
+		if (*next == ',')
+			next += 1;
+		else if (*next != '\0')
+			goto error;
+	}
+
+	return;
+
+error:
+	fprintf(stderr, "'--cpu %s' malformed\n", optarg);
+	exit(-1);
+}
+
 /*
  * HIDE_LIST - hide this list of counters, show the rest [default]
  * SHOW_LIST - show this list of counters, hide the rest
@@ -4716,6 +4803,7 @@ void cmdline(int argc, char **argv)
 	int option_index = 0;
 	static struct option long_options[] = {
 		{"add",		required_argument,	0, 'a'},
+		{"cpu",		required_argument,	0, 'c'},
 		{"Dump",	no_argument,		0, 'D'},
 		{"debug",	no_argument,		0, 'd'},	/* internal, not documented */
 		{"interval",	required_argument,	0, 'i'},
@@ -4741,6 +4829,9 @@ void cmdline(int argc, char **argv)
 		case 'a':
 			parse_add_command(optarg);
 			break;
+		case 'c':
+			parse_cpu_command(optarg);
+			break;
 		case 'D':
 			dump_only++;
 			break;
-- 
GitLab


From 218f0e8d5c388767be9c78fd2c5bc0a6f416d6d0 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 14 Feb 2017 22:07:52 -0500
Subject: [PATCH 0793/1084] tools/power turbostat: fix zero IRQ count shown in
 one-shot command mode

The IRQ column has been working for periodic mode,
but not in one-shot command mode, it shows only 0.

until now.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 8c965bb2f461..48b540a2fe81 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -619,9 +619,9 @@ int dump_counters(struct thread_data *t, struct core_data *c,
 		outp += sprintf(outp, "c1: %016llX\n", t->c1);
 
 		if (DO_BIC(BIC_IRQ))
-			outp += sprintf(outp, "IRQ: %08X\n", t->irq_count);
+			outp += sprintf(outp, "IRQ: %d\n", t->irq_count);
 		if (DO_BIC(BIC_SMI))
-			outp += sprintf(outp, "SMI: %08X\n", t->smi_count);
+			outp += sprintf(outp, "SMI: %d\n", t->smi_count);
 
 		for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
 			outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
@@ -2410,8 +2410,9 @@ int snapshot_gfx_mhz(void)
  */
 int snapshot_proc_sysfs_files(void)
 {
-	if (snapshot_proc_interrupts())
-		return 1;
+	if (DO_BIC(BIC_IRQ))
+		if (snapshot_proc_interrupts())
+			return 1;
 
 	if (DO_BIC(BIC_GFX_rc6))
 		snapshot_gfx_rc6_ms();
@@ -4391,6 +4392,7 @@ int fork_it(char **argv)
 	pid_t child_pid;
 	int status;
 
+	snapshot_proc_sysfs_files();
 	status = for_all_cpus(get_counters, EVEN_COUNTERS);
 	if (status)
 		exit(status);
@@ -4417,6 +4419,7 @@ int fork_it(char **argv)
 	 * n.b. fork_it() does not check for errors from for_all_cpus()
 	 * because re-starting is problematic when forking
 	 */
+	snapshot_proc_sysfs_files();
 	for_all_cpus(get_counters, ODD_COUNTERS);
 	gettimeofday(&tv_odd, (struct timezone *)NULL);
 	timersub(&tv_odd, &tv_even, &tv_delta);
@@ -4438,6 +4441,7 @@ int get_and_dump_counters(void)
 {
 	int status;
 
+	snapshot_proc_sysfs_files();
 	status = for_all_cpus(get_counters, ODD_COUNTERS);
 	if (status)
 		return status;
-- 
GitLab


From c8ade3616a1a5cf7767c0338d2b02007796f5d88 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 15 Feb 2017 17:15:11 -0500
Subject: [PATCH 0794/1084] tools/power turbostat: Add --list option to show
 available header names

It is handy to know the list of column header names,
so that they can be used with --add and --skip

The new --list option shows them:

sudo ./turbostat --list --hide sysfs
,Core,CPU,Avg_MHz,Busy%,Bzy_MHz,TSC_MHz,IRQ,SMI,CPU%c1,CPU%c3,CPU%c6,CPU%c7,CoreTmp,PkgTmp,GFX%rc6,GFXMHz,PkgWatt,CorWatt,GFXWatt

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 118 ++++++++++++++------------
 1 file changed, 66 insertions(+), 52 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 48b540a2fe81..7b02fbb65893 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -52,6 +52,7 @@ unsigned int debug;
 unsigned int quiet;
 unsigned int rapl_joules;
 unsigned int summary_only;
+unsigned int list_header_only;
 unsigned int dump_only;
 unsigned int do_snb_cstates;
 unsigned int do_knl_cstates;
@@ -469,133 +470,133 @@ unsigned long long bic_lookup(char *name_list)
 	return retval;
 }
 
-void print_header(void)
+void print_header(char *delim)
 {
 	struct msr_counter *mp;
 
 	if (DO_BIC(BIC_Package))
-		outp += sprintf(outp, "\tPackage");
+		outp += sprintf(outp, "%sPackage", delim);
 	if (DO_BIC(BIC_Core))
-		outp += sprintf(outp, "\tCore");
+		outp += sprintf(outp, "%sCore", delim);
 	if (DO_BIC(BIC_CPU))
-		outp += sprintf(outp, "\tCPU");
+		outp += sprintf(outp, "%sCPU", delim);
 	if (DO_BIC(BIC_Avg_MHz))
-		outp += sprintf(outp, "\tAvg_MHz");
+		outp += sprintf(outp, "%sAvg_MHz", delim);
 	if (DO_BIC(BIC_Busy))
-		outp += sprintf(outp, "\tBusy%%");
+		outp += sprintf(outp, "%sBusy%%", delim);
 	if (DO_BIC(BIC_Bzy_MHz))
-		outp += sprintf(outp, "\tBzy_MHz");
+		outp += sprintf(outp, "%sBzy_MHz", delim);
 	if (DO_BIC(BIC_TSC_MHz))
-		outp += sprintf(outp, "\tTSC_MHz");
+		outp += sprintf(outp, "%sTSC_MHz", delim);
 
 	if (DO_BIC(BIC_IRQ))
-		outp += sprintf(outp, "\tIRQ");
+		outp += sprintf(outp, "%sIRQ", delim);
 	if (DO_BIC(BIC_SMI))
-		outp += sprintf(outp, "\tSMI");
+		outp += sprintf(outp, "%sSMI", delim);
 
 	for (mp = sys.tp; mp; mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 64)
-				outp += sprintf(outp, "\t%18.18s", mp->name);
+				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
 			else
-				outp += sprintf(outp, "\t%10.10s", mp->name);
+				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
 		} else {
-			outp += sprintf(outp, "\t%s", mp->name);
+			outp += sprintf(outp, "%s%s", delim, mp->name);
 		}
 	}
 
 	if (DO_BIC(BIC_CPU_c1))
-		outp += sprintf(outp, "\tCPU%%c1");
+		outp += sprintf(outp, "%sCPU%%c1", delim);
 	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
-		outp += sprintf(outp, "\tCPU%%c3");
+		outp += sprintf(outp, "%sCPU%%c3", delim);
 	if (DO_BIC(BIC_CPU_c6))
-		outp += sprintf(outp, "\tCPU%%c6");
+		outp += sprintf(outp, "%sCPU%%c6", delim);
 	if (DO_BIC(BIC_CPU_c7))
-		outp += sprintf(outp, "\tCPU%%c7");
+		outp += sprintf(outp, "%sCPU%%c7", delim);
 
 	if (DO_BIC(BIC_Mod_c6))
-		outp += sprintf(outp, "\tMod%%c6");
+		outp += sprintf(outp, "%sMod%%c6", delim);
 
 	if (DO_BIC(BIC_CoreTmp))
-		outp += sprintf(outp, "\tCoreTmp");
+		outp += sprintf(outp, "%sCoreTmp", delim);
 
 	for (mp = sys.cp; mp; mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 64)
-				outp += sprintf(outp, "\t%18.18s", mp->name);
+				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
 			else
-				outp += sprintf(outp, "\t%10.10s", mp->name);
+				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
 		} else {
-			outp += sprintf(outp, "\t%s", mp->name);
+			outp += sprintf(outp, "%s%s", delim, mp->name);
 		}
 	}
 
 	if (DO_BIC(BIC_PkgTmp))
-		outp += sprintf(outp, "\tPkgTmp");
+		outp += sprintf(outp, "%sPkgTmp", delim);
 
 	if (DO_BIC(BIC_GFX_rc6))
-		outp += sprintf(outp, "\tGFX%%rc6");
+		outp += sprintf(outp, "%sGFX%%rc6", delim);
 
 	if (DO_BIC(BIC_GFXMHz))
-		outp += sprintf(outp, "\tGFXMHz");
+		outp += sprintf(outp, "%sGFXMHz", delim);
 
 	if (do_skl_residency) {
-		outp += sprintf(outp, "\tTotl%%C0");
-		outp += sprintf(outp, "\tAny%%C0");
-		outp += sprintf(outp, "\tGFX%%C0");
-		outp += sprintf(outp, "\tCPUGFX%%");
+		outp += sprintf(outp, "%sTotl%%C0", delim);
+		outp += sprintf(outp, "%sAny%%C0", delim);
+		outp += sprintf(outp, "%sGFX%%C0", delim);
+		outp += sprintf(outp, "%sCPUGFX%%", delim);
 	}
 
 	if (DO_BIC(BIC_Pkgpc2))
-		outp += sprintf(outp, "\tPkg%%pc2");
+		outp += sprintf(outp, "%sPkg%%pc2", delim);
 	if (DO_BIC(BIC_Pkgpc3))
-		outp += sprintf(outp, "\tPkg%%pc3");
+		outp += sprintf(outp, "%sPkg%%pc3", delim);
 	if (DO_BIC(BIC_Pkgpc6))
-		outp += sprintf(outp, "\tPkg%%pc6");
+		outp += sprintf(outp, "%sPkg%%pc6", delim);
 	if (DO_BIC(BIC_Pkgpc7))
-		outp += sprintf(outp, "\tPkg%%pc7");
+		outp += sprintf(outp, "%sPkg%%pc7", delim);
 	if (DO_BIC(BIC_Pkgpc8))
-		outp += sprintf(outp, "\tPkg%%pc8");
+		outp += sprintf(outp, "%sPkg%%pc8", delim);
 	if (DO_BIC(BIC_Pkgpc9))
-		outp += sprintf(outp, "\tPkg%%pc9");
+		outp += sprintf(outp, "%sPkg%%pc9", delim);
 	if (DO_BIC(BIC_Pkgpc10))
-		outp += sprintf(outp, "\tPk%%pc10");
+		outp += sprintf(outp, "%sPk%%pc10", delim);
 
 	if (do_rapl && !rapl_joules) {
 		if (DO_BIC(BIC_PkgWatt))
-			outp += sprintf(outp, "\tPkgWatt");
+			outp += sprintf(outp, "%sPkgWatt", delim);
 		if (DO_BIC(BIC_CorWatt))
-			outp += sprintf(outp, "\tCorWatt");
+			outp += sprintf(outp, "%sCorWatt", delim);
 		if (DO_BIC(BIC_GFXWatt))
-			outp += sprintf(outp, "\tGFXWatt");
+			outp += sprintf(outp, "%sGFXWatt", delim);
 		if (DO_BIC(BIC_RAMWatt))
-			outp += sprintf(outp, "\tRAMWatt");
+			outp += sprintf(outp, "%sRAMWatt", delim);
 		if (DO_BIC(BIC_PKG__))
-			outp += sprintf(outp, "\tPKG_%%");
+			outp += sprintf(outp, "%sPKG_%%", delim);
 		if (DO_BIC(BIC_RAM__))
-			outp += sprintf(outp, "\tRAM_%%");
+			outp += sprintf(outp, "%sRAM_%%", delim);
 	} else if (do_rapl && rapl_joules) {
 		if (DO_BIC(BIC_Pkg_J))
-			outp += sprintf(outp, "\tPkg_J");
+			outp += sprintf(outp, "%sPkg_J", delim);
 		if (DO_BIC(BIC_Cor_J))
-			outp += sprintf(outp, "\tCor_J");
+			outp += sprintf(outp, "%sCor_J", delim);
 		if (DO_BIC(BIC_GFX_J))
-			outp += sprintf(outp, "\tGFX_J");
+			outp += sprintf(outp, "%sGFX_J", delim);
 		if (DO_BIC(BIC_RAM_J))
-			outp += sprintf(outp, "\tRAM_J");
+			outp += sprintf(outp, "%sRAM_J", delim);
 		if (DO_BIC(BIC_PKG__))
-			outp += sprintf(outp, "\tPKG_%%");
+			outp += sprintf(outp, "%sPKG_%%", delim);
 		if (DO_BIC(BIC_RAM__))
-			outp += sprintf(outp, "\tRAM_%%");
+			outp += sprintf(outp, "%sRAM_%%", delim);
 	}
 	for (mp = sys.pp; mp; mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 64)
-				outp += sprintf(outp, "\t%18.18s", mp->name);
+				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
 			else
-				outp += sprintf(outp, "\t%10.10s", mp->name);
+				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
 		} else {
-			outp += sprintf(outp, "\t%s", mp->name);
+			outp += sprintf(outp, "%s%s", delim, mp->name);
 		}
 	}
 
@@ -933,7 +934,7 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_
 	static int printed;
 
 	if (!printed || !summary_only)
-		print_header();
+		print_header("\t");
 
 	if (topo.num_cpus > 1)
 		format_counters(&average.threads, &average.cores,
@@ -4107,6 +4108,7 @@ void help()
 	"--quiet	skip decoding system configuration header\n"
 	"--interval sec	Override default 5-second measurement interval\n"
 	"--help		print this help message\n"
+	"--list		list column headers only\n"
 	"--out file	create or truncate \"file\" for all output\n"
 	"--version	print version information\n"
 	"\n"
@@ -4814,6 +4816,7 @@ void cmdline(int argc, char **argv)
 		{"help",	no_argument,		0, 'h'},
 		{"hide",	required_argument,	0, 'H'},	// meh, -h taken by --help
 		{"Joules",	no_argument,		0, 'J'},
+		{"list",	no_argument,		0, 'l'},
 		{"out",		required_argument,	0, 'o'},
 		{"Package",	no_argument,		0, 'p'},
 		{"processor",	no_argument,		0, 'p'},
@@ -4866,6 +4869,10 @@ void cmdline(int argc, char **argv)
 		case 'J':
 			rapl_joules++;
 			break;
+		case 'l':
+			list_header_only++;
+			quiet++;
+			break;
 		case 'o':
 			outf = fopen_or_die(optarg, "w");
 			break;
@@ -4912,6 +4919,13 @@ int main(int argc, char **argv)
 	if (dump_only)
 		return get_and_dump_counters();
 
+	/* list header and exit */
+	if (list_header_only) {
+		print_header(",");
+		flush_output_stdout();
+		return 0;
+	}
+
 	/*
 	 * if any params left, it must be a command to fork
 	 */
-- 
GitLab


From 0de6c0df4ecc32ffaf064fea3a43846ba4474bd0 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 15 Feb 2017 21:45:40 -0500
Subject: [PATCH 0795/1084] tools/power turbostat: use wide columns to display
 large numbers

When a counter overlfows 7 columns, it shifts the remaining
columns to the right, so they no longer line up under
their column header.

Update turbostat to dectect when it is handling large
numbers, and switch to wider columns where, necessary.

Reported-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 68 ++++++++++++++++++++++-----
 1 file changed, 55 insertions(+), 13 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 7b02fbb65893..cafc6bba6539 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -50,6 +50,7 @@ int *fd_percpu;
 struct timespec interval_ts = {5, 0};
 unsigned int debug;
 unsigned int quiet;
+unsigned int sums_need_wide_columns;
 unsigned int rapl_joules;
 unsigned int summary_only;
 unsigned int list_header_only;
@@ -154,7 +155,7 @@ struct thread_data {
 	unsigned long long aperf;
 	unsigned long long mperf;
 	unsigned long long c1;
-	unsigned int irq_count;
+	unsigned long long  irq_count;
 	unsigned int smi_count;
 	unsigned int cpu_id;
 	unsigned int flags;
@@ -489,8 +490,13 @@ void print_header(char *delim)
 	if (DO_BIC(BIC_TSC_MHz))
 		outp += sprintf(outp, "%sTSC_MHz", delim);
 
-	if (DO_BIC(BIC_IRQ))
-		outp += sprintf(outp, "%sIRQ", delim);
+	if (DO_BIC(BIC_IRQ)) {
+		if (sums_need_wide_columns)
+			outp += sprintf(outp, "%s     IRQ", delim);
+		else
+			outp += sprintf(outp, "%sIRQ", delim);
+	}
+
 	if (DO_BIC(BIC_SMI))
 		outp += sprintf(outp, "%sSMI", delim);
 
@@ -501,7 +507,10 @@ void print_header(char *delim)
 			else
 				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
 		} else {
-			outp += sprintf(outp, "%s%s", delim, mp->name);
+			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+				outp += sprintf(outp, "%s%8s", delim, mp->name);
+			else
+				outp += sprintf(outp, "%s%s", delim, mp->name);
 		}
 	}
 
@@ -527,7 +536,10 @@ void print_header(char *delim)
 			else
 				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
 		} else {
-			outp += sprintf(outp, "%s%s", delim, mp->name);
+			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+				outp += sprintf(outp, "%s%8s", delim, mp->name);
+			else
+				outp += sprintf(outp, "%s%s", delim, mp->name);
 		}
 	}
 
@@ -596,7 +608,10 @@ void print_header(char *delim)
 			else
 				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
 		} else {
-			outp += sprintf(outp, "%s%s", delim, mp->name);
+			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+				outp += sprintf(outp, "%s%8s", delim, mp->name);
+			else
+				outp += sprintf(outp, "%s%s", delim, mp->name);
 		}
 	}
 
@@ -620,7 +635,7 @@ int dump_counters(struct thread_data *t, struct core_data *c,
 		outp += sprintf(outp, "c1: %016llX\n", t->c1);
 
 		if (DO_BIC(BIC_IRQ))
-			outp += sprintf(outp, "IRQ: %d\n", t->irq_count);
+			outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
 		if (DO_BIC(BIC_SMI))
 			outp += sprintf(outp, "SMI: %d\n", t->smi_count);
 
@@ -755,8 +770,12 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc/units/interval_float);
 
 	/* IRQ */
-	if (DO_BIC(BIC_IRQ))
-		outp += sprintf(outp, "\t%d", t->irq_count);
+	if (DO_BIC(BIC_IRQ)) {
+		if (sums_need_wide_columns)
+			outp += sprintf(outp, "\t%8lld", t->irq_count);
+		else
+			outp += sprintf(outp, "\t%lld", t->irq_count);
+	}
 
 	/* SMI */
 	if (DO_BIC(BIC_SMI))
@@ -770,7 +789,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
 			else
 				outp += sprintf(outp, "\t0x%016llx", t->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
-			outp += sprintf(outp, "\t%lld", t->counter[i]);
+			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+				outp += sprintf(outp, "\t%8lld", t->counter[i]);
+			else
+				outp += sprintf(outp, "\t%lld", t->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
 			if (mp->type == COUNTER_USEC)
 				outp += sprintf(outp, "\t%.2f", t->counter[i]/interval_float/10000);
@@ -809,7 +831,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
 			else
 				outp += sprintf(outp, "\t0x%016llx", c->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
-			outp += sprintf(outp, "\t%lld", c->counter[i]);
+			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+				outp += sprintf(outp, "\t%8lld", c->counter[i]);
+			else
+				outp += sprintf(outp, "\t%lld", c->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
 			outp += sprintf(outp, "\t%.2f", 100.0 * c->counter[i]/tsc);
 		}
@@ -897,7 +922,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
 			else
 				outp += sprintf(outp, "\t0x%016llx", p->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
-			outp += sprintf(outp, "\t%lld", p->counter[i]);
+			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+				outp += sprintf(outp, "\t%8lld", p->counter[i]);
+			else
+				outp += sprintf(outp, "\t%lld", p->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
 			outp += sprintf(outp, "\t%.2f", 100.0 * p->counter[i]/tsc);
 		}
@@ -1272,6 +1300,9 @@ void compute_average(struct thread_data *t, struct core_data *c,
 	average.threads.mperf /= topo.num_cpus;
 	average.threads.c1 /= topo.num_cpus;
 
+	if (average.threads.irq_count > 9999999)
+		sums_need_wide_columns = 1;
+
 	average.cores.c3 /= topo.num_cores;
 	average.cores.c6 /= topo.num_cores;
 	average.cores.c7 /= topo.num_cores;
@@ -1299,18 +1330,29 @@ void compute_average(struct thread_data *t, struct core_data *c,
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW)
 			continue;
-		if (mp->flags & SYSFS_PERCPU && mp->type == COUNTER_ITEMS)
+		if (mp->type == COUNTER_ITEMS) {
+			if (average.threads.counter[i] > 9999999)
+				sums_need_wide_columns = 1;
 			continue;
+		}
 		average.threads.counter[i] /= topo.num_cpus;
 	}
 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW)
 			continue;
+		if (mp->type == COUNTER_ITEMS) {
+			if (average.cores.counter[i] > 9999999)
+				sums_need_wide_columns = 1;
+		}
 		average.cores.counter[i] /= topo.num_cores;
 	}
 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW)
 			continue;
+		if (mp->type == COUNTER_ITEMS) {
+			if (average.packages.counter[i] > 9999999)
+				sums_need_wide_columns = 1;
+		}
 		average.packages.counter[i] /= topo.num_packages;
 	}
 }
-- 
GitLab


From 6168c2e0fb5084d187aa8f3ec4093db5e161d4dc Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 16 Feb 2017 23:07:51 -0500
Subject: [PATCH 0796/1084] tools/power turbostat: update --list feature

Make it possible to take the entire un-edited output
from `turbostat --list` and feed it to "turbostat --show"
or "turbostat --hide".

To do this, the leading comma was removed
(no mater what columns are active)
and also they dynamic C-state "C1, C2, C3" etc are replaced
by the string "sysfs", which refers to them as a group.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 219 +++++++++++++-------------
 1 file changed, 113 insertions(+), 106 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index cafc6bba6539..851eaf06f358 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -474,33 +474,38 @@ unsigned long long bic_lookup(char *name_list)
 void print_header(char *delim)
 {
 	struct msr_counter *mp;
+	int printed = 0;
 
 	if (DO_BIC(BIC_Package))
-		outp += sprintf(outp, "%sPackage", delim);
+		outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Core))
-		outp += sprintf(outp, "%sCore", delim);
+		outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_CPU))
-		outp += sprintf(outp, "%sCPU", delim);
+		outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Avg_MHz))
-		outp += sprintf(outp, "%sAvg_MHz", delim);
+		outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Busy))
-		outp += sprintf(outp, "%sBusy%%", delim);
+		outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Bzy_MHz))
-		outp += sprintf(outp, "%sBzy_MHz", delim);
+		outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_TSC_MHz))
-		outp += sprintf(outp, "%sTSC_MHz", delim);
+		outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
 
 	if (DO_BIC(BIC_IRQ)) {
 		if (sums_need_wide_columns)
-			outp += sprintf(outp, "%s     IRQ", delim);
+			outp += sprintf(outp, "%s     IRQ", (printed++ ? delim : ""));
 		else
-			outp += sprintf(outp, "%sIRQ", delim);
+			outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
 	}
 
 	if (DO_BIC(BIC_SMI))
-		outp += sprintf(outp, "%sSMI", delim);
+		outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
 
 	for (mp = sys.tp; mp; mp = mp->next) {
+		if (*delim == ',') {
+			outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), "sysfs");
+			break;
+		}
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 64)
 				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
@@ -515,19 +520,19 @@ void print_header(char *delim)
 	}
 
 	if (DO_BIC(BIC_CPU_c1))
-		outp += sprintf(outp, "%sCPU%%c1", delim);
+		outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
-		outp += sprintf(outp, "%sCPU%%c3", delim);
+		outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_CPU_c6))
-		outp += sprintf(outp, "%sCPU%%c6", delim);
+		outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_CPU_c7))
-		outp += sprintf(outp, "%sCPU%%c7", delim);
+		outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
 
 	if (DO_BIC(BIC_Mod_c6))
-		outp += sprintf(outp, "%sMod%%c6", delim);
+		outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
 
 	if (DO_BIC(BIC_CoreTmp))
-		outp += sprintf(outp, "%sCoreTmp", delim);
+		outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
 
 	for (mp = sys.cp; mp; mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
@@ -544,62 +549,62 @@ void print_header(char *delim)
 	}
 
 	if (DO_BIC(BIC_PkgTmp))
-		outp += sprintf(outp, "%sPkgTmp", delim);
+		outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
 
 	if (DO_BIC(BIC_GFX_rc6))
-		outp += sprintf(outp, "%sGFX%%rc6", delim);
+		outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
 
 	if (DO_BIC(BIC_GFXMHz))
-		outp += sprintf(outp, "%sGFXMHz", delim);
+		outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
 
 	if (do_skl_residency) {
-		outp += sprintf(outp, "%sTotl%%C0", delim);
-		outp += sprintf(outp, "%sAny%%C0", delim);
-		outp += sprintf(outp, "%sGFX%%C0", delim);
-		outp += sprintf(outp, "%sCPUGFX%%", delim);
+		outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
+		outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
+		outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
+		outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
 	}
 
 	if (DO_BIC(BIC_Pkgpc2))
-		outp += sprintf(outp, "%sPkg%%pc2", delim);
+		outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Pkgpc3))
-		outp += sprintf(outp, "%sPkg%%pc3", delim);
+		outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Pkgpc6))
-		outp += sprintf(outp, "%sPkg%%pc6", delim);
+		outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Pkgpc7))
-		outp += sprintf(outp, "%sPkg%%pc7", delim);
+		outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Pkgpc8))
-		outp += sprintf(outp, "%sPkg%%pc8", delim);
+		outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Pkgpc9))
-		outp += sprintf(outp, "%sPkg%%pc9", delim);
+		outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Pkgpc10))
-		outp += sprintf(outp, "%sPk%%pc10", delim);
+		outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
 
 	if (do_rapl && !rapl_joules) {
 		if (DO_BIC(BIC_PkgWatt))
-			outp += sprintf(outp, "%sPkgWatt", delim);
+			outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_CorWatt))
-			outp += sprintf(outp, "%sCorWatt", delim);
+			outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_GFXWatt))
-			outp += sprintf(outp, "%sGFXWatt", delim);
+			outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_RAMWatt))
-			outp += sprintf(outp, "%sRAMWatt", delim);
+			outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_PKG__))
-			outp += sprintf(outp, "%sPKG_%%", delim);
+			outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_RAM__))
-			outp += sprintf(outp, "%sRAM_%%", delim);
+			outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
 	} else if (do_rapl && rapl_joules) {
 		if (DO_BIC(BIC_Pkg_J))
-			outp += sprintf(outp, "%sPkg_J", delim);
+			outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_Cor_J))
-			outp += sprintf(outp, "%sCor_J", delim);
+			outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_GFX_J))
-			outp += sprintf(outp, "%sGFX_J", delim);
+			outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_RAM_J))
-			outp += sprintf(outp, "%sRAM_J", delim);
+			outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_PKG__))
-			outp += sprintf(outp, "%sPKG_%%", delim);
+			outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_RAM__))
-			outp += sprintf(outp, "%sRAM_%%", delim);
+			outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
 	}
 	for (mp = sys.pp; mp; mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
@@ -708,6 +713,8 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	char *fmt8;
 	int i;
 	struct msr_counter *mp;
+	char *delim = "\t";
+	int printed = 0;
 
 	 /* if showing only 1st thread in core and this isn't one, bail out */
 	if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
@@ -729,81 +736,81 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	/* topo columns, print blanks on 1st (average) line */
 	if (t == &average.threads) {
 		if (DO_BIC(BIC_Package))
-			outp += sprintf(outp, "\t-");
+			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_Core))
-			outp += sprintf(outp, "\t-");
+			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_CPU))
-			outp += sprintf(outp, "\t-");
+			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 	} else {
 		if (DO_BIC(BIC_Package)) {
 			if (p)
-				outp += sprintf(outp, "\t%d", p->package_id);
+				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
 			else
-				outp += sprintf(outp, "\t-");
+				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 		}
 		if (DO_BIC(BIC_Core)) {
 			if (c)
-				outp += sprintf(outp, "\t%d", c->core_id);
+				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
 			else
-				outp += sprintf(outp, "\t-");
+				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 		}
 		if (DO_BIC(BIC_CPU))
-			outp += sprintf(outp, "\t%d", t->cpu_id);
+			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
 	}
 
 	if (DO_BIC(BIC_Avg_MHz))
-		outp += sprintf(outp, "\t%.0f",
+		outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
 			1.0 / units * t->aperf / interval_float);
 
 	if (DO_BIC(BIC_Busy))
-		outp += sprintf(outp, "\t%.2f", 100.0 * t->mperf/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf/tsc);
 
 	if (DO_BIC(BIC_Bzy_MHz)) {
 		if (has_base_hz)
-			outp += sprintf(outp, "\t%.0f", base_hz / units * t->aperf / t->mperf);
+			outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
 		else
-			outp += sprintf(outp, "\t%.0f",
+			outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
 				tsc / units * t->aperf / t->mperf / interval_float);
 	}
 
 	if (DO_BIC(BIC_TSC_MHz))
-		outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc/units/interval_float);
+		outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float);
 
 	/* IRQ */
 	if (DO_BIC(BIC_IRQ)) {
 		if (sums_need_wide_columns)
-			outp += sprintf(outp, "\t%8lld", t->irq_count);
+			outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
 		else
-			outp += sprintf(outp, "\t%lld", t->irq_count);
+			outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
 	}
 
 	/* SMI */
 	if (DO_BIC(BIC_SMI))
-		outp += sprintf(outp, "\t%d", t->smi_count);
+		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
 
 	/* Added counters */
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 32)
-				outp += sprintf(outp, "\t0x%08lx", (unsigned long) t->counter[i]);
+				outp += sprintf(outp, "%s0x%08lx", (printed++ ? delim : ""), (unsigned long) t->counter[i]);
 			else
-				outp += sprintf(outp, "\t0x%016llx", t->counter[i]);
+				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
-				outp += sprintf(outp, "\t%8lld", t->counter[i]);
+				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
 			else
-				outp += sprintf(outp, "\t%lld", t->counter[i]);
+				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
 			if (mp->type == COUNTER_USEC)
-				outp += sprintf(outp, "\t%.2f", t->counter[i]/interval_float/10000);
+				outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), t->counter[i]/interval_float/10000);
 			else
-				outp += sprintf(outp, "\t%.2f", 100.0 * t->counter[i]/tsc);
+				outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i]/tsc);
 		}
 	}
 
 	/* C1 */
 	if (DO_BIC(BIC_CPU_c1))
-		outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1/tsc);
 
 
 	/* print per-core data only for 1st thread in core */
@@ -811,32 +818,32 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		goto done;
 
 	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
 	if (DO_BIC(BIC_CPU_c6))
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
 	if (DO_BIC(BIC_CPU_c7))
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7/tsc);
 
 	/* Mod%c6 */
 	if (DO_BIC(BIC_Mod_c6))
-		outp += sprintf(outp, "\t%.2f", 100.0 * c->mc6_us / tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
 
 	if (DO_BIC(BIC_CoreTmp))
-		outp += sprintf(outp, "\t%d", c->core_temp_c);
+		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
 
 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 32)
-				outp += sprintf(outp, "\t0x%08lx", (unsigned long) c->counter[i]);
+				outp += sprintf(outp, "%s0x%08lx", (printed++ ? delim : ""), (unsigned long) c->counter[i]);
 			else
-				outp += sprintf(outp, "\t0x%016llx", c->counter[i]);
+				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
-				outp += sprintf(outp, "\t%8lld", c->counter[i]);
+				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
 			else
-				outp += sprintf(outp, "\t%lld", c->counter[i]);
+				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
-			outp += sprintf(outp, "\t%.2f", 100.0 * c->counter[i]/tsc);
+			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i]/tsc);
 		}
 	}
 
@@ -846,88 +853,88 @@ int format_counters(struct thread_data *t, struct core_data *c,
 
 	/* PkgTmp */
 	if (DO_BIC(BIC_PkgTmp))
-		outp += sprintf(outp, "\t%d", p->pkg_temp_c);
+		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
 
 	/* GFXrc6 */
 	if (DO_BIC(BIC_GFX_rc6)) {
 		if (p->gfx_rc6_ms == -1) {	/* detect GFX counter reset */
-			outp += sprintf(outp, "\t**.**");
+			outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
 		} else {
-			outp += sprintf(outp, "\t%.2f",
+			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
 				p->gfx_rc6_ms / 10.0 / interval_float);
 		}
 	}
 
 	/* GFXMHz */
 	if (DO_BIC(BIC_GFXMHz))
-		outp += sprintf(outp, "\t%d", p->gfx_mhz);
+		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
 
 	/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
 	if (do_skl_residency) {
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_wtd_core_c0/tsc);
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_core_c0/tsc);
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_gfxe_c0/tsc);
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_both_core_gfxe_c0/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc);
 	}
 
 	if (DO_BIC(BIC_Pkgpc2))
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc2/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc);
 	if (DO_BIC(BIC_Pkgpc3))
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc3/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3/tsc);
 	if (DO_BIC(BIC_Pkgpc6))
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc6/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6/tsc);
 	if (DO_BIC(BIC_Pkgpc7))
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc7/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7/tsc);
 	if (DO_BIC(BIC_Pkgpc8))
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc8/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8/tsc);
 	if (DO_BIC(BIC_Pkgpc9))
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc9/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9/tsc);
 	if (DO_BIC(BIC_Pkgpc10))
-		outp += sprintf(outp, "\t%.2f", 100.0 * p->pc10/tsc);
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc);
 
 	/*
  	 * If measurement interval exceeds minimum RAPL Joule Counter range,
  	 * indicate that results are suspect by printing "**" in fraction place.
  	 */
 	if (interval_float < rapl_joule_counter_range)
-		fmt8 = "\t%.2f";
+		fmt8 = "%s%.2f";
 	else
 		fmt8 = "%6.0f**";
 
 	if (DO_BIC(BIC_PkgWatt))
-		outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float);
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
 	if (DO_BIC(BIC_CorWatt))
-		outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float);
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
 	if (DO_BIC(BIC_GFXWatt))
-		outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
 	if (DO_BIC(BIC_RAMWatt))
-		outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float);
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float);
 	if (DO_BIC(BIC_Pkg_J))
-		outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units);
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
 	if (DO_BIC(BIC_Cor_J))
-		outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units);
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
 	if (DO_BIC(BIC_GFX_J))
-		outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units);
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
 	if (DO_BIC(BIC_RAM_J))
-		outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units);
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
 	if (DO_BIC(BIC_PKG__))
-		outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
 	if (DO_BIC(BIC_RAM__))
-		outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
+		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
 
 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 32)
-				outp += sprintf(outp, "\t0x%08lx", (unsigned long) p->counter[i]);
+				outp += sprintf(outp, "%s0x%08lx", (printed++ ? delim : ""), (unsigned long) p->counter[i]);
 			else
-				outp += sprintf(outp, "\t0x%016llx", p->counter[i]);
+				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
-				outp += sprintf(outp, "\t%8lld", p->counter[i]);
+				outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
 			else
-				outp += sprintf(outp, "\t%lld", p->counter[i]);
+				outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
 		} else if (mp->format == FORMAT_PERCENT) {
-			outp += sprintf(outp, "\t%.2f", 100.0 * p->counter[i]/tsc);
+			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i]/tsc);
 		}
 	}
 
-- 
GitLab


From da67e2b9fd1d846a41978690da0a899d8e4378ec Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 15 Feb 2017 00:30:22 -0500
Subject: [PATCH 0797/1084] tools/power turbostat: turbostat.8 update

update examples to show recently updated features.
In particular
--add
--show
--hide
--cpu
--list

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 | 238 +++++++++++++++-----------
 1 file changed, 140 insertions(+), 98 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index efe6a7147ff2..b1b1ab80102c 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -16,9 +16,9 @@ idle power-state statistics, temperature and power on X86 processors.
 There are two ways to invoke turbostat.
 The first method is to supply a
 \fBcommand\fP, which is forked and statistics are printed
-upon its completion.
+in one-shot upon its completion.
 The second method is to omit the command,
-and turbostat displays statistics every 5 seconds.
+and turbostat displays statistics every 5 seconds interval.
 The 5-second interval can be changed using the --interval option.
 .PP
 Some information is not available on older processors.
@@ -28,9 +28,10 @@ name as necessary to disambiguate it from others is necessary.  Note that option
 .PP
 \fB--add attributes\fP add column with counter having specified 'attributes'.  The 'location' attribute is required, all others are optional.
 .nf
-	location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP}
+	location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP | \fB/sys/path...\fP}
 		msrDDD is a decimal offset, eg. msr16
 		msr0xXXX is a hex offset, eg. msr0x10
+		/sys/path... is an absolute path to a sysfs attribute
 
 	scope: {\fBcpu\fP | \fBcore\fP | \fBpackage\fP}
 		sample and print the counter for every cpu, core, or package.
@@ -45,6 +46,10 @@ name as necessary to disambiguate it from others is necessary.  Note that option
 		'delta' shows the difference in values during the measurement interval.
 		'percent' shows the delta as a percentage of the cycles elapsed.
 		default: delta
+
+	name: "name_string"
+		Any string that does not match a key-word above is used
+		as the column header.
 .fi
 .PP
 \fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set.  cpu-set is a comma delimited list of cpu ranges. cpu ranges can be individual cpu numbers or start and end numbers, separated by ".." or '-'.  eg. 1,2,8,14..17,21-44
@@ -68,6 +73,8 @@ The file is truncated if it already exists, and it is created if it does not exi
 .PP
 \fB--Package\fP limits output to the system summary plus the 1st thread in each Package.
 .PP
+\fB--list\fP display column header names available for use by --show and --hide, then exit.
+.PP
 \fB--processor\fP limits output to the system summary plus the 1st thread in each processor of each package.  Ie. it skips hyper-threaded siblings.
 .PP
 \fB--Summary\fP limits output to a 1-line System Summary for each interval.
@@ -79,24 +86,25 @@ The file is truncated if it already exists, and it is created if it does not exi
 The \fBcommand\fP parameter forks \fBcommand\fP, and upon its exit,
 displays the statistics gathered since it was forked.
 .PP
-.SH DEFAULT FIELD DESCRIPTIONS
+.SH ROW DESCRIPTIONS
+The system configuration dump (if --quiet is not used) is followed by statistics.  The first row of the statistics labels the content of each column (below).  The second row of statistics is the system summary line.  The system summary line has a '-' in the columns for the Package, Core, and CPU.  The contents of the system summary line depends on the type of column.  Columns that count items (eg. IRQ) show the sum across all CPUs in the system.  Columns that show a percentage show the average across all CPUs in the system.  Columns that dump raw MSR values simply show 0 in the summary.  After the system summary row, each row describes a specific Package/Core/CPU.  Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system.
+.SH COLUMN DESCRIPTIONS
 .nf
+\fBCore\fP processor core number.  Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT).
 \fBCPU\fP Linux CPU (logical processor) number.  Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together.
-\fBAVG_MHz\fP number of cycles executed divided by time elapsed.
-\fBBusy%\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state.
-\fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state).
+\fBPackage\fP processor package number -- not present on systems with a single processor package.
+\fBAvg_MHz\fP number of cycles executed divided by time elapsed.  Note that this includes idle-time when 0 instructions are executed.
+\fBBusy%\fP percent of the measurement interval that the CPU executes instructions, aka. % of time in "C0" state.
+\fBBzy_MHz\fP average clock rate while the CPU was not idle (ie. in "c0" state).
 \fBTSC_MHz\fP average MHz that the TSC ran during the entire interval.
-.fi
-.PP
-.SH DEBUG FIELD DESCRIPTIONS
-.nf
-\fBPackage\fP processor package number.
-\fBCore\fP processor core number.
-Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT).
-\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states.
+\fBIRQ\fP The number of interrupts serviced by that CPU during the measurement interval.  The system total line is the sum of interrupts serviced across all CPUs.  turbostat parses /proc/interrupts to generate this summary.
+\fBSMI\fP The number of System Management Interrupts  serviced CPU during the measurement interval.  While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs.
+\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval.  The system summary line shows the sum for all CPUs.  These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name.  While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system.
+\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3....  The system summary is the average of all CPUs in the system.  Note that these are software, reflecting what was requested.  The hardware counters reflect what was actually achieved.
+\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states.  These numbers are from hardware residency counters.
 \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
 \fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
-\fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states.
+\fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states.  These numbers are from hardware residency counters.
 \fBPkgWatt\fP Watts consumed by the whole package.
 \fBCorWatt\fP Watts consumed by the core part of the package.
 \fBGFXWatt\fP Watts consumed by the Graphics part of the package -- available only on client processors.
@@ -104,50 +112,110 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T
 \fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package.
 \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM.
 .fi
+.SH TOO MUCH INFORMATION EXAMPLE
+By default, turbostat dumps all possible information -- a system configuration header, followed by columns for all counters.
+This is ideal for remote debugging, use the "--out" option to save everything to a text file, and get that file to the expert helping you debug.
 .PP
-.SH PERIODIC EXAMPLE
-Without any parameters, turbostat displays statistics ever 5 seconds.
-Periodic output goes to stdout, by default, unless --out is used to specify an output file.
-The 5-second interval can be changed with th "-i sec" option.
-Or a command may be specified as in "FORK EXAMPLE" below.
+When you are not interested in all that information, and there are several ways to see only what you want.  First the "--quiet" option will skip the configuration information, and turbostat will show only the counter columns.  Second, you can reduce the columns with the "--hide" and "--show" options.  If you use the "--show" option, then turbostat will show only the columns you list.  If you use the "--hide" option, turbostat will show all columns, except the ones you list.
+.PP
+To find out what columns are available for --show and --hide, the "--list" option is available.  Note, however, there is an exception.  The C-state columns collected from sysfs "C1,C2,C3,C1%,C2%,C3%" are not built-in counters, but are discovered after --show and --hide are processed.  You can use the special counter name "sysfs" to refer to all of them at the same time.
+.nf
+sudo ./turbostat --show sysfs --quiet sleep 10
+10.003837 sec
+	C1	C1E	C3	C6	C7s	C1%	C1E%	C3%	C6%	C7s%
+	4	21	2	2	459	0.14	0.82	0.00	0.00	98.93
+	1	17	2	2	130	0.00	0.02	0.00	0.00	99.80
+	0	0	0	0	31	0.00	0.00	0.00	0.00	99.95
+	2	1	0	0	52	1.14	6.49	0.00	0.00	92.21
+	1	2	0	0	52	0.00	0.08	0.00	0.00	99.86
+	0	0	0	0	71	0.00	0.00	0.00	0.00	99.89
+	0	0	0	0	25	0.00	0.00	0.00	0.00	99.96
+	0	0	0	0	74	0.00	0.00	0.00	0.00	99.94
+	0	1	0	0	24	0.00	0.00	0.00	0.00	99.84
+.fi
+.PP
+.SH ONE SHOT COMMAND EXAMPLE
+If turbostat is invoked with a command, it will fork that command
+and output the statistics gathered after the command exits.
+In this case, turbostat output goes to stderr, by default.
+Output can instead be saved to a file using the --out option.
+In this example, the "sleep 10" command is forked, and turbostat waits for it to complete before saving all statistics into "ts.out".  Note that "sleep 10" is not part of turbostat, but is simply an example of a command that turbostat can fork.  The "ts.out" file is what you want to edit in a very wide window, paste into a spreadsheet, or attach to a bugzilla entry.
+
 .nf
-[root@hsw]# ./turbostat
-     CPU Avg_MHz   Busy% Bzy_MHz TSC_MHz
-       -     488   12.51    3898    3498
-       0       0    0.01    3885    3498
-       4    3897   99.99    3898    3498
-       1       0    0.00    3861    3498
-       5       0    0.00    3882    3498
-       2       1    0.02    3894    3498
-       6       2    0.06    3898    3498
-       3       0    0.00    3849    3498
-       7       0    0.00    3877    3498
+[root@hsw]# ./turbostat -o ts.out sleep 10
+[root@hsw]#
+.fi
 
+.SH PERIODIC INTERVAL EXAMPLE
+Without a command to fork, turbostat displays statistics ever 5 seconds.
+Periodic output goes to stdout, by default, unless --out is used to specify an output file.
+The 5-second interval can be changed with the "-i sec" option.
+.nf
+sudo ./turbostat --quiet --hide sysfs,IRQ,SMI,CoreTmp,PkgTmp,GFX%rc6,GFXMHz,PkgWatt,CorWatt,GFXWatt
+	Core	CPU	Avg_MHz	Busy%	Bzy_MHz	TSC_MHz	CPU%c1	CPU%c3	CPU%c6	CPU%c7
+	-	-	488	12.52	3900	3498	12.50	0.00	0.00	74.98
+	0	0	5	0.13	3900	3498	99.87	0.00	0.00	0.00
+	0	4	3897	99.99	3900	3498	0.01
+	1	1	0	0.00	3856	3498	0.01	0.00	0.00	99.98
+	1	5	0	0.00	3861	3498	0.01
+	2	2	1	0.02	3889	3498	0.03	0.00	0.00	99.95
+	2	6	0	0.00	3863	3498	0.05
+	3	3	0	0.01	3869	3498	0.02	0.00	0.00	99.97
+	3	7	0	0.00	3878	3498	0.03
+	Core	CPU	Avg_MHz	Busy%	Bzy_MHz	TSC_MHz	CPU%c1	CPU%c3	CPU%c6	CPU%c7
+	-	-	491	12.59	3900	3498	12.42	0.00	0.00	74.99
+	0	0	27	0.69	3900	3498	99.31	0.00	0.00	0.00
+	0	4	3898	99.99	3900	3498	0.01
+	1	1	0	0.00	3883	3498	0.01	0.00	0.00	99.99
+	1	5	0	0.00	3898	3498	0.01
+	2	2	0	0.01	3889	3498	0.02	0.00	0.00	99.98
+	2	6	0	0.00	3889	3498	0.02
+	3	3	0	0.00	3856	3498	0.01	0.00	0.00	99.99
+	3	7	0	0.00	3897	3498	0.01
 .fi
-.SH DEBUG EXAMPLE
+This example also shows the use of the --hide option to skip columns that are not wanted.
+Note that cpu4 in this example is 99.99% busy, while the other CPUs are all under 1% busy.
+Notice that cpu4's HT sibling is cpu0, which is under 1% busy, but can get into CPU%c1 only,
+because its cpu4's activity on shared hardware keeps it from entering a deeper C-state.
+
+.SH SYSTEM CONFIGURATION INFORMATION EXAMPLE
 
-The first row of statistics is a summary for the entire system.
-For residency % columns, the summary is a weighted average.
-For Temperature columns, the summary is the column maximum.
-For Watts columns, the summary is a system total.
-Subsequent rows show per-CPU statistics.
+By default, turbostat always dumps system configuration information
+before taking measurements.  In the example above, "--quiet" is used
+to suppress that output.  Here is an example of the configuration information:
 .nf
-turbostat version 4.1 10-Feb, 2015 - Len Brown <lenb@kernel.org>
+turbostat version 2017.02.15 - Len Brown <lenb@kernel.org>
 CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3c:3 (6:60:3)
-CPUID(6): APERF, DTS, PTM, EPB
+CPUID(1): SSE3 MONITOR - EIST TM2 TSC MSR ACPI-TM TM
+CPUID(6): APERF, TURBO, DTS, PTM, No-HWP, No-HWPnotify, No-HWPwindow, No-HWPepp, No-HWPpkg, EPB
+cpu4: MSR_IA32_MISC_ENABLE: 0x00850089 (TCC EIST No-MWAIT PREFETCH TURBO)
+CPUID(7): No-SGX
+cpu4: MSR_MISC_PWR_MGMT: 0x00400000 (ENable-EIST_Coordination DISable-EPB DISable-OOB)
 RAPL: 3121 sec. Joule Counter Range, at 84 Watts
-cpu0: MSR_NHM_PLATFORM_INFO: 0x80838f3012300
-8 * 100 = 800 MHz max efficiency
-35 * 100 = 3500 MHz TSC frequency
-cpu0: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled)
-cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0)
-cpu0: MSR_TURBO_RATIO_LIMIT: 0x25262727
-37 * 100 = 3700 MHz max turbo 4 active cores
-38 * 100 = 3800 MHz max turbo 3 active cores
-39 * 100 = 3900 MHz max turbo 2 active cores
-39 * 100 = 3900 MHz max turbo 1 active cores
+cpu4: MSR_PLATFORM_INFO: 0x80838f3012300
+8 * 100.0 = 800.0 MHz max efficiency frequency
+35 * 100.0 = 3500.0 MHz base frequency
+cpu4: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled)
+cpu4: MSR_TURBO_RATIO_LIMIT: 0x25262727
+37 * 100.0 = 3700.0 MHz max turbo 4 active cores
+38 * 100.0 = 3800.0 MHz max turbo 3 active cores
+39 * 100.0 = 3900.0 MHz max turbo 2 active cores
+39 * 100.0 = 3900.0 MHz max turbo 1 active cores
+cpu4: MSR_CONFIG_TDP_NOMINAL: 0x00000023 (base_ratio=35)
+cpu4: MSR_CONFIG_TDP_LEVEL_1: 0x00000000 ()
+cpu4: MSR_CONFIG_TDP_LEVEL_2: 0x00000000 ()
+cpu4: MSR_CONFIG_TDP_CONTROL: 0x80000000 ( lock=1)
+cpu4: MSR_TURBO_ACTIVATION_RATIO: 0x00000000 (MAX_NON_TURBO_RATIO=0 lock=0)
+cpu4: MSR_PKG_CST_CONFIG_CONTROL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0)
+cpu4: POLL: CPUIDLE CORE POLL IDLE
+cpu4: C1: MWAIT 0x00
+cpu4: C1E: MWAIT 0x01
+cpu4: C3: MWAIT 0x10
+cpu4: C6: MWAIT 0x20
+cpu4: C7s: MWAIT 0x32
+cpu4: MSR_MISC_FEATURE_CONTROL: 0x00000000 (L2-Prefetch L2-Prefetch-pair L1-Prefetch L1-IP-Prefetch)
 cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced)
-cpu0: MSR_CORE_PERF_LIMIT_REASONS, 0x31200000 (Active: ) (Logged: Auto-HWP, Amps, MultiCoreTurbo, Transitions, )
+cpu0: MSR_CORE_PERF_LIMIT_REASONS, 0x31200000 (Active: ) (Logged: Transitions, MultiCoreTurbo, Amps, Auto-HWP, )
 cpu0: MSR_GFX_PERF_LIMIT_REASONS, 0x00000000 (Active: ) (Logged: )
 cpu0: MSR_RING_PERF_LIMIT_REASONS, 0x0d000000 (Active: ) (Logged: Amps, PkgPwrL1, PkgPwrL2, )
 cpu0: MSR_RAPL_POWER_UNIT: 0x000a0e03 (0.125000 Watts, 0.000061 Joules, 0.000977 sec.)
@@ -162,23 +230,14 @@ cpu0: MSR_PP1_POLICY: 0
 cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked)
 cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled)
 cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00641400 (100 C)
-cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x88340800 (48 C)
-cpu0: MSR_IA32_THERM_STATUS: 0x88340000 (48 C +/- 1)
-cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1)
-cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1)
-cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1)
-    Core     CPU Avg_MHz   Busy% Bzy_MHz TSC_MHz     SMI  CPU%c1  CPU%c3  CPU%c6  CPU%c7 CoreTmp  PkgTmp PkgWatt CorWatt GFXWatt
-       -       -     493   12.64    3898    3498       0   12.64    0.00    0.00   74.72      47      47   21.62   13.74    0.00
-       0       0       4    0.11    3894    3498       0   99.89    0.00    0.00    0.00      47      47   21.62   13.74    0.00
-       0       4    3897   99.98    3898    3498       0    0.02
-       1       1       7    0.17    3887    3498       0    0.04    0.00    0.00   99.79      32
-       1       5       0    0.00    3885    3498       0    0.21
-       2       2      29    0.76    3895    3498       0    0.10    0.01    0.01   99.13      32
-       2       6       2    0.06    3896    3498       0    0.80
-       3       3       1    0.02    3832    3498       0    0.03    0.00    0.00   99.95      28
-       3       7       0    0.00    3879    3498       0    0.04
-^C
-
+cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884c0800 (24 C)
+cpu0: MSR_IA32_THERM_STATUS: 0x884c0000 (24 C +/- 1)
+cpu1: MSR_IA32_THERM_STATUS: 0x88510000 (19 C +/- 1)
+cpu2: MSR_IA32_THERM_STATUS: 0x884e0000 (22 C +/- 1)
+cpu3: MSR_IA32_THERM_STATUS: 0x88510000 (19 C +/- 1)
+cpu4: MSR_PKGC3_IRTL: 0x00008842 (valid, 67584 ns)
+cpu4: MSR_PKGC6_IRTL: 0x00008873 (valid, 117760 ns)
+cpu4: MSR_PKGC7_IRTL: 0x00008891 (valid, 148480 ns)
 .fi
 The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency
 available at the minimum package voltage.  The \fBTSC frequency\fP is the base
@@ -188,39 +247,22 @@ should be sustainable on all CPUs indefinitely, given nominal power and cooling.
 The remaining rows show what maximum turbo frequency is possible
 depending on the number of idle cores.  Note that not all information is
 available on all processors.
-.SH FORK EXAMPLE
-If turbostat is invoked with a command, it will fork that command
-and output the statistics gathered after the command exits.
-In this case, turbostat output goes to stderr, by default.
-Output can instead be saved to a file using the --out option.
-eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds
-until ^C while the other CPUs are mostly idle:
-
+.SH ADD COUNTER EXAMPLE
+Here we limit turbostat to showing just the CPU number for cpu0 - cpu3.
+We add a counter showing the 32-bit raw value of MSR 0x199 (MSR_IA32_PERF_CTL),
+labeling it with the column header, "PRF_CTRL", and display it only once,
+afte the conclusion of a 0.1 second sleep.
 .nf
-root@hsw: turbostat cat /dev/zero > /dev/null
-^C
-     CPU Avg_MHz   Busy% Bzy_MHz TSC_MHz
-       -     482   12.51    3854    3498
-       0       0    0.01    1960    3498
-       4       0    0.00    2128    3498
-       1       0    0.00    3003    3498
-       5    3854   99.98    3855    3498
-       2       0    0.01    3504    3498
-       6       3    0.08    3884    3498
-       3       0    0.00    2553    3498
-       7       0    0.00    2126    3498
-10.783983 sec
+sudo ./turbostat --quiet --cpu 0-3 --show CPU --add msr0x199,u32,raw,PRF_CTRL sleep .1
+0.101604 sec
+CPU	  PRF_CTRL
+-	0x00000000
+0	0x00000c00
+1	0x00000800
+2	0x00000a00
+3	0x00000800
 
 .fi
-Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit.
-The first row shows the average MHz and Busy% across all the processors in the system.
-
-Note that the Avg_MHz column reflects the total number of cycles executed
-divided by the measurement interval.  If the Busy% column is 100%,
-then the processor was running at that speed the entire interval.
-The Avg_MHz multiplied by the Busy% results in the Bzy_MHz --
-which is the average frequency while the processor was executing --
-not including any non-busy idle time.
 
 .SH NOTES
 
-- 
GitLab


From 4e4e1e7c6eaf387f8ec803f37f154fdd60e303c0 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 21 Feb 2017 22:33:42 -0500
Subject: [PATCH 0798/1084] tools/power turbostat: move --Package and
 --processor into the --cpu option

--Package is now "--cpu package",
which will display just the 1st CPU in each package

--processor is not "--cpu core"
which will display just the 1st CPU in each core

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 |  6 +-----
 tools/power/x86/turbostat/turbostat.c | 31 +++++++++++++++++----------
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index b1b1ab80102c..5189d9d982fe 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -52,7 +52,7 @@ name as necessary to disambiguate it from others is necessary.  Note that option
 		as the column header.
 .fi
 .PP
-\fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set.  cpu-set is a comma delimited list of cpu ranges. cpu ranges can be individual cpu numbers or start and end numbers, separated by ".." or '-'.  eg. 1,2,8,14..17,21-44
+\fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set.  If cpu-set is the string "core", then the system summary plus the first CPU in each core are printed -- eg. subsequent HT siblings are not printed.  Or if cpu-set is the string "package", then the system summary plus the first CPU in each package is printed.  Otherwise, the system summary plus the specified set of CPUs are printed.  The cpu-set is ordered from low to high, comma delimited with ".." and "-" permitted to denote a range. eg. 1,2,8,14..17,21-44
 .PP
 \fB--hide column\fP do not show the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--hide sysfs" to hide the sysfs statistics columns as a group.
 .PP
@@ -71,12 +71,8 @@ The file is truncated if it already exists, and it is created if it does not exi
 .PP
 \fB--Joules\fP displays energy in Joules, rather than dividing Joules by time to print power in Watts.
 .PP
-\fB--Package\fP limits output to the system summary plus the 1st thread in each Package.
-.PP
 \fB--list\fP display column header names available for use by --show and --hide, then exit.
 .PP
-\fB--processor\fP limits output to the system summary plus the 1st thread in each processor of each package.  Ie. it skips hyper-threaded siblings.
-.PP
 \fB--Summary\fP limits output to a 1-line System Summary for each interval.
 .PP
 \fB--TCC temperature\fP sets the Thermal Control Circuit temperature for systems which do not export that value.  This is used for making sense of the Digital Thermal Sensor outputs, as they return degrees Celsius below the TCC activation temperature.
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 851eaf06f358..c005d9052679 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -4153,7 +4153,8 @@ void help()
 	"to print statistics, until interrupted.\n"
 	"--add		add a counter\n"
 	"		eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
-	"--cpu	cpu-set	limit output to summary plus cpu-set cpu-set\n"
+	"--cpu	cpu-set	limit output to summary plus cpu-set:\n"
+	"		{core | package | j,k,l..m,n-p }\n"
 	"--quiet	skip decoding system configuration header\n"
 	"--interval sec	Override default 5-second measurement interval\n"
 	"--help		print this help message\n"
@@ -4756,6 +4757,21 @@ void parse_cpu_command(char *optarg)
 	unsigned int start, end;
 	char *next;
 
+	if (!strcmp(optarg, "core")) {
+		if (cpu_subset)
+			goto error;
+		show_core_only++;
+		return;
+	}
+	if (!strcmp(optarg, "package")) {
+		if (cpu_subset)
+			goto error;
+		show_pkg_only++;
+		return;
+	}
+	if (show_core_only || show_pkg_only)
+		goto error;
+
 	cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
 	if (cpu_subset == NULL)
 		err(3, "CPU_ALLOC");
@@ -4813,7 +4829,8 @@ void parse_cpu_command(char *optarg)
 	return;
 
 error:
-	fprintf(stderr, "'--cpu %s' malformed\n", optarg);
+	fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
+	help();
 	exit(-1);
 }
 
@@ -4867,8 +4884,6 @@ void cmdline(int argc, char **argv)
 		{"Joules",	no_argument,		0, 'J'},
 		{"list",	no_argument,		0, 'l'},
 		{"out",		required_argument,	0, 'o'},
-		{"Package",	no_argument,		0, 'p'},
-		{"processor",	no_argument,		0, 'p'},
 		{"quiet",	no_argument,		0, 'q'},
 		{"show",	required_argument,	0, 's'},
 		{"Summary",	no_argument,		0, 'S'},
@@ -4879,7 +4894,7 @@ void cmdline(int argc, char **argv)
 
 	progname = argv[0];
 
-	while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpqST:v",
+	while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qST:v",
 				long_options, &option_index)) != -1) {
 		switch (opt) {
 		case 'a':
@@ -4925,12 +4940,6 @@ void cmdline(int argc, char **argv)
 		case 'o':
 			outf = fopen_or_die(optarg, "w");
 			break;
-		case 'P':
-			show_pkg_only++;
-			break;
-		case 'p':
-			show_core_only++;
-			break;
 		case 'q':
 			quiet = 1;
 			break;
-- 
GitLab


From dd778a5e6bbd0f52f34c61ae9b42b725c5f22398 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 21 Feb 2017 23:21:13 -0500
Subject: [PATCH 0799/1084] tools/power turbostat: support "--hide C1" etc.

Originally, the only way to hide the sysfs C-state statistics columns
was with "--hide sysfs".  This was because we process "--hide" before
we probe for those columns.

hack --hide to remember deferred hide requests, and apply
them when sysfs is probed.

"--hide sysfs" is still available as short-hand to refer to
the entire group of counters.

The down-side of this change is that we no longer error check for
bogus --hide column names.  But the user will quickly figure that
out if a column they mean to hide is still there...

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 |   2 +-
 tools/power/x86/turbostat/turbostat.c | 115 ++++++++++++++++----------
 2 files changed, 73 insertions(+), 44 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 5189d9d982fe..fedca3285326 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -114,7 +114,7 @@ This is ideal for remote debugging, use the "--out" option to save everything to
 .PP
 When you are not interested in all that information, and there are several ways to see only what you want.  First the "--quiet" option will skip the configuration information, and turbostat will show only the counter columns.  Second, you can reduce the columns with the "--hide" and "--show" options.  If you use the "--show" option, then turbostat will show only the columns you list.  If you use the "--hide" option, turbostat will show all columns, except the ones you list.
 .PP
-To find out what columns are available for --show and --hide, the "--list" option is available.  Note, however, there is an exception.  The C-state columns collected from sysfs "C1,C2,C3,C1%,C2%,C3%" are not built-in counters, but are discovered after --show and --hide are processed.  You can use the special counter name "sysfs" to refer to all of them at the same time.
+To find out what columns are available for --show and --hide, the "--list" option is available.  For convenience, the special strings "sysfs" can be used to refer to all of the sysfs C-state counters at once:
 .nf
 sudo ./turbostat --show sysfs --quiet sleep 10
 10.003837 sec
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index c005d9052679..596259f48f50 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -434,12 +434,45 @@ unsigned long long bic_present = BIC_sysfs;
 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
 
+#define MAX_DEFERRED 16
+char *deferred_skip_names[MAX_DEFERRED];
+int deferred_skip_index;
+
+/*
+ * HIDE_LIST - hide this list of counters, show the rest [default]
+ * SHOW_LIST - show this list of counters, hide the rest
+ */
+enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
+
+void help(void)
+{
+	fprintf(outf,
+	"Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
+	"\n"
+	"Turbostat forks the specified COMMAND and prints statistics\n"
+	"when COMMAND completes.\n"
+	"If no COMMAND is specified, turbostat wakes every 5-seconds\n"
+	"to print statistics, until interrupted.\n"
+	"--add		add a counter\n"
+	"		eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
+	"--cpu	cpu-set	limit output to summary plus cpu-set:\n"
+	"		{core | package | j,k,l..m,n-p }\n"
+	"--quiet	skip decoding system configuration header\n"
+	"--interval sec	Override default 5-second measurement interval\n"
+	"--help		print this help message\n"
+	"--list		list column headers only\n"
+	"--out file	create or truncate \"file\" for all output\n"
+	"--version	print version information\n"
+	"\n"
+	"For more help, run \"man turbostat\"\n");
+}
+
 /*
  * bic_lookup
  * for all the strings in comma separate name_list,
  * set the approprate bit in return value.
  */
-unsigned long long bic_lookup(char *name_list)
+unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
 {
 	int i;
 	unsigned long long retval = 0;
@@ -459,8 +492,19 @@ unsigned long long bic_lookup(char *name_list)
 			}
 		}
 		if (i == MAX_BIC) {
-			fprintf(stderr, "Invalid counter name: %s\n", name_list);
-			exit(-1);
+			if (mode == SHOW_LIST) {
+				fprintf(stderr, "Invalid counter name: %s\n", name_list);
+				exit(-1);
+			}
+			deferred_skip_names[deferred_skip_index++] = name_list;
+			if (debug)
+				fprintf(stderr, "deferred \"%s\"\n", name_list);
+			if (deferred_skip_index >= MAX_DEFERRED) {
+				fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
+					MAX_DEFERRED, name_list);
+				help();
+				exit(1);
+			}
 		}
 
 		name_list = comma;
@@ -471,6 +515,7 @@ unsigned long long bic_lookup(char *name_list)
 	return retval;
 }
 
+
 void print_header(char *delim)
 {
 	struct msr_counter *mp;
@@ -502,20 +547,17 @@ void print_header(char *delim)
 		outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
 
 	for (mp = sys.tp; mp; mp = mp->next) {
-		if (*delim == ',') {
-			outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), "sysfs");
-			break;
-		}
+
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 64)
-				outp += sprintf(outp, "%s%18.18s", delim, mp->name);
+				outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
 			else
-				outp += sprintf(outp, "%s%10.10s", delim, mp->name);
+				outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
 		} else {
 			if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
-				outp += sprintf(outp, "%s%8s", delim, mp->name);
+				outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
 			else
-				outp += sprintf(outp, "%s%s", delim, mp->name);
+				outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
 		}
 	}
 
@@ -4142,29 +4184,6 @@ void process_cpuid()
 	return;
 }
 
-void help()
-{
-	fprintf(outf,
-	"Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
-	"\n"
-	"Turbostat forks the specified COMMAND and prints statistics\n"
-	"when COMMAND completes.\n"
-	"If no COMMAND is specified, turbostat wakes every 5-seconds\n"
-	"to print statistics, until interrupted.\n"
-	"--add		add a counter\n"
-	"		eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
-	"--cpu	cpu-set	limit output to summary plus cpu-set:\n"
-	"		{core | package | j,k,l..m,n-p }\n"
-	"--quiet	skip decoding system configuration header\n"
-	"--interval sec	Override default 5-second measurement interval\n"
-	"--help		print this help message\n"
-	"--list		list column headers only\n"
-	"--out file	create or truncate \"file\" for all output\n"
-	"--version	print version information\n"
-	"\n"
-	"For more help, run \"man turbostat\"\n");
-}
-
 
 /*
  * in /dev/cpu/ return success for names that are numbers
@@ -4689,6 +4708,16 @@ void parse_add_command(char *add_command)
 	}
 }
 
+int is_deferred_skip(char *name)
+{
+	int i;
+
+	for (i = 0; i < deferred_skip_index; ++i)
+		if (!strcmp(name, deferred_skip_names[i]))
+			return 1;
+	return 0;
+}
+
 void probe_sysfs(void)
 {
 	char path[64];
@@ -4720,6 +4749,9 @@ void probe_sysfs(void)
 
 		sprintf(path, "cpuidle/state%d/time", state);
 
+		if (is_deferred_skip(name_buf))
+			continue;
+
 		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC,
 				FORMAT_PERCENT, SYSFS_PERCPU);
 	}
@@ -4741,6 +4773,9 @@ void probe_sysfs(void)
 
 		sprintf(path, "cpuidle/state%d/usage", state);
 
+		if (is_deferred_skip(name_buf))
+			continue;
+
 		add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS,
 				FORMAT_DELTA, SYSFS_PERCPU);
 	}
@@ -4834,12 +4869,6 @@ void parse_cpu_command(char *optarg)
 	exit(-1);
 }
 
-/*
- * HIDE_LIST - hide this list of counters, show the rest [default]
- * SHOW_LIST - show this list of counters, hide the rest
- */
-enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
-
 int shown;
 /*
  * parse_show_hide() - process cmdline to set default counter action
@@ -4853,9 +4882,9 @@ void parse_show_hide(char *optarg, enum show_hide_mode new_mode)
 	 */
 	if (new_mode == SHOW_LIST) {
 		if (shown == 0)
-			bic_enabled = bic_lookup(optarg);
+			bic_enabled = bic_lookup(optarg, new_mode);
 		else
-			bic_enabled |= bic_lookup(optarg);
+			bic_enabled |= bic_lookup(optarg, new_mode);
 		shown = 1;
 
 		return;
@@ -4865,7 +4894,7 @@ void parse_show_hide(char *optarg, enum show_hide_mode new_mode)
 	 * --hide: do not show those specified
 	 *  multiple invocations simply clear more bits in enabled mask
 	 */
-	bic_enabled &= ~bic_lookup(optarg);
+	bic_enabled &= ~bic_lookup(optarg, new_mode);
 
 }
 
-- 
GitLab


From 7da6e3e2125d24040b3648ddc61edf70eb533849 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 21 Feb 2017 23:43:41 -0500
Subject: [PATCH 0800/1084] tools/power turbostat: show package number, even
 without --debug

On multi-package systems, the "Package" column was being displayed
only if --debug was used.  Show it always.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 596259f48f50..2c674ad5ab80 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -4294,7 +4294,7 @@ void topology_probe()
 	if (debug > 1)
 		fprintf(outf, "max_package_id %d, sizing for %d packages\n",
 			max_package_id, topo.num_packages);
-	if (debug && !summary_only && topo.num_packages > 1)
+	if (!summary_only && topo.num_packages > 1)
 		BIC_PRESENT(BIC_Package);
 
 	topo.num_threads_per_core = max_siblings;
-- 
GitLab


From 7293fccdffdec0ab0c36c4e4cffacb3ff114928e Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 22 Feb 2017 00:11:12 -0500
Subject: [PATCH 0801/1084] tools/power turbostat: dump p-state software config

cpu1: cpufreq driver: acpi-cpufreq
cpu1: cpufreq governor: ondemand
cpufreq boost: 1

or

cpu0: cpufreq driver: intel_pstate
cpu0: cpufreq governor: powersave
cpufreq intel_pstate no_turbo: 0

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 50 +++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 2c674ad5ab80..7af5f42a9792 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2979,6 +2979,54 @@ dump_sysfs_cstate_config(void)
 		fclose(input);
 	}
 }
+static void
+dump_sysfs_pstate_config(void)
+{
+	char path[64];
+	char driver_buf[64];
+	char governor_buf[64];
+	FILE *input;
+	int turbo;
+
+	sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver",
+			base_cpu);
+	input = fopen(path, "r");
+	if (input == NULL) {
+		fprintf(stderr, "NSFOD %s\n", path);
+		return;
+	}
+	fgets(driver_buf, sizeof(driver_buf), input);
+	fclose(input);
+
+	sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor",
+			base_cpu);
+	input = fopen(path, "r");
+	if (input == NULL) {
+		fprintf(stderr, "NSFOD %s\n", path);
+		return;
+	}
+	fgets(governor_buf, sizeof(governor_buf), input);
+	fclose(input);
+
+	fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
+	fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
+
+	sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
+	input = fopen(path, "r");
+	if (input != NULL) {
+		fscanf(input, "%d", &turbo);
+		fprintf(outf, "cpufreq boost: %d\n", turbo);
+		fclose(input);
+	}
+
+	sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
+	input = fopen(path, "r");
+	if (input != NULL) {
+		fscanf(input, "%d", &turbo);
+		fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
+		fclose(input);
+	}
+}
 
 
 /*
@@ -4168,6 +4216,8 @@ void process_cpuid()
 
 	if (!quiet)
 		dump_sysfs_cstate_config();
+	if (!quiet)
+		dump_sysfs_pstate_config();
 
 	if (has_skl_msrs(family, model))
 		calculate_tsc_tweak();
-- 
GitLab


From 0815a3d09baf2cd330f75020bdaad0f1adac0ecb Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 23 Feb 2017 17:00:51 -0500
Subject: [PATCH 0802/1084] tools/power turbostat: show error on exec

When turbostat is run in one-shot command mode,
the parent takes the 'before' counter snapshot,
fork/exec/wait for the child to exit,
takes the 'after' counter snapshot,
and prints the results.

however, if the child fails to exec the command,
it immediately returns, without indicating that
anythign was wrong.

Add an error message showing that exec failed:

sudo turbostat sleeeep 4
...
turbostat: exec sleeeep: No such file or directory
...

Note that the parent will still print out the statistics,
because it can't tell the difference between the failed
exec and a command that is purposefully returning
the same status.  Unfortunately, this may obscure the
error message.  However, if the --out parameter is used,
the error message is evident on stderr.

Reported-by: Wendy Wang <wendy.wang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 7af5f42a9792..2d758abecd56 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -4525,6 +4525,7 @@ int fork_it(char **argv)
 	if (!child_pid) {
 		/* child */
 		execvp(argv[0], argv);
+		err(errno, "exec %s", argv[0]);
 	} else {
 
 		/* parent */
-- 
GitLab


From 5f3aea57773dc7f788e374994636ffc0234a355f Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 23 Feb 2017 18:10:27 -0500
Subject: [PATCH 0803/1084] tools/power turbostat: bugfix: --add u32 was
 printed as u64

When the "u32" keyword is used with --add, it means that
the output should be truncated to 32-bits.  This was not
happening and all 64-bits were printed.

Also, when no column name was used for an added MSR,
The default column name was in deximal, eg. MSR16.
Users report that they tend to use hex MSR numbers,
so print them in hex.  To always fit into the columns,
use the syntax M0x10.  Note that the user can always
supply any column header that they want.

eg --add msr0x10,MY_TSC

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 2d758abecd56..5216549957f4 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -834,7 +834,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 32)
-				outp += sprintf(outp, "%s0x%08lx", (printed++ ? delim : ""), (unsigned long) t->counter[i]);
+				outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) t->counter[i]);
 			else
 				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
@@ -876,7 +876,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 32)
-				outp += sprintf(outp, "%s0x%08lx", (printed++ ? delim : ""), (unsigned long) c->counter[i]);
+				outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) c->counter[i]);
 			else
 				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
@@ -967,7 +967,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW) {
 			if (mp->width == 32)
-				outp += sprintf(outp, "%s0x%08lx", (printed++ ? delim : ""), (unsigned long) p->counter[i]);
+				outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) p->counter[i]);
 			else
 				outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
 		} else if (mp->format == FORMAT_DELTA) {
@@ -4732,22 +4732,10 @@ void parse_add_command(char *add_command)
 
 	/* generate default column header */
 	if (*name_buffer == '\0') {
-		if (format == FORMAT_RAW) {
-			if (width == 32)
-				sprintf(name_buffer, "msr%d", msr_num);
-			else
-				sprintf(name_buffer, "MSR%d", msr_num);
-		} else if (format == FORMAT_DELTA) {
-			if (width == 32)
-				sprintf(name_buffer, "cnt%d", msr_num);
-			else
-				sprintf(name_buffer, "CNT%d", msr_num);
-		} else if (format == FORMAT_PERCENT) {
-			if (width == 32)
-				sprintf(name_buffer, "msr%d%%", msr_num);
-			else
-				sprintf(name_buffer, "MSR%d%%", msr_num);
-		}
+		if (width == 32)
+			sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
+		else
+			sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
 	}
 
 	if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
-- 
GitLab


From e3942ed8c66bcff496abee5182422cd542962d9e Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 21 Jan 2017 02:33:23 -0500
Subject: [PATCH 0804/1084] tools/power turbostat: version 17.02.24

The turbostat before this last set of changes is obsolete.
This new version can do a lot more, but it also has
some different defaults, that might catch some off-guard.
So it seems a good time to give a new version number.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 5216549957f4..828dccd3f01e 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -4578,7 +4578,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-	fprintf(outf, "turbostat version 4.17 10 Jan 2017"
+	fprintf(outf, "turbostat version 17.02.24"
 		" - Len Brown <lenb@kernel.org>\n");
 }
 
-- 
GitLab


From df06a2d57711a1472ced72207373eeb6422d4721 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 1 Mar 2017 00:03:37 -0800
Subject: [PATCH 0805/1084] tools/testing/nvdimm: make iset cookie predictable

For testing changes to the iset cookie algorithm we need a value that is
constant from run-to-run.

Stop including dynamic data in the emulated region_offset values. Also,
pick values that sort in a different order depending on whether the
comparison is a memcmp() of two 8-byte arrays or subtraction of two
64-bit values.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/nfit.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 45be8b55a663..798f17655433 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -887,7 +887,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->range_index = 0+1;
 	memdev->region_index = 4+1;
 	memdev->region_size = SPA0_SIZE/2;
-	memdev->region_offset = t->spa_set_dma[0];
+	memdev->region_offset = 1;
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 2;
@@ -902,7 +902,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->range_index = 0+1;
 	memdev->region_index = 5+1;
 	memdev->region_size = SPA0_SIZE/2;
-	memdev->region_offset = t->spa_set_dma[0] + SPA0_SIZE/2;
+	memdev->region_offset = (1 << 8);
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 2;
@@ -917,7 +917,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->range_index = 1+1;
 	memdev->region_index = 4+1;
 	memdev->region_size = SPA1_SIZE/4;
-	memdev->region_offset = t->spa_set_dma[1];
+	memdev->region_offset = (1 << 16);
 	memdev->address = SPA0_SIZE/2;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
@@ -932,7 +932,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->range_index = 1+1;
 	memdev->region_index = 5+1;
 	memdev->region_size = SPA1_SIZE/4;
-	memdev->region_offset = t->spa_set_dma[1] + SPA1_SIZE/4;
+	memdev->region_offset = (1 << 24);
 	memdev->address = SPA0_SIZE/2;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
@@ -947,7 +947,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->range_index = 1+1;
 	memdev->region_index = 6+1;
 	memdev->region_size = SPA1_SIZE/4;
-	memdev->region_offset = t->spa_set_dma[1] + 2*SPA1_SIZE/4;
+	memdev->region_offset = (1ULL << 32);
 	memdev->address = SPA0_SIZE/2;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
@@ -962,7 +962,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->range_index = 1+1;
 	memdev->region_index = 7+1;
 	memdev->region_size = SPA1_SIZE/4;
-	memdev->region_offset = t->spa_set_dma[1] + 3*SPA1_SIZE/4;
+	memdev->region_offset = (1ULL << 40);
 	memdev->address = SPA0_SIZE/2;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
@@ -1380,7 +1380,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 		memdev->range_index = 11+1;
 		memdev->region_index = 9+1;
 		memdev->region_size = SPA0_SIZE;
-		memdev->region_offset = t->spa_set_dma[2];
+		memdev->region_offset = (1ULL << 48);
 		memdev->address = 0;
 		memdev->interleave_index = 0;
 		memdev->interleave_ways = 1;
-- 
GitLab


From 87bfbddd0935f7a3acf36b883707aee816d4bb49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= <noralf@tronnes.org>
Date: Sun, 26 Feb 2017 15:44:42 +0100
Subject: [PATCH 0806/1084] =?UTF-8?q?MAINTAINERS:=20Remove=20Noralf=20Tr?=
 =?UTF-8?q?=C3=B8nnes=20as=20fbtft=20maintainer?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Due to personal reasons I'm unable to continue as fbtft maintainer.

Signed-off-by: Noralf Trønnes <noralf@tronnes.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index ad4f2ce991ca..30ff892e1e2f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4979,7 +4979,6 @@ F:	lib/fault-inject.c
 
 FBTFT Framebuffer drivers
 M:	Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
-M:	Noralf Trønnes <noralf@tronnes.org>
 S:	Maintained
 F:	drivers/staging/fbtft/
 
-- 
GitLab


From a45e47f4b342884dcf9e40a530033f64c379ffc7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 27 Feb 2017 21:42:16 +0100
Subject: [PATCH 0807/1084] staging: fsl-mc: fix warning in DT ranges parser

The fsl-mc-bus driver in staging contains a copy of the standard
'ranges' property parsing algorithm with a hack to treat a missing
property the same way as an empty one. This code produces false-positive
warnings for me in an allmodconfig build:

drivers/staging/fsl-mc/bus/fsl-mc-bus.c: In function 'fsl_mc_bus_probe':
drivers/staging/fsl-mc/bus/fsl-mc-bus.c:645:6: error: 'mc_size_cells' may be used uninitialized in this function [-Werror=maybe-uninitialized]
drivers/staging/fsl-mc/bus/fsl-mc-bus.c:682:8: error: 'mc_addr_cells' may be used uninitialized in this function [-Werror=maybe-uninitialized]
drivers/staging/fsl-mc/bus/fsl-mc-bus.c:644:6: note: 'mc_addr_cells' was declared here
drivers/staging/fsl-mc/bus/fsl-mc-bus.c:684:8: error: 'paddr_cells' may be used uninitialized in this function [-Werror=maybe-uninitialized]
drivers/staging/fsl-mc/bus/fsl-mc-bus.c:643:6: note: 'paddr_cells' was declared here

To avoid the warnings, I'm simplifying the argument handling to pass
the number of valid ranges in the property as the function return code
rather than passing it by reference. With this change, gcc can see that
we don't evaluate the cell numbers for an missing ranges property.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/fsl-mc/bus/fsl-mc-bus.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/drivers/staging/fsl-mc/bus/fsl-mc-bus.c b/drivers/staging/fsl-mc/bus/fsl-mc-bus.c
index 47acb0a29842..3be5f25ff113 100644
--- a/drivers/staging/fsl-mc/bus/fsl-mc-bus.c
+++ b/drivers/staging/fsl-mc/bus/fsl-mc-bus.c
@@ -588,8 +588,7 @@ static int parse_mc_ranges(struct device *dev,
 			   int *paddr_cells,
 			   int *mc_addr_cells,
 			   int *mc_size_cells,
-			   const __be32 **ranges_start,
-			   u8 *num_ranges)
+			   const __be32 **ranges_start)
 {
 	const __be32 *prop;
 	int range_tuple_cell_count;
@@ -602,8 +601,6 @@ static int parse_mc_ranges(struct device *dev,
 		dev_warn(dev,
 			 "missing or empty ranges property for device tree node '%s'\n",
 			 mc_node->name);
-
-		*num_ranges = 0;
 		return 0;
 	}
 
@@ -630,8 +627,7 @@ static int parse_mc_ranges(struct device *dev,
 		return -EINVAL;
 	}
 
-	*num_ranges = ranges_len / tuple_len;
-	return 0;
+	return ranges_len / tuple_len;
 }
 
 static int get_mc_addr_translation_ranges(struct device *dev,
@@ -639,7 +635,7 @@ static int get_mc_addr_translation_ranges(struct device *dev,
 						**ranges,
 					  u8 *num_ranges)
 {
-	int error;
+	int ret;
 	int paddr_cells;
 	int mc_addr_cells;
 	int mc_size_cells;
@@ -647,16 +643,16 @@ static int get_mc_addr_translation_ranges(struct device *dev,
 	const __be32 *ranges_start;
 	const __be32 *cell;
 
-	error = parse_mc_ranges(dev,
+	ret = parse_mc_ranges(dev,
 				&paddr_cells,
 				&mc_addr_cells,
 				&mc_size_cells,
-				&ranges_start,
-				num_ranges);
-	if (error < 0)
-		return error;
+				&ranges_start);
+	if (ret < 0)
+		return ret;
 
-	if (!(*num_ranges)) {
+	*num_ranges = ret;
+	if (!ret) {
 		/*
 		 * Missing or empty ranges property ("ranges;") for the
 		 * 'fsl,qoriq-mc' node. In this case, identity mapping
-- 
GitLab


From 86ef58a4e35e8fa66afb5898cf6dec6a3bb29f67 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 28 Feb 2017 18:32:48 -0800
Subject: [PATCH 0808/1084] nfit, libnvdimm: fix interleave set cookie
 calculation

The interleave-set cookie is a sum that sanity checks the composition of
an interleave set has not changed from when the namespace was initially
created.  The checksum is calculated by sorting the DIMMs by their
location in the interleave-set. The comparison for the sort must be
64-bit wide, not byte-by-byte as performed by memcmp() in the broken
case.

Fix the implementation to accept correct cookie values in addition to
the Linux "memcmp" order cookies, but only allow correct cookies to be
generated going forward. It does mean that namespaces created by
third-party-tooling, or created by newer kernels with this fix, will not
validate on older kernels. However, there are a couple mitigating
conditions:

    1/ platforms with namespace-label capable NVDIMMs are not widely
       available.

    2/ interleave-sets with a single-dimm are by definition not affected
       (nothing to sort). This covers the QEMU-KVM NVDIMM emulation case.

The cookie stored in the namespace label will be fixed by any write the
namespace label, the most straightforward way to achieve this is to
write to the "alt_name" attribute of a namespace in sysfs.

Cc: <stable@vger.kernel.org>
Fixes: eaf961536e16 ("libnvdimm, nfit: add interleave-set state-tracking infrastructure")
Reported-by: Nicholas Moulin <nicholas.w.moulin@linux.intel.com>
Tested-by: Nicholas Moulin <nicholas.w.moulin@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c        | 16 +++++++++++++++-
 drivers/nvdimm/namespace_devs.c | 18 ++++++++++++++----
 drivers/nvdimm/nd.h             |  1 +
 drivers/nvdimm/region_devs.c    |  9 +++++++++
 include/linux/libnvdimm.h       |  2 ++
 5 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 7361d00818e2..662036bdc65e 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1603,7 +1603,7 @@ static size_t sizeof_nfit_set_info(int num_mappings)
 		+ num_mappings * sizeof(struct nfit_set_info_map);
 }
 
-static int cmp_map(const void *m0, const void *m1)
+static int cmp_map_compat(const void *m0, const void *m1)
 {
 	const struct nfit_set_info_map *map0 = m0;
 	const struct nfit_set_info_map *map1 = m1;
@@ -1612,6 +1612,14 @@ static int cmp_map(const void *m0, const void *m1)
 			sizeof(u64));
 }
 
+static int cmp_map(const void *m0, const void *m1)
+{
+	const struct nfit_set_info_map *map0 = m0;
+	const struct nfit_set_info_map *map1 = m1;
+
+	return map0->region_offset - map1->region_offset;
+}
+
 /* Retrieve the nth entry referencing this spa */
 static struct acpi_nfit_memory_map *memdev_from_spa(
 		struct acpi_nfit_desc *acpi_desc, u16 range_index, int n)
@@ -1667,6 +1675,12 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
 	sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
 			cmp_map, NULL);
 	nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
+
+	/* support namespaces created with the wrong sort order */
+	sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
+			cmp_map_compat, NULL);
+	nd_set->altcookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
+
 	ndr_desc->nd_set = nd_set;
 	devm_kfree(dev, info);
 
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index ce3e8dfa10ad..1b481a5fb966 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1700,6 +1700,7 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
 struct device *create_namespace_pmem(struct nd_region *nd_region,
 		struct nd_namespace_label *nd_label)
 {
+	u64 altcookie = nd_region_interleave_set_altcookie(nd_region);
 	u64 cookie = nd_region_interleave_set_cookie(nd_region);
 	struct nd_label_ent *label_ent;
 	struct nd_namespace_pmem *nspm;
@@ -1718,7 +1719,11 @@ struct device *create_namespace_pmem(struct nd_region *nd_region,
 	if (__le64_to_cpu(nd_label->isetcookie) != cookie) {
 		dev_dbg(&nd_region->dev, "invalid cookie in label: %pUb\n",
 				nd_label->uuid);
-		return ERR_PTR(-EAGAIN);
+		if (__le64_to_cpu(nd_label->isetcookie) != altcookie)
+			return ERR_PTR(-EAGAIN);
+
+		dev_dbg(&nd_region->dev, "valid altcookie in label: %pUb\n",
+				nd_label->uuid);
 	}
 
 	nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
@@ -1733,9 +1738,14 @@ struct device *create_namespace_pmem(struct nd_region *nd_region,
 	res->name = dev_name(&nd_region->dev);
 	res->flags = IORESOURCE_MEM;
 
-	for (i = 0; i < nd_region->ndr_mappings; i++)
-		if (!has_uuid_at_pos(nd_region, nd_label->uuid, cookie, i))
-			break;
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		if (has_uuid_at_pos(nd_region, nd_label->uuid, cookie, i))
+			continue;
+		if (has_uuid_at_pos(nd_region, nd_label->uuid, altcookie, i))
+			continue;
+		break;
+	}
+
 	if (i < nd_region->ndr_mappings) {
 		struct nvdimm_drvdata *ndd = to_ndd(&nd_region->mapping[i]);
 
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 35dd75057e16..2a99c83aa19f 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -328,6 +328,7 @@ struct nd_region *to_nd_region(struct device *dev);
 int nd_region_to_nstype(struct nd_region *nd_region);
 int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
 u64 nd_region_interleave_set_cookie(struct nd_region *nd_region);
+u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region);
 void nvdimm_bus_lock(struct device *dev);
 void nvdimm_bus_unlock(struct device *dev);
 bool is_nvdimm_bus_locked(struct device *dev);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 7cd705f3247c..b7cb5066d961 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -505,6 +505,15 @@ u64 nd_region_interleave_set_cookie(struct nd_region *nd_region)
 	return 0;
 }
 
+u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region)
+{
+	struct nd_interleave_set *nd_set = nd_region->nd_set;
+
+	if (nd_set)
+		return nd_set->altcookie;
+	return 0;
+}
+
 void nd_mapping_free_labels(struct nd_mapping *nd_mapping)
 {
 	struct nd_label_ent *label_ent, *e;
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 8458c5351e56..77e7af32543f 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -70,6 +70,8 @@ struct nd_cmd_desc {
 
 struct nd_interleave_set {
 	u64 cookie;
+	/* compatibility with initial buggy Linux implementation */
+	u64 altcookie;
 };
 
 struct nd_mapping_desc {
-- 
GitLab


From 1fb1683cb343d80736625f3048de2107cf5bbf79 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 28 Feb 2017 14:36:56 +0000
Subject: [PATCH 0809/1084] crypto: arm/crc32 - fix build error with outdated
 binutils

Annotate a vmov instruction with an explicit element size of 32 bits.
This is inferred by recent toolchains, but apparently, older versions
need some help figuring this out.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/crc32-ce-core.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/crypto/crc32-ce-core.S b/arch/arm/crypto/crc32-ce-core.S
index e63d400dc5c1..5cbd4a6fedad 100644
--- a/arch/arm/crypto/crc32-ce-core.S
+++ b/arch/arm/crypto/crc32-ce-core.S
@@ -135,7 +135,7 @@ ENTRY(crc32c_pmull_le)
 	vld1.8		{q3-q4}, [BUF, :128]!
 	vmov.i8		qzr, #0
 	vmov.i8		qCONSTANT, #0
-	vmov		dCONSTANTl[0], CRC
+	vmov.32		dCONSTANTl[0], CRC
 	veor.8		d2, d2, dCONSTANTl
 	sub		LEN, LEN, #0x40
 	cmp		LEN, #0x40
-- 
GitLab


From efa7cebdbfde8506b54acd8947822394768cd476 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 28 Feb 2017 14:36:57 +0000
Subject: [PATCH 0810/1084] crypto: arm/crc32 - add build time test for CRC
 instruction support

The accelerated CRC32 module for ARM may use either the scalar CRC32
instructions, the NEON 64x64 to 128 bit polynomial multiplication
(vmull.p64) instruction, or both, depending on what the current CPU
supports.

However, this also requires support in binutils, and as it turns out,
versions of binutils exist that support the vmull.p64 instruction but
not the crc32 instructions.

So refactor the Makefile logic so that this module only gets built if
binutils has support for both.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/Makefile | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 1822c4697278..f2215fbeed13 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -15,7 +15,17 @@ ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o
-ce-obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o
+crc-obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o
+
+ifneq ($(crc-obj-y)$(crc-obj-m),)
+ifeq ($(call as-instr,.arch armv8-a\n.arch_extension crc,y,n),y)
+ce-obj-y += $(crc-obj-y)
+ce-obj-m += $(crc-obj-m)
+else
+$(warning These CRC Extensions modules need binutils 2.23 or higher)
+$(warning $(crc-obj-y) $(crc-obj-m))
+endif
+endif
 
 ifneq ($(ce-obj-y)$(ce-obj-m),)
 ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y)
-- 
GitLab


From 1c68bb0f62bf8de8bb30123ea840d5168f25abea Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@redhat.com>
Date: Tue, 28 Feb 2017 14:07:25 -0800
Subject: [PATCH 0811/1084] crypto: testmgr - Pad aes_ccm_enc_tv_template
 vector

Running with KASAN and crypto tests currently gives

 BUG: KASAN: global-out-of-bounds in __test_aead+0x9d9/0x2200 at addr ffffffff8212fca0
 Read of size 16 by task cryptomgr_test/1107
 Address belongs to variable 0xffffffff8212fca0
 CPU: 0 PID: 1107 Comm: cryptomgr_test Not tainted 4.10.0+ #45
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.1-1.fc24 04/01/2014
 Call Trace:
  dump_stack+0x63/0x8a
  kasan_report.part.1+0x4a7/0x4e0
  ? __test_aead+0x9d9/0x2200
  ? crypto_ccm_init_crypt+0x218/0x3c0 [ccm]
  kasan_report+0x20/0x30
  check_memory_region+0x13c/0x1a0
  memcpy+0x23/0x50
  __test_aead+0x9d9/0x2200
  ? kasan_unpoison_shadow+0x35/0x50
  ? alg_test_akcipher+0xf0/0xf0
  ? crypto_skcipher_init_tfm+0x2e3/0x310
  ? crypto_spawn_tfm2+0x37/0x60
  ? crypto_ccm_init_tfm+0xa9/0xd0 [ccm]
  ? crypto_aead_init_tfm+0x7b/0x90
  ? crypto_alloc_tfm+0xc4/0x190
  test_aead+0x28/0xc0
  alg_test_aead+0x54/0xd0
  alg_test+0x1eb/0x3d0
  ? alg_find_test+0x90/0x90
  ? __sched_text_start+0x8/0x8
  ? __wake_up_common+0x70/0xb0
  cryptomgr_test+0x4d/0x60
  kthread+0x173/0x1c0
  ? crypto_acomp_scomp_free_ctx+0x60/0x60
  ? kthread_create_on_node+0xa0/0xa0
  ret_from_fork+0x2c/0x40
 Memory state around the buggy address:
  ffffffff8212fb80: 00 00 00 00 01 fa fa fa fa fa fa fa 00 00 00 00
  ffffffff8212fc00: 00 01 fa fa fa fa fa fa 00 00 00 00 01 fa fa fa
 >ffffffff8212fc80: fa fa fa fa 00 05 fa fa fa fa fa fa 00 00 00 00
                                   ^
  ffffffff8212fd00: 01 fa fa fa fa fa fa fa 00 00 00 00 01 fa fa fa
  ffffffff8212fd80: fa fa fa fa 00 00 00 00 00 05 fa fa fa fa fa fa

This always happens on the same IV which is less than 16 bytes.

Per Ard,

"CCM IVs are 16 bytes, but due to the way they are constructed
internally, the final couple of bytes of input IV are dont-cares.

Apparently, we do read all 16 bytes, which triggers the KASAN errors."

Fix this by padding the IV with null bytes to be at least 16 bytes.

Cc: stable@vger.kernel.org
Fixes: 0bc5a6c5c79a ("crypto: testmgr - Disable rfc4309 test and convert
test vectors")
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/testmgr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index f85e51cf7dcc..663f034c89b9 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -22691,7 +22691,7 @@ static struct aead_testvec aes_ccm_enc_tv_template[] = {
 			  "\x09\x75\x9a\x9b\x3c\x9b\x27\x39",
 		.klen	= 32,
 		.iv	= "\x03\xf9\xd9\x4e\x63\xb5\x3d\x9d"
-			  "\x43\xf6\x1e\x50",
+			  "\x43\xf6\x1e\x50\0\0\0\0",
 		.assoc	= "\x57\xf5\x6b\x8b\x57\x5c\x3d\x3b"
 			  "\x13\x02\x01\x0c\x83\x4c\x96\x35"
 			  "\x8e\xd6\x39\xcf\x7d\x14\x9b\x94"
-- 
GitLab


From bba82fd75694832b59433bf4e9d7ede8de1dfd42 Mon Sep 17 00:00:00 2001
From: Robert O'Callahan <robert@ocallahan.org>
Date: Wed, 1 Feb 2017 17:06:11 +1300
Subject: [PATCH 0812/1084] KVM: x86: never specify a sample period for
 virtualized in_tx_cp counters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

pmc_reprogram_counter() always sets a sample period based on the value of
pmc->counter. However, hsw_hw_config() rejects sample periods less than
2^31 - 1. So for example, if a KVM guest does

    struct perf_event_attr attr;
    memset(&attr, 0, sizeof(attr));
    attr.type = PERF_TYPE_RAW;
    attr.size = sizeof(attr);
    attr.config = 0x2005101c4; // conditional branches retired IN_TXCP
    attr.sample_period = 0;
    int fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
    ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
    ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);

the guest kernel counts some conditional branch events, then updates the
virtual PMU register with a nonzero count. The host reaches
pmc_reprogram_counter() with nonzero pmc->counter, triggers EOPNOTSUPP
in hsw_hw_config(), prints "kvm_pmu: event creation failed" in
pmc_reprogram_counter(), and silently (from the guest's point of view) stops
counting events.

We fix event counting by forcing attr.sample_period to always be zero for
in_tx_cp counters. Sampling doesn't work, but it already didn't work and
can't be fixed without major changes to the approach in hsw_hw_config().

Signed-off-by: Robert O'Callahan <robert@ocallahan.org>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/pmu.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 06ce377dcbc9..026db42a86c3 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -113,12 +113,19 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
 		.config = config,
 	};
 
+	attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
+
 	if (in_tx)
 		attr.config |= HSW_IN_TX;
-	if (in_tx_cp)
+	if (in_tx_cp) {
+		/*
+		 * HSW_IN_TX_CHECKPOINTED is not supported with nonzero
+		 * period. Just clear the sample period so at least
+		 * allocating the counter doesn't fail.
+		 */
+		attr.sample_period = 0;
 		attr.config |= HSW_IN_TX_CHECKPOINTED;
-
-	attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
+	}
 
 	event = perf_event_create_kernel_counter(&attr, -1, current,
 						 intr ? kvm_perf_overflow_intr :
-- 
GitLab


From 55149d06534ae2a7ba5f7a078353deb89b3fe891 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 1 Mar 2017 00:05:04 -0600
Subject: [PATCH 0813/1084] objtool, compiler.h: Fix __unreachable section
 relocation size

Linus reported the following commit broke module loading on his laptop:

  d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends")

It showed errors like the following:

  module: overflow in relocation type 10 val ffffffffc02afc81
  module: 'nvme' likely not compiled with -mcmodel=kernel

The problem is that the __unreachable section addresses are stored using
the '.long' asm directive, which isn't big enough for .text section
kernel addresses.  Use relative addresses instead:

  ".long %c0b - .\t\n"

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends")
Link: http://lkml.kernel.org/r/20170301060504.oltm3iws6fmubnom@treble
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/compiler-gcc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 76e28c229805..b6bb9019d87f 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -201,7 +201,7 @@
 #define annotate_unreachable() ({					\
 	asm("%c0:\t\n"							\
 	    ".pushsection __unreachable, \"a\"\t\n"			\
-	    ".long %c0b\t\n"						\
+	    ".long %c0b - .\t\n"					\
 	    ".popsection\t\n" : : "i" (__LINE__));			\
 })
 #else
-- 
GitLab


From d0e32fba85d5b7902db2715a9d5540fb0efdba83 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Feb 2017 22:01:18 +0100
Subject: [PATCH 0814/1084] watchdog: wm831x watchdog really needs mfd

The wm831x watchdog driver can now be built without the wm831x mfd
driver, which results in a link error:

(.text+0x1a95c): undefined reference to `wm831x_set_bits'
(.text+0x1a95c): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `wm831x_set_bits'
(.text+0x1a968): undefined reference to `wm831x_reg_lock'
(.text+0x1a968): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `wm831x_reg_lock'
(.text+0x1a9dc): undefined reference to `wm831x_reg_unlock'
(.text+0x1a9dc): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `wm831x_reg_unlock'

This adds back the dependency that was removed. We can still build test
this driver on all architectures by enabling the MFD driver for it first.

Fixes: da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/watchdog/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index c831b7967bf9..fda6f3d24780 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -176,7 +176,7 @@ config WDAT_WDT
 
 config WM831X_WATCHDOG
 	tristate "WM831x watchdog"
-	depends on MFD_WM831X || COMPILE_TEST
+	depends on MFD_WM831X
 	select WATCHDOG_CORE
 	help
 	  Support for the watchdog in the WM831x AudioPlus PMICs.  When
-- 
GitLab


From 0369fdf2e1169896529bd53c3b5fb15511768276 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Feb 2017 22:01:19 +0100
Subject: [PATCH 0815/1084] watchdog: geode: restore hard CS5535_MFGPT
 dependency

Wtihout CONFIG_CS5535_MFGPT, the driver does not link right:

drivers/watchdog/built-in.o: In function `geodewdt_probe':
geodewdt.c:(.init.text+0xca3): undefined reference to `cs5535_mfgpt_alloc_timer'
geodewdt.c:(.init.text+0xcd4): undefined reference to `cs5535_mfgpt_write'
geodewdt.c:(.init.text+0xcef): undefined reference to `cs5535_mfgpt_toggle_event'

This adds back the dependency on this base driver.

Fixes: da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/watchdog/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index fda6f3d24780..5b4bb1b6dd68 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -851,7 +851,7 @@ config SP5100_TCO
 
 config GEODE_WDT
 	tristate "AMD Geode CS5535/CS5536 Watchdog"
-	depends on CS5535_MFGPT || (X86 && COMPILE_TEST)
+	depends on CS5535_MFGPT
 	help
 	  This driver enables a watchdog capability built into the
 	  CS5535/CS5536 companion chips for the AMD Geode GX and LX
-- 
GitLab


From 6fb303a81a0b106e7f5522df929bbf4596780c48 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Feb 2017 22:01:20 +0100
Subject: [PATCH 0816/1084] watchdog: menf21bmc: add I2C dependency

This driver fails to link when CONFIG_I2C is disabled or a loadable module while
the watchdog is built-in:

drivers/watchdog/built-in.o: In function `menf21bmc_wdt_shutdown':
menf21bmc_wdt.c:(.text+0x9b44): undefined reference to `i2c_smbus_write_word_data'
menf21bmc_wdt.c:(.text+0x9b44): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `i2c_smbus_write_word_data'

This adds a Kconfig dependency for it, to enforce a valid configuration.

Fixes: da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/watchdog/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 5b4bb1b6dd68..4d2c336daa39 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -142,6 +142,7 @@ config GPIO_WATCHDOG_ARCH_INITCALL
 config MENF21BMC_WATCHDOG
 	tristate "MEN 14F021P00 BMC Watchdog"
 	depends on MFD_MENF21BMC || COMPILE_TEST
+	depends on I2C
 	select WATCHDOG_CORE
 	help
 	  Say Y here to include support for the MEN 14F021P00 BMC Watchdog.
-- 
GitLab


From 3eafee95643360d22c6725ea2188bd700c09c986 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Feb 2017 22:01:21 +0100
Subject: [PATCH 0817/1084] watchdog: sp805: add back AMBA dependency

The driver fails to link if ARM_AMBA is disabled:

drivers/watchdog/sp805_wdt.o: In function `sp805_wdt_driver_init':
sp805_wdt.c:(.init.text+0x4): undefined reference to `amba_driver_register'

It seems that the COMPILE_TEST was added in the wrong place, as there
is no architecture dependency, but a bus dependency. This moves
the dependency accordingly.

Fixes: da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/watchdog/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 4d2c336daa39..41a75b8ab213 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -218,7 +218,7 @@ config ZIIRAVE_WATCHDOG
 
 config ARM_SP805_WATCHDOG
 	tristate "ARM SP805 Watchdog"
-	depends on (ARM || ARM64) && (ARM_AMBA || COMPILE_TEST)
+	depends on (ARM || ARM64 || COMPILE_TEST) && ARM_AMBA
 	select WATCHDOG_CORE
 	help
 	  ARM Primecell SP805 Watchdog timer. This will reboot your system when
-- 
GitLab


From 2672b7e01abbe147a0a391216bc0dbd656608e0a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Feb 2017 22:01:22 +0100
Subject: [PATCH 0818/1084] watchdog: bcm2835: add CONFIG_OF dependency

Without CONFIG_OF, the driver fails to link:

drivers/watchdog/built-in.o: In function `bcm2835_power_off':
bcm2835_wdt.c:(.text+0x1946): undefined reference to `of_find_device_by_node'

This adds a new dependency, to allow the COMPILE_TEST check to succeed.

Fixes: da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/watchdog/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 41a75b8ab213..13390b00ec47 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -1496,7 +1496,7 @@ config BCM63XX_WDT
 
 config BCM2835_WDT
 	tristate "Broadcom BCM2835 hardware watchdog"
-	depends on ARCH_BCM2835 || COMPILE_TEST
+	depends on ARCH_BCM2835 || (OF && COMPILE_TEST)
 	select WATCHDOG_CORE
 	help
 	  Watchdog driver for the built in watchdog hardware in Broadcom
-- 
GitLab


From ed4a9eca66720366d117d9b39f2335ca9b0a7aae Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Feb 2017 22:01:23 +0100
Subject: [PATCH 0819/1084] watchdog: kempld: revert to full dependency

The kempld watchdog driver requires the respective MFD driver:

drivers/watchdog/built-in.o: In function `kempld_wdt_probe':
kempld_wdt.c:(.text+0x5c78): undefined reference to `kempld_get_mutex'
kempld_wdt.c:(.text+0x5c84): undefined reference to `kempld_read8'
kempld_wdt.c:(.text+0x5c8e): undefined reference to `kempld_release_mutex'
kempld_wdt.c:(.text+0x5d1c): undefined reference to `kempld_read8'
kempld_wdt.c:(.text+0x5d2c): undefined reference to `kempld_write8'

This adds the Kconfig dependency back.

Fixes: da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/watchdog/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 13390b00ec47..47dbacf3bfb4 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -1064,7 +1064,7 @@ config HP_WATCHDOG
 
 config KEMPLD_WDT
 	tristate "Kontron COM Watchdog Timer"
-	depends on MFD_KEMPLD || COMPILE_TEST
+	depends on MFD_KEMPLD
 	select WATCHDOG_CORE
 	help
 	  Support for the PLD watchdog on some Kontron ETX and COMexpress
-- 
GitLab


From 8d5755b3f77b57447ce5de253ef704ad028474d3 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Mon, 27 Feb 2017 13:49:09 +0100
Subject: [PATCH 0820/1084] watchdog: softdog: fire watchdog even if softirqs
 do not get to run

Checking for timer expiration is done from the softirq TIMER_SOFTIRQ.

Since commit 4cd13c21b207 ("softirq: Let ksoftirqd do its job"),
pending softirqs are no longer always handled immediately, instead,
if there are pending softirqs, and ksoftirqd is in state TASK_RUNNING,
the handling of the softirqs are deferred, and are instead supposed
to be handled by ksoftirqd, when ksoftirqd gets scheduled.

If a user space process with a real-time policy starts to misbehave
by never relinquishing the CPU while ksoftirqd is in state TASK_RUNNING,
what will happen is that all softirqs will get deferred, while ksoftirqd,
which is supposed to handle the deferred softirqs, will never get to run.

To make sure that the watchdog is able to fire even when we do not get
to run softirqs, replace the timers with hrtimers.

Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/watchdog/softdog.c | 44 +++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/drivers/watchdog/softdog.c b/drivers/watchdog/softdog.c
index 7983029852ab..060740625485 100644
--- a/drivers/watchdog/softdog.c
+++ b/drivers/watchdog/softdog.c
@@ -21,13 +21,12 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/hrtimer.h>
 #include <linux/init.h>
-#include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/reboot.h>
-#include <linux/timer.h>
 #include <linux/types.h>
 #include <linux/watchdog.h>
 
@@ -54,7 +53,10 @@ module_param(soft_panic, int, 0);
 MODULE_PARM_DESC(soft_panic,
 	"Softdog action, set to 1 to panic, 0 to reboot (default=0)");
 
-static void softdog_fire(unsigned long data)
+static struct hrtimer softdog_ticktock;
+static struct hrtimer softdog_preticktock;
+
+static enum hrtimer_restart softdog_fire(struct hrtimer *timer)
 {
 	module_put(THIS_MODULE);
 	if (soft_noboot) {
@@ -67,32 +69,33 @@ static void softdog_fire(unsigned long data)
 		emergency_restart();
 		pr_crit("Reboot didn't ?????\n");
 	}
-}
 
-static struct timer_list softdog_ticktock =
-		TIMER_INITIALIZER(softdog_fire, 0, 0);
+	return HRTIMER_NORESTART;
+}
 
 static struct watchdog_device softdog_dev;
 
-static void softdog_pretimeout(unsigned long data)
+static enum hrtimer_restart softdog_pretimeout(struct hrtimer *timer)
 {
 	watchdog_notify_pretimeout(&softdog_dev);
-}
 
-static struct timer_list softdog_preticktock =
-		TIMER_INITIALIZER(softdog_pretimeout, 0, 0);
+	return HRTIMER_NORESTART;
+}
 
 static int softdog_ping(struct watchdog_device *w)
 {
-	if (!mod_timer(&softdog_ticktock, jiffies + (w->timeout * HZ)))
+	if (!hrtimer_active(&softdog_ticktock))
 		__module_get(THIS_MODULE);
+	hrtimer_start(&softdog_ticktock, ktime_set(w->timeout, 0),
+		      HRTIMER_MODE_REL);
 
 	if (IS_ENABLED(CONFIG_SOFT_WATCHDOG_PRETIMEOUT)) {
 		if (w->pretimeout)
-			mod_timer(&softdog_preticktock, jiffies +
-				  (w->timeout - w->pretimeout) * HZ);
+			hrtimer_start(&softdog_preticktock,
+				      ktime_set(w->timeout - w->pretimeout, 0),
+				      HRTIMER_MODE_REL);
 		else
-			del_timer(&softdog_preticktock);
+			hrtimer_cancel(&softdog_preticktock);
 	}
 
 	return 0;
@@ -100,11 +103,11 @@ static int softdog_ping(struct watchdog_device *w)
 
 static int softdog_stop(struct watchdog_device *w)
 {
-	if (del_timer(&softdog_ticktock))
+	if (hrtimer_cancel(&softdog_ticktock))
 		module_put(THIS_MODULE);
 
 	if (IS_ENABLED(CONFIG_SOFT_WATCHDOG_PRETIMEOUT))
-		del_timer(&softdog_preticktock);
+		hrtimer_cancel(&softdog_preticktock);
 
 	return 0;
 }
@@ -136,8 +139,15 @@ static int __init softdog_init(void)
 	watchdog_set_nowayout(&softdog_dev, nowayout);
 	watchdog_stop_on_reboot(&softdog_dev);
 
-	if (IS_ENABLED(CONFIG_SOFT_WATCHDOG_PRETIMEOUT))
+	hrtimer_init(&softdog_ticktock, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	softdog_ticktock.function = softdog_fire;
+
+	if (IS_ENABLED(CONFIG_SOFT_WATCHDOG_PRETIMEOUT)) {
 		softdog_info.options |= WDIOF_PRETIMEOUT;
+		hrtimer_init(&softdog_preticktock, CLOCK_MONOTONIC,
+			     HRTIMER_MODE_REL);
+		softdog_preticktock.function = softdog_pretimeout;
+	}
 
 	ret = watchdog_register_device(&softdog_dev);
 	if (ret)
-- 
GitLab


From 3736d4eb6af37492aeded7fec0072dedd959c842 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 1 Mar 2017 10:15:29 +0100
Subject: [PATCH 0821/1084] watchdog: kempld: fix gcc-4.3 build

gcc-4.3 can't decide whether the constant value in
kempld_prescaler[PRESCALER_21] is built-time constant or
not, and gets confused by the logic in do_div():

drivers/watchdog/kempld_wdt.o: In function `kempld_wdt_set_stage_timeout':
kempld_wdt.c:(.text.kempld_wdt_set_stage_timeout+0x130): undefined reference to `__aeabi_uldivmod'

This adds a call to ACCESS_ONCE() to force it to not consider
it to be constant, and leaves the more efficient normal case
in place for modern compilers, using an #ifdef to annotate
why we do this hack.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/watchdog/kempld_wdt.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/watchdog/kempld_wdt.c b/drivers/watchdog/kempld_wdt.c
index 73c46b3a09ab..2f3b049ea301 100644
--- a/drivers/watchdog/kempld_wdt.c
+++ b/drivers/watchdog/kempld_wdt.c
@@ -140,12 +140,19 @@ static int kempld_wdt_set_stage_timeout(struct kempld_wdt_data *wdt_data,
 					unsigned int timeout)
 {
 	struct kempld_device_data *pld = wdt_data->pld;
-	u32 prescaler = kempld_prescaler[PRESCALER_21];
+	u32 prescaler;
 	u64 stage_timeout64;
 	u32 stage_timeout;
 	u32 remainder;
 	u8 stage_cfg;
 
+#if GCC_VERSION < 40400
+	/* work around a bug compiling do_div() */
+	prescaler = READ_ONCE(kempld_prescaler[PRESCALER_21]);
+#else
+	prescaler = kempld_prescaler[PRESCALER_21];
+#endif
+
 	if (!stage)
 		return -EINVAL;
 
-- 
GitLab


From 9297b652bd0a11e29251ffaac93369b8ad45932d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 1 Mar 2017 10:15:30 +0100
Subject: [PATCH 0822/1084] watchdog: db8500: add back prmcu dependency

When the db8500 watchdog is enabled without the PRCMU, we get a lot of
warnings about duplicate or missing helper functions:

In file included from drivers/watchdog/ux500_wdt.c:21:0:
include/linux/mfd/dbx500-prcmu.h:422:19: error: redefinition of 'prcmu_abb_read'
 static inline int prcmu_abb_read(u8 slave, u8 reg, u8 *value, u8 size)

This restores the dependency as it was.

Fixes: da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/watchdog/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 47dbacf3bfb4..a199e8536ce4 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -574,7 +574,7 @@ config IMX2_WDT
 
 config UX500_WATCHDOG
 	tristate "ST-Ericsson Ux500 watchdog"
-	depends on MFD_DB8500_PRCMU || (ARM && COMPILE_TEST)
+	depends on MFD_DB8500_PRCMU
 	select WATCHDOG_CORE
 	default y
 	help
-- 
GitLab


From 9ad82f117f057ed04643cc60ce16fbf59e1b167f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 1 Mar 2017 10:15:31 +0100
Subject: [PATCH 0823/1084] watchdog: retu: restore MFD dependency

The retu watchdog calls into the respective mfd driver, but fails to
link if that is diabled:

drivers/watchdog/built-in.o: In function `retu_wdt_set_timeout':
ziirave_wdt.c:(.text+0x8c88): undefined reference to `retu_write'
ziirave_wdt.c:(.text+0x8c88): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `retu_write'
drivers/watchdog/built-in.o: In function `retu_wdt_start':
ziirave_wdt.c:(.text+0x8cc8): undefined reference to `retu_write'
ziirave_wdt.c:(.text+0x8cc8): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `retu_write'

This restores the dependency as it was before

Fixes: da2a68b3eb47 ("watchdog: Enable COMPILE_TEST where possible")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/watchdog/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index a199e8536ce4..52a70ee6014f 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -586,7 +586,7 @@ config UX500_WATCHDOG
 
 config RETU_WATCHDOG
 	tristate "Retu watchdog"
-	depends on MFD_RETU || COMPILE_TEST
+	depends on MFD_RETU
 	select WATCHDOG_CORE
 	help
 	  Retu watchdog driver for Nokia Internet Tablets (770, N800,
-- 
GitLab


From e3736c3eb3a6f7c0966923b629c9f92b558aa9c7 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Mon, 20 Feb 2017 13:06:21 +0200
Subject: [PATCH 0824/1084] kvm: convert kvm.users_count from atomic_t to
 refcount_t
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

refcount_t type and corresponding API should be
used instead of atomic_t when the variable is used as
a reference counter. This allows to avoid accidental
refcounter overflows that might lead to use-after-free
situations.

Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Hans Liljestrand <ishkamiel@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David Windsor <dwindsor@gmail.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 include/linux/kvm_host.h | 3 ++-
 virt/kvm/kvm_main.c      | 8 ++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8d69d5150748..2c14ad9809da 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -26,6 +26,7 @@
 #include <linux/context_tracking.h>
 #include <linux/irqbypass.h>
 #include <linux/swait.h>
+#include <linux/refcount.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -401,7 +402,7 @@ struct kvm {
 #endif
 	struct kvm_vm_stat stat;
 	struct kvm_arch arch;
-	atomic_t users_count;
+	refcount_t users_count;
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
 	spinlock_t ring_lock;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cc4d6e0dd2a2..c6b7aff634be 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -617,7 +617,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	mutex_init(&kvm->lock);
 	mutex_init(&kvm->irq_lock);
 	mutex_init(&kvm->slots_lock);
-	atomic_set(&kvm->users_count, 1);
+	refcount_set(&kvm->users_count, 1);
 	INIT_LIST_HEAD(&kvm->devices);
 
 	r = kvm_arch_init_vm(kvm, type);
@@ -747,13 +747,13 @@ static void kvm_destroy_vm(struct kvm *kvm)
 
 void kvm_get_kvm(struct kvm *kvm)
 {
-	atomic_inc(&kvm->users_count);
+	refcount_inc(&kvm->users_count);
 }
 EXPORT_SYMBOL_GPL(kvm_get_kvm);
 
 void kvm_put_kvm(struct kvm *kvm)
 {
-	if (atomic_dec_and_test(&kvm->users_count))
+	if (refcount_dec_and_test(&kvm->users_count))
 		kvm_destroy_vm(kvm);
 }
 EXPORT_SYMBOL_GPL(kvm_put_kvm);
@@ -3639,7 +3639,7 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file,
 	 * To avoid the race between open and the removal of the debugfs
 	 * directory we test against the users count.
 	 */
-	if (!atomic_add_unless(&stat_data->kvm->users_count, 1, 0))
+	if (!refcount_inc_not_zero(&stat_data->kvm->users_count))
 		return -ENOENT;
 
 	if (simple_attr_open(inode, file, get, set, fmt)) {
-- 
GitLab


From b7ceaec112aa35aa287325754d8c52b8304892cd Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 22 Feb 2017 07:36:16 -0800
Subject: [PATCH 0825/1084] x86/asm: Tidy up TSS limit code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In an earlier version of the patch ("x86/kvm/vmx: Defer TR reload
after VM exit") that introduced TSS limit validity tracking, I
confused which helper was which.  On reflection, the names I chose
sucked.  Rename the helpers to make it more obvious what's going on
and add some comments.

While I'm at it, clear __tss_limit_invalid when force-reloading as
well as when contitionally reloading, since any TR reload fixes the
limit.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/desc.h | 18 +++++++++++-------
 arch/x86/kernel/ioport.c    |  8 +++++++-
 arch/x86/kernel/process.c   |  6 +++---
 3 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index cb8f9149f6c8..1548ca92ad3f 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -205,6 +205,8 @@ static inline void native_load_tr_desc(void)
 	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
 }
 
+DECLARE_PER_CPU(bool, __tss_limit_invalid);
+
 static inline void force_reload_TR(void)
 {
 	struct desc_struct *d = get_cpu_gdt_table(smp_processor_id());
@@ -220,18 +222,20 @@ static inline void force_reload_TR(void)
 	write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS);
 
 	load_TR_desc();
+	this_cpu_write(__tss_limit_invalid, false);
 }
 
-DECLARE_PER_CPU(bool, need_tr_refresh);
-
-static inline void refresh_TR(void)
+/*
+ * Call this if you need the TSS limit to be correct, which should be the case
+ * if and only if you have TIF_IO_BITMAP set or you're switching to a task
+ * with TIF_IO_BITMAP set.
+ */
+static inline void refresh_tss_limit(void)
 {
 	DEBUG_LOCKS_WARN_ON(preemptible());
 
-	if (unlikely(this_cpu_read(need_tr_refresh))) {
+	if (unlikely(this_cpu_read(__tss_limit_invalid)))
 		force_reload_TR();
-		this_cpu_write(need_tr_refresh, false);
-	}
 }
 
 /*
@@ -250,7 +254,7 @@ static inline void invalidate_tss_limit(void)
 	if (unlikely(test_thread_flag(TIF_IO_BITMAP)))
 		force_reload_TR();
 	else
-		this_cpu_write(need_tr_refresh, true);
+		this_cpu_write(__tss_limit_invalid, true);
 }
 
 static inline void native_load_gdt(const struct desc_ptr *dtr)
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index b01bc8517450..875d3d25dd6a 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -47,8 +47,14 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
 		t->io_bitmap_ptr = bitmap;
 		set_thread_flag(TIF_IO_BITMAP);
 
+		/*
+		 * Now that we have an IO bitmap, we need our TSS limit to be
+		 * correct.  It's fine if we are preempted after doing this:
+		 * with TIF_IO_BITMAP set, context switches will keep our TSS
+		 * limit correct.
+		 */
 		preempt_disable();
-		refresh_TR();
+		refresh_tss_limit();
 		preempt_enable();
 	}
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 7780efa635b9..0b302591b51f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -65,8 +65,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
 };
 EXPORT_PER_CPU_SYMBOL(cpu_tss);
 
-DEFINE_PER_CPU(bool, need_tr_refresh);
-EXPORT_PER_CPU_SYMBOL_GPL(need_tr_refresh);
+DEFINE_PER_CPU(bool, __tss_limit_invalid);
+EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
 
 /*
  * this gets called so that we can store lazy state into memory and copy the
@@ -218,7 +218,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 		 * Make sure that the TSS limit is correct for the CPU
 		 * to notice the IO bitmap.
 		 */
-		refresh_TR();
+		refresh_tss_limit();
 	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
 		/*
 		 * Clear any possible leftover bits:
-- 
GitLab


From 0eb1d0fa6a68324b51db4f7ae16d9b042c5b2de4 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 22 Feb 2017 07:36:17 -0800
Subject: [PATCH 0826/1084] selftests/x86: Add a basic selftest for ioperm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This doesn't fully exercise the interaction between KVM and ioperm(),
but it does test basic functionality.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 tools/testing/selftests/x86/Makefile |   2 +-
 tools/testing/selftests/x86/ioperm.c | 170 +++++++++++++++++++++++++++
 2 files changed, 171 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/x86/ioperm.c

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 83d8b1c6cb0e..bed3e6086cb1 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -5,7 +5,7 @@ include ../lib.mk
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
-			check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test \
+			check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \
 			protection_keys test_vdso
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
diff --git a/tools/testing/selftests/x86/ioperm.c b/tools/testing/selftests/x86/ioperm.c
new file mode 100644
index 000000000000..b77313ba2ab1
--- /dev/null
+++ b/tools/testing/selftests/x86/ioperm.c
@@ -0,0 +1,170 @@
+/*
+ * ioperm.c - Test case for ioperm(2)
+ * Copyright (c) 2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+#include <sched.h>
+#include <sys/io.h>
+
+static int nerrs = 0;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+
+}
+
+static void clearhandler(int sig)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = SIG_DFL;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
+{
+	siglongjmp(jmpbuf, 1);
+}
+
+static bool try_outb(unsigned short port)
+{
+	sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
+	if (sigsetjmp(jmpbuf, 1) != 0) {
+		return false;
+	} else {
+		asm volatile ("outb %%al, %w[port]"
+			      : : [port] "Nd" (port), "a" (0));
+		return true;
+	}
+	clearhandler(SIGSEGV);
+}
+
+static void expect_ok(unsigned short port)
+{
+	if (!try_outb(port)) {
+		printf("[FAIL]\toutb to 0x%02hx failed\n", port);
+		exit(1);
+	}
+
+	printf("[OK]\toutb to 0x%02hx worked\n", port);
+}
+
+static void expect_gp(unsigned short port)
+{
+	if (try_outb(port)) {
+		printf("[FAIL]\toutb to 0x%02hx worked\n", port);
+		exit(1);
+	}
+
+	printf("[OK]\toutb to 0x%02hx failed\n", port);
+}
+
+int main(void)
+{
+	cpu_set_t cpuset;
+	CPU_ZERO(&cpuset);
+	CPU_SET(0, &cpuset);
+	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+		err(1, "sched_setaffinity to CPU 0");
+
+	expect_gp(0x80);
+	expect_gp(0xed);
+
+	/*
+	 * Probe for ioperm support.  Note that clearing ioperm bits
+	 * works even as nonroot.
+	 */
+	printf("[RUN]\tenable 0x80\n");
+	if (ioperm(0x80, 1, 1) != 0) {
+		printf("[OK]\tioperm(0x80, 1, 1) failed (%d) -- try running as root\n",
+		       errno);
+		return 0;
+	}
+	expect_ok(0x80);
+	expect_gp(0xed);
+
+	printf("[RUN]\tdisable 0x80\n");
+	if (ioperm(0x80, 1, 0) != 0) {
+		printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
+		return 1;
+	}
+	expect_gp(0x80);
+	expect_gp(0xed);
+
+	/* Make sure that fork() preserves ioperm. */
+	if (ioperm(0x80, 1, 1) != 0) {
+		printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
+		return 1;
+	}
+
+	pid_t child = fork();
+	if (child == -1)
+		err(1, "fork");
+
+	if (child == 0) {
+		printf("[RUN]\tchild: check that we inherited permissions\n");
+		expect_ok(0x80);
+		expect_gp(0xed);
+		return 0;
+	} else {
+		int status;
+		if (waitpid(child, &status, 0) != child ||
+		    !WIFEXITED(status)) {
+			printf("[FAIL]\tChild died\n");
+			nerrs++;
+		} else if (WEXITSTATUS(status) != 0) {
+			printf("[FAIL]\tChild failed\n");
+			nerrs++;
+		} else {
+			printf("[OK]\tChild succeeded\n");
+		}
+	}
+
+	/* Test the capability checks. */
+
+	printf("\tDrop privileges\n");
+	if (setresuid(1, 1, 1) != 0) {
+		printf("[WARN]\tDropping privileges failed\n");
+		return 0;
+	}
+
+	printf("[RUN]\tdisable 0x80\n");
+	if (ioperm(0x80, 1, 0) != 0) {
+		printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
+		return 1;
+	}
+	printf("[OK]\tit worked\n");
+
+	printf("[RUN]\tenable 0x80 again\n");
+	if (ioperm(0x80, 1, 1) == 0) {
+		printf("[FAIL]\tit succeeded but should have failed.\n");
+		return 1;
+	}
+	printf("[OK]\tit failed\n");
+	return 0;
+}
-- 
GitLab


From 0fce546f9f07b94ccc9de09cf48d35e18946d2fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Lefaure?= <jeremy.lefaure@lse.epita.fr>
Date: Sat, 25 Feb 2017 17:46:53 -0500
Subject: [PATCH 0827/1084] x86/kvm/vmx: remove unused variable in
 segment_base()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The pointer 'struct desc_struct *d' is unused since commit 8c2e41f7ae12
("x86/kvm/vmx: Simplify segment_base()") so let's remove it.

Signed-off-by: Jérémy Lefaure <jeremy.lefaure@lse.epita.fr>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ef4ba71dbb66..764f1f897847 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2053,7 +2053,6 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 static unsigned long segment_base(u16 selector)
 {
 	struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
-	struct desc_struct *d;
 	struct desc_struct *table;
 	unsigned long v;
 
-- 
GitLab


From acc9ab60132739e0c5ab40644444cd7b22d12886 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Mon, 27 Feb 2017 04:24:39 -0800
Subject: [PATCH 0828/1084] KVM: nVMX: Fix pending events injection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

L2 fails to boot on a non-APICv box dues to 'commit 0ad3bed6c5ec
("kvm: nVMX: move nested events check to kvm_vcpu_running")'

KVM internal error. Suberror: 3
extra data[0]: 800000ef
extra data[1]: 1
RAX=0000000000000000 RBX=ffffffff81f36140 RCX=0000000000000000 RDX=0000000000000000
RSI=0000000000000000 RDI=0000000000000000 RBP=ffff88007c92fe90 RSP=ffff88007c92fe90
R8 =ffff88007fccdca0 R9 =0000000000000000 R10=00000000fffedb3d R11=0000000000000000
R12=0000000000000003 R13=0000000000000000 R14=0000000000000000 R15=ffff88007c92c000
RIP=ffffffff810645e6 RFL=00000246 [---Z-P-] CPL=0 II=0 A20=1 SMM=0 HLT=0
ES =0000 0000000000000000 ffffffff 00c00000
CS =0010 0000000000000000 ffffffff 00a09b00 DPL=0 CS64 [-RA]
SS =0000 0000000000000000 ffffffff 00c00000
DS =0000 0000000000000000 ffffffff 00c00000
FS =0000 0000000000000000 ffffffff 00c00000
GS =0000 ffff88007fcc0000 ffffffff 00c00000
LDT=0000 0000000000000000 ffffffff 00c00000
TR =0040 ffff88007fcd4200 00002087 00008b00 DPL=0 TSS64-busy
GDT=     ffff88007fcc9000 0000007f
IDT=     ffffffffff578000 00000fff
CR0=80050033 CR2=00000000ffffffff CR3=0000000001e0a000 CR4=003406e0
DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000
DR6=00000000fffe0ff0 DR7=0000000000000400
EFER=0000000000000d01

We should try to reinject previous events if any before trying to inject
new event if pending. If vmexit is triggered by L2 guest and L0 interested
in, we should reinject IDT-vectoring info to L2 through vmcs02 if any,
otherwise, we can consider new IRQs/NMIs which can be injected and call
nested events callback to switch from L2 to L1 if needed and inject the
proper vmexit events. However, 'commit 0ad3bed6c5ec ("kvm: nVMX: move
nested events check to kvm_vcpu_running")' results in the handle events
order reversely on non-APICv box. This patch fixes it by bailing out for
pending events and not consider new events in this scenario.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Fixes: 0ad3bed6c5ec ("kvm: nVMX: move nested events check to kvm_vcpu_running")
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 764f1f897847..283aa8601833 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10641,6 +10641,11 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+	if (vcpu->arch.exception.pending ||
+		vcpu->arch.nmi_injected ||
+		vcpu->arch.interrupt.pending)
+		return -EBUSY;
+
 	if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
 	    vmx->nested.preemption_timer_expired) {
 		if (vmx->nested.nested_run_pending)
@@ -10650,8 +10655,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
 	}
 
 	if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
-		if (vmx->nested.nested_run_pending ||
-		    vcpu->arch.interrupt.pending)
+		if (vmx->nested.nested_run_pending)
 			return -EBUSY;
 		nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
 				  NMI_VECTOR | INTR_TYPE_NMI_INTR |
-- 
GitLab


From 540b1c48c37ac0ad66212004db21e1ff7e2d78be Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 27 Feb 2017 15:43:06 +0000
Subject: [PATCH 0829/1084] rxrpc: Fix deadlock between call creation and
 sendmsg/recvmsg

All the routines by which rxrpc is accessed from the outside are serialised
by means of the socket lock (sendmsg, recvmsg, bind,
rxrpc_kernel_begin_call(), ...) and this presents a problem:

 (1) If a number of calls on the same socket are in the process of
     connection to the same peer, a maximum of four concurrent live calls
     are permitted before further calls need to wait for a slot.

 (2) If a call is waiting for a slot, it is deep inside sendmsg() or
     rxrpc_kernel_begin_call() and the entry function is holding the socket
     lock.

 (3) sendmsg() and recvmsg() or the in-kernel equivalents are prevented
     from servicing the other calls as they need to take the socket lock to
     do so.

 (4) The socket is stuck until a call is aborted and makes its slot
     available to the waiter.

Fix this by:

 (1) Provide each call with a mutex ('user_mutex') that arbitrates access
     by the users of rxrpc separately for each specific call.

 (2) Make rxrpc_sendmsg() and rxrpc_recvmsg() unlock the socket as soon as
     they've got a call and taken its mutex.

     Note that I'm returning EWOULDBLOCK from recvmsg() if MSG_DONTWAIT is
     set but someone else has the lock.  Should I instead only return
     EWOULDBLOCK if there's nothing currently to be done on a socket, and
     sleep in this particular instance because there is something to be
     done, but we appear to be blocked by the interrupt handler doing its
     ping?

 (3) Make rxrpc_new_client_call() unlock the socket after allocating a new
     call, locking its user mutex and adding it to the socket's call tree.
     The call is returned locked so that sendmsg() can add data to it
     immediately.

     From the moment the call is in the socket tree, it is subject to
     access by sendmsg() and recvmsg() - even if it isn't connected yet.

 (4) Lock new service calls in the UDP data_ready handler (in
     rxrpc_new_incoming_call()) because they may already be in the socket's
     tree and the data_ready handler makes them live immediately if a user
     ID has already been preassigned.

     Note that the new call is locked before any notifications are sent
     that it is live, so doing mutex_trylock() *ought* to always succeed.
     Userspace is prevented from doing sendmsg() on calls that are in a
     too-early state in rxrpc_do_sendmsg().

 (5) Make rxrpc_new_incoming_call() return the call with the user mutex
     held so that a ping can be scheduled immediately under it.

     Note that it might be worth moving the ping call into
     rxrpc_new_incoming_call() and then we can drop the mutex there.

 (6) Make rxrpc_accept_call() take the lock on the call it is accepting and
     release the socket after adding the call to the socket's tree.  This
     is slightly tricky as we've dequeued the call by that point and have
     to requeue it.

     Note that requeuing emits a trace event.

 (7) Make rxrpc_kernel_send_data() and rxrpc_kernel_recv_data() take the
     new mutex immediately and don't bother with the socket mutex at all.

This patch has the nice bonus that calls on the same socket are now to some
extent parallelisable.

Note that we might want to move rxrpc_service_prealloc() calls out from the
socket lock and give it its own lock, so that we don't hang progress in
other calls because we're waiting for the allocator.

We probably also want to avoid calling rxrpc_notify_socket() from within
the socket lock (rxrpc_accept_call()).

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marc Dionne <marc.c.dionne@auristor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/rxrpc.h |  2 ++
 net/rxrpc/af_rxrpc.c         | 12 ++++++--
 net/rxrpc/ar-internal.h      |  1 +
 net/rxrpc/call_accept.c      | 48 ++++++++++++++++++++++++++++++
 net/rxrpc/call_object.c      | 18 ++++++++++--
 net/rxrpc/input.c            |  1 +
 net/rxrpc/recvmsg.c          | 39 ++++++++++++++++++++----
 net/rxrpc/sendmsg.c          | 57 +++++++++++++++++++++++++++++-------
 8 files changed, 156 insertions(+), 22 deletions(-)

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 593f586545eb..39123c06a566 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -119,6 +119,7 @@ enum rxrpc_recvmsg_trace {
 	rxrpc_recvmsg_full,
 	rxrpc_recvmsg_hole,
 	rxrpc_recvmsg_next,
+	rxrpc_recvmsg_requeue,
 	rxrpc_recvmsg_return,
 	rxrpc_recvmsg_terminal,
 	rxrpc_recvmsg_to_be_accepted,
@@ -277,6 +278,7 @@ enum rxrpc_congest_change {
 	EM(rxrpc_recvmsg_full,			"FULL") \
 	EM(rxrpc_recvmsg_hole,			"HOLE") \
 	EM(rxrpc_recvmsg_next,			"NEXT") \
+	EM(rxrpc_recvmsg_requeue,		"REQU") \
 	EM(rxrpc_recvmsg_return,		"RETN") \
 	EM(rxrpc_recvmsg_terminal,		"TERM") \
 	EM(rxrpc_recvmsg_to_be_accepted,	"TBAC") \
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 199b46e93e64..7fb59c3f1542 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -290,10 +290,11 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
 	cp.exclusive		= false;
 	cp.service_id		= srx->srx_service;
 	call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp);
+	/* The socket has been unlocked. */
 	if (!IS_ERR(call))
 		call->notify_rx = notify_rx;
 
-	release_sock(&rx->sk);
+	mutex_unlock(&call->user_mutex);
 	_leave(" = %p", call);
 	return call;
 }
@@ -310,7 +311,10 @@ EXPORT_SYMBOL(rxrpc_kernel_begin_call);
 void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
 {
 	_enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
+
+	mutex_lock(&call->user_mutex);
 	rxrpc_release_call(rxrpc_sk(sock->sk), call);
+	mutex_unlock(&call->user_mutex);
 	rxrpc_put_call(call, rxrpc_call_put_kernel);
 }
 EXPORT_SYMBOL(rxrpc_kernel_end_call);
@@ -450,14 +454,16 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
 	case RXRPC_SERVER_BOUND:
 	case RXRPC_SERVER_LISTENING:
 		ret = rxrpc_do_sendmsg(rx, m, len);
-		break;
+		/* The socket has been unlocked */
+		goto out;
 	default:
 		ret = -EINVAL;
-		break;
+		goto error_unlock;
 	}
 
 error_unlock:
 	release_sock(&rx->sk);
+out:
 	_leave(" = %d", ret);
 	return ret;
 }
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 12be432be9b2..26a7b1db1361 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -467,6 +467,7 @@ struct rxrpc_call {
 	struct rxrpc_connection	*conn;		/* connection carrying call */
 	struct rxrpc_peer	*peer;		/* Peer record for remote address */
 	struct rxrpc_sock __rcu	*socket;	/* socket responsible */
+	struct mutex		user_mutex;	/* User access mutex */
 	ktime_t			ack_at;		/* When deferred ACK needs to happen */
 	ktime_t			resend_at;	/* When next resend needs to happen */
 	ktime_t			ping_at;	/* When next to send a ping */
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 7c4c64ab8da2..0ed181f53f32 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -323,6 +323,8 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
  *
  * If we want to report an error, we mark the skb with the packet type and
  * abort code and return NULL.
+ *
+ * The call is returned with the user access mutex held.
  */
 struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 					   struct rxrpc_connection *conn,
@@ -371,6 +373,18 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 	trace_rxrpc_receive(call, rxrpc_receive_incoming,
 			    sp->hdr.serial, sp->hdr.seq);
 
+	/* Lock the call to prevent rxrpc_kernel_send/recv_data() and
+	 * sendmsg()/recvmsg() inconveniently stealing the mutex once the
+	 * notification is generated.
+	 *
+	 * The BUG should never happen because the kernel should be well
+	 * behaved enough not to access the call before the first notification
+	 * event and userspace is prevented from doing so until the state is
+	 * appropriate.
+	 */
+	if (!mutex_trylock(&call->user_mutex))
+		BUG();
+
 	/* Make the call live. */
 	rxrpc_incoming_call(rx, call, skb);
 	conn = call->conn;
@@ -429,10 +443,12 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
 /*
  * handle acceptance of a call by userspace
  * - assign the user call ID to the call at the front of the queue
+ * - called with the socket locked.
  */
 struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
 				     unsigned long user_call_ID,
 				     rxrpc_notify_rx_t notify_rx)
+	__releases(&rx->sk.sk_lock.slock)
 {
 	struct rxrpc_call *call;
 	struct rb_node *parent, **pp;
@@ -446,6 +462,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
 
 	if (list_empty(&rx->to_be_accepted)) {
 		write_unlock(&rx->call_lock);
+		release_sock(&rx->sk);
 		kleave(" = -ENODATA [empty]");
 		return ERR_PTR(-ENODATA);
 	}
@@ -470,10 +487,39 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
 	 */
 	call = list_entry(rx->to_be_accepted.next,
 			  struct rxrpc_call, accept_link);
+	write_unlock(&rx->call_lock);
+
+	/* We need to gain the mutex from the interrupt handler without
+	 * upsetting lockdep, so we have to release it there and take it here.
+	 * We are, however, still holding the socket lock, so other accepts
+	 * must wait for us and no one can add the user ID behind our backs.
+	 */
+	if (mutex_lock_interruptible(&call->user_mutex) < 0) {
+		release_sock(&rx->sk);
+		kleave(" = -ERESTARTSYS");
+		return ERR_PTR(-ERESTARTSYS);
+	}
+
+	write_lock(&rx->call_lock);
 	list_del_init(&call->accept_link);
 	sk_acceptq_removed(&rx->sk);
 	rxrpc_see_call(call);
 
+	/* Find the user ID insertion point. */
+	pp = &rx->calls.rb_node;
+	parent = NULL;
+	while (*pp) {
+		parent = *pp;
+		call = rb_entry(parent, struct rxrpc_call, sock_node);
+
+		if (user_call_ID < call->user_call_ID)
+			pp = &(*pp)->rb_left;
+		else if (user_call_ID > call->user_call_ID)
+			pp = &(*pp)->rb_right;
+		else
+			BUG();
+	}
+
 	write_lock_bh(&call->state_lock);
 	switch (call->state) {
 	case RXRPC_CALL_SERVER_ACCEPTING:
@@ -499,6 +545,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
 	write_unlock(&rx->call_lock);
 	rxrpc_notify_socket(call);
 	rxrpc_service_prealloc(rx, GFP_KERNEL);
+	release_sock(&rx->sk);
 	_leave(" = %p{%d}", call, call->debug_id);
 	return call;
 
@@ -515,6 +562,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
 	write_unlock(&rx->call_lock);
 out:
 	rxrpc_service_prealloc(rx, GFP_KERNEL);
+	release_sock(&rx->sk);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
 }
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 8b94db3c9b2e..d79cd36987a9 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -115,6 +115,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
 	if (!call->rxtx_annotations)
 		goto nomem_2;
 
+	mutex_init(&call->user_mutex);
 	setup_timer(&call->timer, rxrpc_call_timer_expired,
 		    (unsigned long)call);
 	INIT_WORK(&call->processor, &rxrpc_process_call);
@@ -194,14 +195,16 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call)
 }
 
 /*
- * set up a call for the given data
- * - called in process context with IRQs enabled
+ * Set up a call for the given parameters.
+ * - Called with the socket lock held, which it must release.
+ * - If it returns a call, the call's lock will need releasing by the caller.
  */
 struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 					 struct rxrpc_conn_parameters *cp,
 					 struct sockaddr_rxrpc *srx,
 					 unsigned long user_call_ID,
 					 gfp_t gfp)
+	__releases(&rx->sk.sk_lock.slock)
 {
 	struct rxrpc_call *call, *xcall;
 	struct rb_node *parent, **pp;
@@ -212,6 +215,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 
 	call = rxrpc_alloc_client_call(srx, gfp);
 	if (IS_ERR(call)) {
+		release_sock(&rx->sk);
 		_leave(" = %ld", PTR_ERR(call));
 		return call;
 	}
@@ -219,6 +223,11 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 	trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage),
 			 here, (const void *)user_call_ID);
 
+	/* We need to protect a partially set up call against the user as we
+	 * will be acting outside the socket lock.
+	 */
+	mutex_lock(&call->user_mutex);
+
 	/* Publish the call, even though it is incompletely set up as yet */
 	write_lock(&rx->call_lock);
 
@@ -250,6 +259,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 	list_add_tail(&call->link, &rxrpc_calls);
 	write_unlock(&rxrpc_call_lock);
 
+	/* From this point on, the call is protected by its own lock. */
+	release_sock(&rx->sk);
+
 	/* Set up or get a connection record and set the protocol parameters,
 	 * including channel number and call ID.
 	 */
@@ -279,6 +291,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 	 */
 error_dup_user_ID:
 	write_unlock(&rx->call_lock);
+	release_sock(&rx->sk);
 	ret = -EEXIST;
 
 error:
@@ -287,6 +300,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 	trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage),
 			 here, ERR_PTR(ret));
 	rxrpc_release_call(rx, call);
+	mutex_unlock(&call->user_mutex);
 	rxrpc_put_call(call, rxrpc_call_put);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 78ec33477adf..9f4cfa25af7c 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1194,6 +1194,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
 			goto reject_packet;
 		}
 		rxrpc_send_ping(call, skb, skew);
+		mutex_unlock(&call->user_mutex);
 	}
 
 	rxrpc_input_call_packet(call, skb, skew);
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index f3a688e10843..22447dbcc380 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -487,6 +487,20 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 
 	trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0, 0, 0, 0);
 
+	/* We're going to drop the socket lock, so we need to lock the call
+	 * against interference by sendmsg.
+	 */
+	if (!mutex_trylock(&call->user_mutex)) {
+		ret = -EWOULDBLOCK;
+		if (flags & MSG_DONTWAIT)
+			goto error_requeue_call;
+		ret = -ERESTARTSYS;
+		if (mutex_lock_interruptible(&call->user_mutex) < 0)
+			goto error_requeue_call;
+	}
+
+	release_sock(&rx->sk);
+
 	if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
 		BUG();
 
@@ -502,7 +516,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 				       &call->user_call_ID);
 		}
 		if (ret < 0)
-			goto error;
+			goto error_unlock_call;
 	}
 
 	if (msg->msg_name) {
@@ -533,12 +547,12 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	}
 
 	if (ret < 0)
-		goto error;
+		goto error_unlock_call;
 
 	if (call->state == RXRPC_CALL_COMPLETE) {
 		ret = rxrpc_recvmsg_term(call, msg);
 		if (ret < 0)
-			goto error;
+			goto error_unlock_call;
 		if (!(flags & MSG_PEEK))
 			rxrpc_release_call(rx, call);
 		msg->msg_flags |= MSG_EOR;
@@ -551,8 +565,21 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 		msg->msg_flags &= ~MSG_MORE;
 	ret = copied;
 
-error:
+error_unlock_call:
+	mutex_unlock(&call->user_mutex);
 	rxrpc_put_call(call, rxrpc_call_put);
+	trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret);
+	return ret;
+
+error_requeue_call:
+	if (!(flags & MSG_PEEK)) {
+		write_lock_bh(&rx->recvmsg_lock);
+		list_add(&call->recvmsg_link, &rx->recvmsg_q);
+		write_unlock_bh(&rx->recvmsg_lock);
+		trace_rxrpc_recvmsg(call, rxrpc_recvmsg_requeue, 0, 0, 0, 0);
+	} else {
+		rxrpc_put_call(call, rxrpc_call_put);
+	}
 error_no_call:
 	release_sock(&rx->sk);
 	trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret);
@@ -609,7 +636,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
 	iov.iov_len = size - *_offset;
 	iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, size - *_offset);
 
-	lock_sock(sock->sk);
+	mutex_lock(&call->user_mutex);
 
 	switch (call->state) {
 	case RXRPC_CALL_CLIENT_RECV_REPLY:
@@ -648,7 +675,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
 read_phase_complete:
 	ret = 1;
 out:
-	release_sock(sock->sk);
+	mutex_unlock(&call->user_mutex);
 	_leave(" = %d [%zu,%d]", ret, *_offset, *_abort);
 	return ret;
 
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 0a6ef217aa8a..31c1538c1a8d 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -59,9 +59,12 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
 		}
 
 		trace_rxrpc_transmit(call, rxrpc_transmit_wait);
-		release_sock(&rx->sk);
+		mutex_unlock(&call->user_mutex);
 		*timeo = schedule_timeout(*timeo);
-		lock_sock(&rx->sk);
+		if (mutex_lock_interruptible(&call->user_mutex) < 0) {
+			ret = sock_intr_errno(*timeo);
+			break;
+		}
 	}
 
 	remove_wait_queue(&call->waitq, &myself);
@@ -171,7 +174,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
 /*
  * send data through a socket
  * - must be called in process context
- * - caller holds the socket locked
+ * - The caller holds the call user access mutex, but not the socket lock.
  */
 static int rxrpc_send_data(struct rxrpc_sock *rx,
 			   struct rxrpc_call *call,
@@ -437,10 +440,13 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
 
 /*
  * Create a new client call for sendmsg().
+ * - Called with the socket lock held, which it must release.
+ * - If it returns a call, the call's lock will need releasing by the caller.
  */
 static struct rxrpc_call *
 rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
 				  unsigned long user_call_ID, bool exclusive)
+	__releases(&rx->sk.sk_lock.slock)
 {
 	struct rxrpc_conn_parameters cp;
 	struct rxrpc_call *call;
@@ -450,8 +456,10 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
 
 	_enter("");
 
-	if (!msg->msg_name)
+	if (!msg->msg_name) {
+		release_sock(&rx->sk);
 		return ERR_PTR(-EDESTADDRREQ);
+	}
 
 	key = rx->key;
 	if (key && !rx->key->payload.data[0])
@@ -464,6 +472,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
 	cp.exclusive		= rx->exclusive | exclusive;
 	cp.service_id		= srx->srx_service;
 	call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL);
+	/* The socket is now unlocked */
 
 	_leave(" = %p\n", call);
 	return call;
@@ -475,6 +484,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
  * - the socket may be either a client socket or a server socket
  */
 int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
+	__releases(&rx->sk.sk_lock.slock)
 {
 	enum rxrpc_command cmd;
 	struct rxrpc_call *call;
@@ -488,12 +498,14 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 	ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code,
 				 &exclusive);
 	if (ret < 0)
-		return ret;
+		goto error_release_sock;
 
 	if (cmd == RXRPC_CMD_ACCEPT) {
+		ret = -EINVAL;
 		if (rx->sk.sk_state != RXRPC_SERVER_LISTENING)
-			return -EINVAL;
+			goto error_release_sock;
 		call = rxrpc_accept_call(rx, user_call_ID, NULL);
+		/* The socket is now unlocked. */
 		if (IS_ERR(call))
 			return PTR_ERR(call);
 		rxrpc_put_call(call, rxrpc_call_put);
@@ -502,12 +514,29 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 
 	call = rxrpc_find_call_by_user_ID(rx, user_call_ID);
 	if (!call) {
+		ret = -EBADSLT;
 		if (cmd != RXRPC_CMD_SEND_DATA)
-			return -EBADSLT;
+			goto error_release_sock;
+		ret = -EBUSY;
+		if (call->state == RXRPC_CALL_UNINITIALISED ||
+		    call->state == RXRPC_CALL_CLIENT_AWAIT_CONN ||
+		    call->state == RXRPC_CALL_SERVER_PREALLOC ||
+		    call->state == RXRPC_CALL_SERVER_SECURING ||
+		    call->state == RXRPC_CALL_SERVER_ACCEPTING)
+			goto error_release_sock;
 		call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
 							 exclusive);
+		/* The socket is now unlocked... */
 		if (IS_ERR(call))
 			return PTR_ERR(call);
+		/* ... and we have the call lock. */
+	} else {
+		ret = mutex_lock_interruptible(&call->user_mutex);
+		release_sock(&rx->sk);
+		if (ret < 0) {
+			ret = -ERESTARTSYS;
+			goto error_put;
+		}
 	}
 
 	_debug("CALL %d USR %lx ST %d on CONN %p",
@@ -535,9 +564,15 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 		ret = rxrpc_send_data(rx, call, msg, len);
 	}
 
+	mutex_unlock(&call->user_mutex);
+error_put:
 	rxrpc_put_call(call, rxrpc_call_put);
 	_leave(" = %d", ret);
 	return ret;
+
+error_release_sock:
+	release_sock(&rx->sk);
+	return ret;
 }
 
 /**
@@ -562,7 +597,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
 	ASSERTCMP(msg->msg_name, ==, NULL);
 	ASSERTCMP(msg->msg_control, ==, NULL);
 
-	lock_sock(sock->sk);
+	mutex_lock(&call->user_mutex);
 
 	_debug("CALL %d USR %lx ST %d on CONN %p",
 	       call->debug_id, call->user_call_ID, call->state, call->conn);
@@ -577,7 +612,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
 		ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len);
 	}
 
-	release_sock(sock->sk);
+	mutex_unlock(&call->user_mutex);
 	_leave(" = %d", ret);
 	return ret;
 }
@@ -598,12 +633,12 @@ void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
 {
 	_enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why);
 
-	lock_sock(sock->sk);
+	mutex_lock(&call->user_mutex);
 
 	if (rxrpc_abort_call(why, call, 0, abort_code, error))
 		rxrpc_send_abort_packet(call);
 
-	release_sock(sock->sk);
+	mutex_unlock(&call->user_mutex);
 	_leave("");
 }
 
-- 
GitLab


From 5179b26694c92373275e4933f5d0ff32d585c675 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 28 Feb 2017 12:41:29 +0800
Subject: [PATCH 0830/1084] sctp: call rcu_read_lock before checking for
 duplicate transport nodes

Commit cd2b70875058 ("sctp: check duplicate node before inserting a
new transport") called rhltable_lookup() to check for the duplicate
transport node in transport rhashtable.

But rhltable_lookup() doesn't call rcu_read_lock inside, it could cause
a use-after-free issue if it tries to dereference the node that another
cpu has freed it. Note that sock lock can not avoid this as it is per
sock.

This patch is to fix it by calling rcu_read_lock before checking for
duplicate transport nodes.

Fixes: cd2b70875058 ("sctp: check duplicate node before inserting a new transport")
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/input.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sctp/input.c b/net/sctp/input.c
index fc458968fe4b..2a28ab20487f 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -884,14 +884,17 @@ int sctp_hash_transport(struct sctp_transport *t)
 	arg.paddr = &t->ipaddr;
 	arg.lport = htons(t->asoc->base.bind_addr.port);
 
+	rcu_read_lock();
 	list = rhltable_lookup(&sctp_transport_hashtable, &arg,
 			       sctp_hash_params);
 
 	rhl_for_each_entry_rcu(transport, tmp, list, node)
 		if (transport->asoc->ep == t->asoc->ep) {
+			rcu_read_unlock();
 			err = -EEXIST;
 			goto out;
 		}
+	rcu_read_unlock();
 
 	err = rhltable_insert_key(&sctp_transport_hashtable, &arg,
 				  &t->node, sctp_hash_params);
-- 
GitLab


From 4f7bfb3982e02aacc5fb6e1a121e5326c1778ac3 Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@oracle.com>
Date: Tue, 28 Feb 2017 01:45:40 -0500
Subject: [PATCH 0831/1084] rds: ib: add the static type to the variables

The variables rds_ib_mr_1m_pool_size and rds_ib_mr_8k_pool_size
are used only in the ib.c file. As such, the static type is
added to limit them in this file.

Cc: Joe Jin <joe.jin@oracle.com>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib.c    | 4 ++--
 net/rds/ib_mr.h | 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/rds/ib.c b/net/rds/ib.c
index 91fe46f1e4cc..0f557b243311 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -45,8 +45,8 @@
 #include "ib.h"
 #include "ib_mr.h"
 
-unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE;
-unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE;
+static unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE;
+static unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE;
 unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT;
 
 module_param(rds_ib_mr_1m_pool_size, int, 0444);
diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h
index 24c086db4511..5d6e98a79a5e 100644
--- a/net/rds/ib_mr.h
+++ b/net/rds/ib_mr.h
@@ -107,8 +107,6 @@ struct rds_ib_mr_pool {
 };
 
 extern struct workqueue_struct *rds_ib_mr_wq;
-extern unsigned int rds_ib_mr_1m_pool_size;
-extern unsigned int rds_ib_mr_8k_pool_size;
 extern bool prefer_frmr;
 
 struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev,
-- 
GitLab


From f7df4923fa986247e93ec2cdff5ca168fff14dcf Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 28 Feb 2017 08:55:40 +0100
Subject: [PATCH 0832/1084] mlxsw: spectrum_router: Avoid potential packets
 loss

When the structure of the LPM tree changes (f.e., due to the addition of
a new prefix), we unbind the old tree and then bind the new one. This
may result in temporary packet loss.

Instead, overwrite the old binding with the new one.

Fixes: 6b75c4807db3 ("mlxsw: spectrum_router: Add virtual router management")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_router.c | 30 ++++++++++++-------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index d7ac22d7f940..bd8de6b9be71 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -441,30 +441,40 @@ static int
 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
 			   struct mlxsw_sp_prefix_usage *req_prefix_usage)
 {
-	struct mlxsw_sp_lpm_tree *lpm_tree;
+	struct mlxsw_sp_lpm_tree *lpm_tree = vr->lpm_tree;
+	struct mlxsw_sp_lpm_tree *new_tree;
+	int err;
 
-	if (mlxsw_sp_prefix_usage_eq(req_prefix_usage,
-				     &vr->lpm_tree->prefix_usage))
+	if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, &lpm_tree->prefix_usage))
 		return 0;
 
-	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
+	new_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
 					 vr->proto, false);
-	if (IS_ERR(lpm_tree)) {
+	if (IS_ERR(new_tree)) {
 		/* We failed to get a tree according to the required
 		 * prefix usage. However, the current tree might be still good
 		 * for us if our requirement is subset of the prefixes used
 		 * in the tree.
 		 */
 		if (mlxsw_sp_prefix_usage_subset(req_prefix_usage,
-						 &vr->lpm_tree->prefix_usage))
+						 &lpm_tree->prefix_usage))
 			return 0;
-		return PTR_ERR(lpm_tree);
+		return PTR_ERR(new_tree);
 	}
 
-	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
-	mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
+	/* Prevent packet loss by overwriting existing binding */
+	vr->lpm_tree = new_tree;
+	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
+	if (err)
+		goto err_tree_bind;
+	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
+
+	return 0;
+
+err_tree_bind:
 	vr->lpm_tree = lpm_tree;
-	return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
+	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
+	return err;
 }
 
 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp,
-- 
GitLab


From 0bf09c397e13b70ab6064eb2af69b8fa8e68a24e Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Tue, 28 Feb 2017 10:39:48 +0200
Subject: [PATCH 0833/1084] MAINTAINERS: Orphan usb/net/hso driver

The email address of Jan Dumon bounces, and there is not relevant information
in the linked website.

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 846f97aa3508..5a00239fd7b3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6012,9 +6012,8 @@ F:	include/linux/hsi/
 F:	include/uapi/linux/hsi/
 
 HSO 3G MODEM DRIVER
-M:	Jan Dumon <j.dumon@option.com>
-W:	http://www.pharscape.org
-S:	Maintained
+L:	linux-usb@vger.kernel.org
+S:	Orphan
 F:	drivers/net/usb/hso.c
 
 HSR NETWORK PROTOCOL
-- 
GitLab


From 4f3de46f7a57a8ecc16c7ef69c6917b3731a7c5f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 28 Feb 2017 11:58:22 +0000
Subject: [PATCH 0834/1084] net: usb: asix_devices: fix missing return code
 check on call to asix_write_medium_mode

The call to asix_write_medium_mode is not updating the return code ret
and yet ret is being checked for an error. Fix this by assigning ret to
the return code from the call asix_write_medium_mode.

Detected by CoverityScan, CID#1357148 ("Logically Dead Code")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/asix_devices.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 6e98ede997d3..0dd510604118 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -346,7 +346,7 @@ static int ax88772_reset(struct usbnet *dev)
 	if (ret < 0)
 		goto out;
 
-	asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT, 0);
+	ret = asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT, 0);
 	if (ret < 0)
 		goto out;
 
-- 
GitLab


From b2d0fe35471d1a71471f99147ffb5986bd60e744 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 28 Feb 2017 15:02:15 +0300
Subject: [PATCH 0835/1084] net/mlx4: && vs & typo

Bitwise & was obviously intended here.

Fixes: 745d8ae4622c ("net/mlx4: Spoofcheck and zero MAC can't coexist")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h
index e965e5090d96..a858bcb6220b 100644
--- a/include/linux/mlx4/driver.h
+++ b/include/linux/mlx4/driver.h
@@ -109,7 +109,7 @@ static inline void mlx4_u64_to_mac(u8 *addr, u64 mac)
 	int i;
 
 	for (i = ETH_ALEN; i > 0; i--) {
-		addr[i - 1] = mac && 0xFF;
+		addr[i - 1] = mac & 0xFF;
 		mac >>= 8;
 	}
 }
-- 
GitLab


From 39e6c8208d7b6fb9d2047850fb3327db567b564b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 28 Feb 2017 10:34:50 -0800
Subject: [PATCH 0836/1084] net: solve a NAPI race

While playing with mlx4 hardware timestamping of RX packets, I found
that some packets were received by TCP stack with a ~200 ms delay...

Since the timestamp was provided by the NIC, and my probe was added
in tcp_v4_rcv() while in BH handler, I was confident it was not
a sender issue, or a drop in the network.

This would happen with a very low probability, but hurting RPC
workloads.

A NAPI driver normally arms the IRQ after the napi_complete_done(),
after NAPI_STATE_SCHED is cleared, so that the hard irq handler can grab
it.

Problem is that if another point in the stack grabs NAPI_STATE_SCHED bit
while IRQ are not disabled, we might have later an IRQ firing and
finding this bit set, right before napi_complete_done() clears it.

This can happen with busy polling users, or if gro_flush_timeout is
used. But some other uses of napi_schedule() in drivers can cause this
as well.

thread 1                                 thread 2 (could be on same cpu, or not)

// busy polling or napi_watchdog()
napi_schedule();
...
napi->poll()

device polling:
read 2 packets from ring buffer
                                          Additional 3rd packet is
available.
                                          device hard irq

                                          // does nothing because
NAPI_STATE_SCHED bit is owned by thread 1
                                          napi_schedule();

napi_complete_done(napi, 2);
rearm_irq();

Note that rearm_irq() will not force the device to send an additional
IRQ for the packet it already signaled (3rd packet in my example)

This patch adds a new NAPI_STATE_MISSED bit, that napi_schedule_prep()
can set if it could not grab NAPI_STATE_SCHED

Then napi_complete_done() properly reschedules the napi to make sure
we do not miss something.

Since we manipulate multiple bits at once, use cmpxchg() like in
sk_busy_loop() to provide proper transactions.

In v2, I changed napi_watchdog() to use a relaxed variant of
napi_schedule_prep() : No need to set NAPI_STATE_MISSED from this point.

In v3, I added more details in the changelog and clears
NAPI_STATE_MISSED in busy_poll_stop()

In v4, I added the ideas given by Alexander Duyck in v3 review

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexander Duyck <alexander.duyck@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 29 +++++----------
 net/core/dev.c            | 76 ++++++++++++++++++++++++++++++++++++---
 2 files changed, 81 insertions(+), 24 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f40f0ab3847a..97456b2539e4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -330,6 +330,7 @@ struct napi_struct {
 
 enum {
 	NAPI_STATE_SCHED,	/* Poll is scheduled */
+	NAPI_STATE_MISSED,	/* reschedule a napi */
 	NAPI_STATE_DISABLE,	/* Disable pending */
 	NAPI_STATE_NPSVC,	/* Netpoll - don't dequeue from poll_list */
 	NAPI_STATE_HASHED,	/* In NAPI hash (busy polling possible) */
@@ -338,12 +339,13 @@ enum {
 };
 
 enum {
-	NAPIF_STATE_SCHED	 = (1UL << NAPI_STATE_SCHED),
-	NAPIF_STATE_DISABLE	 = (1UL << NAPI_STATE_DISABLE),
-	NAPIF_STATE_NPSVC	 = (1UL << NAPI_STATE_NPSVC),
-	NAPIF_STATE_HASHED	 = (1UL << NAPI_STATE_HASHED),
-	NAPIF_STATE_NO_BUSY_POLL = (1UL << NAPI_STATE_NO_BUSY_POLL),
-	NAPIF_STATE_IN_BUSY_POLL = (1UL << NAPI_STATE_IN_BUSY_POLL),
+	NAPIF_STATE_SCHED	 = BIT(NAPI_STATE_SCHED),
+	NAPIF_STATE_MISSED	 = BIT(NAPI_STATE_MISSED),
+	NAPIF_STATE_DISABLE	 = BIT(NAPI_STATE_DISABLE),
+	NAPIF_STATE_NPSVC	 = BIT(NAPI_STATE_NPSVC),
+	NAPIF_STATE_HASHED	 = BIT(NAPI_STATE_HASHED),
+	NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
+	NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
 };
 
 enum gro_result {
@@ -414,20 +416,7 @@ static inline bool napi_disable_pending(struct napi_struct *n)
 	return test_bit(NAPI_STATE_DISABLE, &n->state);
 }
 
-/**
- *	napi_schedule_prep - check if NAPI can be scheduled
- *	@n: NAPI context
- *
- * Test if NAPI routine is already running, and if not mark
- * it as running.  This is used as a condition variable to
- * insure only one NAPI poll instance runs.  We also make
- * sure there is no pending NAPI disable.
- */
-static inline bool napi_schedule_prep(struct napi_struct *n)
-{
-	return !napi_disable_pending(n) &&
-		!test_and_set_bit(NAPI_STATE_SCHED, &n->state);
-}
+bool napi_schedule_prep(struct napi_struct *n);
 
 /**
  *	napi_schedule - schedule NAPI poll
diff --git a/net/core/dev.c b/net/core/dev.c
index 304f2deae5f9..e63bf61b19be 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4883,6 +4883,39 @@ void __napi_schedule(struct napi_struct *n)
 }
 EXPORT_SYMBOL(__napi_schedule);
 
+/**
+ *	napi_schedule_prep - check if napi can be scheduled
+ *	@n: napi context
+ *
+ * Test if NAPI routine is already running, and if not mark
+ * it as running.  This is used as a condition variable
+ * insure only one NAPI poll instance runs.  We also make
+ * sure there is no pending NAPI disable.
+ */
+bool napi_schedule_prep(struct napi_struct *n)
+{
+	unsigned long val, new;
+
+	do {
+		val = READ_ONCE(n->state);
+		if (unlikely(val & NAPIF_STATE_DISABLE))
+			return false;
+		new = val | NAPIF_STATE_SCHED;
+
+		/* Sets STATE_MISSED bit if STATE_SCHED was already set
+		 * This was suggested by Alexander Duyck, as compiler
+		 * emits better code than :
+		 * if (val & NAPIF_STATE_SCHED)
+		 *     new |= NAPIF_STATE_MISSED;
+		 */
+		new |= (val & NAPIF_STATE_SCHED) / NAPIF_STATE_SCHED *
+						   NAPIF_STATE_MISSED;
+	} while (cmpxchg(&n->state, val, new) != val);
+
+	return !(val & NAPIF_STATE_SCHED);
+}
+EXPORT_SYMBOL(napi_schedule_prep);
+
 /**
  * __napi_schedule_irqoff - schedule for receive
  * @n: entry to schedule
@@ -4897,7 +4930,7 @@ EXPORT_SYMBOL(__napi_schedule_irqoff);
 
 bool napi_complete_done(struct napi_struct *n, int work_done)
 {
-	unsigned long flags;
+	unsigned long flags, val, new;
 
 	/*
 	 * 1) Don't let napi dequeue from the cpu poll list
@@ -4927,7 +4960,27 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
 		list_del_init(&n->poll_list);
 		local_irq_restore(flags);
 	}
-	WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
+
+	do {
+		val = READ_ONCE(n->state);
+
+		WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
+
+		new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED);
+
+		/* If STATE_MISSED was set, leave STATE_SCHED set,
+		 * because we will call napi->poll() one more time.
+		 * This C code was suggested by Alexander Duyck to help gcc.
+		 */
+		new |= (val & NAPIF_STATE_MISSED) / NAPIF_STATE_MISSED *
+						    NAPIF_STATE_SCHED;
+	} while (cmpxchg(&n->state, val, new) != val);
+
+	if (unlikely(val & NAPIF_STATE_MISSED)) {
+		__napi_schedule(n);
+		return false;
+	}
+
 	return true;
 }
 EXPORT_SYMBOL(napi_complete_done);
@@ -4953,6 +5006,16 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
 {
 	int rc;
 
+	/* Busy polling means there is a high chance device driver hard irq
+	 * could not grab NAPI_STATE_SCHED, and that NAPI_STATE_MISSED was
+	 * set in napi_schedule_prep().
+	 * Since we are about to call napi->poll() once more, we can safely
+	 * clear NAPI_STATE_MISSED.
+	 *
+	 * Note: x86 could use a single "lock and ..." instruction
+	 * to perform these two clear_bit()
+	 */
+	clear_bit(NAPI_STATE_MISSED, &napi->state);
 	clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state);
 
 	local_bh_disable();
@@ -5088,8 +5151,13 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
 	struct napi_struct *napi;
 
 	napi = container_of(timer, struct napi_struct, timer);
-	if (napi->gro_list)
-		napi_schedule_irqoff(napi);
+
+	/* Note : we use a relaxed variant of napi_schedule_prep() not setting
+	 * NAPI_STATE_MISSED, since we do not react to a device IRQ.
+	 */
+	if (napi->gro_list && !napi_disable_pending(napi) &&
+	    !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
+		__napi_schedule_irqoff(napi);
 
 	return HRTIMER_NORESTART;
 }
-- 
GitLab


From 56de859e9967c070464a9a9f4f18d73f9447298e Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 24 Feb 2017 11:43:36 -0800
Subject: [PATCH 0837/1084] vxlan: lock RCU on TX path

There is no guarantees that callers of the TX path will hold
the RCU lock.  Grab it explicitly.

Fixes: c6fcc4fc5f8b ("vxlan: avoid using stale vxlan socket.")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index b7911994112a..e375560cc74e 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2105,6 +2105,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 	src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
 				     vxlan->cfg.port_max, true);
 
+	rcu_read_lock();
 	if (dst->sa.sa_family == AF_INET) {
 		struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
 		struct rtable *rt;
@@ -2127,7 +2128,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 						    dst_port, vni, &rt->dst,
 						    rt->rt_flags);
 			if (err)
-				return;
+				goto out_unlock;
 		} else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) {
 			df = htons(IP_DF);
 		}
@@ -2166,7 +2167,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 						    dst_port, vni, ndst,
 						    rt6i_flags);
 			if (err)
-				return;
+				goto out_unlock;
 		}
 
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
@@ -2183,6 +2184,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 				     label, src_port, dst_port, !udp_sum);
 #endif
 	}
+out_unlock:
+	rcu_read_unlock();
 	return;
 
 drop:
@@ -2191,6 +2194,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 	return;
 
 tx_error:
+	rcu_read_unlock();
 	if (err == -ELOOP)
 		dev->stats.collisions++;
 	else if (err == -ENETUNREACH)
-- 
GitLab


From a717e3f740803cc88bd5c9a70c93504f6a368663 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 24 Feb 2017 11:43:37 -0800
Subject: [PATCH 0838/1084] geneve: lock RCU on TX path

There is no guarantees that callers of the TX path will hold
the RCU lock.  Grab it explicitly.

Fixes: fceb9c3e3825 ("geneve: avoid using stale geneve socket.")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 45301cb98bc1..7074b40ebd7f 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -881,12 +881,14 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 		info = &geneve->info;
 	}
 
+	rcu_read_lock();
 #if IS_ENABLED(CONFIG_IPV6)
 	if (info->mode & IP_TUNNEL_INFO_IPV6)
 		err = geneve6_xmit_skb(skb, dev, geneve, info);
 	else
 #endif
 		err = geneve_xmit_skb(skb, dev, geneve, info);
+	rcu_read_unlock();
 
 	if (likely(!err))
 		return NETDEV_TX_OK;
-- 
GitLab


From 522214d9be9c9f00f34ed89cb95e901b7ac31c59 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Wed, 1 Mar 2017 09:57:00 +0100
Subject: [PATCH 0839/1084] Input: rmi4 - f30: detect INPUT_PROP_BUTTONPAD from
 the button count

INPUT_PROP_BUTTONPAD is currently only set through the platform data.
The RMI4 header doc says that this property is there to force the
buttonpad property, so we also need to detect it by looking at
the exported buttons count.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Reported-and-tested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/input/rmi4/rmi_f30.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/input/rmi4/rmi_f30.c b/drivers/input/rmi4/rmi_f30.c
index 3422464af229..198678613382 100644
--- a/drivers/input/rmi4/rmi_f30.c
+++ b/drivers/input/rmi4/rmi_f30.c
@@ -258,9 +258,10 @@ static int rmi_f30_map_gpios(struct rmi_function *fn,
 
 	/*
 	 * Buttonpad could be also inferred from f30->has_mech_mouse_btns,
-	 * but I am not sure, so use only the pdata info.
+	 * but I am not sure, so use only the pdata info and the number of
+	 * mapped buttons.
 	 */
-	if (pdata->f30_data.buttonpad)
+	if (pdata->f30_data.buttonpad || (button - BTN_LEFT == 1))
 		__set_bit(INPUT_PROP_BUTTONPAD, input->propbit);
 
 	return 0;
-- 
GitLab


From 8c171d6ca56c6891372a97af26b58b2cfad7fd9a Mon Sep 17 00:00:00 2001
From: Felix Jia <felix.jia@alliedtelesis.co.nz>
Date: Mon, 27 Feb 2017 12:41:23 +1300
Subject: [PATCH 0840/1084] net/ipv6: avoid possible dead locking on
 addr_gen_mode sysctl

The addr_gen_mode variable can be accessed by both sysctl and netlink.
Repleacd rtnl_lock() with rtnl_trylock() protect the sysctl operation to
avoid the possbile dead lock.`

Signed-off-by: Felix Jia <felix.jia@alliedtelesis.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3a2025f5bf2c..cfc485a8e1c0 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5692,13 +5692,18 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
 	struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
 	struct net *net = (struct net *)ctl->extra2;
 
+	if (!rtnl_trylock())
+		return restart_syscall();
+
 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	if (write) {
 		new_val = *((int *)ctl->data);
 
-		if (check_addr_gen_mode(new_val) < 0)
-			return -EINVAL;
+		if (check_addr_gen_mode(new_val) < 0) {
+			ret = -EINVAL;
+			goto out;
+		}
 
 		/* request for default */
 		if (&net->ipv6.devconf_dflt->addr_gen_mode == ctl->data) {
@@ -5707,20 +5712,23 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
 		/* request for individual net device */
 		} else {
 			if (!idev)
-				return ret;
+				goto out;
 
-			if (check_stable_privacy(idev, net, new_val) < 0)
-				return -EINVAL;
+			if (check_stable_privacy(idev, net, new_val) < 0) {
+				ret = -EINVAL;
+				goto out;
+			}
 
 			if (idev->cnf.addr_gen_mode != new_val) {
 				idev->cnf.addr_gen_mode = new_val;
-				rtnl_lock();
 				addrconf_dev_config(idev->dev);
-				rtnl_unlock();
 			}
 		}
 	}
 
+out:
+	rtnl_unlock();
+
 	return ret;
 }
 
-- 
GitLab


From 3b45a4106f146c336cbcaccb9d8d0fa0e5c3dc1d Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Mon, 27 Feb 2017 20:59:39 +0800
Subject: [PATCH 0841/1084] net: route: add missing nla_policy entry for
 RTA_MARK attribute

This will add stricter validating for RTA_MARK attribute.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 1 +
 net/ipv6/route.c        | 1 +
 2 files changed, 2 insertions(+)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index b39a791f6756..42bfd08109dd 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -622,6 +622,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
 	[RTA_ENCAP]		= { .type = NLA_NESTED },
 	[RTA_UID]		= { .type = NLA_U32 },
+	[RTA_MARK]		= { .type = NLA_U32 },
 };
 
 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f54f4265b37f..d94f1dfa54c8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2891,6 +2891,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
 	[RTA_ENCAP]		= { .type = NLA_NESTED },
 	[RTA_EXPIRES]		= { .type = NLA_U32 },
 	[RTA_UID]		= { .type = NLA_U32 },
+	[RTA_MARK]		= { .type = NLA_U32 },
 };
 
 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
-- 
GitLab


From e390f9a9689a42f477a6073e2e7df530a4c1b740 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 1 Mar 2017 12:04:44 -0600
Subject: [PATCH 0842/1084] objtool, modules: Discard objtool annotation
 sections for modules

The '__unreachable' and '__func_stack_frame_non_standard' sections are
only used at compile time.  They're discarded for vmlinux but they
should also be discarded for modules.

Since this is a recurring pattern, prefix the section names with
".discard.".  It's a nice convention and vmlinux.lds.h already discards
such sections.

Also remove the 'a' (allocatable) flag from the __unreachable section
since it doesn't make sense for a discarded section.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Jessica Yu <jeyu@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends")
Link: http://lkml.kernel.org/r/20170301180444.lhd53c5tibc4ns77@treble
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/vmlinux.lds.S | 2 --
 include/linux/compiler-gcc.h  | 2 +-
 include/linux/frame.h         | 2 +-
 scripts/mod/modpost.c         | 1 +
 scripts/module-common.lds     | 5 ++++-
 tools/objtool/builtin-check.c | 6 +++---
 6 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index ad0118fbce90..c74ae9ce8dc4 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -345,8 +345,6 @@ SECTIONS
 	DISCARDS
 	/DISCARD/ : {
 		*(.eh_frame)
-		*(__func_stack_frame_non_standard)
-		*(__unreachable)
 	}
 }
 
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index b6bb9019d87f..0efef9cf014f 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -200,7 +200,7 @@
 #ifdef CONFIG_STACK_VALIDATION
 #define annotate_unreachable() ({					\
 	asm("%c0:\t\n"							\
-	    ".pushsection __unreachable, \"a\"\t\n"			\
+	    ".pushsection .discard.unreachable\t\n"			\
 	    ".long %c0b - .\t\n"					\
 	    ".popsection\t\n" : : "i" (__LINE__));			\
 })
diff --git a/include/linux/frame.h b/include/linux/frame.h
index e6baaba3f1ae..d772c61c31da 100644
--- a/include/linux/frame.h
+++ b/include/linux/frame.h
@@ -11,7 +11,7 @@
  * For more information, see tools/objtool/Documentation/stack-validation.txt.
  */
 #define STACK_FRAME_NON_STANDARD(func) \
-	static void __used __section(__func_stack_frame_non_standard) \
+	static void __used __section(.discard.func_stack_frame_non_standard) \
 		*__func_stack_frame_non_standard_##func = func
 
 #else /* !CONFIG_STACK_VALIDATION */
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 4dedd0d3d3a7..30d752a4a6a6 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -854,6 +854,7 @@ static const char *const section_white_list[] =
 	".cmem*",			/* EZchip */
 	".fmt_slot*",			/* EZchip */
 	".gnu.lto*",
+	".discard.*",
 	NULL
 };
 
diff --git a/scripts/module-common.lds b/scripts/module-common.lds
index 73a2c7da0e55..cf7e52e4781b 100644
--- a/scripts/module-common.lds
+++ b/scripts/module-common.lds
@@ -4,7 +4,10 @@
  * combine them automatically.
  */
 SECTIONS {
-	/DISCARD/ : { *(.discard) }
+	/DISCARD/ : {
+		*(.discard)
+		*(.discard.*)
+	}
 
 	__ksymtab		0 : { *(SORT(___ksymtab+*)) }
 	__ksymtab_gpl		0 : { *(SORT(___ksymtab_gpl+*)) }
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 5fc52ee3264c..4cfdbb5b6967 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -339,13 +339,13 @@ static int add_dead_ends(struct objtool_file *file)
 	struct instruction *insn;
 	bool found;
 
-	sec = find_section_by_name(file->elf, ".rela__unreachable");
+	sec = find_section_by_name(file->elf, ".rela.discard.unreachable");
 	if (!sec)
 		return 0;
 
 	list_for_each_entry(rela, &sec->rela_list, list) {
 		if (rela->sym->type != STT_SECTION) {
-			WARN("unexpected relocation symbol type in .rela__unreachable");
+			WARN("unexpected relocation symbol type in %s", sec->name);
 			return -1;
 		}
 		insn = find_insn(file, rela->sym->sec, rela->addend);
@@ -1272,7 +1272,7 @@ int cmd_check(int argc, const char **argv)
 
 	INIT_LIST_HEAD(&file.insn_list);
 	hash_init(file.insn_hash);
-	file.whitelist = find_section_by_name(file.elf, "__func_stack_frame_non_standard");
+	file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
 	file.rodata = find_section_by_name(file.elf, ".rodata");
 	file.ignore_unreachables = false;
 	file.c_file = find_section_by_name(file.elf, ".comment");
-- 
GitLab


From 4ab18701c66552944188dbcd0ce0012729baab84 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Tue, 3 Jan 2017 09:37:34 -0800
Subject: [PATCH 0843/1084] xtensa: move parse_tag_fdt out of #ifdef
 CONFIG_BLK_DEV_INITRD

FDT tag parsing is not related to whether BLK_DEV_INITRD is configured
or not, move it out of the corresponding #ifdef/#endif block.
This fixes passing external FDT to the kernel configured w/o
BLK_DEV_INITRD support.

Cc: stable@vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/setup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 88a044af7504..23f2e034ba46 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -133,6 +133,8 @@ static int __init parse_tag_initrd(const bp_tag_t* tag)
 
 __tagtable(BP_TAG_INITRD, parse_tag_initrd);
 
+#endif /* CONFIG_BLK_DEV_INITRD */
+
 #ifdef CONFIG_OF
 
 static int __init parse_tag_fdt(const bp_tag_t *tag)
@@ -145,8 +147,6 @@ __tagtable(BP_TAG_FDT, parse_tag_fdt);
 
 #endif /* CONFIG_OF */
 
-#endif /* CONFIG_BLK_DEV_INITRD */
-
 static int __init parse_tag_cmdline(const bp_tag_t* tag)
 {
 	strlcpy(command_line, (char *)(tag->data), COMMAND_LINE_SIZE);
-- 
GitLab


From 9a736fcb096b43b68af8329eb12abc8256dceaba Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Sun, 25 Dec 2016 04:58:57 -0800
Subject: [PATCH 0844/1084] xtensa: clean up bootable image build targets

Currently xtensa uses 'zImage' as a synonym of 'all', but in fact xtensa
supports three targets: 'Image' (ELF image with reset vector), 'zImage'
(compressed redboot image) and 'uImage' (U-Boot image).
Provide separate 'Image', 'zImage' and 'uImage' make targets that only
build corresponding image type. Make 'all' build all images appropriate
for a platform.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/Makefile                   |  8 +++-----
 arch/xtensa/boot/Makefile              | 23 ++++++++++++++++-------
 arch/xtensa/boot/boot-elf/Makefile     |  2 +-
 arch/xtensa/boot/boot-redboot/Makefile |  2 +-
 arch/xtensa/boot/boot-uboot/Makefile   | 14 --------------
 5 files changed, 21 insertions(+), 28 deletions(-)
 delete mode 100644 arch/xtensa/boot/boot-uboot/Makefile

diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile
index e54189427b31..7ee02fe4a63d 100644
--- a/arch/xtensa/Makefile
+++ b/arch/xtensa/Makefile
@@ -93,11 +93,7 @@ endif
 
 boot		:= arch/xtensa/boot
 
-all: zImage
-
-bzImage : zImage
-
-zImage: vmlinux
+all Image zImage uImage: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $@
 
 %.dtb:
@@ -107,6 +103,8 @@ dtbs: scripts
 	$(Q)$(MAKE) $(build)=$(boot)/dts
 
 define archhelp
+  @echo '* Image       - Kernel ELF image with reset vector'
   @echo '* zImage      - Compressed kernel image (arch/xtensa/boot/images/zImage.*)'
+  @echo '* uImage      - U-Boot wrapped image'
   @echo '  dtbs        - Build device tree blobs for enabled boards'
 endef
diff --git a/arch/xtensa/boot/Makefile b/arch/xtensa/boot/Makefile
index ca20a892021b..53e4178711e6 100644
--- a/arch/xtensa/boot/Makefile
+++ b/arch/xtensa/boot/Makefile
@@ -21,14 +21,17 @@ subdir-y	:= lib
 
 # Subdirs for the boot loader(s)
 
-bootdir-$(CONFIG_XTENSA_PLATFORM_ISS)	 += boot-elf
-bootdir-$(CONFIG_XTENSA_PLATFORM_XT2000) += boot-redboot boot-elf boot-uboot
-bootdir-$(CONFIG_XTENSA_PLATFORM_XTFPGA) += boot-redboot boot-elf boot-uboot
+boot-$(CONFIG_XTENSA_PLATFORM_ISS)    += Image
+boot-$(CONFIG_XTENSA_PLATFORM_XT2000) += Image zImage uImage
+boot-$(CONFIG_XTENSA_PLATFORM_XTFPGA) += Image zImage uImage
 
-zImage Image: $(bootdir-y)
+all: $(boot-y)
+Image: boot-elf
+zImage: boot-redboot
+uImage: $(obj)/uImage
 
-$(bootdir-y): $(addprefix $(obj)/,$(subdir-y)) \
-	      $(addprefix $(obj)/,$(host-progs))
+boot-elf boot-redboot: $(addprefix $(obj)/,$(subdir-y)) \
+		       $(addprefix $(obj)/,$(host-progs))
 	$(Q)$(MAKE) $(build)=$(obj)/$@ $(MAKECMDGOALS)
 
 OBJCOPYFLAGS = --strip-all -R .comment -R .note.gnu.build-id -O binary
@@ -41,4 +44,10 @@ vmlinux.bin.gz: vmlinux.bin FORCE
 
 boot-elf: vmlinux.bin
 boot-redboot: vmlinux.bin.gz
-boot-uboot: vmlinux.bin.gz
+
+UIMAGE_LOADADDR = $(CONFIG_KERNEL_LOAD_ADDRESS)
+UIMAGE_COMPRESSION = gzip
+
+$(obj)/uImage: vmlinux.bin.gz FORCE
+	$(call if_changed,uimage)
+	$(Q)$(kecho) '  Kernel: $@ is ready'
diff --git a/arch/xtensa/boot/boot-elf/Makefile b/arch/xtensa/boot/boot-elf/Makefile
index 89db089f5a12..521471981356 100644
--- a/arch/xtensa/boot/boot-elf/Makefile
+++ b/arch/xtensa/boot/boot-elf/Makefile
@@ -31,4 +31,4 @@ $(obj)/../Image.elf: $(obj)/Image.o $(obj)/boot.lds
 		-o $@ $(obj)/Image.o
 	$(Q)$(kecho) '  Kernel: $@ is ready'
 
-zImage:	$(obj)/../Image.elf
+all Image: $(obj)/../Image.elf
diff --git a/arch/xtensa/boot/boot-redboot/Makefile b/arch/xtensa/boot/boot-redboot/Makefile
index 8be8b9436981..8632473ad319 100644
--- a/arch/xtensa/boot/boot-redboot/Makefile
+++ b/arch/xtensa/boot/boot-redboot/Makefile
@@ -32,4 +32,4 @@ $(obj)/../zImage.redboot: $(obj)/zImage.elf
 	$(Q)$(OBJCOPY) -S -O binary $< $@
 	$(Q)$(kecho) '  Kernel: $@ is ready'
 
-zImage: $(obj)/../zImage.redboot
+all zImage: $(obj)/../zImage.redboot
diff --git a/arch/xtensa/boot/boot-uboot/Makefile b/arch/xtensa/boot/boot-uboot/Makefile
deleted file mode 100644
index 0f4c417b4196..000000000000
--- a/arch/xtensa/boot/boot-uboot/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-#
-# This file is subject to the terms and conditions of the GNU General Public
-# License.  See the file "COPYING" in the main directory of this archive
-# for more details.
-#
-
-UIMAGE_LOADADDR = $(CONFIG_KERNEL_LOAD_ADDRESS)
-UIMAGE_COMPRESSION = gzip
-
-$(obj)/../uImage: vmlinux.bin.gz FORCE
-	$(call if_changed,uimage)
-	$(Q)$(kecho) '  Kernel: $@ is ready'
-
-zImage: $(obj)/../uImage
-- 
GitLab


From b46dcfa378b0cdea1ee832802c9e36750e0fffa9 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Wed, 4 Jan 2017 10:40:49 -0800
Subject: [PATCH 0845/1084] xtensa: allow merging vectors into .text section

Currently code for exception/IRQ vectors is stored in kernel image as
initialization data and is copied to its working addresses during
startup. It doesn't always make sense. In many cases vectors location
can be automatically decided at kernel link time and code can be placed
right there. This is especially useful for XIP kernel.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/asm/vectors.h |  4 +++
 arch/xtensa/kernel/setup.c        |  3 +++
 arch/xtensa/kernel/vmlinux.lds.S  | 41 +++++++++++++++++++++++++++++++
 3 files changed, 48 insertions(+)

diff --git a/arch/xtensa/include/asm/vectors.h b/arch/xtensa/include/asm/vectors.h
index 77d41cc7a688..65d3da9db19b 100644
--- a/arch/xtensa/include/asm/vectors.h
+++ b/arch/xtensa/include/asm/vectors.h
@@ -67,7 +67,11 @@ static inline unsigned long xtensa_get_kio_paddr(void)
 #endif /* CONFIG_MMU */
 
 #define RESET_VECTOR1_VADDR		(XCHAL_RESET_VECTOR1_VADDR)
+#ifdef CONFIG_VECTORS_OFFSET
 #define VECBASE_VADDR			(KERNELOFFSET - CONFIG_VECTORS_OFFSET)
+#else
+#define VECBASE_VADDR			_vecbase
+#endif
 
 #if defined(XCHAL_HAVE_VECBASE) && XCHAL_HAVE_VECBASE
 
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 23f2e034ba46..7fc7bb9f3b84 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -455,6 +455,7 @@ void __init setup_arch(char **cmdline_p)
 
 	mem_reserve(__pa(&_stext), __pa(&_end));
 
+#ifdef CONFIG_VECTORS_OFFSET
 	mem_reserve(__pa(&_WindowVectors_text_start),
 		    __pa(&_WindowVectors_text_end));
 
@@ -491,6 +492,8 @@ void __init setup_arch(char **cmdline_p)
 		    __pa(&_Level6InterruptVector_text_end));
 #endif
 
+#endif /* CONFIG_VECTORS_OFFSET */
+
 #ifdef CONFIG_SMP
 	mem_reserve(__pa(&_SecondaryResetVector_text_start),
 		    __pa(&_SecondaryResetVector_text_end));
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index 31411fc82662..30d9fc21e076 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -59,6 +59,7 @@ jiffies = jiffies_64;
  * garbage.)
  */
 
+#ifdef CONFIG_VECTORS_OFFSET
 #define SECTION_VECTOR(sym, section, addr, max_prevsec_size, prevsec)       \
   section addr : AT((MIN(LOADADDR(prevsec) + max_prevsec_size,		    \
 		         LOADADDR(prevsec) + SIZEOF(prevsec)) + 3) & ~ 3)   \
@@ -68,6 +69,11 @@ jiffies = jiffies_64;
     *(section)								    \
     sym ## _end = ABSOLUTE(.);						    \
   }
+#else
+#define SECTION_VECTOR(section, addr)					    \
+  . = addr;								    \
+  *(section)
+#endif
 
 /*
  *  Mapping of input sections to output sections when linking.
@@ -85,6 +91,37 @@ SECTIONS
   {
     /* The HEAD_TEXT section must be the first section! */
     HEAD_TEXT
+
+#ifndef CONFIG_VECTORS_OFFSET
+  . = ALIGN(PAGE_SIZE);
+  _vecbase = .;
+
+  SECTION_VECTOR (.WindowVectors.text, WINDOW_VECTORS_VADDR)
+#if XCHAL_EXCM_LEVEL >= 2
+  SECTION_VECTOR (.Level2InterruptVector.text, INTLEVEL2_VECTOR_VADDR)
+#endif
+#if XCHAL_EXCM_LEVEL >= 3
+  SECTION_VECTOR (.Level3InterruptVector.text, INTLEVEL3_VECTOR_VADDR)
+#endif
+#if XCHAL_EXCM_LEVEL >= 4
+  SECTION_VECTOR (.Level4InterruptVector.text, INTLEVEL4_VECTOR_VADDR)
+#endif
+#if XCHAL_EXCM_LEVEL >= 5
+  SECTION_VECTOR (.Level5InterruptVector.text, INTLEVEL5_VECTOR_VADDR)
+#endif
+#if XCHAL_EXCM_LEVEL >= 6
+  SECTION_VECTOR (.Level6InterruptVector.text, INTLEVEL6_VECTOR_VADDR)
+#endif
+  SECTION_VECTOR (.DebugInterruptVector.literal, DEBUG_VECTOR_VADDR - 4)
+  SECTION_VECTOR (.DebugInterruptVector.text, DEBUG_VECTOR_VADDR)
+  SECTION_VECTOR (.KernelExceptionVector.literal, KERNEL_VECTOR_VADDR - 4)
+  SECTION_VECTOR (.KernelExceptionVector.text, KERNEL_VECTOR_VADDR)
+  SECTION_VECTOR (.UserExceptionVector.literal, USER_VECTOR_VADDR - 4)
+  SECTION_VECTOR (.UserExceptionVector.text, USER_VECTOR_VADDR)
+  SECTION_VECTOR (.DoubleExceptionVector.literal, DOUBLEEXC_VECTOR_VADDR - 48)
+  SECTION_VECTOR (.DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR)
+#endif
+
     TEXT_TEXT
     VMLINUX_SYMBOL(__sched_text_start) = .;
     *(.sched.literal .sched.text)
@@ -132,6 +169,7 @@ SECTIONS
     . = ALIGN(16);
     __boot_reloc_table_start = ABSOLUTE(.);
 
+#ifdef CONFIG_VECTORS_OFFSET
     RELOCATE_ENTRY(_WindowVectors_text,
 		   .WindowVectors.text);
 #if XCHAL_EXCM_LEVEL >= 2
@@ -164,6 +202,7 @@ SECTIONS
 		   .DoubleExceptionVector.text);
     RELOCATE_ENTRY(_DebugInterruptVector_text,
 		   .DebugInterruptVector.text);
+#endif
 #if defined(CONFIG_SMP)
     RELOCATE_ENTRY(_SecondaryResetVector_text,
 		   .SecondaryResetVector.text);
@@ -186,6 +225,7 @@ SECTIONS
   . = ALIGN(4);
   .dummy : { LONG(0) }
 
+#ifdef CONFIG_VECTORS_OFFSET
   /* The vectors are relocated to the real position at startup time */
 
   SECTION_VECTOR (_WindowVectors_text,
@@ -277,6 +317,7 @@ SECTIONS
 
   . = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3;
 
+#endif
 #if defined(CONFIG_SMP)
 
   SECTION_VECTOR (_SecondaryResetVector_text,
-- 
GitLab


From 28db0c7b1c7fbca3637994c1ce3c4ffe142e2755 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 1 Mar 2017 16:39:55 +0530
Subject: [PATCH 0846/1084] PM / OPP: Documentation: Fix opp-microvolt in
 examples

The triplet present in "opp-microvolt" property should be in the order
<target min max>, while all the examples have it in the order
<min target max>.

Fix it.

Luckily all of the users of "opp-microvolt" property have applied brain
instead of copying the examples from documentation and none of the
actual dts files have it wrong.

Reported-by: Rob Herring <robh@kernel.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/devicetree/bindings/opp/opp.txt | 44 +++++++++----------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt
index 9f5ca4457b5f..7951961ef356 100644
--- a/Documentation/devicetree/bindings/opp/opp.txt
+++ b/Documentation/devicetree/bindings/opp/opp.txt
@@ -188,14 +188,14 @@ Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states together.
 
 		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
-			opp-microvolt = <970000 975000 985000>;
+			opp-microvolt = <975000 970000 985000>;
 			opp-microamp = <70000>;
 			clock-latency-ns = <300000>;
 			opp-suspend;
 		};
 		opp@1100000000 {
 			opp-hz = /bits/ 64 <1100000000>;
-			opp-microvolt = <980000 1000000 1010000>;
+			opp-microvolt = <1000000 980000 1010000>;
 			opp-microamp = <80000>;
 			clock-latency-ns = <310000>;
 		};
@@ -267,14 +267,14 @@ independently.
 
 		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
-			opp-microvolt = <970000 975000 985000>;
+			opp-microvolt = <975000 970000 985000>;
 			opp-microamp = <70000>;
 			clock-latency-ns = <300000>;
 			opp-suspend;
 		};
 		opp@1100000000 {
 			opp-hz = /bits/ 64 <1100000000>;
-			opp-microvolt = <980000 1000000 1010000>;
+			opp-microvolt = <1000000 980000 1010000>;
 			opp-microamp = <80000>;
 			clock-latency-ns = <310000>;
 		};
@@ -343,14 +343,14 @@ DVFS state together.
 
 		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
-			opp-microvolt = <970000 975000 985000>;
+			opp-microvolt = <975000 970000 985000>;
 			opp-microamp = <70000>;
 			clock-latency-ns = <300000>;
 			opp-suspend;
 		};
 		opp@1100000000 {
 			opp-hz = /bits/ 64 <1100000000>;
-			opp-microvolt = <980000 1000000 1010000>;
+			opp-microvolt = <1000000 980000 1010000>;
 			opp-microamp = <80000>;
 			clock-latency-ns = <310000>;
 		};
@@ -369,7 +369,7 @@ DVFS state together.
 
 		opp@1300000000 {
 			opp-hz = /bits/ 64 <1300000000>;
-			opp-microvolt = <1045000 1050000 1055000>;
+			opp-microvolt = <1050000 1045000 1055000>;
 			opp-microamp = <95000>;
 			clock-latency-ns = <400000>;
 			opp-suspend;
@@ -382,7 +382,7 @@ DVFS state together.
 		};
 		opp@1500000000 {
 			opp-hz = /bits/ 64 <1500000000>;
-			opp-microvolt = <1010000 1100000 1110000>;
+			opp-microvolt = <1100000 1010000 1110000>;
 			opp-microamp = <95000>;
 			clock-latency-ns = <400000>;
 			turbo-mode;
@@ -424,9 +424,9 @@ Example 4: Handling multiple regulators
 
 		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
-			opp-microvolt = <970000 975000 985000>, /* Supply 0 */
-					<960000 965000 975000>, /* Supply 1 */
-					<960000 965000 975000>; /* Supply 2 */
+			opp-microvolt = <975000 970000 985000>, /* Supply 0 */
+					<965000 960000 975000>, /* Supply 1 */
+					<965000 960000 975000>; /* Supply 2 */
 			opp-microamp =  <70000>,		/* Supply 0 */
 					<70000>,		/* Supply 1 */
 					<70000>;		/* Supply 2 */
@@ -437,9 +437,9 @@ Example 4: Handling multiple regulators
 
 		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
-			opp-microvolt = <970000 975000 985000>, /* Supply 0 */
-					<960000 965000 975000>, /* Supply 1 */
-					<960000 965000 975000>; /* Supply 2 */
+			opp-microvolt = <975000 970000 985000>, /* Supply 0 */
+					<965000 960000 975000>, /* Supply 1 */
+					<965000 960000 975000>; /* Supply 2 */
 			opp-microamp =  <70000>,		/* Supply 0 */
 					<0>,			/* Supply 1 doesn't need this */
 					<70000>;		/* Supply 2 */
@@ -474,7 +474,7 @@ Example 5: opp-supported-hw
 			 */
 			opp-supported-hw = <0xF 0xFFFFFFFF 0xFFFFFFFF>
 			opp-hz = /bits/ 64 <600000000>;
-			opp-microvolt = <900000 915000 925000>;
+			opp-microvolt = <915000 900000 925000>;
 			...
 		};
 
@@ -487,7 +487,7 @@ Example 5: opp-supported-hw
 			 */
 			opp-supported-hw = <0x20 0xff0000ff 0x0000f4f0>
 			opp-hz = /bits/ 64 <800000000>;
-			opp-microvolt = <900000 915000 925000>;
+			opp-microvolt = <915000 900000 925000>;
 			...
 		};
 	};
@@ -512,18 +512,18 @@ Example 6: opp-microvolt-<name>, opp-microamp-<name>:
 
 		opp@1000000000 {
 			opp-hz = /bits/ 64 <1000000000>;
-			opp-microvolt-slow = <900000 915000 925000>;
-			opp-microvolt-fast = <970000 975000 985000>;
+			opp-microvolt-slow = <915000 900000 925000>;
+			opp-microvolt-fast = <975000 970000 985000>;
 			opp-microamp-slow =  <70000>;
 			opp-microamp-fast =  <71000>;
 		};
 
 		opp@1200000000 {
 			opp-hz = /bits/ 64 <1200000000>;
-			opp-microvolt-slow = <900000 915000 925000>, /* Supply vcc0 */
-					      <910000 925000 935000>; /* Supply vcc1 */
-			opp-microvolt-fast = <970000 975000 985000>, /* Supply vcc0 */
-					     <960000 965000 975000>; /* Supply vcc1 */
+			opp-microvolt-slow = <915000 900000 925000>, /* Supply vcc0 */
+					      <925000 910000 935000>; /* Supply vcc1 */
+			opp-microvolt-fast = <975000 970000 985000>, /* Supply vcc0 */
+					     <965000 960000 975000>; /* Supply vcc1 */
 			opp-microamp =  <70000>; /* Will be used for both slow/fast */
 		};
 	};
-- 
GitLab


From 61cfac6f267dabcf2740a7ec8a0295833b28b5f5 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilov@microsoft.com>
Date: Tue, 28 Feb 2017 16:05:19 -0800
Subject: [PATCH 0847/1084] CIFS: Fix possible use after free in demultiplex
 thread

The recent changes that added SMB3 encryption support introduced
a possible use after free in the demultiplex thread. When we
process an encrypted packed we obtain a pointer to SMB session
but do not obtain a reference. This can possibly lead to a situation
when this session was freed before we copy a decryption key from
there. Fix this by obtaining a copy of the key rather than a pointer
to the session under a spinlock.

Signed-off-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2ops.c | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index a44b4dbe4aae..d2cdd9c19d17 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1609,6 +1609,26 @@ static void cifs_crypt_complete(struct crypto_async_request *req, int err)
 	complete(&res->completion);
 }
 
+static int
+smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key)
+{
+	struct cifs_ses *ses;
+	u8 *ses_enc_key;
+
+	spin_lock(&cifs_tcp_ses_lock);
+	list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+		if (ses->Suid != ses_id)
+			continue;
+		ses_enc_key = enc ? ses->smb3encryptionkey :
+							ses->smb3decryptionkey;
+		memcpy(key, ses_enc_key, SMB3_SIGN_KEY_SIZE);
+		spin_unlock(&cifs_tcp_ses_lock);
+		return 0;
+	}
+	spin_unlock(&cifs_tcp_ses_lock);
+
+	return 1;
+}
 /*
  * Encrypt or decrypt @rqst message. @rqst has the following format:
  * iov[0] - transform header (associate data),
@@ -1622,10 +1642,10 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
 	struct smb2_transform_hdr *tr_hdr =
 			(struct smb2_transform_hdr *)rqst->rq_iov[0].iov_base;
 	unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 24;
-	struct cifs_ses *ses;
 	int rc = 0;
 	struct scatterlist *sg;
 	u8 sign[SMB2_SIGNATURE_SIZE] = {};
+	u8 key[SMB3_SIGN_KEY_SIZE];
 	struct aead_request *req;
 	char *iv;
 	unsigned int iv_len;
@@ -1635,9 +1655,10 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
 
 	init_completion(&result.completion);
 
-	ses = smb2_find_smb_ses(server, tr_hdr->SessionId);
-	if (!ses) {
-		cifs_dbg(VFS, "%s: Could not find session\n", __func__);
+	rc = smb2_get_enc_key(server, tr_hdr->SessionId, enc, key);
+	if (rc) {
+		cifs_dbg(VFS, "%s: Could not get %scryption key\n", __func__,
+			 enc ? "en" : "de");
 		return 0;
 	}
 
@@ -1649,8 +1670,7 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
 
 	tfm = enc ? server->secmech.ccmaesencrypt :
 						server->secmech.ccmaesdecrypt;
-	rc = crypto_aead_setkey(tfm, enc ? ses->smb3encryptionkey :
-				ses->smb3decryptionkey, SMB3_SIGN_KEY_SIZE);
+	rc = crypto_aead_setkey(tfm, key, SMB3_SIGN_KEY_SIZE);
 	if (rc) {
 		cifs_dbg(VFS, "%s: Failed to set aead key %d\n", __func__, rc);
 		return rc;
-- 
GitLab


From df2c43343b47a7138431f431118eb5819e205365 Mon Sep 17 00:00:00 2001
From: Yotam Gigi <yotamg@mellanox.com>
Date: Wed, 1 Mar 2017 16:50:45 +0200
Subject: [PATCH 0848/1084] bridge: Fix error path in nbp_vlan_init

Fix error path order in nbp_vlan_init, so if switchdev_port_attr_set
call failes, the vlan_hash wouldn't be destroyed before inited.

Fixes: efa5356b0d97 ("bridge: per vlan dst_metadata netlink support")
CC: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_vlan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 62e68c0dc687..b838213c408e 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -997,10 +997,10 @@ int nbp_vlan_init(struct net_bridge_port *p)
 	RCU_INIT_POINTER(p->vlgrp, NULL);
 	synchronize_rcu();
 	vlan_tunnel_deinit(vg);
-err_vlan_enabled:
 err_tunnel_init:
 	rhashtable_destroy(&vg->vlan_hash);
 err_rhtbl:
+err_vlan_enabled:
 	kfree(vg);
 
 	goto out;
-- 
GitLab


From eba38a968258b5ad9d70722ab8c584e1753f4b16 Mon Sep 17 00:00:00 2001
From: Gary Lin <glin@suse.com>
Date: Wed, 1 Mar 2017 16:25:51 +0800
Subject: [PATCH 0849/1084] bpf: update the comment about the length of
 analysis

Commit 07016151a446 ("bpf, verifier: further improve search
pruning") increased the limit of processed instructions from
32k to 64k, but the comment still mentioned the 32k limit.
This commit updates the comment to reflect the change.

Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Gary Lin <glin@suse.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3fc6e39b223e..796b68d00119 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -33,7 +33,7 @@
  * - out of bounds or malformed jumps
  * The second pass is all possible path descent from the 1st insn.
  * Since it's analyzing all pathes through the program, the length of the
- * analysis is limited to 32k insn, which may be hit even if total number of
+ * analysis is limited to 64k insn, which may be hit even if total number of
  * insn is less then 4K, but there are too many branches that change stack/regs.
  * Number of 'branches to be analyzed' is limited to 1k
  *
-- 
GitLab


From 449809a66c1d0b1563dee84493e14bf3104d2d7e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 1 Mar 2017 08:39:49 -0800
Subject: [PATCH 0850/1084] tcp/dccp: block BH for SYN processing

SYN processing really was meant to be handled from BH.

When I got rid of BH blocking while processing socket backlog
in commit 5413d1babe8f ("net: do not block BH while processing socket
backlog"), I forgot that a malicious user could transition to TCP_LISTEN
from a state that allowed (SYN) packets to be parked in the socket
backlog while socket is owned by the thread doing the listen() call.

Sure enough syzkaller found this and reported the bug ;)

=================================
[ INFO: inconsistent lock state ]
4.10.0+ #60 Not tainted
---------------------------------
inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage.
syz-executor0/5090 [HC0[0]:SC0[0]:HE1:SE1] takes:
 (&(&hashinfo->ehash_locks[i])->rlock){+.?...}, at:
[<ffffffff83a6a370>] spin_lock include/linux/spinlock.h:299 [inline]
 (&(&hashinfo->ehash_locks[i])->rlock){+.?...}, at:
[<ffffffff83a6a370>] inet_ehash_insert+0x240/0xad0
net/ipv4/inet_hashtables.c:407
{IN-SOFTIRQ-W} state was registered at:
  mark_irqflags kernel/locking/lockdep.c:2923 [inline]
  __lock_acquire+0xbcf/0x3270 kernel/locking/lockdep.c:3295
  lock_acquire+0x241/0x580 kernel/locking/lockdep.c:3753
  __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
  _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151
  spin_lock include/linux/spinlock.h:299 [inline]
  inet_ehash_insert+0x240/0xad0 net/ipv4/inet_hashtables.c:407
  reqsk_queue_hash_req net/ipv4/inet_connection_sock.c:753 [inline]
  inet_csk_reqsk_queue_hash_add+0x1b7/0x2a0 net/ipv4/inet_connection_sock.c:764
  tcp_conn_request+0x25cc/0x3310 net/ipv4/tcp_input.c:6399
  tcp_v4_conn_request+0x157/0x220 net/ipv4/tcp_ipv4.c:1262
  tcp_rcv_state_process+0x802/0x4130 net/ipv4/tcp_input.c:5889
  tcp_v4_do_rcv+0x56b/0x940 net/ipv4/tcp_ipv4.c:1433
  tcp_v4_rcv+0x2e12/0x3210 net/ipv4/tcp_ipv4.c:1711
  ip_local_deliver_finish+0x4ce/0xc40 net/ipv4/ip_input.c:216
  NF_HOOK include/linux/netfilter.h:257 [inline]
  ip_local_deliver+0x1ce/0x710 net/ipv4/ip_input.c:257
  dst_input include/net/dst.h:492 [inline]
  ip_rcv_finish+0xb1d/0x2110 net/ipv4/ip_input.c:396
  NF_HOOK include/linux/netfilter.h:257 [inline]
  ip_rcv+0xd90/0x19c0 net/ipv4/ip_input.c:487
  __netif_receive_skb_core+0x1ad1/0x3400 net/core/dev.c:4179
  __netif_receive_skb+0x2a/0x170 net/core/dev.c:4217
  netif_receive_skb_internal+0x1d6/0x430 net/core/dev.c:4245
  napi_skb_finish net/core/dev.c:4602 [inline]
  napi_gro_receive+0x4e6/0x680 net/core/dev.c:4636
  e1000_receive_skb drivers/net/ethernet/intel/e1000/e1000_main.c:4033 [inline]
  e1000_clean_rx_irq+0x5e0/0x1490
drivers/net/ethernet/intel/e1000/e1000_main.c:4489
  e1000_clean+0xb9a/0x2910 drivers/net/ethernet/intel/e1000/e1000_main.c:3834
  napi_poll net/core/dev.c:5171 [inline]
  net_rx_action+0xe70/0x1900 net/core/dev.c:5236
  __do_softirq+0x2fb/0xb7d kernel/softirq.c:284
  invoke_softirq kernel/softirq.c:364 [inline]
  irq_exit+0x19e/0x1d0 kernel/softirq.c:405
  exiting_irq arch/x86/include/asm/apic.h:658 [inline]
  do_IRQ+0x81/0x1a0 arch/x86/kernel/irq.c:250
  ret_from_intr+0x0/0x20
  native_safe_halt+0x6/0x10 arch/x86/include/asm/irqflags.h:53
  arch_safe_halt arch/x86/include/asm/paravirt.h:98 [inline]
  default_idle+0x8f/0x410 arch/x86/kernel/process.c:271
  arch_cpu_idle+0xa/0x10 arch/x86/kernel/process.c:262
  default_idle_call+0x36/0x60 kernel/sched/idle.c:96
  cpuidle_idle_call kernel/sched/idle.c:154 [inline]
  do_idle+0x348/0x440 kernel/sched/idle.c:243
  cpu_startup_entry+0x18/0x20 kernel/sched/idle.c:345
  start_secondary+0x344/0x440 arch/x86/kernel/smpboot.c:272
  verify_cpu+0x0/0xfc
irq event stamp: 1741
hardirqs last  enabled at (1741): [<ffffffff84d49d77>]
__raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:160
[inline]
hardirqs last  enabled at (1741): [<ffffffff84d49d77>]
_raw_spin_unlock_irqrestore+0xf7/0x1a0 kernel/locking/spinlock.c:191
hardirqs last disabled at (1740): [<ffffffff84d4a732>]
__raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:108 [inline]
hardirqs last disabled at (1740): [<ffffffff84d4a732>]
_raw_spin_lock_irqsave+0xa2/0x110 kernel/locking/spinlock.c:159
softirqs last  enabled at (1738): [<ffffffff84d4deff>]
__do_softirq+0x7cf/0xb7d kernel/softirq.c:310
softirqs last disabled at (1571): [<ffffffff84d4b92c>]
do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(&(&hashinfo->ehash_locks[i])->rlock);
  <Interrupt>
    lock(&(&hashinfo->ehash_locks[i])->rlock);

 *** DEADLOCK ***

1 lock held by syz-executor0/5090:
 #0:  (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff83406b43>] lock_sock
include/net/sock.h:1460 [inline]
 #0:  (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff83406b43>]
sock_setsockopt+0x233/0x1e40 net/core/sock.c:683

stack backtrace:
CPU: 1 PID: 5090 Comm: syz-executor0 Not tainted 4.10.0+ #60
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:15 [inline]
 dump_stack+0x292/0x398 lib/dump_stack.c:51
 print_usage_bug+0x3ef/0x450 kernel/locking/lockdep.c:2387
 valid_state kernel/locking/lockdep.c:2400 [inline]
 mark_lock_irq kernel/locking/lockdep.c:2602 [inline]
 mark_lock+0xf30/0x1410 kernel/locking/lockdep.c:3065
 mark_irqflags kernel/locking/lockdep.c:2941 [inline]
 __lock_acquire+0x6dc/0x3270 kernel/locking/lockdep.c:3295
 lock_acquire+0x241/0x580 kernel/locking/lockdep.c:3753
 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
 _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151
 spin_lock include/linux/spinlock.h:299 [inline]
 inet_ehash_insert+0x240/0xad0 net/ipv4/inet_hashtables.c:407
 reqsk_queue_hash_req net/ipv4/inet_connection_sock.c:753 [inline]
 inet_csk_reqsk_queue_hash_add+0x1b7/0x2a0 net/ipv4/inet_connection_sock.c:764
 dccp_v6_conn_request+0xada/0x11b0 net/dccp/ipv6.c:380
 dccp_rcv_state_process+0x51e/0x1660 net/dccp/input.c:606
 dccp_v6_do_rcv+0x213/0x350 net/dccp/ipv6.c:632
 sk_backlog_rcv include/net/sock.h:896 [inline]
 __release_sock+0x127/0x3a0 net/core/sock.c:2052
 release_sock+0xa5/0x2b0 net/core/sock.c:2539
 sock_setsockopt+0x60f/0x1e40 net/core/sock.c:1016
 SYSC_setsockopt net/socket.c:1782 [inline]
 SyS_setsockopt+0x2fb/0x3a0 net/socket.c:1765
 entry_SYSCALL_64_fastpath+0x1f/0xc2
RIP: 0033:0x4458b9
RSP: 002b:00007fe8b26c2b58 EFLAGS: 00000292 ORIG_RAX: 0000000000000036
RAX: ffffffffffffffda RBX: 0000000000000006 RCX: 00000000004458b9
RDX: 000000000000001a RSI: 0000000000000001 RDI: 0000000000000006
RBP: 00000000006e2110 R08: 0000000000000010 R09: 0000000000000000
R10: 00000000208c3000 R11: 0000000000000292 R12: 0000000000708000
R13: 0000000020000000 R14: 0000000000001000 R15: 0000000000000000

Fixes: 5413d1babe8f ("net: do not block BH while processing socket backlog")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/input.c     | 10 ++++++++--
 net/ipv4/tcp_input.c | 10 ++++++++--
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 8fedc2d49770..4a05d7876850 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -577,6 +577,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
 	const int old_state = sk->sk_state;
+	bool acceptable;
 	int queued = 0;
 
 	/*
@@ -603,8 +604,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	 */
 	if (sk->sk_state == DCCP_LISTEN) {
 		if (dh->dccph_type == DCCP_PKT_REQUEST) {
-			if (inet_csk(sk)->icsk_af_ops->conn_request(sk,
-								    skb) < 0)
+			/* It is possible that we process SYN packets from backlog,
+			 * so we need to make sure to disable BH right there.
+			 */
+			local_bh_disable();
+			acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0;
+			local_bh_enable();
+			if (!acceptable)
 				return 1;
 			consume_skb(skb);
 			return 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2c0ff327b6df..39c393cc0fd3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5886,9 +5886,15 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		if (th->syn) {
 			if (th->fin)
 				goto discard;
-			if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
-				return 1;
+			/* It is possible that we process SYN packets from backlog,
+			 * so we need to make sure to disable BH right there.
+			 */
+			local_bh_disable();
+			acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
+			local_bh_enable();
 
+			if (!acceptable)
+				return 1;
 			consume_skb(skb);
 			return 0;
 		}
-- 
GitLab


From 0837e49ab3fa8d903a499984575d71efee8097ce Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 1 Mar 2017 15:11:23 +0000
Subject: [PATCH 0851/1084] KEYS: Differentiate uses of rcu_dereference_key()
 and user_key_payload()

rcu_dereference_key() and user_key_payload() are currently being used in
two different, incompatible ways:

 (1) As a wrapper to rcu_dereference() - when only the RCU read lock used
     to protect the key.

 (2) As a wrapper to rcu_dereference_protected() - when the key semaphor is
     used to protect the key and the may be being modified.

Fix this by splitting both of the key wrappers to produce:

 (1) RCU accessors for keys when caller has the key semaphore locked:

	dereference_key_locked()
	user_key_payload_locked()

 (2) RCU accessors for keys when caller holds the RCU read lock:

	dereference_key_rcu()
	user_key_payload_rcu()

This should fix following warning in the NFS idmapper

  ===============================
  [ INFO: suspicious RCU usage. ]
  4.10.0 #1 Tainted: G        W
  -------------------------------
  ./include/keys/user-type.h:53 suspicious rcu_dereference_protected() usage!
  other info that might help us debug this:
  rcu_scheduler_active = 2, debug_locks = 0
  1 lock held by mount.nfs/5987:
    #0:  (rcu_read_lock){......}, at: [<d000000002527abc>] nfs_idmap_get_key+0x15c/0x420 [nfsv4]
  stack backtrace:
  CPU: 1 PID: 5987 Comm: mount.nfs Tainted: G        W       4.10.0 #1
  Call Trace:
    dump_stack+0xe8/0x154 (unreliable)
    lockdep_rcu_suspicious+0x140/0x190
    nfs_idmap_get_key+0x380/0x420 [nfsv4]
    nfs_map_name_to_uid+0x2a0/0x3b0 [nfsv4]
    decode_getfattr_attrs+0xfac/0x16b0 [nfsv4]
    decode_getfattr_generic.constprop.106+0xbc/0x150 [nfsv4]
    nfs4_xdr_dec_lookup_root+0xac/0xb0 [nfsv4]
    rpcauth_unwrap_resp+0xe8/0x140 [sunrpc]
    call_decode+0x29c/0x910 [sunrpc]
    __rpc_execute+0x140/0x8f0 [sunrpc]
    rpc_run_task+0x170/0x200 [sunrpc]
    nfs4_call_sync_sequence+0x68/0xa0 [nfsv4]
    _nfs4_lookup_root.isra.44+0xd0/0xf0 [nfsv4]
    nfs4_lookup_root+0xe0/0x350 [nfsv4]
    nfs4_lookup_root_sec+0x70/0xa0 [nfsv4]
    nfs4_find_root_sec+0xc4/0x100 [nfsv4]
    nfs4_proc_get_rootfh+0x5c/0xf0 [nfsv4]
    nfs4_get_rootfh+0x6c/0x190 [nfsv4]
    nfs4_server_common_setup+0xc4/0x260 [nfsv4]
    nfs4_create_server+0x278/0x3c0 [nfsv4]
    nfs4_remote_mount+0x50/0xb0 [nfsv4]
    mount_fs+0x74/0x210
    vfs_kern_mount+0x78/0x220
    nfs_do_root_mount+0xb0/0x140 [nfsv4]
    nfs4_try_mount+0x60/0x100 [nfsv4]
    nfs_fs_mount+0x5ec/0xda0 [nfs]
    mount_fs+0x74/0x210
    vfs_kern_mount+0x78/0x220
    do_mount+0x254/0xf70
    SyS_mount+0x94/0x100
    system_call+0x38/0xe0

Reported-by: Jan Stancek <jstancek@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Jan Stancek <jstancek@redhat.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 Documentation/security/keys.txt          | 17 +++++++++++++++--
 drivers/md/dm-crypt.c                    |  2 +-
 fs/cifs/connect.c                        |  2 +-
 fs/crypto/keyinfo.c                      |  2 +-
 fs/ecryptfs/ecryptfs_kernel.h            |  2 +-
 fs/fscache/object-list.c                 |  2 +-
 fs/nfs/nfs4idmap.c                       |  2 +-
 include/keys/user-type.h                 |  9 +++++++--
 include/linux/key.h                      |  5 ++++-
 lib/digsig.c                             |  2 +-
 net/dns_resolver/dns_query.c             |  4 ++--
 security/keys/dh.c                       |  2 +-
 security/keys/encrypted-keys/encrypted.c |  4 ++--
 security/keys/trusted.c                  |  4 ++--
 security/keys/user_defined.c             |  6 +++---
 15 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt
index 3849814bfe6d..0e03baf271bd 100644
--- a/Documentation/security/keys.txt
+++ b/Documentation/security/keys.txt
@@ -1151,8 +1151,21 @@ access the data:
      usage.  This is called key->payload.rcu_data0.  The following accessors
      wrap the RCU calls to this element:
 
-	rcu_assign_keypointer(struct key *key, void *data);
-	void *rcu_dereference_key(struct key *key);
+     (a) Set or change the first payload pointer:
+
+		rcu_assign_keypointer(struct key *key, void *data);
+
+     (b) Read the first payload pointer with the key semaphore held:
+
+		[const] void *dereference_key_locked([const] struct key *key);
+
+	 Note that the return value will inherit its constness from the key
+	 parameter.  Static analysis will give an error if it things the lock
+	 isn't held.
+
+     (c) Read the first payload pointer with the RCU read lock held:
+
+		const void *dereference_key_rcu(const struct key *key);
 
 
 ===================
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 1cb2ca9dfae3..389a3637ffcc 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1536,7 +1536,7 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string
 
 	down_read(&key->sem);
 
-	ukp = user_key_payload(key);
+	ukp = user_key_payload_locked(key);
 	if (!ukp) {
 		up_read(&key->sem);
 		key_put(key);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 777ad9f4fc3c..8a3ecef30d3c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2455,7 +2455,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
 	}
 
 	down_read(&key->sem);
-	upayload = user_key_payload(key);
+	upayload = user_key_payload_locked(key);
 	if (IS_ERR_OR_NULL(upayload)) {
 		rc = upayload ? PTR_ERR(upayload) : -EINVAL;
 		goto out_key_put;
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 02eb6b9e4438..d5d896fa5a71 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -103,7 +103,7 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
 		goto out;
 	}
 	down_read(&keyring_key->sem);
-	ukp = user_key_payload(keyring_key);
+	ukp = user_key_payload_locked(keyring_key);
 	if (ukp->datalen != sizeof(struct fscrypt_key)) {
 		res = -EINVAL;
 		up_read(&keyring_key->sem);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 599a29237cfe..95c1c8d34539 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -117,7 +117,7 @@ ecryptfs_get_key_payload_data(struct key *key)
 
 	auth_tok = ecryptfs_get_encrypted_key_payload_data(key);
 	if (!auth_tok)
-		return (struct ecryptfs_auth_tok *)user_key_payload(key)->data;
+		return (struct ecryptfs_auth_tok *)user_key_payload_locked(key)->data;
 	else
 		return auth_tok;
 }
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index 5d5ddaa84b21..67f940892ef8 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -329,7 +329,7 @@ static void fscache_objlist_config(struct fscache_objlist_data *data)
 	config = 0;
 	rcu_read_lock();
 
-	confkey = user_key_payload(key);
+	confkey = user_key_payload_rcu(key);
 	buf = confkey->data;
 
 	for (len = confkey->datalen - 1; len >= 0; len--) {
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index c444285bb1b1..835c163f61af 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -316,7 +316,7 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
 	if (ret < 0)
 		goto out_up;
 
-	payload = user_key_payload(rkey);
+	payload = user_key_payload_rcu(rkey);
 	if (IS_ERR_OR_NULL(payload)) {
 		ret = PTR_ERR(payload);
 		goto out_up;
diff --git a/include/keys/user-type.h b/include/keys/user-type.h
index c56fef40f53e..e098cbe27db5 100644
--- a/include/keys/user-type.h
+++ b/include/keys/user-type.h
@@ -48,9 +48,14 @@ extern void user_describe(const struct key *user, struct seq_file *m);
 extern long user_read(const struct key *key,
 		      char __user *buffer, size_t buflen);
 
-static inline const struct user_key_payload *user_key_payload(const struct key *key)
+static inline const struct user_key_payload *user_key_payload_rcu(const struct key *key)
 {
-	return (struct user_key_payload *)rcu_dereference_key(key);
+	return (struct user_key_payload *)dereference_key_rcu(key);
+}
+
+static inline struct user_key_payload *user_key_payload_locked(const struct key *key)
+{
+	return (struct user_key_payload *)dereference_key_locked((struct key *)key);
 }
 
 #endif /* CONFIG_KEYS */
diff --git a/include/linux/key.h b/include/linux/key.h
index 722914798f37..e45212f2777e 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -354,7 +354,10 @@ static inline bool key_is_instantiated(const struct key *key)
 		!test_bit(KEY_FLAG_NEGATIVE, &key->flags);
 }
 
-#define rcu_dereference_key(KEY)					\
+#define dereference_key_rcu(KEY)					\
+	(rcu_dereference((KEY)->payload.rcu_data0))
+
+#define dereference_key_locked(KEY)					\
 	(rcu_dereference_protected((KEY)->payload.rcu_data0,		\
 				   rwsem_is_locked(&((struct key *)(KEY))->sem)))
 
diff --git a/lib/digsig.c b/lib/digsig.c
index 55b8b2f41a9e..03d7c63837ae 100644
--- a/lib/digsig.c
+++ b/lib/digsig.c
@@ -85,7 +85,7 @@ static int digsig_verify_rsa(struct key *key,
 	struct pubkey_hdr *pkh;
 
 	down_read(&key->sem);
-	ukp = user_key_payload(key);
+	ukp = user_key_payload_locked(key);
 
 	if (ukp->datalen < sizeof(*pkh))
 		goto err1;
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index ecc28cff08ab..d502c94b1a82 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -70,7 +70,7 @@ int dns_query(const char *type, const char *name, size_t namelen,
 	      const char *options, char **_result, time64_t *_expiry)
 {
 	struct key *rkey;
-	const struct user_key_payload *upayload;
+	struct user_key_payload *upayload;
 	const struct cred *saved_cred;
 	size_t typelen, desclen;
 	char *desc, *cp;
@@ -141,7 +141,7 @@ int dns_query(const char *type, const char *name, size_t namelen,
 	if (ret)
 		goto put;
 
-	upayload = user_key_payload(rkey);
+	upayload = user_key_payload_locked(rkey);
 	len = upayload->datalen;
 
 	ret = -ENOMEM;
diff --git a/security/keys/dh.c b/security/keys/dh.c
index 531ed2ec132f..893af4c45038 100644
--- a/security/keys/dh.c
+++ b/security/keys/dh.c
@@ -55,7 +55,7 @@ static ssize_t mpi_from_key(key_serial_t keyid, size_t maxlen, MPI *mpi)
 		if (status == 0) {
 			const struct user_key_payload *payload;
 
-			payload = user_key_payload(key);
+			payload = user_key_payload_locked(key);
 
 			if (maxlen == 0) {
 				*mpi = NULL;
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 4fb315cddf5b..0010955d7876 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -314,7 +314,7 @@ static struct key *request_user_key(const char *master_desc, const u8 **master_k
 		goto error;
 
 	down_read(&ukey->sem);
-	upayload = user_key_payload(ukey);
+	upayload = user_key_payload_locked(ukey);
 	*master_key = upayload->data;
 	*master_keylen = upayload->datalen;
 error:
@@ -926,7 +926,7 @@ static long encrypted_read(const struct key *key, char __user *buffer,
 	size_t asciiblob_len;
 	int ret;
 
-	epayload = rcu_dereference_key(key);
+	epayload = dereference_key_locked(key);
 
 	/* returns the hex encoded iv, encrypted-data, and hmac as ascii */
 	asciiblob_len = epayload->datablob_len + ivsize + 1
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index 90d61751ff12..2ae31c5a87de 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -1140,12 +1140,12 @@ static int trusted_update(struct key *key, struct key_preparsed_payload *prep)
 static long trusted_read(const struct key *key, char __user *buffer,
 			 size_t buflen)
 {
-	struct trusted_key_payload *p;
+	const struct trusted_key_payload *p;
 	char *ascii_buf;
 	char *bufp;
 	int i;
 
-	p = rcu_dereference_key(key);
+	p = dereference_key_locked(key);
 	if (!p)
 		return -EINVAL;
 	if (!buffer || buflen <= 0)
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index e187c8909d9d..26605134f17a 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -107,7 +107,7 @@ int user_update(struct key *key, struct key_preparsed_payload *prep)
 	/* attach the new data, displacing the old */
 	key->expiry = prep->expiry;
 	if (!test_bit(KEY_FLAG_NEGATIVE, &key->flags))
-		zap = rcu_dereference_key(key);
+		zap = dereference_key_locked(key);
 	rcu_assign_keypointer(key, prep->payload.data[0]);
 	prep->payload.data[0] = NULL;
 
@@ -123,7 +123,7 @@ EXPORT_SYMBOL_GPL(user_update);
  */
 void user_revoke(struct key *key)
 {
-	struct user_key_payload *upayload = key->payload.data[0];
+	struct user_key_payload *upayload = user_key_payload_locked(key);
 
 	/* clear the quota */
 	key_payload_reserve(key, 0);
@@ -169,7 +169,7 @@ long user_read(const struct key *key, char __user *buffer, size_t buflen)
 	const struct user_key_payload *upayload;
 	long ret;
 
-	upayload = user_key_payload(key);
+	upayload = user_key_payload_locked(key);
 	ret = upayload->datalen;
 
 	/* we can return the data as is */
-- 
GitLab


From 2651225b5ebcdde60f684c4db8ec7e9e3800a74f Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Tue, 28 Feb 2017 10:35:56 -0500
Subject: [PATCH 0852/1084] selinux: wrap cgroup seclabel support with its own
 policy capability

commit 1ea0ce40690dff38935538e8dab7b12683ded0d3 ("selinux: allow
changing labels for cgroupfs") broke the Android init program,
which looks up security contexts whenever creating directories
and attempts to assign them via setfscreatecon().
When creating subdirectories in cgroup mounts, this would previously
be ignored since cgroup did not support userspace setting of security
contexts.  However, after the commit, SELinux would attempt to honor
the requested context on cgroup directories and fail due to permission
denial.  Avoid breaking existing userspace/policy by wrapping this change
with a conditional on a new cgroup_seclabel policy capability.  This
preserves existing behavior until/unless a new policy explicitly enables
this capability.

Reported-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 security/selinux/hooks.c            | 7 ++++---
 security/selinux/include/security.h | 2 ++
 security/selinux/selinuxfs.c        | 3 ++-
 security/selinux/ss/services.c      | 4 ++++
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 9a8f12f8d5b7..0a4b4b040e0a 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -480,12 +480,13 @@ static int selinux_is_sblabel_mnt(struct super_block *sb)
 		sbsec->behavior == SECURITY_FS_USE_NATIVE ||
 		/* Special handling. Genfs but also in-core setxattr handler */
 		!strcmp(sb->s_type->name, "sysfs") ||
-		!strcmp(sb->s_type->name, "cgroup") ||
-		!strcmp(sb->s_type->name, "cgroup2") ||
 		!strcmp(sb->s_type->name, "pstore") ||
 		!strcmp(sb->s_type->name, "debugfs") ||
 		!strcmp(sb->s_type->name, "tracefs") ||
-		!strcmp(sb->s_type->name, "rootfs");
+		!strcmp(sb->s_type->name, "rootfs") ||
+		(selinux_policycap_cgroupseclabel &&
+		 (!strcmp(sb->s_type->name, "cgroup") ||
+		  !strcmp(sb->s_type->name, "cgroup2")));
 }
 
 static int sb_finish_set_opts(struct super_block *sb)
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index beaa14b8b6cf..f979c35e037e 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -71,6 +71,7 @@ enum {
 	POLICYDB_CAPABILITY_OPENPERM,
 	POLICYDB_CAPABILITY_EXTSOCKCLASS,
 	POLICYDB_CAPABILITY_ALWAYSNETWORK,
+	POLICYDB_CAPABILITY_CGROUPSECLABEL,
 	__POLICYDB_CAPABILITY_MAX
 };
 #define POLICYDB_CAPABILITY_MAX (__POLICYDB_CAPABILITY_MAX - 1)
@@ -79,6 +80,7 @@ extern int selinux_policycap_netpeer;
 extern int selinux_policycap_openperm;
 extern int selinux_policycap_extsockclass;
 extern int selinux_policycap_alwaysnetwork;
+extern int selinux_policycap_cgroupseclabel;
 
 /*
  * type_datum properties
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index c9e8a9898ce4..cb3fd98fb05a 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -46,7 +46,8 @@ static char *policycap_names[] = {
 	"network_peer_controls",
 	"open_perms",
 	"extended_socket_class",
-	"always_check_network"
+	"always_check_network",
+	"cgroup_seclabel"
 };
 
 unsigned int selinux_checkreqprot = CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE;
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index a70fcee9824b..b4aa491a0a23 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -74,6 +74,7 @@ int selinux_policycap_netpeer;
 int selinux_policycap_openperm;
 int selinux_policycap_extsockclass;
 int selinux_policycap_alwaysnetwork;
+int selinux_policycap_cgroupseclabel;
 
 static DEFINE_RWLOCK(policy_rwlock);
 
@@ -1993,6 +1994,9 @@ static void security_load_policycaps(void)
 					  POLICYDB_CAPABILITY_EXTSOCKCLASS);
 	selinux_policycap_alwaysnetwork = ebitmap_get_bit(&policydb.policycaps,
 						  POLICYDB_CAPABILITY_ALWAYSNETWORK);
+	selinux_policycap_cgroupseclabel =
+		ebitmap_get_bit(&policydb.policycaps,
+				POLICYDB_CAPABILITY_CGROUPSECLABEL);
 }
 
 static int security_preserve_bools(struct policydb *p);
-- 
GitLab


From f889491380582b4ba2981cf0b0d7d6a40fb30ab7 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Tue, 28 Feb 2017 17:56:02 +0800
Subject: [PATCH 0853/1084] vhost: introduce O(1) vq metadata cache

When device IOTLB is enabled, all address translations were stored in
interval tree. O(lgN) searching time could be slow for virtqueue
metadata (avail, used and descriptors) since they were accessed much
often than other addresses. So this patch introduces an O(1) array
which points to the interval tree nodes that store the translations of
vq metadata. Those array were update during vq IOTLB prefetching and
were reset during each invalidation and tlb update. Each time we want
to access vq metadata, this small array were queried before interval
tree. This would be sufficient for static mappings but not dynamic
mappings, we could do optimizations on top.

Test were done with l2fwd in guest (2M hugepage):

   noiommu  | before        | after
tx 1.32Mpps | 1.06Mpps(82%) | 1.30Mpps(98%)
rx 2.33Mpps | 1.46Mpps(63%) | 2.29Mpps(98%)

We can almost reach the same performance as noiommu mode.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/vhost.c | 136 ++++++++++++++++++++++++++++++++++--------
 drivers/vhost/vhost.h |   8 +++
 2 files changed, 118 insertions(+), 26 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 1f7e4e4e6f8e..998bed505530 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -282,6 +282,22 @@ void vhost_poll_queue(struct vhost_poll *poll)
 }
 EXPORT_SYMBOL_GPL(vhost_poll_queue);
 
+static void __vhost_vq_meta_reset(struct vhost_virtqueue *vq)
+{
+	int j;
+
+	for (j = 0; j < VHOST_NUM_ADDRS; j++)
+		vq->meta_iotlb[j] = NULL;
+}
+
+static void vhost_vq_meta_reset(struct vhost_dev *d)
+{
+	int i;
+
+	for (i = 0; i < d->nvqs; ++i)
+		__vhost_vq_meta_reset(d->vqs[i]);
+}
+
 static void vhost_vq_reset(struct vhost_dev *dev,
 			   struct vhost_virtqueue *vq)
 {
@@ -312,6 +328,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
 	vq->busyloop_timeout = 0;
 	vq->umem = NULL;
 	vq->iotlb = NULL;
+	__vhost_vq_meta_reset(vq);
 }
 
 static int vhost_worker(void *data)
@@ -691,6 +708,18 @@ static int vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem,
 	return 1;
 }
 
+static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
+					       u64 addr, unsigned int size,
+					       int type)
+{
+	const struct vhost_umem_node *node = vq->meta_iotlb[type];
+
+	if (!node)
+		return NULL;
+
+	return (void *)(uintptr_t)(node->userspace_addr + addr - node->start);
+}
+
 /* Can we switch to this memory table? */
 /* Caller should have device mutex but not vq mutex */
 static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
@@ -733,8 +762,14 @@ static int vhost_copy_to_user(struct vhost_virtqueue *vq, void __user *to,
 		 * could be access through iotlb. So -EAGAIN should
 		 * not happen in this case.
 		 */
-		/* TODO: more fast path */
 		struct iov_iter t;
+		void __user *uaddr = vhost_vq_meta_fetch(vq,
+				     (u64)(uintptr_t)to, size,
+				     VHOST_ADDR_DESC);
+
+		if (uaddr)
+			return __copy_to_user(uaddr, from, size);
+
 		ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov,
 				     ARRAY_SIZE(vq->iotlb_iov),
 				     VHOST_ACCESS_WO);
@@ -762,8 +797,14 @@ static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
 		 * could be access through iotlb. So -EAGAIN should
 		 * not happen in this case.
 		 */
-		/* TODO: more fast path */
+		void __user *uaddr = vhost_vq_meta_fetch(vq,
+				     (u64)(uintptr_t)from, size,
+				     VHOST_ADDR_DESC);
 		struct iov_iter f;
+
+		if (uaddr)
+			return __copy_from_user(to, uaddr, size);
+
 		ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov,
 				     ARRAY_SIZE(vq->iotlb_iov),
 				     VHOST_ACCESS_RO);
@@ -783,17 +824,12 @@ static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
 	return ret;
 }
 
-static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
-				     void __user *addr, unsigned size)
+static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq,
+					  void __user *addr, unsigned int size,
+					  int type)
 {
 	int ret;
 
-	/* This function should be called after iotlb
-	 * prefetch, which means we're sure that vq
-	 * could be access through iotlb. So -EAGAIN should
-	 * not happen in this case.
-	 */
-	/* TODO: more fast path */
 	ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov,
 			     ARRAY_SIZE(vq->iotlb_iov),
 			     VHOST_ACCESS_RO);
@@ -814,14 +850,32 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
 	return vq->iotlb_iov[0].iov_base;
 }
 
-#define vhost_put_user(vq, x, ptr) \
+/* This function should be called after iotlb
+ * prefetch, which means we're sure that vq
+ * could be access through iotlb. So -EAGAIN should
+ * not happen in this case.
+ */
+static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
+					    void *addr, unsigned int size,
+					    int type)
+{
+	void __user *uaddr = vhost_vq_meta_fetch(vq,
+			     (u64)(uintptr_t)addr, size, type);
+	if (uaddr)
+		return uaddr;
+
+	return __vhost_get_user_slow(vq, addr, size, type);
+}
+
+#define vhost_put_user(vq, x, ptr)		\
 ({ \
 	int ret = -EFAULT; \
 	if (!vq->iotlb) { \
 		ret = __put_user(x, ptr); \
 	} else { \
 		__typeof__(ptr) to = \
-			(__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \
+			(__typeof__(ptr)) __vhost_get_user(vq, ptr,	\
+					  sizeof(*ptr), VHOST_ADDR_USED); \
 		if (to != NULL) \
 			ret = __put_user(x, to); \
 		else \
@@ -830,14 +884,16 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
 	ret; \
 })
 
-#define vhost_get_user(vq, x, ptr) \
+#define vhost_get_user(vq, x, ptr, type)		\
 ({ \
 	int ret; \
 	if (!vq->iotlb) { \
 		ret = __get_user(x, ptr); \
 	} else { \
 		__typeof__(ptr) from = \
-			(__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \
+			(__typeof__(ptr)) __vhost_get_user(vq, ptr, \
+							   sizeof(*ptr), \
+							   type); \
 		if (from != NULL) \
 			ret = __get_user(x, from); \
 		else \
@@ -846,6 +902,12 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
 	ret; \
 })
 
+#define vhost_get_avail(vq, x, ptr) \
+	vhost_get_user(vq, x, ptr, VHOST_ADDR_AVAIL)
+
+#define vhost_get_used(vq, x, ptr) \
+	vhost_get_user(vq, x, ptr, VHOST_ADDR_USED)
+
 static void vhost_dev_lock_vqs(struct vhost_dev *d)
 {
 	int i = 0;
@@ -951,6 +1013,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
 			ret = -EFAULT;
 			break;
 		}
+		vhost_vq_meta_reset(dev);
 		if (vhost_new_umem_range(dev->iotlb, msg->iova, msg->size,
 					 msg->iova + msg->size - 1,
 					 msg->uaddr, msg->perm)) {
@@ -960,6 +1023,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
 		vhost_iotlb_notify_vq(dev, msg);
 		break;
 	case VHOST_IOTLB_INVALIDATE:
+		vhost_vq_meta_reset(dev);
 		vhost_del_umem_range(dev->iotlb, msg->iova,
 				     msg->iova + msg->size - 1);
 		break;
@@ -1103,12 +1167,26 @@ static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
 			sizeof *used + num * sizeof *used->ring + s);
 }
 
+static void vhost_vq_meta_update(struct vhost_virtqueue *vq,
+				 const struct vhost_umem_node *node,
+				 int type)
+{
+	int access = (type == VHOST_ADDR_USED) ?
+		     VHOST_ACCESS_WO : VHOST_ACCESS_RO;
+
+	if (likely(node->perm & access))
+		vq->meta_iotlb[type] = node;
+}
+
 static int iotlb_access_ok(struct vhost_virtqueue *vq,
-			   int access, u64 addr, u64 len)
+			   int access, u64 addr, u64 len, int type)
 {
 	const struct vhost_umem_node *node;
 	struct vhost_umem *umem = vq->iotlb;
-	u64 s = 0, size;
+	u64 s = 0, size, orig_addr = addr;
+
+	if (vhost_vq_meta_fetch(vq, addr, len, type))
+		return true;
 
 	while (len > s) {
 		node = vhost_umem_interval_tree_iter_first(&umem->umem_tree,
@@ -1125,6 +1203,10 @@ static int iotlb_access_ok(struct vhost_virtqueue *vq,
 		}
 
 		size = node->size - addr + node->start;
+
+		if (orig_addr == addr && size >= len)
+			vhost_vq_meta_update(vq, node, type);
+
 		s += size;
 		addr += size;
 	}
@@ -1141,13 +1223,15 @@ int vq_iotlb_prefetch(struct vhost_virtqueue *vq)
 		return 1;
 
 	return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc,
-			       num * sizeof *vq->desc) &&
+			       num * sizeof(*vq->desc), VHOST_ADDR_DESC) &&
 	       iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail,
 			       sizeof *vq->avail +
-			       num * sizeof *vq->avail->ring + s) &&
+			       num * sizeof(*vq->avail->ring) + s,
+			       VHOST_ADDR_AVAIL) &&
 	       iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used,
 			       sizeof *vq->used +
-			       num * sizeof *vq->used->ring + s);
+			       num * sizeof(*vq->used->ring) + s,
+			       VHOST_ADDR_USED);
 }
 EXPORT_SYMBOL_GPL(vq_iotlb_prefetch);
 
@@ -1728,7 +1812,7 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq)
 		r = -EFAULT;
 		goto err;
 	}
-	r = vhost_get_user(vq, last_used_idx, &vq->used->idx);
+	r = vhost_get_used(vq, last_used_idx, &vq->used->idx);
 	if (r) {
 		vq_err(vq, "Can't access used idx at %p\n",
 		       &vq->used->idx);
@@ -1932,7 +2016,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 	last_avail_idx = vq->last_avail_idx;
 
 	if (vq->avail_idx == vq->last_avail_idx) {
-		if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) {
+		if (unlikely(vhost_get_avail(vq, avail_idx, &vq->avail->idx))) {
 			vq_err(vq, "Failed to access avail idx at %p\n",
 				&vq->avail->idx);
 			return -EFAULT;
@@ -1959,7 +2043,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 
 	/* Grab the next descriptor number they're advertising, and increment
 	 * the index we've seen. */
-	if (unlikely(vhost_get_user(vq, ring_head,
+	if (unlikely(vhost_get_avail(vq, ring_head,
 		     &vq->avail->ring[last_avail_idx & (vq->num - 1)]))) {
 		vq_err(vq, "Failed to read head: idx %d address %p\n",
 		       last_avail_idx,
@@ -2175,7 +2259,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 		 * with the barrier that the Guest executes when enabling
 		 * interrupts. */
 		smp_mb();
-		if (vhost_get_user(vq, flags, &vq->avail->flags)) {
+		if (vhost_get_avail(vq, flags, &vq->avail->flags)) {
 			vq_err(vq, "Failed to get flags");
 			return true;
 		}
@@ -2202,7 +2286,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 	 * interrupts. */
 	smp_mb();
 
-	if (vhost_get_user(vq, event, vhost_used_event(vq))) {
+	if (vhost_get_avail(vq, event, vhost_used_event(vq))) {
 		vq_err(vq, "Failed to get used event idx");
 		return true;
 	}
@@ -2246,7 +2330,7 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 	__virtio16 avail_idx;
 	int r;
 
-	r = vhost_get_user(vq, avail_idx, &vq->avail->idx);
+	r = vhost_get_avail(vq, avail_idx, &vq->avail->idx);
 	if (r)
 		return false;
 
@@ -2281,7 +2365,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 	/* They could have slipped one in as we were doing that: make
 	 * sure it's written, then check again. */
 	smp_mb();
-	r = vhost_get_user(vq, avail_idx, &vq->avail->idx);
+	r = vhost_get_avail(vq, avail_idx, &vq->avail->idx);
 	if (r) {
 		vq_err(vq, "Failed to check avail idx at %p: %d\n",
 		       &vq->avail->idx, r);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index a9cbbb148f46..f55671d53f28 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -76,6 +76,13 @@ struct vhost_umem {
 	int numem;
 };
 
+enum vhost_uaddr_type {
+	VHOST_ADDR_DESC = 0,
+	VHOST_ADDR_AVAIL = 1,
+	VHOST_ADDR_USED = 2,
+	VHOST_NUM_ADDRS = 3,
+};
+
 /* The virtqueue structure describes a queue attached to a device. */
 struct vhost_virtqueue {
 	struct vhost_dev *dev;
@@ -86,6 +93,7 @@ struct vhost_virtqueue {
 	struct vring_desc __user *desc;
 	struct vring_avail __user *avail;
 	struct vring_used __user *used;
+	const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
 	struct file *kick;
 	struct file *call;
 	struct file *error;
-- 
GitLab


From c4baad50297d84bde1a7ad45e50c73adae4a2192 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Wed, 1 Feb 2017 00:02:27 -0800
Subject: [PATCH 0854/1084] virtio-console: avoid DMA from stack

put_chars() stuffs the buffer it gets into an sg, but that buffer may be
on the stack. This breaks with CONFIG_VMAP_STACK=y (for me, it
manifested as printks getting turned into NUL bytes).

Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
---
 drivers/char/virtio_console.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 6266c0568e1d..e9b7e0b3cabe 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1136,6 +1136,8 @@ static int put_chars(u32 vtermno, const char *buf, int count)
 {
 	struct port *port;
 	struct scatterlist sg[1];
+	void *data;
+	int ret;
 
 	if (unlikely(early_put_chars))
 		return early_put_chars(vtermno, buf, count);
@@ -1144,8 +1146,14 @@ static int put_chars(u32 vtermno, const char *buf, int count)
 	if (!port)
 		return -EPIPE;
 
-	sg_init_one(sg, buf, count);
-	return __send_to_port(port, sg, 1, count, (void *)buf, false);
+	data = kmemdup(buf, count, GFP_ATOMIC);
+	if (!data)
+		return -ENOMEM;
+
+	sg_init_one(sg, data, count);
+	ret = __send_to_port(port, sg, 1, count, data, false);
+	kfree(data);
+	return ret;
 }
 
 /*
-- 
GitLab


From 4ecce920e13ace16a5ba45efe8909946c28fb2ad Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Mon, 13 Feb 2017 16:03:47 +0100
Subject: [PATCH 0855/1084] CIFS: move DFS response parsing out of SMB1 code

since the DFS payload is not tied to the SMB version we can:
* isolate the DFS payload in its own struct, and include that struct in
  packet structs
* move the function that parses the response to misc.c and make it work
  on the new DFS payload struct (add payload size and utf16 flag as a
  result).

Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Acked-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifspdu.h   |  16 +++---
 fs/cifs/cifsproto.h |   5 ++
 fs/cifs/cifssmb.c   | 119 ++------------------------------------------
 fs/cifs/misc.c      | 105 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 125 insertions(+), 120 deletions(-)

diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index f5b87303ce46..1ce733f3582f 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -2086,17 +2086,21 @@ typedef struct dfs_referral_level_3 { /* version 4 is same, + one flag bit */
 	__u8   ServiceSiteGuid[16];  /* MBZ, ignored */
 } __attribute__((packed)) REFERRAL3;
 
-typedef struct smb_com_transaction_get_dfs_refer_rsp {
-	struct smb_hdr hdr;	/* wct = 10 */
-	struct trans2_resp t2;
-	__u16 ByteCount;
-	__u8 Pad;
+struct get_dfs_referral_rsp {
 	__le16 PathConsumed;
 	__le16 NumberOfReferrals;
 	__le32 DFSFlags;
 	REFERRAL3 referrals[1];	/* array of level 3 dfs_referral structures */
 	/* followed by the strings pointed to by the referral structures */
-} __attribute__((packed)) TRANSACTION2_GET_DFS_REFER_RSP;
+} __packed;
+
+typedef struct smb_com_transaction_get_dfs_refer_rsp {
+	struct smb_hdr hdr;	/* wct = 10 */
+	struct trans2_resp t2;
+	__u16 ByteCount;
+	__u8 Pad;
+	struct get_dfs_referral_rsp dfs_data;
+} __packed TRANSACTION2_GET_DFS_REFER_RSP;
 
 /* DFS Flags */
 #define DFSREF_REFERRAL_SERVER  0x00000001 /* all targets are DFS roots */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 406d2c10ba78..c09783045288 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -284,6 +284,11 @@ extern int get_dfs_path(const unsigned int xid, struct cifs_ses *ses,
 			const struct nls_table *nls_codepage,
 			unsigned int *num_referrals,
 			struct dfs_info3_param **referrals, int remap);
+extern int parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size,
+			       unsigned int *num_of_nodes,
+			       struct dfs_info3_param **target_nodes,
+			       const struct nls_table *nls_codepage, int remap,
+			       const char *searchName, bool is_unicode);
 extern void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
 				 struct cifs_sb_info *cifs_sb,
 				 struct smb_vol *vol);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index f5099fb8a22f..5005c7995b6a 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -4786,117 +4786,6 @@ CIFSGetSrvInodeNumber(const unsigned int xid, struct cifs_tcon *tcon,
 	return rc;
 }
 
-/* parses DFS refferal V3 structure
- * caller is responsible for freeing target_nodes
- * returns:
- * 	on success - 0
- *	on failure - errno
- */
-static int
-parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
-		unsigned int *num_of_nodes,
-		struct dfs_info3_param **target_nodes,
-		const struct nls_table *nls_codepage, int remap,
-		const char *searchName)
-{
-	int i, rc = 0;
-	char *data_end;
-	bool is_unicode;
-	struct dfs_referral_level_3 *ref;
-
-	if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
-		is_unicode = true;
-	else
-		is_unicode = false;
-	*num_of_nodes = le16_to_cpu(pSMBr->NumberOfReferrals);
-
-	if (*num_of_nodes < 1) {
-		cifs_dbg(VFS, "num_referrals: must be at least > 0, but we get num_referrals = %d\n",
-			 *num_of_nodes);
-		rc = -EINVAL;
-		goto parse_DFS_referrals_exit;
-	}
-
-	ref = (struct dfs_referral_level_3 *) &(pSMBr->referrals);
-	if (ref->VersionNumber != cpu_to_le16(3)) {
-		cifs_dbg(VFS, "Referrals of V%d version are not supported, should be V3\n",
-			 le16_to_cpu(ref->VersionNumber));
-		rc = -EINVAL;
-		goto parse_DFS_referrals_exit;
-	}
-
-	/* get the upper boundary of the resp buffer */
-	data_end = (char *)(&(pSMBr->PathConsumed)) +
-				le16_to_cpu(pSMBr->t2.DataCount);
-
-	cifs_dbg(FYI, "num_referrals: %d dfs flags: 0x%x ...\n",
-		 *num_of_nodes, le32_to_cpu(pSMBr->DFSFlags));
-
-	*target_nodes = kcalloc(*num_of_nodes, sizeof(struct dfs_info3_param),
-				GFP_KERNEL);
-	if (*target_nodes == NULL) {
-		rc = -ENOMEM;
-		goto parse_DFS_referrals_exit;
-	}
-
-	/* collect necessary data from referrals */
-	for (i = 0; i < *num_of_nodes; i++) {
-		char *temp;
-		int max_len;
-		struct dfs_info3_param *node = (*target_nodes)+i;
-
-		node->flags = le32_to_cpu(pSMBr->DFSFlags);
-		if (is_unicode) {
-			__le16 *tmp = kmalloc(strlen(searchName)*2 + 2,
-						GFP_KERNEL);
-			if (tmp == NULL) {
-				rc = -ENOMEM;
-				goto parse_DFS_referrals_exit;
-			}
-			cifsConvertToUTF16((__le16 *) tmp, searchName,
-					   PATH_MAX, nls_codepage, remap);
-			node->path_consumed = cifs_utf16_bytes(tmp,
-					le16_to_cpu(pSMBr->PathConsumed),
-					nls_codepage);
-			kfree(tmp);
-		} else
-			node->path_consumed = le16_to_cpu(pSMBr->PathConsumed);
-
-		node->server_type = le16_to_cpu(ref->ServerType);
-		node->ref_flag = le16_to_cpu(ref->ReferralEntryFlags);
-
-		/* copy DfsPath */
-		temp = (char *)ref + le16_to_cpu(ref->DfsPathOffset);
-		max_len = data_end - temp;
-		node->path_name = cifs_strndup_from_utf16(temp, max_len,
-						is_unicode, nls_codepage);
-		if (!node->path_name) {
-			rc = -ENOMEM;
-			goto parse_DFS_referrals_exit;
-		}
-
-		/* copy link target UNC */
-		temp = (char *)ref + le16_to_cpu(ref->NetworkAddressOffset);
-		max_len = data_end - temp;
-		node->node_name = cifs_strndup_from_utf16(temp, max_len,
-						is_unicode, nls_codepage);
-		if (!node->node_name) {
-			rc = -ENOMEM;
-			goto parse_DFS_referrals_exit;
-		}
-
-		ref++;
-	}
-
-parse_DFS_referrals_exit:
-	if (rc) {
-		free_dfs_info_array(*target_nodes, *num_of_nodes);
-		*target_nodes = NULL;
-		*num_of_nodes = 0;
-	}
-	return rc;
-}
-
 int
 CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses,
 		const char *search_name, struct dfs_info3_param **target_nodes,
@@ -4993,9 +4882,11 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses,
 		 get_bcc(&pSMBr->hdr), le16_to_cpu(pSMBr->t2.DataOffset));
 
 	/* parse returned result into more usable form */
-	rc = parse_DFS_referrals(pSMBr, num_of_nodes,
-				 target_nodes, nls_codepage, remap,
-				 search_name);
+	rc = parse_dfs_referrals(&pSMBr->dfs_data,
+				 le16_to_cpu(pSMBr->t2.DataCount),
+				 num_of_nodes, target_nodes, nls_codepage,
+				 remap, search_name,
+				 pSMBr->hdr.Flags2 & SMBFLG2_UNICODE);
 
 GetDFSRefExit:
 	cifs_buf_release(pSMB);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index c6729156f9a0..d3fb11529ed9 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -640,3 +640,108 @@ cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink,
 	cifs_add_pending_open_locked(fid, tlink, open);
 	spin_unlock(&tlink_tcon(open->tlink)->open_file_lock);
 }
+
+/* parses DFS refferal V3 structure
+ * caller is responsible for freeing target_nodes
+ * returns:
+ * - on success - 0
+ * - on failure - errno
+ */
+int
+parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size,
+		    unsigned int *num_of_nodes,
+		    struct dfs_info3_param **target_nodes,
+		    const struct nls_table *nls_codepage, int remap,
+		    const char *searchName, bool is_unicode)
+{
+	int i, rc = 0;
+	char *data_end;
+	struct dfs_referral_level_3 *ref;
+
+	*num_of_nodes = le16_to_cpu(rsp->NumberOfReferrals);
+
+	if (*num_of_nodes < 1) {
+		cifs_dbg(VFS, "num_referrals: must be at least > 0, but we get num_referrals = %d\n",
+			 *num_of_nodes);
+		rc = -EINVAL;
+		goto parse_DFS_referrals_exit;
+	}
+
+	ref = (struct dfs_referral_level_3 *) &(rsp->referrals);
+	if (ref->VersionNumber != cpu_to_le16(3)) {
+		cifs_dbg(VFS, "Referrals of V%d version are not supported, should be V3\n",
+			 le16_to_cpu(ref->VersionNumber));
+		rc = -EINVAL;
+		goto parse_DFS_referrals_exit;
+	}
+
+	/* get the upper boundary of the resp buffer */
+	data_end = (char *)rsp + rsp_size;
+
+	cifs_dbg(FYI, "num_referrals: %d dfs flags: 0x%x ...\n",
+		 *num_of_nodes, le32_to_cpu(rsp->DFSFlags));
+
+	*target_nodes = kcalloc(*num_of_nodes, sizeof(struct dfs_info3_param),
+				GFP_KERNEL);
+	if (*target_nodes == NULL) {
+		rc = -ENOMEM;
+		goto parse_DFS_referrals_exit;
+	}
+
+	/* collect necessary data from referrals */
+	for (i = 0; i < *num_of_nodes; i++) {
+		char *temp;
+		int max_len;
+		struct dfs_info3_param *node = (*target_nodes)+i;
+
+		node->flags = le32_to_cpu(rsp->DFSFlags);
+		if (is_unicode) {
+			__le16 *tmp = kmalloc(strlen(searchName)*2 + 2,
+						GFP_KERNEL);
+			if (tmp == NULL) {
+				rc = -ENOMEM;
+				goto parse_DFS_referrals_exit;
+			}
+			cifsConvertToUTF16((__le16 *) tmp, searchName,
+					   PATH_MAX, nls_codepage, remap);
+			node->path_consumed = cifs_utf16_bytes(tmp,
+					le16_to_cpu(rsp->PathConsumed),
+					nls_codepage);
+			kfree(tmp);
+		} else
+			node->path_consumed = le16_to_cpu(rsp->PathConsumed);
+
+		node->server_type = le16_to_cpu(ref->ServerType);
+		node->ref_flag = le16_to_cpu(ref->ReferralEntryFlags);
+
+		/* copy DfsPath */
+		temp = (char *)ref + le16_to_cpu(ref->DfsPathOffset);
+		max_len = data_end - temp;
+		node->path_name = cifs_strndup_from_utf16(temp, max_len,
+						is_unicode, nls_codepage);
+		if (!node->path_name) {
+			rc = -ENOMEM;
+			goto parse_DFS_referrals_exit;
+		}
+
+		/* copy link target UNC */
+		temp = (char *)ref + le16_to_cpu(ref->NetworkAddressOffset);
+		max_len = data_end - temp;
+		node->node_name = cifs_strndup_from_utf16(temp, max_len,
+						is_unicode, nls_codepage);
+		if (!node->node_name) {
+			rc = -ENOMEM;
+			goto parse_DFS_referrals_exit;
+		}
+
+		ref++;
+	}
+
+parse_DFS_referrals_exit:
+	if (rc) {
+		free_dfs_info_array(*target_nodes, *num_of_nodes);
+		*target_nodes = NULL;
+		*num_of_nodes = 0;
+	}
+	return rc;
+}
-- 
GitLab


From 268a635d414df45a4a8da699d431da8f8ffcf014 Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Mon, 13 Feb 2017 16:14:17 +0100
Subject: [PATCH 0856/1084] CIFS: add build_path_from_dentry_optional_prefix()

this function does the same thing as add build_path_from_dentry() but
takes a boolean parameter to decide whether or not to prefix the path
with the tree name.

we cannot rely on tcon->Flags & SMB_SHARE_IS_IN_DFS for SMB2 as smb2
code never sets tcon->Flags but it sets tcon->share_flags and it seems
the SMB_SHARE_IS_IN_DFS has different semantics in SMB2: the prefix
shouldn't be added everytime it was in SMB1.

Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Acked-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifs_dfs_ref.c |  4 +++-
 fs/cifs/cifsproto.h    |  2 ++
 fs/cifs/dir.c          | 13 ++++++++++++-
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 9156be545b0f..6b61df117fd4 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -303,7 +303,9 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
 	 * gives us the latter, so we must adjust the result.
 	 */
 	mnt = ERR_PTR(-ENOMEM);
-	full_path = build_path_from_dentry(mntpt);
+
+	/* always use tree name prefix */
+	full_path = build_path_from_dentry_optional_prefix(mntpt, true);
 	if (full_path == NULL)
 		goto cdda_exit;
 
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index c09783045288..9ee46c1c3ebd 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -61,6 +61,8 @@ extern void exit_cifs_idmap(void);
 extern int init_cifs_spnego(void);
 extern void exit_cifs_spnego(void);
 extern char *build_path_from_dentry(struct dentry *);
+extern char *build_path_from_dentry_optional_prefix(struct dentry *direntry,
+						    bool prefix);
 extern char *cifs_build_path_to_root(struct smb_vol *vol,
 				     struct cifs_sb_info *cifs_sb,
 				     struct cifs_tcon *tcon,
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 2c227a99f369..56366e984076 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -80,6 +80,17 @@ cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
 /* Note: caller must free return buffer */
 char *
 build_path_from_dentry(struct dentry *direntry)
+{
+	struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
+	struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+	bool prefix = tcon->Flags & SMB_SHARE_IS_IN_DFS;
+
+	return build_path_from_dentry_optional_prefix(direntry,
+						      prefix);
+}
+
+char *
+build_path_from_dentry_optional_prefix(struct dentry *direntry, bool prefix)
 {
 	struct dentry *temp;
 	int namelen;
@@ -92,7 +103,7 @@ build_path_from_dentry(struct dentry *direntry)
 	unsigned seq;
 
 	dirsep = CIFS_DIR_SEP(cifs_sb);
-	if (tcon->Flags & SMB_SHARE_IS_IN_DFS)
+	if (prefix)
 		dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1);
 	else
 		dfsplen = 0;
-- 
GitLab


From 51146625f34b348c15d55c378b5ae5c2fb963fc8 Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Tue, 28 Feb 2017 15:08:41 +0100
Subject: [PATCH 0857/1084] CIFS: add use_ipc flag to SMB2_ioctl()

when set, use the session IPC tree id instead of the tid in the provided
tcon.

Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2file.c  |  3 ++-
 fs/cifs/smb2ops.c   | 23 ++++++++++++++++-------
 fs/cifs/smb2pdu.c   | 17 +++++++++++++++--
 fs/cifs/smb2proto.h |  3 ++-
 4 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index b2aff0c6f22c..b4b1f0305f29 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -73,7 +73,8 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
 		nr_ioctl_req.Timeout = 0; /* use server default (120 seconds) */
 		nr_ioctl_req.Reserved = 0;
 		rc = SMB2_ioctl(xid, oparms->tcon, fid->persistent_fid,
-			fid->volatile_fid, FSCTL_LMR_REQUEST_RESILIENCY, true,
+			fid->volatile_fid, FSCTL_LMR_REQUEST_RESILIENCY,
+			true /* is_fsctl */, false /* use_ipc */,
 			(char *)&nr_ioctl_req, sizeof(nr_ioctl_req),
 			NULL, NULL /* no return info */);
 		if (rc == -EOPNOTSUPP) {
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index d2cdd9c19d17..eba00cd3bd16 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -282,6 +282,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
 
 	rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
 			FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */,
+			false /* use_ipc */,
 			NULL /* no data input */, 0 /* no data input */,
 			(char **)&out_buf, &ret_data_len);
 	if (rc != 0)
@@ -571,6 +572,7 @@ SMB2_request_res_key(const unsigned int xid, struct cifs_tcon *tcon,
 
 	rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
 			FSCTL_SRV_REQUEST_RESUME_KEY, true /* is_fsctl */,
+			false /* use_ipc */,
 			NULL, 0 /* no input */,
 			(char **)&res_key, &ret_data_len);
 
@@ -635,7 +637,8 @@ smb2_clone_range(const unsigned int xid,
 		/* Request server copy to target from src identified by key */
 		rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
 			trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
-			true /* is_fsctl */, (char *)pcchunk,
+			true /* is_fsctl */, false /* use_ipc */,
+			(char *)pcchunk,
 			sizeof(struct copychunk_ioctl),	(char **)&retbuf,
 			&ret_data_len);
 		if (rc == 0) {
@@ -787,7 +790,8 @@ static bool smb2_set_sparse(const unsigned int xid, struct cifs_tcon *tcon,
 
 	rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
 			cfile->fid.volatile_fid, FSCTL_SET_SPARSE,
-			true /* is_fctl */, &setsparse, 1, NULL, NULL);
+			true /* is_fctl */, false /* use_ipc */,
+			&setsparse, 1, NULL, NULL);
 	if (rc) {
 		tcon->broken_sparse_sup = true;
 		cifs_dbg(FYI, "set sparse rc = %d\n", rc);
@@ -857,7 +861,8 @@ smb2_duplicate_extents(const unsigned int xid,
 	rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
 			trgtfile->fid.volatile_fid,
 			FSCTL_DUPLICATE_EXTENTS_TO_FILE,
-			true /* is_fsctl */, (char *)&dup_ext_buf,
+			true /* is_fsctl */, false /* use_ipc */,
+			(char *)&dup_ext_buf,
 			sizeof(struct duplicate_extents_to_file),
 			NULL,
 			&ret_data_len);
@@ -891,7 +896,8 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
 	return SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
 			cfile->fid.volatile_fid,
 			FSCTL_SET_INTEGRITY_INFORMATION,
-			true /* is_fsctl */, (char *)&integr_info,
+			true /* is_fsctl */, false /* use_ipc */,
+			(char *)&integr_info,
 			sizeof(struct fsctl_set_integrity_information_req),
 			NULL,
 			&ret_data_len);
@@ -910,7 +916,8 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon,
 	rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
 			cfile->fid.volatile_fid,
 			FSCTL_SRV_ENUMERATE_SNAPSHOTS,
-			true /* is_fsctl */, NULL, 0 /* no input data */,
+			true /* is_fsctl */, false /* use_ipc */,
+			NULL, 0 /* no input data */,
 			(char **)&retbuf,
 			&ret_data_len);
 	cifs_dbg(FYI, "enum snaphots ioctl returned %d and ret buflen is %d\n",
@@ -1220,7 +1227,8 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
 
 	rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
 			cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
-			true /* is_fctl */, (char *)&fsctl_buf,
+			true /* is_fctl */, false /* use_ipc */,
+			(char *)&fsctl_buf,
 			sizeof(struct file_zero_data_information), NULL, NULL);
 	free_xid(xid);
 	return rc;
@@ -1254,7 +1262,8 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
 
 	rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
 			cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
-			true /* is_fctl */, (char *)&fsctl_buf,
+			true /* is_fctl */, false /* use_ipc */,
+			(char *)&fsctl_buf,
 			sizeof(struct file_zero_data_information), NULL, NULL);
 	free_xid(xid);
 	return rc;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index ad83b3db2840..8c4532dc749f 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -620,6 +620,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
 
 	rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
 		FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
+		false /* use_ipc */,
 		(char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req),
 		(char **)&pneg_rsp, &rsplen);
 
@@ -1683,8 +1684,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
  */
 int
 SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
-	   u64 volatile_fid, u32 opcode, bool is_fsctl, char *in_data,
-	   u32 indatalen, char **out_data, u32 *plen /* returned data len */)
+	   u64 volatile_fid, u32 opcode, bool is_fsctl, bool use_ipc,
+	   char *in_data, u32 indatalen,
+	   char **out_data, u32 *plen /* returned data len */)
 {
 	struct smb2_ioctl_req *req;
 	struct smb2_ioctl_rsp *rsp;
@@ -1721,6 +1723,16 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
 	if (rc)
 		return rc;
 
+	if (use_ipc) {
+		if (ses->ipc_tid == 0) {
+			cifs_small_buf_release(req);
+			return -ENOTCONN;
+		}
+
+		cifs_dbg(FYI, "replacing tid 0x%x with IPC tid 0x%x\n",
+			 req->hdr.sync_hdr.TreeId, ses->ipc_tid);
+		req->hdr.sync_hdr.TreeId = ses->ipc_tid;
+	}
 	if (encryption_required(tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
@@ -1843,6 +1855,7 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
 
 	rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
 			FSCTL_SET_COMPRESSION, true /* is_fsctl */,
+			false /* use_ipc */,
 			(char *)&fsctl_input /* data input */,
 			2 /* in data len */, &ret_data /* out data */, NULL);
 
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 85fc7a789334..11d9f3013db8 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -121,7 +121,8 @@ extern int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms,
 		     struct smb2_err_rsp **err_buf);
 extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon,
 		     u64 persistent_fid, u64 volatile_fid, u32 opcode,
-		     bool is_fsctl, char *in_data, u32 indatalen,
+		     bool is_fsctl, bool use_ipc,
+		     char *in_data, u32 indatalen,
 		     char **out_data, u32 *plen /* returned data len */);
 extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
 		      u64 persistent_file_id, u64 volatile_file_id);
-- 
GitLab


From 7f9f6d2ec51449a20bc35359c9e190bf861b57e1 Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Tue, 28 Feb 2017 15:26:30 +0100
Subject: [PATCH 0858/1084] CIFS: let ses->ipc_tid hold smb2 TreeIds

the TreeId field went from 2 bytes in CIFS to 4 bytes in SMB2+. this
commit updates the size of the ipc_tid field of a cifs_ses, which was
still using 2 bytes.

Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 1a90bb3e2986..af224cda8697 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -822,7 +822,7 @@ struct cifs_ses {
 	int ses_count;		/* reference counter */
 	enum statusEnum status;
 	unsigned overrideSecFlg;  /* if non-zero override global sec flags */
-	__u16 ipc_tid;		/* special tid for connection to IPC share */
+	__u32 ipc_tid;		/* special tid for connection to IPC share */
 	char *serverOS;		/* name of operating system underlying server */
 	char *serverNOS;	/* name of network operating system of server */
 	char *serverDomain;	/* security realm of server */
-- 
GitLab


From b9043cc5b99e9c93596a28647fd9f526f5bfa22c Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Mon, 13 Feb 2017 16:19:04 +0100
Subject: [PATCH 0859/1084] CIFS: set signing flag in SMB2+ TreeConnect if
 needed

cifs_enable_signing() already sets server->sign according to what the
server requires/offers and what mount options allows/forbids, so use
that.

this is required for IPC tcon that connects to signing-required servers.

Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Acked-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2pdu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 8c4532dc749f..2fd93eeed15a 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1168,8 +1168,8 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
 
 		/* since no tcon, smb2_init can not do this, so do here */
 		req->hdr.sync_hdr.SessionId = ses->Suid;
-		/* if (ses->server->sec_mode & SECMODE_SIGN_REQUIRED)
-			req->hdr.Flags |= SMB2_FLAGS_SIGNED; */
+		if (ses->server->sign)
+			req->hdr.sync_hdr.Flags |= SMB2_FLAGS_SIGNED;
 	} else if (encryption_required(tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
-- 
GitLab


From 8953de2f02ad7b15e4964c82f9afd60f128e4e98 Mon Sep 17 00:00:00 2001
From: Mike Manning <mmanning@brocade.com>
Date: Wed, 1 Mar 2017 09:55:28 +0000
Subject: [PATCH 0860/1084] net: bridge: allow IPv6 when multicast flood is
 disabled

Even with multicast flooding turned off, IPv6 ND should still work so
that IPv6 connectivity is provided. Allow this by continuing to flood
multicast traffic originated by us.

Fixes: b6cb5ac8331b ("net: bridge: add per-port multicast flood flag")
Cc: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: Mike Manning <mmanning@brocade.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 6bfac29318f2..902af6ba481c 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -186,8 +186,9 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
 		/* Do not flood unicast traffic to ports that turn it off */
 		if (pkt_type == BR_PKT_UNICAST && !(p->flags & BR_FLOOD))
 			continue;
+		/* Do not flood if mc off, except for traffic we originate */
 		if (pkt_type == BR_PKT_MULTICAST &&
-		    !(p->flags & BR_MCAST_FLOOD))
+		    !(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev)
 			continue;
 
 		/* Do not flood to ports that enable proxy ARP */
-- 
GitLab


From 540e2894f7905538740aaf122bd8e0548e1c34a4 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Wed, 1 Mar 2017 12:57:20 +0100
Subject: [PATCH 0861/1084] net: don't call strlen() on the user buffer in
 packet_bind_spkt()

KMSAN (KernelMemorySanitizer, a new error detection tool) reports use of
uninitialized memory in packet_bind_spkt():
Acked-by: Eric Dumazet <edumazet@google.com>

==================================================================
BUG: KMSAN: use of unitialized memory
CPU: 0 PID: 1074 Comm: packet Not tainted 4.8.0-rc6+ #1891
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
01/01/2011
 0000000000000000 ffff88006b6dfc08 ffffffff82559ae8 ffff88006b6dfb48
 ffffffff818a7c91 ffffffff85b9c870 0000000000000092 ffffffff85b9c550
 0000000000000000 0000000000000092 00000000ec400911 0000000000000002
Call Trace:
 [<     inline     >] __dump_stack lib/dump_stack.c:15
 [<ffffffff82559ae8>] dump_stack+0x238/0x290 lib/dump_stack.c:51
 [<ffffffff818a6626>] kmsan_report+0x276/0x2e0 mm/kmsan/kmsan.c:1003
 [<ffffffff818a783b>] __msan_warning+0x5b/0xb0
mm/kmsan/kmsan_instr.c:424
 [<     inline     >] strlen lib/string.c:484
 [<ffffffff8259b58d>] strlcpy+0x9d/0x200 lib/string.c:144
 [<ffffffff84b2eca4>] packet_bind_spkt+0x144/0x230
net/packet/af_packet.c:3132
 [<ffffffff84242e4d>] SYSC_bind+0x40d/0x5f0 net/socket.c:1370
 [<ffffffff84242a22>] SyS_bind+0x82/0xa0 net/socket.c:1356
 [<ffffffff8515991b>] entry_SYSCALL_64_fastpath+0x13/0x8f
arch/x86/entry/entry_64.o:?
chained origin: 00000000eba00911
 [<ffffffff810bb787>] save_stack_trace+0x27/0x50
arch/x86/kernel/stacktrace.c:67
 [<     inline     >] kmsan_save_stack_with_flags mm/kmsan/kmsan.c:322
 [<     inline     >] kmsan_save_stack mm/kmsan/kmsan.c:334
 [<ffffffff818a59f8>] kmsan_internal_chain_origin+0x118/0x1e0
mm/kmsan/kmsan.c:527
 [<ffffffff818a7773>] __msan_set_alloca_origin4+0xc3/0x130
mm/kmsan/kmsan_instr.c:380
 [<ffffffff84242b69>] SYSC_bind+0x129/0x5f0 net/socket.c:1356
 [<ffffffff84242a22>] SyS_bind+0x82/0xa0 net/socket.c:1356
 [<ffffffff8515991b>] entry_SYSCALL_64_fastpath+0x13/0x8f
arch/x86/entry/entry_64.o:?
origin description: ----address@SYSC_bind (origin=00000000eb400911)
==================================================================
(the line numbers are relative to 4.8-rc6, but the bug persists
upstream)

, when I run the following program as root:

=====================================
 #include <string.h>
 #include <sys/socket.h>
 #include <netpacket/packet.h>
 #include <net/ethernet.h>

 int main() {
   struct sockaddr addr;
   memset(&addr, 0xff, sizeof(addr));
   addr.sa_family = AF_PACKET;
   int fd = socket(PF_PACKET, SOCK_PACKET, htons(ETH_P_ALL));
   bind(fd, &addr, sizeof(addr));
   return 0;
 }
=====================================

This happens because addr.sa_data copied from the userspace is not
zero-terminated, and copying it with strlcpy() in packet_bind_spkt()
results in calling strlen() on the kernel copy of that non-terminated
buffer.

Signed-off-by: Alexander Potapenko <glider@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2bd0d1949312..a0dbe7ca8f72 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3103,7 +3103,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
 			    int addr_len)
 {
 	struct sock *sk = sock->sk;
-	char name[15];
+	char name[sizeof(uaddr->sa_data) + 1];
 
 	/*
 	 *	Check legality
@@ -3111,7 +3111,11 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
 
 	if (addr_len != sizeof(struct sockaddr))
 		return -EINVAL;
-	strlcpy(name, uaddr->sa_data, sizeof(name));
+	/* uaddr->sa_data comes from the userspace, it's not guaranteed to be
+	 * zero-terminated.
+	 */
+	memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data));
+	name[sizeof(uaddr->sa_data)] = 0;
 
 	return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
 }
-- 
GitLab


From 13baa00ad01bb3a9f893e3a08cbc2d072fc0c15d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 1 Mar 2017 14:28:39 -0800
Subject: [PATCH 0862/1084] net: net_enable_timestamp() can be called from irq
 contexts

It is now very clear that silly TCP listeners might play with
enabling/disabling timestamping while new children are added
to their accept queue.

Meaning net_enable_timestamp() can be called from BH context
while current state of the static key is not enabled.

Lets play safe and allow all contexts.

The work queue is scheduled only under the problematic cases,
which are the static key enable/disable transition, to not slow down
critical paths.

This extends and improves what we did in commit 5fa8bbda38c6 ("net: use
a work queue to defer net_disable_timestamp() work")

Fixes: b90e5794c5bd ("net: dont call jump_label_dec from irq context")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index e63bf61b19be..8637b2b71f3d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1698,27 +1698,54 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue);
 static struct static_key netstamp_needed __read_mostly;
 #ifdef HAVE_JUMP_LABEL
 static atomic_t netstamp_needed_deferred;
+static atomic_t netstamp_wanted;
 static void netstamp_clear(struct work_struct *work)
 {
 	int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
+	int wanted;
 
-	while (deferred--)
-		static_key_slow_dec(&netstamp_needed);
+	wanted = atomic_add_return(deferred, &netstamp_wanted);
+	if (wanted > 0)
+		static_key_enable(&netstamp_needed);
+	else
+		static_key_disable(&netstamp_needed);
 }
 static DECLARE_WORK(netstamp_work, netstamp_clear);
 #endif
 
 void net_enable_timestamp(void)
 {
+#ifdef HAVE_JUMP_LABEL
+	int wanted;
+
+	while (1) {
+		wanted = atomic_read(&netstamp_wanted);
+		if (wanted <= 0)
+			break;
+		if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted)
+			return;
+	}
+	atomic_inc(&netstamp_needed_deferred);
+	schedule_work(&netstamp_work);
+#else
 	static_key_slow_inc(&netstamp_needed);
+#endif
 }
 EXPORT_SYMBOL(net_enable_timestamp);
 
 void net_disable_timestamp(void)
 {
 #ifdef HAVE_JUMP_LABEL
-	/* net_disable_timestamp() can be called from non process context */
-	atomic_inc(&netstamp_needed_deferred);
+	int wanted;
+
+	while (1) {
+		wanted = atomic_read(&netstamp_wanted);
+		if (wanted <= 1)
+			break;
+		if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted)
+			return;
+	}
+	atomic_dec(&netstamp_needed_deferred);
 	schedule_work(&netstamp_work);
 #else
 	static_key_slow_dec(&netstamp_needed);
-- 
GitLab


From 48cac18ecf1de82f76259a54402c3adb7839ad01 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 1 Mar 2017 14:45:06 -0800
Subject: [PATCH 0863/1084] ipv6: orphan skbs in reassembly unit

Andrey reported a use-after-free in IPv6 stack.

Issue here is that we free the socket while it still has skb
in TX path and in some queues.

It happens here because IPv6 reassembly unit messes skb->truesize,
breaking skb_set_owner_w() badly.

We fixed a similar issue for IPV4 in commit 8282f27449bf ("inet: frag:
Always orphan skbs inside ip_defrag()")
Acked-by: Joe Stringer <joe@ovn.org>

==================================================================
BUG: KASAN: use-after-free in sock_wfree+0x118/0x120
Read of size 8 at addr ffff880062da0060 by task a.out/4140

page:ffffea00018b6800 count:1 mapcount:0 mapping:          (null)
index:0x0 compound_mapcount: 0
flags: 0x100000000008100(slab|head)
raw: 0100000000008100 0000000000000000 0000000000000000 0000000180130013
raw: dead000000000100 dead000000000200 ffff88006741f140 0000000000000000
page dumped because: kasan: bad access detected

CPU: 0 PID: 4140 Comm: a.out Not tainted 4.10.0-rc3+ #59
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:15
 dump_stack+0x292/0x398 lib/dump_stack.c:51
 describe_address mm/kasan/report.c:262
 kasan_report_error+0x121/0x560 mm/kasan/report.c:370
 kasan_report mm/kasan/report.c:392
 __asan_report_load8_noabort+0x3e/0x40 mm/kasan/report.c:413
 sock_flag ./arch/x86/include/asm/bitops.h:324
 sock_wfree+0x118/0x120 net/core/sock.c:1631
 skb_release_head_state+0xfc/0x250 net/core/skbuff.c:655
 skb_release_all+0x15/0x60 net/core/skbuff.c:668
 __kfree_skb+0x15/0x20 net/core/skbuff.c:684
 kfree_skb+0x16e/0x4e0 net/core/skbuff.c:705
 inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304
 inet_frag_put ./include/net/inet_frag.h:133
 nf_ct_frag6_gather+0x1125/0x38b0 net/ipv6/netfilter/nf_conntrack_reasm.c:617
 ipv6_defrag+0x21b/0x350 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68
 nf_hook_entry_hookfn ./include/linux/netfilter.h:102
 nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310
 nf_hook ./include/linux/netfilter.h:212
 __ip6_local_out+0x52c/0xaf0 net/ipv6/output_core.c:160
 ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170
 ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722
 ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742
 rawv6_push_pending_frames net/ipv6/raw.c:613
 rawv6_sendmsg+0x2cff/0x4130 net/ipv6/raw.c:927
 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744
 sock_sendmsg_nosec net/socket.c:635
 sock_sendmsg+0xca/0x110 net/socket.c:645
 sock_write_iter+0x326/0x620 net/socket.c:848
 new_sync_write fs/read_write.c:499
 __vfs_write+0x483/0x760 fs/read_write.c:512
 vfs_write+0x187/0x530 fs/read_write.c:560
 SYSC_write fs/read_write.c:607
 SyS_write+0xfb/0x230 fs/read_write.c:599
 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203
RIP: 0033:0x7ff26e6f5b79
RSP: 002b:00007ff268e0ed98 EFLAGS: 00000206 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 00007ff268e0f9c0 RCX: 00007ff26e6f5b79
RDX: 0000000000000010 RSI: 0000000020f50fe1 RDI: 0000000000000003
RBP: 00007ff26ebc1220 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000000
R13: 00007ff268e0f9c0 R14: 00007ff26efec040 R15: 0000000000000003

The buggy address belongs to the object at ffff880062da0000
 which belongs to the cache RAWv6 of size 1504
The buggy address ffff880062da0060 is located 96 bytes inside
 of 1504-byte region [ffff880062da0000, ffff880062da05e0)

Freed by task 4113:
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57
 save_stack+0x43/0xd0 mm/kasan/kasan.c:502
 set_track mm/kasan/kasan.c:514
 kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:578
 slab_free_hook mm/slub.c:1352
 slab_free_freelist_hook mm/slub.c:1374
 slab_free mm/slub.c:2951
 kmem_cache_free+0xb2/0x2c0 mm/slub.c:2973
 sk_prot_free net/core/sock.c:1377
 __sk_destruct+0x49c/0x6e0 net/core/sock.c:1452
 sk_destruct+0x47/0x80 net/core/sock.c:1460
 __sk_free+0x57/0x230 net/core/sock.c:1468
 sk_free+0x23/0x30 net/core/sock.c:1479
 sock_put ./include/net/sock.h:1638
 sk_common_release+0x31e/0x4e0 net/core/sock.c:2782
 rawv6_close+0x54/0x80 net/ipv6/raw.c:1214
 inet_release+0xed/0x1c0 net/ipv4/af_inet.c:425
 inet6_release+0x50/0x70 net/ipv6/af_inet6.c:431
 sock_release+0x8d/0x1e0 net/socket.c:599
 sock_close+0x16/0x20 net/socket.c:1063
 __fput+0x332/0x7f0 fs/file_table.c:208
 ____fput+0x15/0x20 fs/file_table.c:244
 task_work_run+0x19b/0x270 kernel/task_work.c:116
 exit_task_work ./include/linux/task_work.h:21
 do_exit+0x186b/0x2800 kernel/exit.c:839
 do_group_exit+0x149/0x420 kernel/exit.c:943
 SYSC_exit_group kernel/exit.c:954
 SyS_exit_group+0x1d/0x20 kernel/exit.c:952
 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203

Allocated by task 4115:
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57
 save_stack+0x43/0xd0 mm/kasan/kasan.c:502
 set_track mm/kasan/kasan.c:514
 kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:605
 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:544
 slab_post_alloc_hook mm/slab.h:432
 slab_alloc_node mm/slub.c:2708
 slab_alloc mm/slub.c:2716
 kmem_cache_alloc+0x1af/0x250 mm/slub.c:2721
 sk_prot_alloc+0x65/0x2a0 net/core/sock.c:1334
 sk_alloc+0x105/0x1010 net/core/sock.c:1396
 inet6_create+0x44d/0x1150 net/ipv6/af_inet6.c:183
 __sock_create+0x4f6/0x880 net/socket.c:1199
 sock_create net/socket.c:1239
 SYSC_socket net/socket.c:1269
 SyS_socket+0xf9/0x230 net/socket.c:1249
 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203

Memory state around the buggy address:
 ffff880062d9ff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff880062d9ff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff880062da0000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                                                       ^
 ffff880062da0080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 ffff880062da0100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
==================================================================

Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 1 +
 net/openvswitch/conntrack.c             | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 9948b5ce52da..986d4ca38832 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -589,6 +589,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
 	hdr = ipv6_hdr(skb);
 	fhdr = (struct frag_hdr *)skb_transport_header(skb);
 
+	skb_orphan(skb);
 	fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
 		     skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
 	if (fq == NULL) {
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 85cd59526670..e0a87776a010 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -485,7 +485,6 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
 	} else if (key->eth.type == htons(ETH_P_IPV6)) {
 		enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
 
-		skb_orphan(skb);
 		memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
 		err = nf_ct_frag6_gather(net, skb, user);
 		if (err) {
-- 
GitLab


From eb1e011a14748a1d9df9a7d7df9a5711721a1bdb Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 15 Feb 2017 09:49:26 +0100
Subject: [PATCH 0864/1084] average: change to declare precision, not factor

Declaring the factor is counter-intuitive, and people are prone
to using small(-ish) values even when that makes no sense.

Change the DECLARE_EWMA() macro to take the fractional precision,
in bits, rather than a factor, and update all users.

While at it, add some more documentation.

Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/virtio_net.c                    |  2 +-
 drivers/net/wireless/ath/ath5k/ath5k.h      |  2 +-
 drivers/net/wireless/ralink/rt2x00/rt2x00.h |  2 +-
 include/linux/average.h                     | 61 ++++++++++++++-------
 net/batman-adv/types.h                      |  2 +-
 net/mac80211/ieee80211_i.h                  |  2 +-
 net/mac80211/sta_info.h                     |  2 +-
 7 files changed, 47 insertions(+), 26 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index bf95016f442a..e9d7e2b70085 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -51,7 +51,7 @@ module_param(gso, bool, 0444);
  * at once, the weight is chosen so that the EWMA will be insensitive to short-
  * term, transient changes in packet size.
  */
-DECLARE_EWMA(pkt_len, 1, 64)
+DECLARE_EWMA(pkt_len, 0, 64)
 
 /* With mergeable buffers we align buffer address and use the low bits to
  * encode its true size. Buffer size is up to 1 page so we need to align to
diff --git a/drivers/net/wireless/ath/ath5k/ath5k.h b/drivers/net/wireless/ath/ath5k/ath5k.h
index 67fedb61fcc0..979800c6f57f 100644
--- a/drivers/net/wireless/ath/ath5k/ath5k.h
+++ b/drivers/net/wireless/ath/ath5k/ath5k.h
@@ -1252,7 +1252,7 @@ struct ath5k_statistics {
 #define ATH5K_TXQ_LEN_MAX	(ATH_TXBUF / 4)		/* bufs per queue */
 #define ATH5K_TXQ_LEN_LOW	(ATH5K_TXQ_LEN_MAX / 2)	/* low mark */
 
-DECLARE_EWMA(beacon_rssi, 1024, 8)
+DECLARE_EWMA(beacon_rssi, 10, 8)
 
 /* Driver state associated with an instance of a device */
 struct ath5k_hw {
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00.h b/drivers/net/wireless/ralink/rt2x00/rt2x00.h
index 26869b3bef45..340787894c69 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00.h
@@ -257,7 +257,7 @@ struct link_qual {
 	int tx_failed;
 };
 
-DECLARE_EWMA(rssi, 1024, 8)
+DECLARE_EWMA(rssi, 10, 8)
 
 /*
  * Antenna settings about the currently active link.
diff --git a/include/linux/average.h b/include/linux/average.h
index d04aa58280de..7ddaf340d2ac 100644
--- a/include/linux/average.h
+++ b/include/linux/average.h
@@ -1,45 +1,66 @@
 #ifndef _LINUX_AVERAGE_H
 #define _LINUX_AVERAGE_H
 
-/* Exponentially weighted moving average (EWMA) */
+/*
+ * Exponentially weighted moving average (EWMA)
+ *
+ * This implements a fixed-precision EWMA algorithm, with both the
+ * precision and fall-off coefficient determined at compile-time
+ * and built into the generated helper funtions.
+ *
+ * The first argument to the macro is the name that will be used
+ * for the struct and helper functions.
+ *
+ * The second argument, the precision, expresses how many bits are
+ * used for the fractional part of the fixed-precision values.
+ *
+ * The third argument, the weight reciprocal, determines how the
+ * new values will be weighed vs. the old state, new values will
+ * get weight 1/weight_rcp and old values 1-1/weight_rcp. Note
+ * that this parameter must be a power of two for efficiency.
+ */
 
-#define DECLARE_EWMA(name, _factor, _weight)				\
+#define DECLARE_EWMA(name, _precision, _weight_rcp)			\
 	struct ewma_##name {						\
 		unsigned long internal;					\
 	};								\
 	static inline void ewma_##name##_init(struct ewma_##name *e)	\
 	{								\
-		BUILD_BUG_ON(!__builtin_constant_p(_factor));		\
-		BUILD_BUG_ON(!__builtin_constant_p(_weight));		\
-		BUILD_BUG_ON_NOT_POWER_OF_2(_factor);			\
-		BUILD_BUG_ON_NOT_POWER_OF_2(_weight);			\
+		BUILD_BUG_ON(!__builtin_constant_p(_precision));	\
+		BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp));	\
+		/*							\
+		 * Even if you want to feed it just 0/1 you should have	\
+		 * some bits for the non-fractional part...		\
+		 */							\
+		BUILD_BUG_ON((_precision) > 30);			\
+		BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp);		\
 		e->internal = 0;					\
 	}								\
 	static inline unsigned long					\
 	ewma_##name##_read(struct ewma_##name *e)			\
 	{								\
-		BUILD_BUG_ON(!__builtin_constant_p(_factor));		\
-		BUILD_BUG_ON(!__builtin_constant_p(_weight));		\
-		BUILD_BUG_ON_NOT_POWER_OF_2(_factor);			\
-		BUILD_BUG_ON_NOT_POWER_OF_2(_weight);			\
-		return e->internal >> ilog2(_factor);			\
+		BUILD_BUG_ON(!__builtin_constant_p(_precision));	\
+		BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp));	\
+		BUILD_BUG_ON((_precision) > 30);			\
+		BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp);		\
+		return e->internal >> (_precision);			\
 	}								\
 	static inline void ewma_##name##_add(struct ewma_##name *e,	\
 					     unsigned long val)		\
 	{								\
 		unsigned long internal = ACCESS_ONCE(e->internal);	\
-		unsigned long weight = ilog2(_weight);			\
-		unsigned long factor = ilog2(_factor);			\
+		unsigned long weight_rcp = ilog2(_weight_rcp);		\
+		unsigned long precision = _precision;			\
 									\
-		BUILD_BUG_ON(!__builtin_constant_p(_factor));		\
-		BUILD_BUG_ON(!__builtin_constant_p(_weight));		\
-		BUILD_BUG_ON_NOT_POWER_OF_2(_factor);			\
-		BUILD_BUG_ON_NOT_POWER_OF_2(_weight);			\
+		BUILD_BUG_ON(!__builtin_constant_p(_precision));	\
+		BUILD_BUG_ON(!__builtin_constant_p(_weight_rcp));	\
+		BUILD_BUG_ON((_precision) > 30);			\
+		BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp);		\
 									\
 		ACCESS_ONCE(e->internal) = internal ?			\
-			(((internal << weight) - internal) +		\
-				(val << factor)) >> weight :		\
-			(val << factor);				\
+			(((internal << weight_rcp) - internal) +	\
+				(val << precision)) >> weight_rcp :	\
+			(val << precision);				\
 	}
 
 #endif /* _LINUX_AVERAGE_H */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 8f64a5c01345..66b25e410a41 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -402,7 +402,7 @@ struct batadv_gw_node {
 	struct rcu_head rcu;
 };
 
-DECLARE_EWMA(throughput, 1024, 8)
+DECLARE_EWMA(throughput, 10, 8)
 
 /**
  * struct batadv_hardif_neigh_node_bat_v - B.A.T.M.A.N. V private neighbor
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 159a1a733725..0e718437d080 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -428,7 +428,7 @@ struct ieee80211_sta_tx_tspec {
 	bool downgraded;
 };
 
-DECLARE_EWMA(beacon_signal, 16, 4)
+DECLARE_EWMA(beacon_signal, 4, 4)
 
 struct ieee80211_if_managed {
 	struct timer_list timer;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 15599c70a38f..e65cda34d2bc 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -372,7 +372,7 @@ struct mesh_sta {
 	unsigned int fail_avg;
 };
 
-DECLARE_EWMA(signal, 1024, 8)
+DECLARE_EWMA(signal, 10, 8)
 
 struct ieee80211_sta_rx_stats {
 	unsigned long packets;
-- 
GitLab


From 9ccd27cc2e8bfbec502d7c64a353dc4489536b81 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 21:11:09 +0100
Subject: [PATCH 0865/1084] sched/headers: Make all include/linux/sched/*.h
 headers build standalone

Make each header self-sufficient, so that it can be built successfully
both in an allnoconfig and allyesconfig kernel.

Also standardize the naming of their header guards.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/deadline.h |  8 +++++---
 include/linux/sched/prio.h     |  6 +++---
 include/linux/sched/rt.h       | 10 ++++++----
 include/linux/sched/sysctl.h   | 10 +++++++---
 4 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index 9089a2ae913d..975be862e083 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -1,5 +1,7 @@
-#ifndef _SCHED_DEADLINE_H
-#define _SCHED_DEADLINE_H
+#ifndef _LINUX_SCHED_DEADLINE_H
+#define _LINUX_SCHED_DEADLINE_H
+
+#include <linux/sched.h>
 
 /*
  * SCHED_DEADLINE tasks has negative priorities, reflecting
@@ -26,4 +28,4 @@ static inline bool dl_time_before(u64 a, u64 b)
 	return (s64)(a - b) < 0;
 }
 
-#endif /* _SCHED_DEADLINE_H */
+#endif /* _LINUX_SCHED_DEADLINE_H */
diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
index d9cf5a5762d9..2cc450f6ec54 100644
--- a/include/linux/sched/prio.h
+++ b/include/linux/sched/prio.h
@@ -1,5 +1,5 @@
-#ifndef _SCHED_PRIO_H
-#define _SCHED_PRIO_H
+#ifndef _LINUX_SCHED_PRIO_H
+#define _LINUX_SCHED_PRIO_H
 
 #define MAX_NICE	19
 #define MIN_NICE	-20
@@ -57,4 +57,4 @@ static inline long rlimit_to_nice(long prio)
 	return (MAX_NICE - prio + 1);
 }
 
-#endif /* _SCHED_PRIO_H */
+#endif /* _LINUX_SCHED_PRIO_H */
diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
index a30b172df6e1..3bd668414f61 100644
--- a/include/linux/sched/rt.h
+++ b/include/linux/sched/rt.h
@@ -1,7 +1,9 @@
-#ifndef _SCHED_RT_H
-#define _SCHED_RT_H
+#ifndef _LINUX_SCHED_RT_H
+#define _LINUX_SCHED_RT_H
 
-#include <linux/sched/prio.h>
+#include <linux/sched.h>
+
+struct task_struct;
 
 static inline int rt_prio(int prio)
 {
@@ -57,4 +59,4 @@ extern void normalize_rt_tasks(void);
  */
 #define RR_TIMESLICE		(100 * HZ / 1000)
 
-#endif /* _SCHED_RT_H */
+#endif /* _LINUX_SCHED_RT_H */
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 49308e142aae..0f5ecd4d298e 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -1,5 +1,9 @@
-#ifndef _SCHED_SYSCTL_H
-#define _SCHED_SYSCTL_H
+#ifndef _LINUX_SCHED_SYSCTL_H
+#define _LINUX_SCHED_SYSCTL_H
+
+#include <linux/types.h>
+
+struct ctl_table;
 
 #ifdef CONFIG_DETECT_HUNG_TASK
 extern int	     sysctl_hung_task_check_count;
@@ -78,4 +82,4 @@ extern int sysctl_schedstats(struct ctl_table *table, int write,
 				 void __user *buffer, size_t *lenp,
 				 loff_t *ppos);
 
-#endif /* _SCHED_SYSCTL_H */
+#endif /* _LINUX_SCHED_SYSCTL_H */
-- 
GitLab


From c930b2c0de32f45ce8f67affe936ce7a05b07b00 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 12:22:54 +0100
Subject: [PATCH 0866/1084] sched/core: Convert ___assert_task_state() link
 time assert to BUILD_BUG_ON()

The length of TASK_STATE_TO_CHAR_STR was still checked using the old
link-time manual error method - convert it to BUILD_BUG_ON(). This
has a couple of advantages:

 - it's more obvious what's going on

 - it reduces the size and complexity of <linux/sched.h>

 - BUILD_BUG_ON() will fail during compilation, with a clearer
   error message than the link time assert.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 3 ---
 kernel/sched/core.c   | 3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4a28deb5f210..c204613396cd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -223,9 +223,6 @@ extern void proc_sched_set_task(struct task_struct *p);
 
 #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn"
 
-extern char ___assert_task_state[1 - 2*!!(
-		sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
-
 /* Convenience macros for the sake of set_current_state */
 #define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
 #define TASK_STOPPED		(TASK_WAKEKILL | __TASK_STOPPED)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bbfb917a9b49..7d76ccb79e91 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5233,6 +5233,9 @@ void sched_show_task(struct task_struct *p)
 	int ppid;
 	unsigned long state = p->state;
 
+	/* Make sure the string lines up properly with the number of task states: */
+	BUILD_BUG_ON(sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1);
+
 	if (!try_get_task_stack(p))
 		return;
 	if (state)
-- 
GitLab


From 59ddbcb2f45b958cf1f11f122b666cbcf50cd57b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 23:37:48 +0100
Subject: [PATCH 0867/1084] sched/core: Move the get_preempt_disable_ip()
 inline to sched/core.c

It's defined in <linux/sched.h>, but nothing outside the scheduler
uses it - so move it to the sched/core.c usage site.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 9 ---------
 kernel/sched/core.c   | 9 +++++++++
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c204613396cd..df42cac04243 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3419,15 +3419,6 @@ static inline void cond_resched_rcu(void)
 #endif
 }
 
-static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
-{
-#ifdef CONFIG_DEBUG_PREEMPT
-	return p->preempt_disable_ip;
-#else
-	return 0;
-#endif
-}
-
 /*
  * Does a critical section need to be broken due to another
  * task waiting?: (technically does not depend on CONFIG_PREEMPT,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7d76ccb79e91..2acdf19c5f7c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3211,6 +3211,15 @@ static inline void preempt_latency_start(int val) { }
 static inline void preempt_latency_stop(int val) { }
 #endif
 
+static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
+{
+#ifdef CONFIG_DEBUG_PREEMPT
+	return p->preempt_disable_ip;
+#else
+	return 0;
+#endif
+}
+
 /*
  * Print scheduling while atomic bug:
  */
-- 
GitLab


From 0c98d344fe5c27f6e4bce42ac503e9e9a51c7d1d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 15:38:10 +0100
Subject: [PATCH 0868/1084] sched/core: Remove the tsk_cpus_allowed() wrapper

So the original intention of tsk_cpus_allowed() was to 'future-proof'
the field - but it's pretty ineffectual at that, because half of
the code uses ->cpus_allowed directly ...

Also, the wrapper makes the code longer than the original expression!

So just get rid of it. This also shrinks <linux/sched.h> a bit.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/kernel/smp.c                  |  2 +-
 arch/powerpc/platforms/cell/spufs/sched.c  |  2 +-
 arch/sparc/kernel/sysfs.c                  |  2 +-
 drivers/cpufreq/sparc-us2e-cpufreq.c       |  4 ++--
 drivers/cpufreq/sparc-us3-cpufreq.c        |  4 ++--
 drivers/infiniband/hw/hfi1/affinity.c      |  2 +-
 drivers/infiniband/hw/hfi1/sdma.c          |  2 +-
 include/linux/sched.h                      |  3 ---
 kernel/sched/core.c                        | 20 ++++++++---------
 kernel/sched/cpudeadline.c                 |  4 ++--
 kernel/sched/cpupri.c                      |  4 ++--
 kernel/sched/deadline.c                    |  7 +++---
 kernel/sched/fair.c                        | 25 +++++++++++-----------
 kernel/sched/rt.c                          |  5 ++---
 lib/smp_processor_id.c                     |  2 +-
 samples/trace_events/trace-events-sample.c |  2 +-
 16 files changed, 42 insertions(+), 48 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 573fb3a461b5..21fdf02583fe 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -795,7 +795,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
 	 * se we pin us down to CPU 0 for a short while
 	 */
 	alloc_cpumask_var(&old_mask, GFP_NOWAIT);
-	cpumask_copy(old_mask, tsk_cpus_allowed(current));
+	cpumask_copy(old_mask, &current->cpus_allowed);
 	set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid));
 	
 	if (smp_ops && smp_ops->setup_cpu)
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 460f5f31d5cb..9b543df210fb 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -140,7 +140,7 @@ void __spu_update_sched_info(struct spu_context *ctx)
 	 * runqueue. The context will be rescheduled on the proper node
 	 * if it is timesliced or preempted.
 	 */
-	cpumask_copy(&ctx->cpus_allowed, tsk_cpus_allowed(current));
+	cpumask_copy(&ctx->cpus_allowed, &current->cpus_allowed);
 
 	/* Save the current cpu id for spu interrupt routing. */
 	ctx->last_ran = raw_smp_processor_id();
diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c
index 4808b6d23455..d63fc613e7a9 100644
--- a/arch/sparc/kernel/sysfs.c
+++ b/arch/sparc/kernel/sysfs.c
@@ -106,7 +106,7 @@ static unsigned long run_on_cpu(unsigned long cpu,
 	cpumask_t old_affinity;
 	unsigned long ret;
 
-	cpumask_copy(&old_affinity, tsk_cpus_allowed(current));
+	cpumask_copy(&old_affinity, &current->cpus_allowed);
 	/* should return -EINVAL to userspace */
 	if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
 		return 0;
diff --git a/drivers/cpufreq/sparc-us2e-cpufreq.c b/drivers/cpufreq/sparc-us2e-cpufreq.c
index b73feeb666f9..35ddb6da93aa 100644
--- a/drivers/cpufreq/sparc-us2e-cpufreq.c
+++ b/drivers/cpufreq/sparc-us2e-cpufreq.c
@@ -234,7 +234,7 @@ static unsigned int us2e_freq_get(unsigned int cpu)
 	cpumask_t cpus_allowed;
 	unsigned long clock_tick, estar;
 
-	cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
+	cpumask_copy(&cpus_allowed, &current->cpus_allowed);
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
 
 	clock_tick = sparc64_get_clock_tick(cpu) / 1000;
@@ -252,7 +252,7 @@ static int us2e_freq_target(struct cpufreq_policy *policy, unsigned int index)
 	unsigned long clock_tick, divisor, old_divisor, estar;
 	cpumask_t cpus_allowed;
 
-	cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
+	cpumask_copy(&cpus_allowed, &current->cpus_allowed);
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
 
 	new_freq = clock_tick = sparc64_get_clock_tick(cpu) / 1000;
diff --git a/drivers/cpufreq/sparc-us3-cpufreq.c b/drivers/cpufreq/sparc-us3-cpufreq.c
index 9bb42ba50efa..a8d86a449ca1 100644
--- a/drivers/cpufreq/sparc-us3-cpufreq.c
+++ b/drivers/cpufreq/sparc-us3-cpufreq.c
@@ -82,7 +82,7 @@ static unsigned int us3_freq_get(unsigned int cpu)
 	unsigned long reg;
 	unsigned int ret;
 
-	cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
+	cpumask_copy(&cpus_allowed, &current->cpus_allowed);
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
 
 	reg = read_safari_cfg();
@@ -99,7 +99,7 @@ static int us3_freq_target(struct cpufreq_policy *policy, unsigned int index)
 	unsigned long new_bits, new_freq, reg;
 	cpumask_t cpus_allowed;
 
-	cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
+	cpumask_copy(&cpus_allowed, &current->cpus_allowed);
 	set_cpus_allowed_ptr(current, cpumask_of(cpu));
 
 	new_freq = sparc64_get_clock_tick(cpu) / 1000;
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 7a3d906b3671..e2cd2cd3b28a 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -576,7 +576,7 @@ int hfi1_get_proc_affinity(int node)
 	struct hfi1_affinity_node *entry;
 	cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask;
 	const struct cpumask *node_mask,
-		*proc_mask = tsk_cpus_allowed(current);
+		*proc_mask = &current->cpus_allowed;
 	struct hfi1_affinity_node_list *affinity = &node_affinity;
 	struct cpu_mask_set *set = &affinity->proc;
 
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 1d81cac1fa6c..5cde1ecda0fe 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -856,7 +856,7 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
 {
 	struct sdma_rht_node *rht_node;
 	struct sdma_engine *sde = NULL;
-	const struct cpumask *current_mask = tsk_cpus_allowed(current);
+	const struct cpumask *current_mask = &current->cpus_allowed;
 	unsigned long cpu_id;
 
 	/*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index df42cac04243..6d1cc20cc477 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1995,9 +1995,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
 }
 #endif
 
-/* Future-safe accessor for struct task_struct's cpus_allowed. */
-#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
-
 static inline int tsk_nr_cpus_allowed(struct task_struct *p)
 {
 	return p->nr_cpus_allowed;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2acdf19c5f7c..ef5bbf760a08 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -981,7 +981,7 @@ static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int dest_
 		return rq;
 
 	/* Affinity changed (again). */
-	if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
+	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
 		return rq;
 
 	rq = move_queued_task(rq, p, dest_cpu);
@@ -1259,10 +1259,10 @@ static int migrate_swap_stop(void *data)
 	if (task_cpu(arg->src_task) != arg->src_cpu)
 		goto unlock;
 
-	if (!cpumask_test_cpu(arg->dst_cpu, tsk_cpus_allowed(arg->src_task)))
+	if (!cpumask_test_cpu(arg->dst_cpu, &arg->src_task->cpus_allowed))
 		goto unlock;
 
-	if (!cpumask_test_cpu(arg->src_cpu, tsk_cpus_allowed(arg->dst_task)))
+	if (!cpumask_test_cpu(arg->src_cpu, &arg->dst_task->cpus_allowed))
 		goto unlock;
 
 	__migrate_swap_task(arg->src_task, arg->dst_cpu);
@@ -1303,10 +1303,10 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p)
 	if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu))
 		goto out;
 
-	if (!cpumask_test_cpu(arg.dst_cpu, tsk_cpus_allowed(arg.src_task)))
+	if (!cpumask_test_cpu(arg.dst_cpu, &arg.src_task->cpus_allowed))
 		goto out;
 
-	if (!cpumask_test_cpu(arg.src_cpu, tsk_cpus_allowed(arg.dst_task)))
+	if (!cpumask_test_cpu(arg.src_cpu, &arg.dst_task->cpus_allowed))
 		goto out;
 
 	trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu);
@@ -1490,14 +1490,14 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 		for_each_cpu(dest_cpu, nodemask) {
 			if (!cpu_active(dest_cpu))
 				continue;
-			if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
+			if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
 				return dest_cpu;
 		}
 	}
 
 	for (;;) {
 		/* Any allowed, online CPU? */
-		for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
+		for_each_cpu(dest_cpu, &p->cpus_allowed) {
 			if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu))
 				continue;
 			if (!cpu_online(dest_cpu))
@@ -1552,7 +1552,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
 	if (tsk_nr_cpus_allowed(p) > 1)
 		cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
 	else
-		cpu = cpumask_any(tsk_cpus_allowed(p));
+		cpu = cpumask_any(&p->cpus_allowed);
 
 	/*
 	 * In order not to call set_task_cpu() on a blocking task we need
@@ -1564,7 +1564,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
 	 * [ this allows ->select_task() to simply return task_cpu(p) and
 	 *   not worry about this generic constraint ]
 	 */
-	if (unlikely(!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) ||
+	if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
 		     !cpu_online(cpu)))
 		cpu = select_fallback_rq(task_cpu(p), p);
 
@@ -5473,7 +5473,7 @@ int migrate_task_to(struct task_struct *p, int target_cpu)
 	if (curr_cpu == target_cpu)
 		return 0;
 
-	if (!cpumask_test_cpu(target_cpu, tsk_cpus_allowed(p)))
+	if (!cpumask_test_cpu(target_cpu, &p->cpus_allowed))
 		return -EINVAL;
 
 	/* TODO: This is not properly updating schedstats */
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index e73119013c53..fba235c7d026 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -128,10 +128,10 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
 	const struct sched_dl_entity *dl_se = &p->dl;
 
 	if (later_mask &&
-	    cpumask_and(later_mask, cp->free_cpus, tsk_cpus_allowed(p))) {
+	    cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
 		best_cpu = cpumask_any(later_mask);
 		goto out;
-	} else if (cpumask_test_cpu(cpudl_maximum(cp), tsk_cpus_allowed(p)) &&
+	} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
 			dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
 		best_cpu = cpudl_maximum(cp);
 		if (later_mask)
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 11e9705bf937..981fcd7dc394 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -103,11 +103,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
 		if (skip)
 			continue;
 
-		if (cpumask_any_and(tsk_cpus_allowed(p), vec->mask) >= nr_cpu_ids)
+		if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
 			continue;
 
 		if (lowest_mask) {
-			cpumask_and(lowest_mask, tsk_cpus_allowed(p), vec->mask);
+			cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
 
 			/*
 			 * We have to ensure that we have at least one bit
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 27737f34757d..8e4d6e4e3ccc 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -252,7 +252,7 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
 		 * If we cannot preempt any rq, fall back to pick any
 		 * online cpu.
 		 */
-		cpu = cpumask_any_and(cpu_active_mask, tsk_cpus_allowed(p));
+		cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
 		if (cpu >= nr_cpu_ids) {
 			/*
 			 * Fail to find any suitable cpu.
@@ -1235,7 +1235,7 @@ static void set_curr_task_dl(struct rq *rq)
 static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
 {
 	if (!task_running(rq, p) &&
-	    cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+	    cpumask_test_cpu(cpu, &p->cpus_allowed))
 		return 1;
 	return 0;
 }
@@ -1384,8 +1384,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
 		/* Retry if something changed. */
 		if (double_lock_balance(rq, later_rq)) {
 			if (unlikely(task_rq(task) != rq ||
-				     !cpumask_test_cpu(later_rq->cpu,
-						       tsk_cpus_allowed(task)) ||
+				     !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) ||
 				     task_running(rq, task) ||
 				     !dl_task(task) ||
 				     !task_on_rq_queued(task))) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 274c747a01ce..3b60d73ab290 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1551,7 +1551,7 @@ static void task_numa_compare(struct task_numa_env *env,
 	 */
 	if (cur) {
 		/* Skip this swap candidate if cannot move to the source cpu */
-		if (!cpumask_test_cpu(env->src_cpu, tsk_cpus_allowed(cur)))
+		if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed))
 			goto unlock;
 
 		/*
@@ -1661,7 +1661,7 @@ static void task_numa_find_cpu(struct task_numa_env *env,
 
 	for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) {
 		/* Skip this CPU if the source task cannot migrate */
-		if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(env->p)))
+		if (!cpumask_test_cpu(cpu, &env->p->cpus_allowed))
 			continue;
 
 		env->dst_cpu = cpu;
@@ -5458,7 +5458,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 
 		/* Skip over this group if it has no CPUs allowed */
 		if (!cpumask_intersects(sched_group_cpus(group),
-					tsk_cpus_allowed(p)))
+					&p->cpus_allowed))
 			continue;
 
 		local_group = cpumask_test_cpu(this_cpu,
@@ -5578,7 +5578,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 		return cpumask_first(sched_group_cpus(group));
 
 	/* Traverse only the allowed CPUs */
-	for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) {
+	for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) {
 		if (idle_cpu(i)) {
 			struct rq *rq = cpu_rq(i);
 			struct cpuidle_state *idle = idle_get_state(rq);
@@ -5717,7 +5717,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
 	if (!test_idle_cores(target, false))
 		return -1;
 
-	cpumask_and(cpus, sched_domain_span(sd), tsk_cpus_allowed(p));
+	cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
 
 	for_each_cpu_wrap(core, cpus, target, wrap) {
 		bool idle = true;
@@ -5751,7 +5751,7 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t
 		return -1;
 
 	for_each_cpu(cpu, cpu_smt_mask(target)) {
-		if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
 			continue;
 		if (idle_cpu(cpu))
 			return cpu;
@@ -5803,7 +5803,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 	time = local_clock();
 
 	for_each_cpu_wrap(cpu, sched_domain_span(sd), target, wrap) {
-		if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
 			continue;
 		if (idle_cpu(cpu))
 			break;
@@ -5958,7 +5958,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 	if (sd_flag & SD_BALANCE_WAKE) {
 		record_wakee(p);
 		want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu)
-			      && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
+			      && cpumask_test_cpu(cpu, &p->cpus_allowed);
 	}
 
 	rcu_read_lock();
@@ -6698,7 +6698,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 	if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
 		return 0;
 
-	if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) {
+	if (!cpumask_test_cpu(env->dst_cpu, &p->cpus_allowed)) {
 		int cpu;
 
 		schedstat_inc(p->se.statistics.nr_failed_migrations_affine);
@@ -6718,7 +6718,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 
 		/* Prevent to re-select dst_cpu via env's cpus */
 		for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
-			if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) {
+			if (cpumask_test_cpu(cpu, &p->cpus_allowed)) {
 				env->flags |= LBF_DST_PINNED;
 				env->new_dst_cpu = cpu;
 				break;
@@ -7252,7 +7252,7 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
 
 /*
  * Group imbalance indicates (and tries to solve) the problem where balancing
- * groups is inadequate due to tsk_cpus_allowed() constraints.
+ * groups is inadequate due to ->cpus_allowed constraints.
  *
  * Imagine a situation of two groups of 4 cpus each and 4 tasks each with a
  * cpumask covering 1 cpu of the first group and 3 cpus of the second group.
@@ -8211,8 +8211,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 			 * if the curr task on busiest cpu can't be
 			 * moved to this_cpu
 			 */
-			if (!cpumask_test_cpu(this_cpu,
-					tsk_cpus_allowed(busiest->curr))) {
+			if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) {
 				raw_spin_unlock_irqrestore(&busiest->lock,
 							    flags);
 				env.flags |= LBF_ALL_PINNED;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index e8836cfc4cdb..cbd356f63883 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1591,7 +1591,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 {
 	if (!task_running(rq, p) &&
-	    cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+	    cpumask_test_cpu(cpu, &p->cpus_allowed))
 		return 1;
 	return 0;
 }
@@ -1726,8 +1726,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 			 * Also make sure that it wasn't scheduled on its rq.
 			 */
 			if (unlikely(task_rq(task) != rq ||
-				     !cpumask_test_cpu(lowest_rq->cpu,
-						       tsk_cpus_allowed(task)) ||
+				     !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) ||
 				     task_running(rq, task) ||
 				     !rt_task(task) ||
 				     !task_on_rq_queued(task))) {
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 1afec32de6f2..690d75b132fa 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -22,7 +22,7 @@ notrace static unsigned int check_preemption_disabled(const char *what1,
 	 * Kernel threads bound to a single CPU can safely use
 	 * smp_processor_id():
 	 */
-	if (cpumask_equal(tsk_cpus_allowed(current), cpumask_of(this_cpu)))
+	if (cpumask_equal(&current->cpus_allowed, cpumask_of(this_cpu)))
 		goto out;
 
 	/*
diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c
index 30e282d33d4d..bc7fcf010a5b 100644
--- a/samples/trace_events/trace-events-sample.c
+++ b/samples/trace_events/trace-events-sample.c
@@ -33,7 +33,7 @@ static void simple_thread_func(int cnt)
 
 	/* Silly tracepoints */
 	trace_foo_bar("hello", cnt, array, random_strings[len],
-		      tsk_cpus_allowed(current));
+		      &current->cpus_allowed);
 
 	trace_foo_with_template_simple("HELLO", cnt);
 
-- 
GitLab


From 4b53a3412d6663214ce9c754eff9373a9cff9dee Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 15:41:03 +0100
Subject: [PATCH 0869/1084] sched/core: Remove the tsk_nr_cpus_allowed()
 wrapper

tsk_nr_cpus_allowed() too is a pretty pointless wrapper that
is not used consistently and which makes the code both harder
to read and longer as well.

So remove it - this also shrinks <linux/sched.h> a bit.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h   |  5 -----
 kernel/sched/core.c     |  2 +-
 kernel/sched/deadline.c | 28 ++++++++++++++--------------
 kernel/sched/rt.c       | 24 ++++++++++++------------
 4 files changed, 27 insertions(+), 32 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6d1cc20cc477..e732881517f2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1995,11 +1995,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
 }
 #endif
 
-static inline int tsk_nr_cpus_allowed(struct task_struct *p)
-{
-	return p->nr_cpus_allowed;
-}
-
 #define TNF_MIGRATED	0x01
 #define TNF_NO_GROUP	0x02
 #define TNF_SHARED	0x04
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ef5bbf760a08..2d51ec65dc73 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1549,7 +1549,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
 {
 	lockdep_assert_held(&p->pi_lock);
 
-	if (tsk_nr_cpus_allowed(p) > 1)
+	if (p->nr_cpus_allowed > 1)
 		cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
 	else
 		cpu = cpumask_any(&p->cpus_allowed);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 8e4d6e4e3ccc..99b2c33a9fbc 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -134,7 +134,7 @@ static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 {
 	struct task_struct *p = dl_task_of(dl_se);
 
-	if (tsk_nr_cpus_allowed(p) > 1)
+	if (p->nr_cpus_allowed > 1)
 		dl_rq->dl_nr_migratory++;
 
 	update_dl_migration(dl_rq);
@@ -144,7 +144,7 @@ static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 {
 	struct task_struct *p = dl_task_of(dl_se);
 
-	if (tsk_nr_cpus_allowed(p) > 1)
+	if (p->nr_cpus_allowed > 1)
 		dl_rq->dl_nr_migratory--;
 
 	update_dl_migration(dl_rq);
@@ -958,7 +958,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 
 	enqueue_dl_entity(&p->dl, pi_se, flags);
 
-	if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
+	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_dl_task(rq, p);
 }
 
@@ -1032,9 +1032,9 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
 	 * try to make it stay here, it might be important.
 	 */
 	if (unlikely(dl_task(curr)) &&
-	    (tsk_nr_cpus_allowed(curr) < 2 ||
+	    (curr->nr_cpus_allowed < 2 ||
 	     !dl_entity_preempt(&p->dl, &curr->dl)) &&
-	    (tsk_nr_cpus_allowed(p) > 1)) {
+	    (p->nr_cpus_allowed > 1)) {
 		int target = find_later_rq(p);
 
 		if (target != -1 &&
@@ -1055,7 +1055,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
 	 * Current can't be migrated, useless to reschedule,
 	 * let's hope p can move out.
 	 */
-	if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
+	if (rq->curr->nr_cpus_allowed == 1 ||
 	    cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
 		return;
 
@@ -1063,7 +1063,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
 	 * p is migratable, so let's not schedule it and
 	 * see if it is pushed or pulled somewhere else.
 	 */
-	if (tsk_nr_cpus_allowed(p) != 1 &&
+	if (p->nr_cpus_allowed != 1 &&
 	    cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
 		return;
 
@@ -1178,7 +1178,7 @@ static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
 {
 	update_curr_dl(rq);
 
-	if (on_dl_rq(&p->dl) && tsk_nr_cpus_allowed(p) > 1)
+	if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_dl_task(rq, p);
 }
 
@@ -1279,7 +1279,7 @@ static int find_later_rq(struct task_struct *task)
 	if (unlikely(!later_mask))
 		return -1;
 
-	if (tsk_nr_cpus_allowed(task) == 1)
+	if (task->nr_cpus_allowed == 1)
 		return -1;
 
 	/*
@@ -1424,7 +1424,7 @@ static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
 
 	BUG_ON(rq->cpu != task_cpu(p));
 	BUG_ON(task_current(rq, p));
-	BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
+	BUG_ON(p->nr_cpus_allowed <= 1);
 
 	BUG_ON(!task_on_rq_queued(p));
 	BUG_ON(!dl_task(p));
@@ -1463,7 +1463,7 @@ static int push_dl_task(struct rq *rq)
 	 */
 	if (dl_task(rq->curr) &&
 	    dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
-	    tsk_nr_cpus_allowed(rq->curr) > 1) {
+	    rq->curr->nr_cpus_allowed > 1) {
 		resched_curr(rq);
 		return 0;
 	}
@@ -1610,9 +1610,9 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p)
 {
 	if (!task_running(rq, p) &&
 	    !test_tsk_need_resched(rq->curr) &&
-	    tsk_nr_cpus_allowed(p) > 1 &&
+	    p->nr_cpus_allowed > 1 &&
 	    dl_task(rq->curr) &&
-	    (tsk_nr_cpus_allowed(rq->curr) < 2 ||
+	    (rq->curr->nr_cpus_allowed < 2 ||
 	     !dl_entity_preempt(&p->dl, &rq->curr->dl))) {
 		push_dl_tasks(rq);
 	}
@@ -1726,7 +1726,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
 
 	if (rq->curr != p) {
 #ifdef CONFIG_SMP
-		if (tsk_nr_cpus_allowed(p) > 1 && rq->dl.overloaded)
+		if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
 			queue_push_tasks(rq);
 #endif
 		if (dl_task(rq->curr))
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index cbd356f63883..9f3e40226dec 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -335,7 +335,7 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 	rt_rq = &rq_of_rt_rq(rt_rq)->rt;
 
 	rt_rq->rt_nr_total++;
-	if (tsk_nr_cpus_allowed(p) > 1)
+	if (p->nr_cpus_allowed > 1)
 		rt_rq->rt_nr_migratory++;
 
 	update_rt_migration(rt_rq);
@@ -352,7 +352,7 @@ static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 	rt_rq = &rq_of_rt_rq(rt_rq)->rt;
 
 	rt_rq->rt_nr_total--;
-	if (tsk_nr_cpus_allowed(p) > 1)
+	if (p->nr_cpus_allowed > 1)
 		rt_rq->rt_nr_migratory--;
 
 	update_rt_migration(rt_rq);
@@ -1324,7 +1324,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 
 	enqueue_rt_entity(rt_se, flags);
 
-	if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
+	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
 }
 
@@ -1413,7 +1413,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
 	 * will have to sort it out.
 	 */
 	if (curr && unlikely(rt_task(curr)) &&
-	    (tsk_nr_cpus_allowed(curr) < 2 ||
+	    (curr->nr_cpus_allowed < 2 ||
 	     curr->prio <= p->prio)) {
 		int target = find_lowest_rq(p);
 
@@ -1437,7 +1437,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 	 * Current can't be migrated, useless to reschedule,
 	 * let's hope p can move out.
 	 */
-	if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
+	if (rq->curr->nr_cpus_allowed == 1 ||
 	    !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
 		return;
 
@@ -1445,7 +1445,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 	 * p is migratable, so let's not schedule it and
 	 * see if it is pushed or pulled somewhere else.
 	 */
-	if (tsk_nr_cpus_allowed(p) != 1
+	if (p->nr_cpus_allowed != 1
 	    && cpupri_find(&rq->rd->cpupri, p, NULL))
 		return;
 
@@ -1579,7 +1579,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 	 * The previous task needs to be made eligible for pushing
 	 * if it is still active
 	 */
-	if (on_rt_rq(&p->rt) && tsk_nr_cpus_allowed(p) > 1)
+	if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
 }
 
@@ -1629,7 +1629,7 @@ static int find_lowest_rq(struct task_struct *task)
 	if (unlikely(!lowest_mask))
 		return -1;
 
-	if (tsk_nr_cpus_allowed(task) == 1)
+	if (task->nr_cpus_allowed == 1)
 		return -1; /* No other targets possible */
 
 	if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
@@ -1761,7 +1761,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
 
 	BUG_ON(rq->cpu != task_cpu(p));
 	BUG_ON(task_current(rq, p));
-	BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
+	BUG_ON(p->nr_cpus_allowed <= 1);
 
 	BUG_ON(!task_on_rq_queued(p));
 	BUG_ON(!rt_task(p));
@@ -2121,9 +2121,9 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
 {
 	if (!task_running(rq, p) &&
 	    !test_tsk_need_resched(rq->curr) &&
-	    tsk_nr_cpus_allowed(p) > 1 &&
+	    p->nr_cpus_allowed > 1 &&
 	    (dl_task(rq->curr) || rt_task(rq->curr)) &&
-	    (tsk_nr_cpus_allowed(rq->curr) < 2 ||
+	    (rq->curr->nr_cpus_allowed < 2 ||
 	     rq->curr->prio <= p->prio))
 		push_rt_tasks(rq);
 }
@@ -2196,7 +2196,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
 	 */
 	if (task_on_rq_queued(p) && rq->curr != p) {
 #ifdef CONFIG_SMP
-		if (tsk_nr_cpus_allowed(p) > 1 && rq->rt.overloaded)
+		if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
 			queue_push_tasks(rq);
 #endif /* CONFIG_SMP */
 		if (p->prio < rq->curr->prio)
-- 
GitLab


From f9411ebe3d85cbbea06298241e6053d031d281fc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 09:50:49 +0100
Subject: [PATCH 0870/1084] rcu: Separate the RCU synchronization types and
 APIs into <linux/rcupdate_wait.h>

So rcupdate.h is a pretty complex header, in particular it includes
<linux/completion.h> which includes <linux/wait.h> - creating a
dependency that includes <linux/wait.h> in <linux/sched.h>,
which prevents the isolation of <linux/sched.h> from the derived
<linux/wait.h> header.

Solve part of the problem by decoupling rcupdate.h from completions:
this can be done by separating out the rcu_synchronize types and APIs,
and updating their usage sites.

Since this is a mostly RCU-internal types this will not just simplify
<linux/sched.h>'s dependencies, but will make all the hundreds of
.c files that include rcupdate.h but not completions or wait.h build
faster.

( For rcutiny this means that two dependent APIs have to be uninlined,
  but that shouldn't be much of a problem as they are rare variants. )

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/autofs4/autofs_i.h         |  1 +
 include/linux/dcache.h        |  1 +
 include/linux/rcupdate.h      | 40 ----------------------------
 include/linux/rcupdate_wait.h | 50 +++++++++++++++++++++++++++++++++++
 include/linux/rcutiny.h       | 11 ++------
 kernel/rcu/srcu.c             |  2 +-
 kernel/rcu/tiny.c             | 14 +++++++++-
 kernel/rcu/tree.c             |  2 +-
 kernel/rcu/update.c           |  1 +
 kernel/sched/core.c           |  1 +
 10 files changed, 71 insertions(+), 52 deletions(-)
 create mode 100644 include/linux/rcupdate_wait.h

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index c885daae68c8..beef981aa54f 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -14,6 +14,7 @@
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
+#include <linux/completion.h>
 
 /* This is the range of ioctl() numbers we claim as ours */
 #define AUTOFS_IOC_FIRST     AUTOFS_IOC_READY
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 591b6c16f9c1..d2e38dc6172c 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -11,6 +11,7 @@
 #include <linux/rcupdate.h>
 #include <linux/lockref.h>
 #include <linux/stringhash.h>
+#include <linux/wait.h>
 
 struct path;
 struct vfsmount;
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 6ade6a52d9d4..de88b33c0974 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -40,7 +40,6 @@
 #include <linux/cpumask.h>
 #include <linux/seqlock.h>
 #include <linux/lockdep.h>
-#include <linux/completion.h>
 #include <linux/debugobjects.h>
 #include <linux/bug.h>
 #include <linux/compiler.h>
@@ -226,45 +225,6 @@ void call_rcu_sched(struct rcu_head *head,
 
 void synchronize_sched(void);
 
-/*
- * Structure allowing asynchronous waiting on RCU.
- */
-struct rcu_synchronize {
-	struct rcu_head head;
-	struct completion completion;
-};
-void wakeme_after_rcu(struct rcu_head *head);
-
-void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
-		   struct rcu_synchronize *rs_array);
-
-#define _wait_rcu_gp(checktiny, ...) \
-do {									\
-	call_rcu_func_t __crcu_array[] = { __VA_ARGS__ };		\
-	struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)];	\
-	__wait_rcu_gp(checktiny, ARRAY_SIZE(__crcu_array),		\
-			__crcu_array, __rs_array);			\
-} while (0)
-
-#define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__)
-
-/**
- * synchronize_rcu_mult - Wait concurrently for multiple grace periods
- * @...: List of call_rcu() functions for the flavors to wait on.
- *
- * This macro waits concurrently for multiple flavors of RCU grace periods.
- * For example, synchronize_rcu_mult(call_rcu, call_rcu_bh) would wait
- * on concurrent RCU and RCU-bh grace periods.  Waiting on a give SRCU
- * domain requires you to write a wrapper function for that SRCU domain's
- * call_srcu() function, supplying the corresponding srcu_struct.
- *
- * If Tiny RCU, tell _wait_rcu_gp() not to bother waiting for RCU
- * or RCU-bh, given that anywhere synchronize_rcu_mult() can be called
- * is automatically a grace period.
- */
-#define synchronize_rcu_mult(...) \
-	_wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__)
-
 /**
  * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
  * @head: structure to be used for queueing the RCU updates.
diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h
new file mode 100644
index 000000000000..e774b4f5f220
--- /dev/null
+++ b/include/linux/rcupdate_wait.h
@@ -0,0 +1,50 @@
+#ifndef _LINUX_SCHED_RCUPDATE_WAIT_H
+#define _LINUX_SCHED_RCUPDATE_WAIT_H
+
+/*
+ * RCU synchronization types and methods:
+ */
+
+#include <linux/rcupdate.h>
+#include <linux/completion.h>
+
+/*
+ * Structure allowing asynchronous waiting on RCU.
+ */
+struct rcu_synchronize {
+	struct rcu_head head;
+	struct completion completion;
+};
+void wakeme_after_rcu(struct rcu_head *head);
+
+void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
+		   struct rcu_synchronize *rs_array);
+
+#define _wait_rcu_gp(checktiny, ...) \
+do {									\
+	call_rcu_func_t __crcu_array[] = { __VA_ARGS__ };		\
+	struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)];	\
+	__wait_rcu_gp(checktiny, ARRAY_SIZE(__crcu_array),		\
+			__crcu_array, __rs_array);			\
+} while (0)
+
+#define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__)
+
+/**
+ * synchronize_rcu_mult - Wait concurrently for multiple grace periods
+ * @...: List of call_rcu() functions for the flavors to wait on.
+ *
+ * This macro waits concurrently for multiple flavors of RCU grace periods.
+ * For example, synchronize_rcu_mult(call_rcu, call_rcu_bh) would wait
+ * on concurrent RCU and RCU-bh grace periods.  Waiting on a give SRCU
+ * domain requires you to write a wrapper function for that SRCU domain's
+ * call_srcu() function, supplying the corresponding srcu_struct.
+ *
+ * If Tiny RCU, tell _wait_rcu_gp() not to bother waiting for RCU
+ * or RCU-bh, given that anywhere synchronize_rcu_mult() can be called
+ * is automatically a grace period.
+ */
+#define synchronize_rcu_mult(...) \
+	_wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__)
+
+#endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 4f9b2fa2173d..b452953e21c8 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -53,15 +53,8 @@ static inline void cond_synchronize_sched(unsigned long oldstate)
 	might_sleep();
 }
 
-static inline void rcu_barrier_bh(void)
-{
-	wait_rcu_gp(call_rcu_bh);
-}
-
-static inline void rcu_barrier_sched(void)
-{
-	wait_rcu_gp(call_rcu_sched);
-}
+extern void rcu_barrier_bh(void);
+extern void rcu_barrier_sched(void);
 
 static inline void synchronize_rcu_expedited(void)
 {
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index e773129c8b08..ef3bcfb15b39 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -30,7 +30,7 @@
 #include <linux/mutex.h>
 #include <linux/percpu.h>
 #include <linux/preempt.h>
-#include <linux/rcupdate.h>
+#include <linux/rcupdate_wait.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/delay.h>
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index fa6a48d3917b..6ad330dbbae2 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -25,7 +25,7 @@
 #include <linux/completion.h>
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
-#include <linux/rcupdate.h>
+#include <linux/rcupdate_wait.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/mutex.h>
@@ -47,6 +47,18 @@ static void __call_rcu(struct rcu_head *head,
 
 #include "tiny_plugin.h"
 
+void rcu_barrier_bh(void)
+{
+	wait_rcu_gp(call_rcu_bh);
+}
+EXPORT_SYMBOL(rcu_barrier_bh);
+
+void rcu_barrier_sched(void)
+{
+	wait_rcu_gp(call_rcu_sched);
+}
+EXPORT_SYMBOL(rcu_barrier_sched);
+
 #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE)
 
 /*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index d80e0d2f68c6..cb62ce23ffc7 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -32,7 +32,7 @@
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/smp.h>
-#include <linux/rcupdate.h>
+#include <linux/rcupdate_wait.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/nmi.h>
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 9e03db9ea9c0..a0e90e0afc75 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -49,6 +49,7 @@
 #include <linux/moduleparam.h>
 #include <linux/kthread.h>
 #include <linux/tick.h>
+#include <linux/rcupdate_wait.h>
 
 #define CREATE_TRACE_POINTS
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d51ec65dc73..1bd317db9810 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -10,6 +10,7 @@
 #include <linux/delayacct.h>
 #include <linux/init_task.h>
 #include <linux/context_tracking.h>
+#include <linux/rcupdate_wait.h>
 
 #include <linux/blkdev.h>
 #include <linux/kprobes.h>
-- 
GitLab


From 780de9dd2720debc14c501dab4dc80d1f75ad50e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 11:50:56 +0100
Subject: [PATCH 0871/1084] sched/headers, cgroups: Remove the
 threadgroup_change_*() wrappery

threadgroup_change_begin()/end() is a pointless wrapper around
cgroup_threadgroup_change_begin()/end(), minus a might_sleep()
in the !CONFIG_CGROUPS=y case.

Remove the wrappery, move the might_sleep() (the down_read()
already has a might_sleep() check).

This debloats <linux/sched.h> a bit and simplifies this API.

Update all call sites.

No change in functionality.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/exec.c                   |  6 +++---
 include/linux/cgroup-defs.h | 13 ++++++++-----
 include/linux/sched.h       | 28 ----------------------------
 kernel/cgroup/pids.c        |  2 +-
 kernel/fork.c               |  6 +++---
 kernel/signal.c             |  6 +++---
 6 files changed, 18 insertions(+), 43 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 698a86094f76..e595e1529581 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1088,7 +1088,7 @@ static int de_thread(struct task_struct *tsk)
 		struct task_struct *leader = tsk->group_leader;
 
 		for (;;) {
-			threadgroup_change_begin(tsk);
+			cgroup_threadgroup_change_begin(tsk);
 			write_lock_irq(&tasklist_lock);
 			/*
 			 * Do this under tasklist_lock to ensure that
@@ -1099,7 +1099,7 @@ static int de_thread(struct task_struct *tsk)
 				break;
 			__set_current_state(TASK_KILLABLE);
 			write_unlock_irq(&tasklist_lock);
-			threadgroup_change_end(tsk);
+			cgroup_threadgroup_change_end(tsk);
 			schedule();
 			if (unlikely(__fatal_signal_pending(tsk)))
 				goto killed;
@@ -1157,7 +1157,7 @@ static int de_thread(struct task_struct *tsk)
 		if (unlikely(leader->ptrace))
 			__wake_up_parent(leader, leader->parent);
 		write_unlock_irq(&tasklist_lock);
-		threadgroup_change_end(tsk);
+		cgroup_threadgroup_change_end(tsk);
 
 		release_task(leader);
 	}
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 3c02404cfce9..6a3f850cabab 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -531,8 +531,8 @@ extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
  * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups
  * @tsk: target task
  *
- * Called from threadgroup_change_begin() and allows cgroup operations to
- * synchronize against threadgroup changes using a percpu_rw_semaphore.
+ * Allows cgroup operations to synchronize against threadgroup changes
+ * using a percpu_rw_semaphore.
  */
 static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
 {
@@ -543,8 +543,7 @@ static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
  * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups
  * @tsk: target task
  *
- * Called from threadgroup_change_end().  Counterpart of
- * cgroup_threadcgroup_change_begin().
+ * Counterpart of cgroup_threadcgroup_change_begin().
  */
 static inline void cgroup_threadgroup_change_end(struct task_struct *tsk)
 {
@@ -555,7 +554,11 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk)
 
 #define CGROUP_SUBSYS_COUNT 0
 
-static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) {}
+static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
+{
+	might_sleep();
+}
+
 static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {}
 
 #endif	/* CONFIG_CGROUPS */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e732881517f2..3f61baac928b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3162,34 +3162,6 @@ static inline void unlock_task_sighand(struct task_struct *tsk,
 	spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
 }
 
-/**
- * threadgroup_change_begin - mark the beginning of changes to a threadgroup
- * @tsk: task causing the changes
- *
- * All operations which modify a threadgroup - a new thread joining the
- * group, death of a member thread (the assertion of PF_EXITING) and
- * exec(2) dethreading the process and replacing the leader - are wrapped
- * by threadgroup_change_{begin|end}().  This is to provide a place which
- * subsystems needing threadgroup stability can hook into for
- * synchronization.
- */
-static inline void threadgroup_change_begin(struct task_struct *tsk)
-{
-	might_sleep();
-	cgroup_threadgroup_change_begin(tsk);
-}
-
-/**
- * threadgroup_change_end - mark the end of changes to a threadgroup
- * @tsk: task causing the changes
- *
- * See threadgroup_change_begin().
- */
-static inline void threadgroup_change_end(struct task_struct *tsk)
-{
-	cgroup_threadgroup_change_end(tsk);
-}
-
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 
 static inline struct thread_info *task_thread_info(struct task_struct *task)
diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c
index 2bd673783f1a..e756dae49300 100644
--- a/kernel/cgroup/pids.c
+++ b/kernel/cgroup/pids.c
@@ -214,7 +214,7 @@ static void pids_cancel_attach(struct cgroup_taskset *tset)
 
 /*
  * task_css_check(true) in pids_can_fork() and pids_cancel_fork() relies
- * on threadgroup_change_begin() held by the copy_process().
+ * on cgroup_threadgroup_change_begin() held by the copy_process().
  */
 static int pids_can_fork(struct task_struct *task)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index 246bf9aaf9df..d043fedc03c8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1746,7 +1746,7 @@ static __latent_entropy struct task_struct *copy_process(
 	INIT_LIST_HEAD(&p->thread_group);
 	p->task_works = NULL;
 
-	threadgroup_change_begin(current);
+	cgroup_threadgroup_change_begin(current);
 	/*
 	 * Ensure that the cgroup subsystem policies allow the new process to be
 	 * forked. It should be noted the the new process's css_set can be changed
@@ -1843,7 +1843,7 @@ static __latent_entropy struct task_struct *copy_process(
 
 	proc_fork_connector(p);
 	cgroup_post_fork(p);
-	threadgroup_change_end(current);
+	cgroup_threadgroup_change_end(current);
 	perf_event_fork(p);
 
 	trace_task_newtask(p, clone_flags);
@@ -1854,7 +1854,7 @@ static __latent_entropy struct task_struct *copy_process(
 bad_fork_cancel_cgroup:
 	cgroup_cancel_fork(p);
 bad_fork_free_pid:
-	threadgroup_change_end(current);
+	cgroup_threadgroup_change_end(current);
 	if (pid != &init_struct_pid)
 		free_pid(pid);
 bad_fork_cleanup_thread:
diff --git a/kernel/signal.c b/kernel/signal.c
index 214a8feeb771..bae358532d0a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2395,11 +2395,11 @@ void exit_signals(struct task_struct *tsk)
 	 * @tsk is about to have PF_EXITING set - lock out users which
 	 * expect stable threadgroup.
 	 */
-	threadgroup_change_begin(tsk);
+	cgroup_threadgroup_change_begin(tsk);
 
 	if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) {
 		tsk->flags |= PF_EXITING;
-		threadgroup_change_end(tsk);
+		cgroup_threadgroup_change_end(tsk);
 		return;
 	}
 
@@ -2410,7 +2410,7 @@ void exit_signals(struct task_struct *tsk)
 	 */
 	tsk->flags |= PF_EXITING;
 
-	threadgroup_change_end(tsk);
+	cgroup_threadgroup_change_end(tsk);
 
 	if (!signal_pending(tsk))
 		goto out;
-- 
GitLab


From 314ff7851fc8ea66cbf48eaa93d8ebfb5ca084a9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 11:03:31 +0100
Subject: [PATCH 0872/1084] mm/vmacache, sched/headers: Introduce 'struct
 vmacache' and move it from <linux/sched.h> to <linux/mm_types>

The <linux/sched.h> header includes various vmacache related defines,
which are arguably misplaced.

Move them to mm_types.h and minimize the sched.h impact by putting
all task vmacache state into a new 'struct vmacache' structure.

No change in functionality.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm_types.h  | 12 ++++++++++++
 include/linux/sched.h     | 11 ++++-------
 include/linux/vmacache.h  |  2 +-
 kernel/debug/debug_core.c |  4 ++--
 mm/nommu.c                |  2 +-
 mm/vmacache.c             | 10 +++++-----
 6 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 4f6d440ad785..137797cd7b50 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -360,6 +360,18 @@ struct vm_area_struct {
 	struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
 };
 
+/*
+ * The per task VMA cache array:
+ */
+#define VMACACHE_BITS 2
+#define VMACACHE_SIZE (1U << VMACACHE_BITS)
+#define VMACACHE_MASK (VMACACHE_SIZE - 1)
+
+struct vmacache {
+	u32 seqnum;
+	struct vm_area_struct *vmas[VMACACHE_SIZE];
+};
+
 struct core_thread {
 	struct task_struct *task;
 	struct core_thread *next;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3f61baac928b..e87c97e1a947 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -134,10 +134,6 @@ struct blk_plug;
 struct filename;
 struct nameidata;
 
-#define VMACACHE_BITS 2
-#define VMACACHE_SIZE (1U << VMACACHE_BITS)
-#define VMACACHE_MASK (VMACACHE_SIZE - 1)
-
 /*
  * These are the constant used to fake the fixed-point load-average
  * counting. Some notes:
@@ -1550,9 +1546,10 @@ struct task_struct {
 #endif
 
 	struct mm_struct *mm, *active_mm;
-	/* per-thread vma caching */
-	u32 vmacache_seqnum;
-	struct vm_area_struct *vmacache[VMACACHE_SIZE];
+
+	/* Per-thread vma caching: */
+	struct vmacache vmacache;
+
 #if defined(SPLIT_RSS_COUNTING)
 	struct task_rss_stat	rss_stat;
 #endif
diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h
index c3fa0fd43949..1081db987391 100644
--- a/include/linux/vmacache.h
+++ b/include/linux/vmacache.h
@@ -12,7 +12,7 @@
 
 static inline void vmacache_flush(struct task_struct *tsk)
 {
-	memset(tsk->vmacache, 0, sizeof(tsk->vmacache));
+	memset(tsk->vmacache.vmas, 0, sizeof(tsk->vmacache.vmas));
 }
 
 extern void vmacache_flush_all(struct mm_struct *mm);
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 79517e5549f1..a603ef28f70c 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -232,9 +232,9 @@ static void kgdb_flush_swbreak_addr(unsigned long addr)
 		int i;
 
 		for (i = 0; i < VMACACHE_SIZE; i++) {
-			if (!current->vmacache[i])
+			if (!current->vmacache.vmas[i])
 				continue;
-			flush_cache_range(current->vmacache[i],
+			flush_cache_range(current->vmacache.vmas[i],
 					  addr, addr + BREAK_INSTR_SIZE);
 		}
 	}
diff --git a/mm/nommu.c b/mm/nommu.c
index fe9f4fa4a7a7..aae06e854552 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -757,7 +757,7 @@ static void delete_vma_from_mm(struct vm_area_struct *vma)
 	mm->map_count--;
 	for (i = 0; i < VMACACHE_SIZE; i++) {
 		/* if the vma is cached, invalidate the entire cache */
-		if (curr->vmacache[i] == vma) {
+		if (curr->vmacache.vmas[i] == vma) {
 			vmacache_invalidate(mm);
 			break;
 		}
diff --git a/mm/vmacache.c b/mm/vmacache.c
index 035fdeb35b43..7c233f8e20ee 100644
--- a/mm/vmacache.c
+++ b/mm/vmacache.c
@@ -60,7 +60,7 @@ static inline bool vmacache_valid_mm(struct mm_struct *mm)
 void vmacache_update(unsigned long addr, struct vm_area_struct *newvma)
 {
 	if (vmacache_valid_mm(newvma->vm_mm))
-		current->vmacache[VMACACHE_HASH(addr)] = newvma;
+		current->vmacache.vmas[VMACACHE_HASH(addr)] = newvma;
 }
 
 static bool vmacache_valid(struct mm_struct *mm)
@@ -71,12 +71,12 @@ static bool vmacache_valid(struct mm_struct *mm)
 		return false;
 
 	curr = current;
-	if (mm->vmacache_seqnum != curr->vmacache_seqnum) {
+	if (mm->vmacache_seqnum != curr->vmacache.seqnum) {
 		/*
 		 * First attempt will always be invalid, initialize
 		 * the new cache for this task here.
 		 */
-		curr->vmacache_seqnum = mm->vmacache_seqnum;
+		curr->vmacache.seqnum = mm->vmacache_seqnum;
 		vmacache_flush(curr);
 		return false;
 	}
@@ -93,7 +93,7 @@ struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
 		return NULL;
 
 	for (i = 0; i < VMACACHE_SIZE; i++) {
-		struct vm_area_struct *vma = current->vmacache[i];
+		struct vm_area_struct *vma = current->vmacache.vmas[i];
 
 		if (!vma)
 			continue;
@@ -121,7 +121,7 @@ struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
 		return NULL;
 
 	for (i = 0; i < VMACACHE_SIZE; i++) {
-		struct vm_area_struct *vma = current->vmacache[i];
+		struct vm_area_struct *vma = current->vmacache.vmas[i];
 
 		if (vma && vma->vm_start == start && vma->vm_end == end) {
 			count_vm_vmacache_event(VMACACHE_FIND_HITS);
-- 
GitLab


From af8601ad420f6afa6445c927ad9f36d9700d96d6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 09:57:00 +0100
Subject: [PATCH 0873/1084] kasan, sched/headers: Uninline
 kasan_enable/disable_current()

<linux/kasan.h> is a low level header that is included early
in affected kernel headers. But it includes <linux/sched.h>
which complicates the cleanup of sched.h dependencies.

But kasan.h has almost no need for sched.h: its only use of
scheduler functionality is in two inline functions which are
not used very frequently - so uninline kasan_enable_current()
and kasan_disable_current().

Also add a <linux/sched.h> dependency to a .c file that depended
on kasan.h including it.

This paves the way to remove the <linux/sched.h> include from kasan.h.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kasan.h | 10 ++--------
 lib/sbitmap.c         |  1 +
 mm/kasan/kasan.c      | 10 ++++++++++
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index c908b25bf5a5..7793036eac80 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -30,16 +30,10 @@ static inline void *kasan_mem_to_shadow(const void *addr)
 }
 
 /* Enable reporting bugs after kasan_disable_current() */
-static inline void kasan_enable_current(void)
-{
-	current->kasan_depth++;
-}
+extern void kasan_enable_current(void);
 
 /* Disable reporting bugs for current task */
-static inline void kasan_disable_current(void)
-{
-	current->kasan_depth--;
-}
+extern void kasan_disable_current(void);
 
 void kasan_unpoison_shadow(const void *address, size_t size);
 
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 55e11c4b2f3b..60e800e0b5a0 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -15,6 +15,7 @@
  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
  */
 
+#include <linux/sched.h>
 #include <linux/random.h>
 #include <linux/sbitmap.h>
 #include <linux/seq_file.h>
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 25f0e6521f36..d99312ed7c22 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -39,6 +39,16 @@
 #include "kasan.h"
 #include "../slab.h"
 
+void kasan_enable_current(void)
+{
+	current->kasan_depth++;
+}
+
+void kasan_disable_current(void)
+{
+	current->kasan_depth--;
+}
+
 /*
  * Poisons the shadow memory for 'size' bytes starting from 'addr'.
  * Memory addresses should be aligned to KASAN_SHADOW_SCALE_SIZE.
-- 
GitLab


From 105ab3d8ce7269887d24d224054677125e18037c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: [PATCH 0874/1084] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/topology.h>

We are going to split <linux/sched/topology.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/topology.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/kernel/topology.c     | 1 +
 arch/arm64/kernel/topology.c   | 1 +
 arch/powerpc/kernel/smp.c      | 1 +
 arch/s390/kernel/topology.c    | 1 +
 arch/x86/kernel/smpboot.c      | 1 +
 block/blk-mq.c                 | 1 +
 block/blk-softirq.c            | 1 +
 include/linux/cpuset.h         | 1 +
 include/linux/sched/topology.h | 6 ++++++
 kernel/sched/fair.c            | 2 ++
 kernel/sched/sched.h           | 1 +
 11 files changed, 17 insertions(+)
 create mode 100644 include/linux/sched/topology.h

diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index ebf47d91b804..f8a3ab82e77f 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -21,6 +21,7 @@
 #include <linux/nodemask.h>
 #include <linux/of.h>
 #include <linux/sched.h>
+#include <linux/sched/topology.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 565dd69888cc..08243533e5ee 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -20,6 +20,7 @@
 #include <linux/nodemask.h>
 #include <linux/of.h>
 #include <linux/sched.h>
+#include <linux/sched/topology.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/cpufreq.h>
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 21fdf02583fe..fce17789c675 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -20,6 +20,7 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/topology.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 2cd5f4f1013c..17660e800e74 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -13,6 +13,7 @@
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/topology.h>
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/slab.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index a0d38685f7df..fe7a66e6b5a0 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -45,6 +45,7 @@
 #include <linux/smp.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/topology.h>
 #include <linux/percpu.h>
 #include <linux/bootmem.h>
 #include <linux/err.h>
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9e6b064e5339..746c14e0d157 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -20,6 +20,7 @@
 #include <linux/cpu.h>
 #include <linux/cache.h>
 #include <linux/sched/sysctl.h>
+#include <linux/sched/topology.h>
 #include <linux/delay.h>
 #include <linux/crash_dump.h>
 #include <linux/prefetch.h>
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 06cf9807f49a..87b7df4851bf 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -9,6 +9,7 @@
 #include <linux/interrupt.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
+#include <linux/sched/topology.h>
 
 #include "blk.h"
 
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index bfc204e70338..c608c39cb161 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -9,6 +9,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/topology.h>
 #include <linux/cpumask.h>
 #include <linux/nodemask.h>
 #include <linux/mm.h>
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
new file mode 100644
index 000000000000..4f1159f76f92
--- /dev/null
+++ b/include/linux/sched/topology.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_TOPOLOGY_H
+#define _LINUX_SCHED_TOPOLOGY_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_TOPOLOGY_H */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3b60d73ab290..11e0ab57748a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -21,6 +21,8 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/topology.h>
+
 #include <linux/latencytop.h>
 #include <linux/cpumask.h>
 #include <linux/cpuidle.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 71b10a9b73cf..9b9cb260d9cf 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1,6 +1,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/sysctl.h>
+#include <linux/sched/topology.h>
 #include <linux/sched/rt.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/sched/deadline.h>
-- 
GitLab


From 4c822698cba8bdd93724117eded12bf34eb80252 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: [PATCH 0875/1084] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/idle.h>

We are going to split  <linux/sched/idle.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/idle.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/mwait.h   | 1 +
 arch/x86/kernel/process.c      | 1 +
 drivers/cpuidle/driver.c       | 1 +
 include/linux/sched/idle.h     | 6 ++++++
 include/linux/sched/topology.h | 2 ++
 kernel/sched/idle.c            | 1 +
 kernel/smp.c                   | 1 +
 7 files changed, 13 insertions(+)
 create mode 100644 include/linux/sched/idle.h

diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index f37f2d8a2989..bda3c27f0da0 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -2,6 +2,7 @@
 #define _ASM_X86_MWAIT_H
 
 #include <linux/sched.h>
+#include <linux/sched/idle.h>
 
 #include <asm/cpufeature.h>
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 7780efa635b9..6395e0cd7dd6 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -7,6 +7,7 @@
 #include <linux/prctl.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/idle.h>
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/pm.h>
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index ab264d393233..e53fb861beb0 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -11,6 +11,7 @@
 #include <linux/mutex.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/idle.h>
 #include <linux/cpuidle.h>
 #include <linux/cpumask.h>
 #include <linux/tick.h>
diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
new file mode 100644
index 000000000000..a3cf79b86986
--- /dev/null
+++ b/include/linux/sched/idle.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_IDLE_H
+#define _LINUX_SCHED_IDLE_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_IDLE_H */
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 4f1159f76f92..184b56b8aa64 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -3,4 +3,6 @@
 
 #include <linux/sched.h>
 
+#include <linux/sched/idle.h>
+
 #endif /* _LINUX_SCHED_TOPOLOGY_H */
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 6a4bae0a649d..ac6d5176463d 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -2,6 +2,7 @@
  * Generic entry point for the idle threads
  */
 #include <linux/sched.h>
+#include <linux/sched/idle.h>
 #include <linux/cpu.h>
 #include <linux/cpuidle.h>
 #include <linux/cpuhotplug.h>
diff --git a/kernel/smp.c b/kernel/smp.c
index 77fcdb9f2775..a817769b53c0 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -17,6 +17,7 @@
 #include <linux/smp.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
+#include <linux/sched/idle.h>
 #include <linux/hypervisor.h>
 
 #include "smpboot.h"
-- 
GitLab


From 84f001e15737f8214b0f5f0f7dfec0fb1027938f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: [PATCH 0876/1084] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/wake_q.h>

We are going to split <linux/sched/wake_q.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/wake_q.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/wake_q.h    | 6 ++++++
 ipc/mqueue.c                    | 1 +
 ipc/msg.c                       | 1 +
 ipc/sem.c                       | 1 +
 kernel/futex.c                  | 1 +
 kernel/locking/mutex.c          | 1 +
 kernel/locking/rtmutex.c        | 1 +
 kernel/locking/rtmutex_common.h | 1 +
 kernel/locking/rwsem-xadd.c     | 3 ++-
 kernel/sched/sched.h            | 1 +
 10 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sched/wake_q.h

diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h
new file mode 100644
index 000000000000..6383b35e4eba
--- /dev/null
+++ b/include/linux/sched/wake_q.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_WAKE_Q_H
+#define _LINUX_SCHED_WAKE_Q_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_WAKE_Q_H */
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 4fdd97031431..40e448de8a7e 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -35,6 +35,7 @@
 #include <linux/ipc_namespace.h>
 #include <linux/user_namespace.h>
 #include <linux/slab.h>
+#include <linux/sched/wake_q.h>
 
 #include <net/sock.h>
 #include "util.h"
diff --git a/ipc/msg.c b/ipc/msg.c
index e3e52ce01123..ecc387e573f6 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -31,6 +31,7 @@
 #include <linux/list.h>
 #include <linux/security.h>
 #include <linux/sched.h>
+#include <linux/sched/wake_q.h>
 #include <linux/syscalls.h>
 #include <linux/audit.h>
 #include <linux/seq_file.h>
diff --git a/ipc/sem.c b/ipc/sem.c
index e468cd1c12f0..947dc2348271 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -82,6 +82,7 @@
 #include <linux/rwsem.h>
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
+#include <linux/sched/wake_q.h>
 
 #include <linux/uaccess.h>
 #include "util.h"
diff --git a/kernel/futex.c b/kernel/futex.c
index b687cb22301c..8ddcf9ea953c 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -61,6 +61,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ptrace.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/wake_q.h>
 #include <linux/hugetlb.h>
 #include <linux/freezer.h>
 #include <linux/bootmem.h>
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index ad2d9e22697b..57f6311e2405 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -21,6 +21,7 @@
 #include <linux/ww_mutex.h>
 #include <linux/sched.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/wake_q.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index d340be3a488f..d4f798491361 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/deadline.h>
+#include <linux/sched/wake_q.h>
 #include <linux/timer.h>
 
 #include "rtmutex_common.h"
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 990134617b4c..856dfff5c33a 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -13,6 +13,7 @@
 #define __KERNEL_RTMUTEX_COMMON_H
 
 #include <linux/rtmutex.h>
+#include <linux/sched/wake_q.h>
 
 /*
  * This is the control structure for tasks blocked on a rt_mutex,
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 2ad8d8dc3bb1..4fe8d8ad4396 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -10,10 +10,11 @@
  * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
  */
 #include <linux/rwsem.h>
-#include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/export.h>
+#include <linux/sched.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/wake_q.h>
 #include <linux/osq_lock.h>
 
 #include "rwsem.h"
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 9b9cb260d9cf..683570739f46 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3,6 +3,7 @@
 #include <linux/sched/sysctl.h>
 #include <linux/sched/topology.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/wake_q.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/sched/deadline.h>
 #include <linux/kernel_stat.h>
-- 
GitLab


From e601757102cfd3eeae068f53b3bc1234f3a2b2e9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: [PATCH 0877/1084] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/clock.h>

We are going to split <linux/sched/clock.h> out of <linux/sched.h>, which
will have to be picked up from other headers and .c files.

Create a trivial placeholder <linux/sched/clock.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/mach-bcm/platsmp.c             | 1 +
 arch/arm/mach-omap2/pm-debug.c          | 1 +
 arch/arm/probes/kprobes/test-core.c     | 1 +
 arch/cris/kernel/time.c                 | 2 +-
 arch/ia64/kernel/setup.c                | 1 +
 arch/microblaze/kernel/timer.c          | 1 +
 arch/mn10300/kernel/time.c              | 1 +
 arch/parisc/kernel/setup.c              | 1 +
 arch/parisc/kernel/time.c               | 1 +
 arch/powerpc/kernel/time.c              | 1 +
 arch/s390/kernel/time.c                 | 1 +
 arch/sparc/kernel/ds.c                  | 1 +
 arch/sparc/kernel/viohs.c               | 1 +
 arch/tile/kernel/time.c                 | 1 +
 arch/x86/events/amd/ibs.c               | 1 +
 arch/x86/events/core.c                  | 1 +
 arch/x86/kernel/cpu/amd.c               | 1 +
 arch/x86/kernel/cpu/centaur.c           | 1 +
 arch/x86/kernel/cpu/common.c            | 1 +
 arch/x86/kernel/cpu/cyrix.c             | 1 +
 arch/x86/kernel/cpu/intel.c             | 1 +
 arch/x86/kernel/cpu/transmeta.c         | 1 +
 arch/x86/kernel/kvmclock.c              | 1 +
 arch/x86/kernel/nmi.c                   | 1 +
 arch/x86/kernel/tsc.c                   | 1 +
 block/cfq-iosched.c                     | 1 +
 drivers/acpi/apei/ghes.c                | 1 +
 drivers/clocksource/arm_arch_timer.c    | 1 +
 drivers/clocksource/pxa_timer.c         | 1 +
 drivers/clocksource/timer-digicolor.c   | 1 +
 drivers/cpuidle/cpuidle.c               | 1 +
 drivers/firmware/tegra/bpmp.c           | 1 +
 drivers/gpu/drm/i915/i915_gem_request.c | 2 ++
 drivers/gpu/drm/i915/intel_drv.h        | 1 +
 drivers/md/bcache/bset.c                | 1 +
 drivers/md/bcache/btree.c               | 1 +
 drivers/md/bcache/sysfs.c               | 1 +
 drivers/md/bcache/util.c                | 1 +
 drivers/md/bcache/util.h                | 1 +
 drivers/md/bcache/writeback.c           | 1 +
 drivers/misc/cxl/native.c               | 1 +
 drivers/net/irda/pxaficp_ir.c           | 1 +
 drivers/perf/arm_pmu.c                  | 1 +
 drivers/vhost/net.c                     | 1 +
 include/linux/blkdev.h                  | 1 +
 include/linux/sched/clock.h             | 6 ++++++
 include/linux/skbuff.h                  | 1 +
 include/net/busy_poll.h                 | 1 +
 kernel/events/core.c                    | 1 +
 kernel/locking/lockdep.c                | 1 +
 kernel/locking/qspinlock_stat.h         | 1 +
 kernel/printk/printk.c                  | 1 +
 kernel/sched/clock.c                    | 1 +
 kernel/sched/core.c                     | 1 +
 kernel/sched/sched.h                    | 1 +
 kernel/time/sched_clock.c               | 1 +
 kernel/time/tick-sched.c                | 1 +
 kernel/torture.c                        | 1 +
 kernel/trace/ring_buffer.c              | 1 +
 kernel/trace/trace_clock.c              | 1 +
 kernel/trace/trace_hwlat.c              | 1 +
 kernel/trace/trace_output.c             | 1 +
 kernel/watchdog.c                       | 1 +
 lib/plist.c                             | 1 +
 net/ipv4/tcp_cdg.c                      | 2 ++
 65 files changed, 72 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sched/clock.h

diff --git a/arch/arm/mach-bcm/platsmp.c b/arch/arm/mach-bcm/platsmp.c
index 582886d0d02f..9e3f275934eb 100644
--- a/arch/arm/mach-bcm/platsmp.c
+++ b/arch/arm/mach-bcm/platsmp.c
@@ -21,6 +21,7 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/smp.h>
 
 #include <asm/cacheflush.h>
diff --git a/arch/arm/mach-omap2/pm-debug.c b/arch/arm/mach-omap2/pm-debug.c
index 003a6cb248be..5c46ea6756d7 100644
--- a/arch/arm/mach-omap2/pm-debug.c
+++ b/arch/arm/mach-omap2/pm-debug.c
@@ -21,6 +21,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/io.h>
diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c
index 9775de22e2ff..c893726aa52d 100644
--- a/arch/arm/probes/kprobes/test-core.c
+++ b/arch/arm/probes/kprobes/test-core.c
@@ -203,6 +203,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/sched/clock.h>
 #include <linux/kprobes.h>
 #include <linux/errno.h>
 #include <linux/stddef.h>
diff --git a/arch/cris/kernel/time.c b/arch/cris/kernel/time.c
index 2dda6da71521..bc562cf511a6 100644
--- a/arch/cris/kernel/time.c
+++ b/arch/cris/kernel/time.c
@@ -29,7 +29,7 @@
 #include <linux/timex.h>
 #include <linux/init.h>
 #include <linux/profile.h>
-#include <linux/sched.h>	/* just for sched_clock() - funny that */
+#include <linux/sched/clock.h>
 
 
 #define D(x)
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index d68322966f33..32a6cc36296f 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -32,6 +32,7 @@
 #include <linux/kernel.h>
 #include <linux/reboot.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/seq_file.h>
 #include <linux/string.h>
 #include <linux/threads.h>
diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c
index 1d6fad50fa76..999066192715 100644
--- a/arch/microblaze/kernel/timer.c
+++ b/arch/microblaze/kernel/timer.c
@@ -12,6 +12,7 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/sched_clock.h>
 #include <linux/clk.h>
 #include <linux/clockchips.h>
diff --git a/arch/mn10300/kernel/time.c b/arch/mn10300/kernel/time.c
index 67c6416a58f8..06b83b17c5f1 100644
--- a/arch/mn10300/kernel/time.c
+++ b/arch/mn10300/kernel/time.c
@@ -10,6 +10,7 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 #include <linux/time.h>
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 068ed3607bac..dee6f9d6a153 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -37,6 +37,7 @@
 #include <linux/proc_fs.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 
 #include <asm/processor.h>
 #include <asm/sections.h>
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 1e22f981cd81..89421df70160 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/rtc.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/sched_clock.h>
 #include <linux/kernel.h>
 #include <linux/param.h>
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index bc84a8d47b9e..37833c5dc274 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -34,6 +34,7 @@
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/string.h>
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index de66abb479c9..c31da46bc037 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -18,6 +18,7 @@
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/string.h>
diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c
index f87a55d77094..b542cc7c8d94 100644
--- a/arch/sparc/kernel/ds.c
+++ b/arch/sparc/kernel/ds.c
@@ -9,6 +9,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
 #include <linux/kthread.h>
diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c
index 526fcb5d8ce9..b30b30ab3ddd 100644
--- a/arch/sparc/kernel/viohs.c
+++ b/arch/sparc/kernel/viohs.c
@@ -8,6 +8,7 @@
 #include <linux/string.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/slab.h>
 
 #include <asm/ldc.h>
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index c9357012b1c8..5bd4e88c7c60 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -20,6 +20,7 @@
 #include <linux/clockchips.h>
 #include <linux/hardirq.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/smp.h>
 #include <linux/delay.h>
 #include <linux/module.h>
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 496e60391fac..786fd875de92 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -12,6 +12,7 @@
 #include <linux/pci.h>
 #include <linux/ptrace.h>
 #include <linux/syscore_ops.h>
+#include <linux/sched/clock.h>
 
 #include <asm/apic.h>
 
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 1635c0c8df23..2ecc0e97772b 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/kdebug.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 4e95b2e0d95f..35a5d5dca2fa 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -5,6 +5,7 @@
 
 #include <linux/io.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/random.h>
 #include <asm/processor.h>
 #include <asm/apic.h>
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 2c234a6d94c4..adc0ebd8bed0 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -1,5 +1,6 @@
 
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 
 #include <asm/cpufeature.h>
 #include <asm/e820.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c64ca5929cb5..c7e2ca966386 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -8,6 +8,7 @@
 #include <linux/ctype.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/init.h>
 #include <linux/kprobes.h>
 #include <linux/kgdb.h>
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 47416f959a48..0a3bc19de017 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -10,6 +10,7 @@
 #include <asm/tsc.h>
 #include <asm/cpufeature.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 
 #include "cpu.h"
 
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 017ecd3bb553..fe0a615a051b 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -4,6 +4,7 @@
 #include <linux/bitops.h>
 #include <linux/smp.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/thread_info.h>
 #include <linux/init.h>
 #include <linux/uaccess.h>
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index c1ea5b999839..8457b4978668 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -1,5 +1,6 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/mm.h>
 #include <asm/cpufeature.h>
 #include <asm/msr.h>
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index bae6ea6cfb94..d88967659098 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -25,6 +25,7 @@
 #include <linux/hardirq.h>
 #include <linux/memblock.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 
 #include <asm/x86_init.h>
 #include <asm/reboot.h>
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index bfe4d6c96fbd..d17b1a940add 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -20,6 +20,7 @@
 #include <linux/ratelimit.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/sched/clock.h>
 
 #if defined(CONFIG_EDAC)
 #include <linux/edac.h>
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 2724dc82f992..46bcda4cb1c2 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -2,6 +2,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/timer.h>
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 137944777859..440b95ee593c 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -8,6 +8,7 @@
  */
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/sched/clock.h>
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
 #include <linux/ktime.h>
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index e53bef6cf53c..b192b42a8351 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -44,6 +44,7 @@
 #include <linux/pci.h>
 #include <linux/aer.h>
 #include <linux/nmi.h>
+#include <linux/sched/clock.h>
 
 #include <acpi/ghes.h>
 #include <acpi/apei.h>
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 93aa1364376a..7a8a4117f123 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -24,6 +24,7 @@
 #include <linux/of_address.h>
 #include <linux/io.h>
 #include <linux/slab.h>
+#include <linux/sched/clock.h>
 #include <linux/sched_clock.h>
 #include <linux/acpi.h>
 
diff --git a/drivers/clocksource/pxa_timer.c b/drivers/clocksource/pxa_timer.c
index 9cae38eebec2..1c24de215c14 100644
--- a/drivers/clocksource/pxa_timer.c
+++ b/drivers/clocksource/pxa_timer.c
@@ -19,6 +19,7 @@
 #include <linux/clockchips.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/sched/clock.h>
 #include <linux/sched_clock.h>
 
 #include <clocksource/pxa.h>
diff --git a/drivers/clocksource/timer-digicolor.c b/drivers/clocksource/timer-digicolor.c
index 10318cc99c0e..e9f50d289362 100644
--- a/drivers/clocksource/timer-digicolor.c
+++ b/drivers/clocksource/timer-digicolor.c
@@ -31,6 +31,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/irqreturn.h>
+#include <linux/sched/clock.h>
 #include <linux/sched_clock.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 62810ff3b00f..548b90be7685 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/mutex.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/notifier.h>
 #include <linux/pm_qos.h>
 #include <linux/cpu.h>
diff --git a/drivers/firmware/tegra/bpmp.c b/drivers/firmware/tegra/bpmp.c
index 4ff02d310868..84e4c9a58a0c 100644
--- a/drivers/firmware/tegra/bpmp.c
+++ b/drivers/firmware/tegra/bpmp.c
@@ -19,6 +19,7 @@
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/semaphore.h>
+#include <linux/sched/clock.h>
 
 #include <soc/tegra/bpmp.h>
 #include <soc/tegra/bpmp-abi.h>
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index f31deeb72703..df3fef393dbe 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -24,6 +24,8 @@
 
 #include <linux/prefetch.h>
 #include <linux/dma-fence-array.h>
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
 
 #include "i915_drv.h"
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index b9cde116dab3..344f238b283f 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -28,6 +28,7 @@
 #include <linux/async.h>
 #include <linux/i2c.h>
 #include <linux/hdmi.h>
+#include <linux/sched/clock.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include <drm/drm_crtc.h>
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 646fe85261c1..18526d44688d 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -11,6 +11,7 @@
 #include "bset.h"
 
 #include <linux/console.h>
+#include <linux/sched/clock.h>
 #include <linux/random.h>
 #include <linux/prefetch.h>
 
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index a43eedd5804d..384559af310e 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -32,6 +32,7 @@
 #include <linux/prefetch.h>
 #include <linux/random.h>
 #include <linux/rcupdate.h>
+#include <linux/sched/clock.h>
 #include <trace/events/bcache.h>
 
 /*
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index b3ff57d61dde..f90f13616980 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -13,6 +13,7 @@
 
 #include <linux/blkdev.h>
 #include <linux/sort.h>
+#include <linux/sched/clock.h>
 
 static const char * const cache_replacement_policies[] = {
 	"lru",
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index dde6172f3f10..8c3a938f4bf0 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/seq_file.h>
 #include <linux/types.h>
+#include <linux/sched/clock.h>
 
 #include "util.h"
 
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index cf2cbc211d83..a126919ed102 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -6,6 +6,7 @@
 #include <linux/errno.h>
 #include <linux/blkdev.h>
 #include <linux/kernel.h>
+#include <linux/sched/clock.h>
 #include <linux/llist.h>
 #include <linux/ratelimit.h>
 #include <linux/vmalloc.h>
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 69e1ae59cab8..6ac2e48b9235 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -13,6 +13,7 @@
 
 #include <linux/delay.h>
 #include <linux/kthread.h>
+#include <linux/sched/clock.h>
 #include <trace/events/bcache.h>
 
 /* Rate limiting */
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 09505f432eda..7ae710585267 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -9,6 +9,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/mm.h>
diff --git a/drivers/net/irda/pxaficp_ir.c b/drivers/net/irda/pxaficp_ir.c
index 6e8f616be48e..1dba16bc7f8d 100644
--- a/drivers/net/irda/pxaficp_ir.c
+++ b/drivers/net/irda/pxaficp_ir.c
@@ -24,6 +24,7 @@
 #include <linux/dma/pxa-dma.h>
 #include <linux/gpio.h>
 #include <linux/slab.h>
+#include <linux/sched/clock.h>
 
 #include <net/irda/irda.h>
 #include <net/irda/irmod.h>
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 6d9335865880..9612b84bc3e0 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -20,6 +20,7 @@
 #include <linux/perf/arm_pmu.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
+#include <linux/sched/clock.h>
 #include <linux/spinlock.h>
 #include <linux/irq.h>
 #include <linux/irqdesc.h>
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 2fe35354f20e..5c98ad4d2f4c 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -17,6 +17,7 @@
 #include <linux/workqueue.h>
 #include <linux/file.h>
 #include <linux/slab.h>
+#include <linux/sched/clock.h>
 #include <linux/vmalloc.h>
 
 #include <linux/net.h>
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index aecca0e7d9ca..796016e63c1d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -2,6 +2,7 @@
 #define _LINUX_BLKDEV_H
 
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 
 #ifdef CONFIG_BLOCK
 
diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
new file mode 100644
index 000000000000..7cb7d79b2cf9
--- /dev/null
+++ b/include/linux/sched/clock.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_CLOCK_H
+#define _LINUX_SCHED_CLOCK_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_CLOCK_H */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 69ccd2636911..c776abd86937 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -34,6 +34,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/netdev_features.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <net/flow_dissector.h>
 #include <linux/splice.h>
 #include <linux/in6.h>
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index b8d637225a07..b96832df239e 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -25,6 +25,7 @@
 #define _LINUX_NET_BUSY_POLL_H
 
 #include <linux/netdevice.h>
+#include <linux/sched/clock.h>
 #include <net/ip.h>
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1031bdf9f012..42fe16a84f97 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -46,6 +46,7 @@
 #include <linux/filter.h>
 #include <linux/namei.h>
 #include <linux/parser.h>
+#include <linux/sched/clock.h>
 
 #include "internal.h"
 
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 9812e5dd409e..cdafaff926ca 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -28,6 +28,7 @@
 #define DISABLE_BRANCH_PROFILING
 #include <linux/mutex.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/proc_fs.h>
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index e852be4851fc..4a30ef63c607 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -63,6 +63,7 @@ enum qlock_stats {
  */
 #include <linux/debugfs.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/fs.h>
 
 static const char * const qstat_names[qstat_num + 1] = {
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 34da86e73d00..47050eedc206 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -45,6 +45,7 @@
 #include <linux/utsname.h>
 #include <linux/ctype.h>
 #include <linux/uio.h>
+#include <linux/sched/clock.h>
 
 #include <linux/uaccess.h>
 #include <asm/sections.h>
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index ad64efe41722..dd7817cdbf58 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -58,6 +58,7 @@
 #include <linux/percpu.h>
 #include <linux/ktime.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/static_key.h>
 #include <linux/workqueue.h>
 #include <linux/compiler.h>
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1bd317db9810..1a7fd3d21e5a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6,6 +6,7 @@
  *  Copyright (C) 1991-2002  Linus Torvalds
  */
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/cpuset.h>
 #include <linux/delayacct.h>
 #include <linux/init_task.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 683570739f46..4d386461f13a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3,6 +3,7 @@
 #include <linux/sched/sysctl.h>
 #include <linux/sched/topology.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/clock.h>
 #include <linux/sched/wake_q.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/sched/deadline.h>
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index a26036d37a38..ea6b610c4c57 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/moduleparam.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/syscore_ops.h>
 #include <linux/hrtimer.h>
 #include <linux/sched_clock.h>
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 2c115fdab397..4fee1c3abd0b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -19,6 +19,7 @@
 #include <linux/percpu.h>
 #include <linux/profile.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/module.h>
 #include <linux/irq_work.h>
 #include <linux/posix-timers.h>
diff --git a/kernel/torture.c b/kernel/torture.c
index 01a99976f072..55de96529287 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -30,6 +30,7 @@
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/completion.h>
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a85739efcc30..96fc3c043ad6 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -6,6 +6,7 @@
 #include <linux/trace_events.h>
 #include <linux/ring_buffer.h>
 #include <linux/trace_clock.h>
+#include <linux/sched/clock.h>
 #include <linux/trace_seq.h>
 #include <linux/spinlock.h>
 #include <linux/irq_work.h>
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 0f06532a755b..5fdc779f411d 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/percpu.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/ktime.h>
 #include <linux/trace_clock.h>
 
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index edfacd954e1b..21ea6ae77d93 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -44,6 +44,7 @@
 #include <linux/uaccess.h>
 #include <linux/cpumask.h>
 #include <linux/delay.h>
+#include <linux/sched/clock.h>
 #include "trace.h"
 
 static struct trace_array	*hwlat_trace;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 070866c32eb9..107afca97649 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/ftrace.h>
+#include <linux/sched/clock.h>
 
 #include "trace_output.h"
 
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 63177be0159e..144d7b1b0364 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -21,6 +21,7 @@
 #include <linux/sched/rt.h>
 #include <linux/tick.h>
 #include <linux/workqueue.h>
+#include <linux/sched/clock.h>
 
 #include <asm/irq_regs.h>
 #include <linux/kvm_para.h>
diff --git a/lib/plist.c b/lib/plist.c
index 3a30c53db061..199408f91057 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -175,6 +175,7 @@ void plist_requeue(struct plist_node *node, struct plist_head *head)
 
 #ifdef CONFIG_DEBUG_PI_LIST
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/module.h>
 #include <linux/init.h>
 
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 35b280361cb2..50a0f3e51d5b 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -27,6 +27,8 @@
 #include <linux/kernel.h>
 #include <linux/random.h>
 #include <linux/module.h>
+#include <linux/sched/clock.h>
+
 #include <net/tcp.h>
 
 #define HYSTART_ACK_TRAIN	1
-- 
GitLab


From ae7e81c077d60507dcec139e40a6d10cf932cf4b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 18:07:51 +0100
Subject: [PATCH 0878/1084] sched/headers: Prepare for new header dependencies
 before moving code to <uapi/linux/sched/types.h>

We are going to move scheduler ABI details to <uapi/linux/sched/types.h>,
which will be used from a number of .c files.

Create empty placeholder header that maps to <linux/types.h>.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/common/bL_switcher.c                 | 1 +
 crypto/crypto_engine.c                        | 1 +
 drivers/acpi/acpi_pad.c                       | 1 +
 drivers/block/drbd/drbd_receiver.c            | 1 +
 drivers/firmware/psci_checker.c               | 1 +
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 1 +
 drivers/gpu/drm/i915/intel_breadcrumbs.c      | 1 +
 drivers/media/pci/ivtv/ivtv-driver.c          | 1 +
 drivers/mmc/core/sdio_irq.c                   | 1 +
 drivers/spi/spi.c                             | 1 +
 drivers/staging/android/ion/ion_heap.c        | 1 +
 drivers/thermal/intel_powerclamp.c            | 1 +
 drivers/tty/serial/sc16is7xx.c                | 1 +
 include/uapi/linux/sched/types.h              | 6 ++++++
 kernel/irq/manage.c                           | 1 +
 kernel/kthread.c                              | 1 +
 kernel/locking/locktorture.c                  | 1 +
 kernel/rcu/rcuperf.c                          | 1 +
 kernel/rcu/rcutorture.c                       | 1 +
 kernel/rcu/tree.c                             | 1 +
 kernel/rcu/tree_plugin.h                      | 1 +
 kernel/sched/core.c                           | 1 +
 kernel/sched/cpufreq_schedutil.c              | 1 +
 kernel/trace/ring_buffer_benchmark.c          | 1 +
 kernel/trace/trace_selftest.c                 | 1 +
 kernel/watchdog.c                             | 1 +
 26 files changed, 31 insertions(+)
 create mode 100644 include/uapi/linux/sched/types.h

diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 46730017b3c5..083c9e517d22 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/interrupt.h>
 #include <linux/cpu_pm.h>
 #include <linux/cpu.h>
diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
index f1bf3418d968..727bd5c3569e 100644
--- a/crypto/crypto_engine.c
+++ b/crypto/crypto_engine.c
@@ -16,6 +16,7 @@
 #include <linux/delay.h>
 #include <crypto/engine.h>
 #include <crypto/internal/hash.h>
+#include <uapi/linux/sched/types.h>
 #include "internal.h"
 
 #define CRYPTO_ENGINE_MAX_QLEN 10
diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c
index eb76a4c10dbf..754431031282 100644
--- a/drivers/acpi/acpi_pad.c
+++ b/drivers/acpi/acpi_pad.c
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/kthread.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/freezer.h>
 #include <linux/cpu.h>
 #include <linux/tick.h>
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index c7728dd77230..8b40a5b2f8e6 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -36,6 +36,7 @@
 #include <linux/memcontrol.h>
 #include <linux/mm_inline.h>
 #include <linux/slab.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/pkt_sched.h>
 #define __KERNEL_SYSCALLS__
 #include <linux/unistd.h>
diff --git a/drivers/firmware/psci_checker.c b/drivers/firmware/psci_checker.c
index 29d58feaf675..6523ce962865 100644
--- a/drivers/firmware/psci_checker.c
+++ b/drivers/firmware/psci_checker.c
@@ -20,6 +20,7 @@
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
 #include <linux/kthread.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/module.h>
 #include <linux/preempt.h>
 #include <linux/psci.h>
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 1bf83ed113b3..16f96563cd2b 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -24,6 +24,7 @@
 #include <linux/kthread.h>
 #include <linux/wait.h>
 #include <linux/sched.h>
+#include <uapi/linux/sched/types.h>
 #include <drm/drmP.h>
 #include "gpu_scheduler.h"
 
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index fcfa423d08bd..7044e9a6abf7 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/kthread.h>
+#include <uapi/linux/sched/types.h>
 
 #include "i915_drv.h"
 
diff --git a/drivers/media/pci/ivtv/ivtv-driver.c b/drivers/media/pci/ivtv/ivtv-driver.c
index ab2ae53618e8..e73c153285f0 100644
--- a/drivers/media/pci/ivtv/ivtv-driver.c
+++ b/drivers/media/pci/ivtv/ivtv-driver.c
@@ -59,6 +59,7 @@
 #include <media/tveeprom.h>
 #include <media/i2c/saa7115.h>
 #include "tuner-xc2028.h"
+#include <uapi/linux/sched/types.h>
 
 /* If you have already X v4l cards, then set this to X. This way
    the device numbers stay matched. Example: you have a WinTV card
diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c
index d29faf2addfe..6d4b72080d51 100644
--- a/drivers/mmc/core/sdio_irq.c
+++ b/drivers/mmc/core/sdio_irq.c
@@ -15,6 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/kthread.h>
 #include <linux/export.h>
 #include <linux/wait.h>
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 44222ef9471e..90b5b2efafbf 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -33,6 +33,7 @@
 #include <linux/pm_domain.h>
 #include <linux/export.h>
 #include <linux/sched/rt.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/ioport.h>
diff --git a/drivers/staging/android/ion/ion_heap.c b/drivers/staging/android/ion/ion_heap.c
index 4e5c0f17f579..c69d0bd53693 100644
--- a/drivers/staging/android/ion/ion_heap.c
+++ b/drivers/staging/android/ion/ion_heap.c
@@ -20,6 +20,7 @@
 #include <linux/mm.h>
 #include <linux/rtmutex.h>
 #include <linux/sched.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/scatterlist.h>
 #include <linux/vmalloc.h>
 #include "ion.h"
diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c
index a47103a659fa..d718cd179ddb 100644
--- a/drivers/thermal/intel_powerclamp.c
+++ b/drivers/thermal/intel_powerclamp.c
@@ -50,6 +50,7 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/sched/rt.h>
+#include <uapi/linux/sched/types.h>
 
 #include <asm/nmi.h>
 #include <asm/msr.h>
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 793395451982..ca54ce074a5f 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -29,6 +29,7 @@
 #include <linux/tty_flip.h>
 #include <linux/spi/spi.h>
 #include <linux/uaccess.h>
+#include <uapi/linux/sched/types.h>
 
 #define SC16IS7XX_NAME			"sc16is7xx"
 #define SC16IS7XX_MAX_DEVS		8
diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h
new file mode 100644
index 000000000000..d162d315f4b5
--- /dev/null
+++ b/include/uapi/linux/sched/types.h
@@ -0,0 +1,6 @@
+#ifndef _UAPI_LINUX_SCHED_TYPES_H
+#define _UAPI_LINUX_SCHED_TYPES_H
+
+#include <linux/types.h>
+
+#endif /* _UAPI_LINUX_SCHED_TYPES_H */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 944d068b6c48..09740952e4de 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/sched/rt.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/task_work.h>
 
 #include "internals.h"
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 8461a4372e8a..ef9b9eb809c7 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -5,6 +5,7 @@
  * even if we're invoked from userspace (think modprobe, hotplug cpu,
  * etc.).
  */
+#include <uapi/linux/sched/types.h>
 #include <linux/sched.h>
 #include <linux/kthread.h>
 #include <linux/completion.h>
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 28350dc8ecbb..5ea0a8969ee2 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -32,6 +32,7 @@
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/atomic.h>
 #include <linux/moduleparam.h>
 #include <linux/delay.h>
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index 123ccbd22449..a4a86fb47e4a 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -30,6 +30,7 @@
 #include <linux/rcupdate.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/completion.h>
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index d81345be730e..6a28b79710f0 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -33,6 +33,7 @@
 #include <linux/rcupdate.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/completion.h>
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index cb62ce23ffc7..e456327a63d6 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -49,6 +49,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/wait.h>
 #include <linux/kthread.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/prefetch.h>
 #include <linux/delay.h>
 #include <linux/stop_machine.h>
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index a240f3308be6..9dabb04003be 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -28,6 +28,7 @@
 #include <linux/gfp.h>
 #include <linux/oom.h>
 #include <linux/smpboot.h>
+#include <uapi/linux/sched/types.h>
 #include "../time/tick-internal.h"
 
 #ifdef CONFIG_RCU_BOOST
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1a7fd3d21e5a..ed39d1d0b64a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7,6 +7,7 @@
  */
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/cpuset.h>
 #include <linux/delayacct.h>
 #include <linux/init_task.h>
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index fd4659313640..8f8de3d4d6b7 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -13,6 +13,7 @@
 
 #include <linux/cpufreq.h>
 #include <linux/kthread.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/slab.h>
 #include <trace/events/power.h>
 
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 6df9a83e20d7..c190a4d5013c 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -6,6 +6,7 @@
 #include <linux/ring_buffer.h>
 #include <linux/completion.h>
 #include <linux/kthread.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/module.h>
 #include <linux/ktime.h>
 #include <asm/local.h>
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index b0f86ea77881..cb917cebae29 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -1,5 +1,6 @@
 /* Include in trace.c */
 
+#include <uapi/linux/sched/types.h>
 #include <linux/stringify.h>
 #include <linux/kthread.h>
 #include <linux/delay.h>
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 144d7b1b0364..52718f4512e9 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -19,6 +19,7 @@
 #include <linux/sysctl.h>
 #include <linux/smpboot.h>
 #include <linux/sched/rt.h>
+#include <uapi/linux/sched/types.h>
 #include <linux/tick.h>
 #include <linux/workqueue.h>
 #include <linux/sched/clock.h>
-- 
GitLab


From 4f17722c7256af8e17c2c4f29f170247264bdf48 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 08:45:17 +0100
Subject: [PATCH 0879/1084] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/loadavg.h>

We are going to split <linux/sched/loadavg.h> out of <linux/sched.h>, which
will have to be picked up from a couple of .c files.

Create a trivial placeholder <linux/sched/topology.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/m68k/kernel/time.c                         | 1 +
 arch/microblaze/kernel/heartbeat.c              | 1 +
 arch/powerpc/platforms/cell/cpufreq_spudemand.c | 1 +
 arch/powerpc/platforms/cell/spufs/sched.c       | 1 +
 arch/s390/appldata/appldata_os.c                | 1 +
 arch/sh/drivers/heartbeat.c                     | 1 +
 arch/sparc/kernel/led.c                         | 1 +
 drivers/cpuidle/governors/menu.c                | 1 +
 drivers/leds/trigger/ledtrig-heartbeat.c        | 1 +
 drivers/platform/x86/intel_ips.c                | 1 +
 fs/proc/loadavg.c                               | 1 +
 include/linux/sched/loadavg.h                   | 6 ++++++
 kernel/debug/kdb/kdb_main.c                     | 1 +
 kernel/sched/core.c                             | 1 +
 kernel/sched/loadavg.c                          | 1 +
 kernel/sys.c                                    | 1 +
 kernel/time/timekeeping.c                       | 1 +
 net/sched/em_meta.c                             | 1 +
 18 files changed, 23 insertions(+)
 create mode 100644 include/linux/sched/loadavg.h

diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
index 4e5aa2f4f522..87160b4415fb 100644
--- a/arch/m68k/kernel/time.c
+++ b/arch/m68k/kernel/time.c
@@ -14,6 +14,7 @@
 #include <linux/export.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/string.h>
diff --git a/arch/microblaze/kernel/heartbeat.c b/arch/microblaze/kernel/heartbeat.c
index 4643e3ab9414..2022130139d2 100644
--- a/arch/microblaze/kernel/heartbeat.c
+++ b/arch/microblaze/kernel/heartbeat.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/io.h>
 
 #include <asm/setup.h>
diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
index 88301e53f085..882944c36ef5 100644
--- a/arch/powerpc/platforms/cell/cpufreq_spudemand.c
+++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
@@ -22,6 +22,7 @@
 
 #include <linux/cpufreq.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/module.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 9b543df210fb..d317c84dc794 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -24,6 +24,7 @@
 
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/sched/rt.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 08b9e942a262..079446619f89 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -17,6 +17,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/netdevice.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <asm/appldata.h>
 #include <asm/smp.h>
 
diff --git a/arch/sh/drivers/heartbeat.c b/arch/sh/drivers/heartbeat.c
index 49bace446a1a..c6d96049a0bb 100644
--- a/arch/sh/drivers/heartbeat.c
+++ b/arch/sh/drivers/heartbeat.c
@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/timer.h>
 #include <linux/io.h>
 #include <linux/slab.h>
diff --git a/arch/sparc/kernel/led.c b/arch/sparc/kernel/led.c
index 3ae36f36e758..44a3ed93c214 100644
--- a/arch/sparc/kernel/led.c
+++ b/arch/sparc/kernel/led.c
@@ -8,6 +8,7 @@
 #include <linux/jiffies.h>
 #include <linux/timer.h>
 #include <linux/uaccess.h>
+#include <linux/sched/loadavg.h>
 
 #include <asm/auxio.h>
 
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 8d6d25c38c02..fe86060e48f7 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -18,6 +18,7 @@
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/math64.h>
 #include <linux/cpu.h>
 
diff --git a/drivers/leds/trigger/ledtrig-heartbeat.c b/drivers/leds/trigger/ledtrig-heartbeat.c
index e6f2f8b9f09a..afa3b4099214 100644
--- a/drivers/leds/trigger/ledtrig-heartbeat.c
+++ b/drivers/leds/trigger/ledtrig-heartbeat.c
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/timer.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/leds.h>
 #include <linux/reboot.h>
 #include <linux/suspend.h>
diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
index 55663b3d7282..58dcee562d64 100644
--- a/drivers/platform/x86/intel_ips.c
+++ b/drivers/platform/x86/intel_ips.c
@@ -68,6 +68,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/seq_file.h>
 #include <linux/string.h>
 #include <linux/tick.h>
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index aec66e6c2060..add5255ce7e0 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -3,6 +3,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/proc_fs.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/seq_file.h>
 #include <linux/seqlock.h>
 #include <linux/time.h>
diff --git a/include/linux/sched/loadavg.h b/include/linux/sched/loadavg.h
new file mode 100644
index 000000000000..3ae74be327e8
--- /dev/null
+++ b/include/linux/sched/loadavg.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_LOADAVG_H
+#define _LINUX_SCHED_LOADAVG_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_LOADAVG_H */
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index ca183919d302..308937c7a687 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -18,6 +18,7 @@
 #include <linux/kmsg_dump.h>
 #include <linux/reboot.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/sysrq.h>
 #include <linux/smp.h>
 #include <linux/utsname.h>
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ed39d1d0b64a..11a0684a29a7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8,6 +8,7 @@
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
 #include <uapi/linux/sched/types.h>
+#include <linux/sched/loadavg.h>
 #include <linux/cpuset.h>
 #include <linux/delayacct.h>
 #include <linux/init_task.h>
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index a2d6eb71f06b..7296b7308eca 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/export.h>
+#include <linux/sched/loadavg.h>
 
 #include "sched.h"
 
diff --git a/kernel/sys.c b/kernel/sys.c
index b07adca97ea3..28b8a4c1bc7e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -49,6 +49,7 @@
 #include <linux/binfmts.h>
 
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/rcupdate.h>
 #include <linux/uidgid.h>
 #include <linux/cred.h>
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 95b258dd75db..fb564acee0f3 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/syscore_ops.h>
 #include <linux/clocksource.h>
 #include <linux/jiffies.h>
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 41c80b6c3906..ae7e4f5b348b 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -63,6 +63,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/loadavg.h>
 #include <linux/string.h>
 #include <linux/skbuff.h>
 #include <linux/random.h>
-- 
GitLab


From 4eb5aaa3af8a54e5e9bac90e2b42bbab1f1ee5a3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:29 +0100
Subject: [PATCH 0880/1084] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/autogroup.h>

We are going to split <linux/sched/autogroup.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/autogroup.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/proc/base.c                  | 1 +
 include/linux/sched/autogroup.h | 6 ++++++
 kernel/exit.c                   | 1 +
 kernel/fork.c                   | 1 +
 kernel/sched/autogroup.h        | 1 +
 kernel/sys.c                    | 1 +
 6 files changed, 11 insertions(+)
 create mode 100644 include/linux/sched/autogroup.h

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1e1e182d571b..27af4ea315a3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -85,6 +85,7 @@
 #include <linux/user_namespace.h>
 #include <linux/fs_struct.h>
 #include <linux/slab.h>
+#include <linux/sched/autogroup.h>
 #include <linux/flex_array.h>
 #include <linux/posix-timers.h>
 #ifdef CONFIG_HARDWALL
diff --git a/include/linux/sched/autogroup.h b/include/linux/sched/autogroup.h
new file mode 100644
index 000000000000..d745f22e57dc
--- /dev/null
+++ b/include/linux/sched/autogroup.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_AUTOGROUP_H
+#define _LINUX_SCHED_AUTOGROUP_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_AUTOGROUP_H */
diff --git a/kernel/exit.c b/kernel/exit.c
index 8a768a3672a5..9c0b92833fbb 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -6,6 +6,7 @@
 
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/sched/autogroup.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/capability.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index d043fedc03c8..234f4bfb8efd 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/slab.h>
+#include <linux/sched/autogroup.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
 #include <linux/module.h>
diff --git a/kernel/sched/autogroup.h b/kernel/sched/autogroup.h
index 890c95f2587a..ce40c810cd5c 100644
--- a/kernel/sched/autogroup.h
+++ b/kernel/sched/autogroup.h
@@ -2,6 +2,7 @@
 
 #include <linux/kref.h>
 #include <linux/rwsem.h>
+#include <linux/sched/autogroup.h>
 
 struct autogroup {
 	/*
diff --git a/kernel/sys.c b/kernel/sys.c
index 28b8a4c1bc7e..cfe82ae62423 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -49,6 +49,7 @@
 #include <linux/binfmts.h>
 
 #include <linux/sched.h>
+#include <linux/sched/autogroup.h>
 #include <linux/sched/loadavg.h>
 #include <linux/rcupdate.h>
 #include <linux/uidgid.h>
-- 
GitLab


From 6e84f31522f931027bf695752087ece278c10d3f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:29 +0100
Subject: [PATCH 0881/1084] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/mm.h>

We are going to split <linux/sched/mm.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/mm.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

The APIs that are going to be moved first are:

   mm_alloc()
   __mmdrop()
   mmdrop()
   mmdrop_async_fn()
   mmdrop_async()
   mmget_not_zero()
   mmput()
   mmput_async()
   get_task_mm()
   mm_access()
   mm_release()

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arc/include/asm/mmu_context.h          | 1 +
 arch/arc/kernel/troubleshoot.c              | 2 ++
 arch/arm/mach-rpc/ecard.c                   | 1 +
 arch/frv/mm/mmu-context.c                   | 1 +
 arch/m68k/sun3/mmu_emu.c                    | 1 +
 arch/powerpc/platforms/cell/spufs/context.c | 2 ++
 drivers/android/binder.c                    | 1 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c    | 1 +
 drivers/gpu/drm/etnaviv/etnaviv_gem.c       | 1 +
 drivers/gpu/drm/i915/i915_gem_userptr.c     | 1 +
 drivers/infiniband/core/umem.c              | 1 +
 drivers/infiniband/core/umem_odp.c          | 1 +
 drivers/infiniband/hw/hfi1/file_ops.c       | 1 +
 drivers/infiniband/hw/mlx4/main.c           | 2 ++
 drivers/infiniband/hw/mlx5/main.c           | 1 +
 drivers/infiniband/hw/usnic/usnic_uiom.c    | 1 +
 drivers/iommu/amd_iommu_v2.c                | 1 +
 drivers/iommu/intel-svm.c                   | 1 +
 drivers/lguest/lguest_user.c                | 1 +
 drivers/misc/cxl/fault.c                    | 1 +
 drivers/misc/mic/scif/scif_rma.c            | 2 ++
 drivers/oprofile/buffer_sync.c              | 1 +
 drivers/vfio/vfio_iommu_spapr_tce.c         | 2 ++
 drivers/vfio/vfio_iommu_type1.c             | 1 +
 drivers/vhost/vhost.c                       | 1 +
 drivers/xen/gntdev.c                        | 1 +
 fs/exec.c                                   | 1 +
 fs/proc/array.c                             | 1 +
 fs/proc/base.c                              | 1 +
 fs/proc/task_mmu.c                          | 1 +
 fs/proc/task_nommu.c                        | 2 ++
 fs/userfaultfd.c                            | 1 +
 include/linux/sched/mm.h                    | 6 ++++++
 kernel/cgroup/cpuset.c                      | 1 +
 kernel/events/core.c                        | 1 +
 kernel/events/uprobes.c                     | 1 +
 kernel/exit.c                               | 1 +
 kernel/fork.c                               | 1 +
 kernel/futex.c                              | 1 +
 kernel/ptrace.c                             | 1 +
 kernel/sched/sched.h                        | 1 +
 kernel/sys.c                                | 1 +
 kernel/trace/trace_output.c                 | 1 +
 kernel/tsacct.c                             | 1 +
 mm/khugepaged.c                             | 1 +
 mm/ksm.c                                    | 1 +
 mm/memcontrol.c                             | 1 +
 mm/memory.c                                 | 1 +
 mm/mempolicy.c                              | 1 +
 mm/migrate.c                                | 1 +
 mm/mmu_context.c                            | 1 +
 mm/mmu_notifier.c                           | 1 +
 mm/nommu.c                                  | 1 +
 mm/oom_kill.c                               | 1 +
 mm/process_vm_access.c                      | 1 +
 mm/rmap.c                                   | 1 +
 mm/swapfile.c                               | 1 +
 mm/util.c                                   | 1 +
 virt/kvm/async_pf.c                         | 1 +
 virt/kvm/kvm_main.c                         | 1 +
 60 files changed, 71 insertions(+)
 create mode 100644 include/linux/sched/mm.h

diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index b0b87f2447f5..64b5ebae1ae8 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -20,6 +20,7 @@
 
 #include <asm/arcregs.h>
 #include <asm/tlb.h>
+#include <linux/sched/mm.h>
 
 #include <asm-generic/mm_hooks.h>
 
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index 82f9bc819f4a..0c48907f56a9 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -13,6 +13,8 @@
 #include <linux/fs_struct.h>
 #include <linux/proc_fs.h>
 #include <linux/file.h>
+#include <linux/sched/mm.h>
+
 #include <asm/arcregs.h>
 #include <asm/irqflags.h>
 
diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c
index dc67a7fb3831..6b279d037774 100644
--- a/arch/arm/mach-rpc/ecard.c
+++ b/arch/arm/mach-rpc/ecard.c
@@ -31,6 +31,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/interrupt.h>
 #include <linux/completion.h>
 #include <linux/reboot.h>
diff --git a/arch/frv/mm/mmu-context.c b/arch/frv/mm/mmu-context.c
index 3473bde77f56..4031289bf2ec 100644
--- a/arch/frv/mm/mmu-context.c
+++ b/arch/frv/mm/mmu-context.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/mm.h>
 #include <asm/tlbflush.h>
 
diff --git a/arch/m68k/sun3/mmu_emu.c b/arch/m68k/sun3/mmu_emu.c
index e9d7fbe4d5ae..7fdc61525e0b 100644
--- a/arch/m68k/sun3/mmu_emu.c
+++ b/arch/m68k/sun3/mmu_emu.c
@@ -15,6 +15,7 @@
 #include <linux/bootmem.h>
 #include <linux/bitops.h>
 #include <linux/module.h>
+#include <linux/sched/mm.h>
 
 #include <asm/setup.h>
 #include <asm/traps.h>
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
index 3b4152faeb1f..b500b17254a0 100644
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -25,6 +25,8 @@
 #include <linux/slab.h>
 #include <linux/atomic.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
+
 #include <asm/spu.h>
 #include <asm/spu_csa.h>
 #include "spufs.h"
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 2bbcdc6fdfee..e33e7fbe870b 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -32,6 +32,7 @@
 #include <linux/debugfs.h>
 #include <linux/rbtree.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index ca5f2aa7232d..84d1ffd1eef9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -23,6 +23,7 @@
 #include <linux/mutex.h>
 #include <linux/log2.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/slab.h>
 #include <linux/amd-iommu.h>
 #include <linux/notifier.h>
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index e78f1406885d..ae2882b64b82 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -16,6 +16,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/shmem_fs.h>
+#include <linux/sched/mm.h>
 
 #include "etnaviv_drv.h"
 #include "etnaviv_gem.h"
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 0115989e324a..22b46398831e 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -31,6 +31,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/mempolicy.h>
 #include <linux/swap.h>
+#include <linux/sched/mm.h>
 
 struct i915_mm_struct {
 	struct mm_struct *mm;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 446b56a5260b..d525b1a2986a 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -35,6 +35,7 @@
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/export.h>
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index f2fc0431512d..7279f259494f 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -32,6 +32,7 @@
 
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/pid.h>
 #include <linux/slab.h>
 #include <linux/export.h>
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 3b19c16a9e45..f78c739b330a 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -48,6 +48,7 @@
 #include <linux/cdev.h>
 #include <linux/vmalloc.h>
 #include <linux/io.h>
+#include <linux/sched/mm.h>
 
 #include <rdma/ib.h>
 
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 88608906ce25..56da8f0d71bb 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -39,6 +39,8 @@
 #include <linux/inetdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/if_vlan.h>
+#include <linux/sched/mm.h>
+
 #include <net/ipv6.h>
 #include <net/addrconf.h>
 #include <net/devlink.h>
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 5b3355268725..4b1ec3ff152a 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -41,6 +41,7 @@
 #include <asm/pat.h>
 #endif
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/delay.h>
 #include <rdma/ib_user_verbs.h>
 #include <rdma/ib_addr.h>
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 1ccee6ea5bc3..ba868be5af2e 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -35,6 +35,7 @@
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/iommu.h>
 #include <linux/workqueue.h>
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index f8ed8c95b685..063343909b0d 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -22,6 +22,7 @@
 #include <linux/profile.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/iommu.h>
 #include <linux/wait.h>
 #include <linux/pci.h>
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 51f2b228723f..23c427602c55 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -16,6 +16,7 @@
 #include <linux/intel-iommu.h>
 #include <linux/mmu_notifier.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/slab.h>
 #include <linux/intel-svm.h>
 #include <linux/rculist.h>
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index 30c60687d277..1a6787bc9386 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -8,6 +8,7 @@
 #include <linux/miscdevice.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/file.h>
 #include <linux/slab.h>
 #include <linux/export.h>
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index 377e650a2a1d..e33011e3e7e2 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c
@@ -9,6 +9,7 @@
 
 #include <linux/workqueue.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/pid.h>
 #include <linux/mm.h>
 #include <linux/moduleparam.h>
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
index f806a4471eb9..d0e9c60f944e 100644
--- a/drivers/misc/mic/scif/scif_rma.c
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -17,6 +17,8 @@
  */
 #include <linux/dma_remapping.h>
 #include <linux/pagemap.h>
+#include <linux/sched/mm.h>
+
 #include "scif_main.h"
 #include "scif_map.h"
 
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index 642478d35e99..eb0c35994b54 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -31,6 +31,7 @@
 #include <linux/fs.h>
 #include <linux/oprofile.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/gfp.h>
 
 #include "oprofile_stats.h"
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 59b3f62a2d64..185d50ee1b12 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -20,6 +20,8 @@
 #include <linux/err.h>
 #include <linux/vfio.h>
 #include <linux/vmalloc.h>
+#include <linux/sched/mm.h>
+
 #include <asm/iommu.h>
 #include <asm/tce.h>
 #include <asm/mmu_context.h>
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index bd6f293c4ebd..7e94c7fc90ae 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -32,6 +32,7 @@
 #include <linux/mm.h>
 #include <linux/rbtree.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/vfio.h>
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 4269e621e254..4a00140a7624 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -27,6 +27,7 @@
 #include <linux/cgroup.h>
 #include <linux/module.h>
 #include <linux/sort.h>
+#include <linux/sched/mm.h>
 #include <linux/interval_tree_generic.h>
 
 #include "vhost.h"
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 2ef2b61b69df..c77a0751a311 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -32,6 +32,7 @@
 #include <linux/types.h>
 #include <linux/uaccess.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
diff --git a/fs/exec.c b/fs/exec.c
index e595e1529581..8929da96cca1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -32,6 +32,7 @@
 #include <linux/swap.h>
 #include <linux/string.h>
 #include <linux/init.h>
+#include <linux/sched/mm.h>
 #include <linux/pagemap.h>
 #include <linux/perf_event.h>
 #include <linux/highmem.h>
diff --git a/fs/proc/array.c b/fs/proc/array.c
index fe12b519d09b..61bea3a4ecc5 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -60,6 +60,7 @@
 #include <linux/tty.h>
 #include <linux/string.h>
 #include <linux/mman.h>
+#include <linux/sched/mm.h>
 #include <linux/proc_fs.h>
 #include <linux/ioport.h>
 #include <linux/uaccess.h>
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 27af4ea315a3..9efe12376af1 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -86,6 +86,7 @@
 #include <linux/fs_struct.h>
 #include <linux/slab.h>
 #include <linux/sched/autogroup.h>
+#include <linux/sched/mm.h>
 #include <linux/flex_array.h>
 #include <linux/posix-timers.h>
 #ifdef CONFIG_HARDWALL
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ee3efb229ef6..f08bd31c1081 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -11,6 +11,7 @@
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
 #include <linux/swap.h>
+#include <linux/sched/mm.h>
 #include <linux/swapops.h>
 #include <linux/mmu_notifier.h>
 #include <linux/page_idle.h>
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 1ef97cfcf422..23266694db11 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -7,6 +7,8 @@
 #include <linux/ptrace.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>
+#include <linux/sched/mm.h>
+
 #include "internal.h"
 
 /*
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 3c421d06a18e..18d12bfff770 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -15,6 +15,7 @@
 #include <linux/list.h>
 #include <linux/hashtable.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/mm.h>
 #include <linux/poll.h>
 #include <linux/slab.h>
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
new file mode 100644
index 000000000000..a7adba1cd0a9
--- /dev/null
+++ b/include/linux/sched/mm.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_MM_H
+#define _LINUX_SCHED_MM_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_MM_H */
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index b3088886cd37..a5c46db2855c 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -44,6 +44,7 @@
 #include <linux/proc_fs.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/seq_file.h>
 #include <linux/security.h>
 #include <linux/slab.h>
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 42fe16a84f97..6f41548f2e32 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -47,6 +47,7 @@
 #include <linux/namei.h>
 #include <linux/parser.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/mm.h>
 
 #include "internal.h"
 
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index d630f8ac4d2f..62f2dbd13efc 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -27,6 +27,7 @@
 #include <linux/pagemap.h>	/* read_mapping_page */
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/export.h>
 #include <linux/rmap.h>		/* anon_vma_prepare */
 #include <linux/mmu_notifier.h>	/* set_pte_at_notify */
diff --git a/kernel/exit.c b/kernel/exit.c
index 9c0b92833fbb..48649ccbb649 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -7,6 +7,7 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/sched/autogroup.h>
+#include <linux/sched/mm.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/capability.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 234f4bfb8efd..1875468e2a81 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -13,6 +13,7 @@
 
 #include <linux/slab.h>
 #include <linux/sched/autogroup.h>
+#include <linux/sched/mm.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
 #include <linux/module.h>
diff --git a/kernel/futex.c b/kernel/futex.c
index 8ddcf9ea953c..229a744b1781 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -62,6 +62,7 @@
 #include <linux/ptrace.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/wake_q.h>
+#include <linux/sched/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/freezer.h>
 #include <linux/bootmem.h>
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 49ba7c1ade9d..6fe5a2897912 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -10,6 +10,7 @@
 #include <linux/capability.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4d386461f13a..e2307a6c29f1 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -5,6 +5,7 @@
 #include <linux/sched/rt.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/wake_q.h>
+#include <linux/sched/mm.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/sched/deadline.h>
 #include <linux/kernel_stat.h>
diff --git a/kernel/sys.c b/kernel/sys.c
index cfe82ae62423..dc71ec1e1967 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -51,6 +51,7 @@
 #include <linux/sched.h>
 #include <linux/sched/autogroup.h>
 #include <linux/sched/loadavg.h>
+#include <linux/sched/mm.h>
 #include <linux/rcupdate.h>
 #include <linux/uidgid.h>
 #include <linux/cred.h>
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 107afca97649..02a4aeb22c47 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -9,6 +9,7 @@
 #include <linux/mutex.h>
 #include <linux/ftrace.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/mm.h>
 
 #include "trace_output.h"
 
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 5c21f0535056..571a2d3821d8 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -18,6 +18,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/tsacct_kern.h>
 #include <linux/acct.h>
 #include <linux/jiffies.h>
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 34bce5c308e3..212a2afe8830 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2,6 +2,7 @@
 
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/mmu_notifier.h>
 #include <linux/rmap.h>
 #include <linux/swap.h>
diff --git a/mm/ksm.c b/mm/ksm.c
index 520e4c37fec7..5632c9d02457 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -19,6 +19,7 @@
 #include <linux/fs.h>
 #include <linux/mman.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/rwsem.h>
 #include <linux/pagemap.h>
 #include <linux/rmap.h>
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 45867e439d31..c52ec893e241 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -35,6 +35,7 @@
 #include <linux/memcontrol.h>
 #include <linux/cgroup.h>
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/shmem_fs.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
diff --git a/mm/memory.c b/mm/memory.c
index 14fc0b40f0bb..b0495ec74d29 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -40,6 +40,7 @@
 
 #include <linux/kernel_stat.h>
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 1e7873e40c9a..90892416ed75 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -73,6 +73,7 @@
 #include <linux/hugetlb.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/nodemask.h>
 #include <linux/cpuset.h>
 #include <linux/slab.h>
diff --git a/mm/migrate.c b/mm/migrate.c
index 2c63ac06791b..9a0897a14d37 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -40,6 +40,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/page_idle.h>
 #include <linux/page_owner.h>
+#include <linux/sched/mm.h>
 
 #include <asm/tlbflush.h>
 
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index daf67bb02b4a..8888b124a386 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c
@@ -5,6 +5,7 @@
 
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/mmu_context.h>
 #include <linux/export.h>
 
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 32bc9f2ff7eb..a7652acd2ab9 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -17,6 +17,7 @@
 #include <linux/srcu.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/slab.h>
 
 /* global SRCU for all MMs */
diff --git a/mm/nommu.c b/mm/nommu.c
index aae06e854552..79abed514a4b 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -17,6 +17,7 @@
 
 #include <linux/export.h>
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/vmacache.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 51c091849dcb..d28936618698 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -22,6 +22,7 @@
 #include <linux/err.h>
 #include <linux/gfp.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/swap.h>
 #include <linux/timex.h>
 #include <linux/jiffies.h>
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 84d0c7eada2b..8973cd231ece 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -12,6 +12,7 @@
 #include <linux/mm.h>
 #include <linux/uio.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/highmem.h>
 #include <linux/ptrace.h>
 #include <linux/slab.h>
diff --git a/mm/rmap.c b/mm/rmap.c
index 8774791e2809..0367a0506667 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -46,6 +46,7 @@
  */
 
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
diff --git a/mm/swapfile.c b/mm/swapfile.c
index fadc6a1c0da0..ff2bf3f61b14 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/mman.h>
 #include <linux/slab.h>
diff --git a/mm/util.c b/mm/util.c
index b8f538863b5a..14f4e13323e2 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -5,6 +5,7 @@
 #include <linux/export.h>
 #include <linux/err.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/security.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 2366177172f6..bb298a200cd3 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/mmu_context.h>
+#include <linux/sched/mm.h>
 
 #include "async_pf.h"
 #include <trace/events/kvm.h>
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 35f71409d9ee..dd5de09bf362 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -33,6 +33,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/cpumask.h>
 #include <linux/smp.h>
 #include <linux/anon_inodes.h>
-- 
GitLab


From f7ccbae45c5e2c1077654b0e857e7efb1aa31c92 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:30 +0100
Subject: [PATCH 0882/1084] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/coredump.h>

We are going to split <linux/sched/coredump.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/coredump.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/binfmt_elf.c                | 1 +
 fs/binfmt_elf_fdpic.c          | 1 +
 fs/coredump.c                  | 1 +
 fs/exec.c                      | 1 +
 fs/proc/base.c                 | 1 +
 fs/proc/internal.h             | 1 +
 include/linux/khugepaged.h     | 3 ++-
 include/linux/ksm.h            | 1 +
 include/linux/sched/coredump.h | 6 ++++++
 kernel/cred.c                  | 1 +
 kernel/events/uprobes.c        | 1 +
 kernel/fork.c                  | 1 +
 kernel/ptrace.c                | 1 +
 kernel/sys.c                   | 1 +
 kernel/sysctl.c                | 1 +
 mm/huge_memory.c               | 1 +
 mm/khugepaged.c                | 1 +
 mm/ksm.c                       | 1 +
 mm/memory.c                    | 1 +
 mm/oom_kill.c                  | 1 +
 20 files changed, 26 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sched/coredump.h

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 443a6f537d56..fbbe52e1250e 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -35,6 +35,7 @@
 #include <linux/utsname.h>
 #include <linux/coredump.h>
 #include <linux/sched.h>
+#include <linux/sched/coredump.h>
 #include <linux/dax.h>
 #include <linux/uaccess.h>
 #include <asm/param.h>
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index ffca4bbc3d63..222873bb689c 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -35,6 +35,7 @@
 #include <linux/elf-fdpic.h>
 #include <linux/elfcore.h>
 #include <linux/coredump.h>
+#include <linux/sched/coredump.h>
 #include <linux/dax.h>
 
 #include <linux/uaccess.h>
diff --git a/fs/coredump.c b/fs/coredump.c
index ae6b05629ca1..23a539b29225 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -16,6 +16,7 @@
 #include <linux/personality.h>
 #include <linux/binfmts.h>
 #include <linux/coredump.h>
+#include <linux/sched/coredump.h>
 #include <linux/utsname.h>
 #include <linux/pid_namespace.h>
 #include <linux/module.h>
diff --git a/fs/exec.c b/fs/exec.c
index 8929da96cca1..9c80d011594e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -33,6 +33,7 @@
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
 #include <linux/pagemap.h>
 #include <linux/perf_event.h>
 #include <linux/highmem.h>
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9efe12376af1..4c5fa704e38c 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -87,6 +87,7 @@
 #include <linux/slab.h>
 #include <linux/sched/autogroup.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
 #include <linux/flex_array.h>
 #include <linux/posix-timers.h>
 #ifdef CONFIG_HARDWALL
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 5d6960f5f1c0..550e8b183abe 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -14,6 +14,7 @@
 #include <linux/spinlock.h>
 #include <linux/atomic.h>
 #include <linux/binfmts.h>
+#include <linux/sched/coredump.h>
 
 struct ctl_table_header;
 struct mempolicy;
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 1e032a1ddb3e..5d9a400af509 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -1,7 +1,8 @@
 #ifndef _LINUX_KHUGEPAGED_H
 #define _LINUX_KHUGEPAGED_H
 
-#include <linux/sched.h> /* MMF_VM_HUGEPAGE */
+#include <linux/sched/coredump.h> /* MMF_VM_HUGEPAGE */
+
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern struct attribute_group khugepaged_attr_group;
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 481c8c4627ca..e1cfda4bee58 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -12,6 +12,7 @@
 #include <linux/pagemap.h>
 #include <linux/rmap.h>
 #include <linux/sched.h>
+#include <linux/sched/coredump.h>
 
 struct stable_node;
 struct mem_cgroup;
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
new file mode 100644
index 000000000000..ab81e564e076
--- /dev/null
+++ b/include/linux/sched/coredump.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_COREDUMP_H
+#define _LINUX_SCHED_COREDUMP_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_COREDUMP_H */
diff --git a/kernel/cred.c b/kernel/cred.c
index 5f264fb5737d..2bc66075740f 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -12,6 +12,7 @@
 #include <linux/cred.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/coredump.h>
 #include <linux/key.h>
 #include <linux/keyctl.h>
 #include <linux/init_task.h>
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 62f2dbd13efc..0e137f98a50c 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -28,6 +28,7 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
 #include <linux/export.h>
 #include <linux/rmap.h>		/* anon_vma_prepare */
 #include <linux/mmu_notifier.h>	/* set_pte_at_notify */
diff --git a/kernel/fork.c b/kernel/fork.c
index 1875468e2a81..7332448b668a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/sched/autogroup.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
 #include <linux/module.h>
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 6fe5a2897912..bcdbdc19eb35 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -11,6 +11,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
diff --git a/kernel/sys.c b/kernel/sys.c
index dc71ec1e1967..e2d743fb7f55 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -52,6 +52,7 @@
 #include <linux/sched/autogroup.h>
 #include <linux/sched/loadavg.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
 #include <linux/rcupdate.h>
 #include <linux/uidgid.h>
 #include <linux/cred.h>
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index bb260ceb3718..acf0a5a06da7 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -63,6 +63,7 @@
 #include <linux/capability.h>
 #include <linux/binfmts.h>
 #include <linux/sched/sysctl.h>
+#include <linux/sched/coredump.h>
 #include <linux/kexec.h>
 #include <linux/bpf.h>
 #include <linux/mount.h>
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 71e3dede95b4..dee92fa26bbb 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -9,6 +9,7 @@
 
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/coredump.h>
 #include <linux/highmem.h>
 #include <linux/hugetlb.h>
 #include <linux/mmu_notifier.h>
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 212a2afe8830..ba40b7f673f4 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -3,6 +3,7 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
 #include <linux/mmu_notifier.h>
 #include <linux/rmap.h>
 #include <linux/swap.h>
diff --git a/mm/ksm.c b/mm/ksm.c
index 5632c9d02457..19b4f2dea7a5 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -20,6 +20,7 @@
 #include <linux/mman.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
 #include <linux/rwsem.h>
 #include <linux/pagemap.h>
 #include <linux/rmap.h>
diff --git a/mm/memory.c b/mm/memory.c
index b0495ec74d29..6fc918b2c459 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -41,6 +41,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/mm.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
 #include <linux/hugetlb.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index d28936618698..69f75b014607 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -23,6 +23,7 @@
 #include <linux/gfp.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/coredump.h>
 #include <linux/swap.h>
 #include <linux/timex.h>
 #include <linux/jiffies.h>
-- 
GitLab


From 3f07c0144132e4f59d88055ac8ff3e691a5fa2b8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:30 +0100
Subject: [PATCH 0883/1084] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/signal.h>

We are going to split <linux/sched/signal.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/signal.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/kernel/osf_sys.c                                 | 2 +-
 arch/alpha/kernel/signal.c                                  | 2 +-
 arch/alpha/kernel/traps.c                                   | 2 +-
 arch/alpha/mm/fault.c                                       | 2 +-
 arch/arc/kernel/traps.c                                     | 2 +-
 arch/arc/mm/fault.c                                         | 2 +-
 arch/arm/kernel/ptrace.c                                    | 2 +-
 arch/arm/kernel/traps.c                                     | 2 +-
 arch/arm/mm/alignment.c                                     | 2 +-
 arch/arm/mm/fault.c                                         | 2 +-
 arch/arm/mm/init.c                                          | 1 +
 arch/arm/mm/mmap.c                                          | 2 +-
 arch/arm/vfp/vfpmodule.c                                    | 2 +-
 arch/arm64/kernel/fpsimd.c                                  | 2 +-
 arch/arm64/kernel/ptrace.c                                  | 2 +-
 arch/arm64/kernel/traps.c                                   | 2 +-
 arch/arm64/mm/fault.c                                       | 2 +-
 arch/arm64/mm/mmap.c                                        | 2 +-
 arch/avr32/kernel/traps.c                                   | 2 +-
 arch/blackfin/kernel/trace.c                                | 2 +-
 arch/blackfin/kernel/traps.c                                | 1 +
 arch/cris/mm/fault.c                                        | 1 +
 arch/frv/kernel/traps.c                                     | 2 +-
 arch/h8300/kernel/ptrace_s.c                                | 2 +-
 arch/hexagon/kernel/traps.c                                 | 2 +-
 arch/hexagon/mm/vm_fault.c                                  | 1 +
 arch/ia64/kernel/asm-offsets.c                              | 2 +-
 arch/ia64/kernel/brl_emu.c                                  | 2 +-
 arch/ia64/kernel/mca.c                                      | 2 +-
 arch/ia64/kernel/traps.c                                    | 2 +-
 arch/ia64/kernel/unaligned.c                                | 2 +-
 arch/ia64/mm/fault.c                                        | 2 +-
 arch/ia64/mm/init.c                                         | 1 +
 arch/mips/kernel/branch.c                                   | 2 +-
 arch/mips/kernel/signal_o32.c                               | 1 +
 arch/mips/mm/mmap.c                                         | 2 +-
 arch/mips/sgi-ip22/ip22-berr.c                              | 2 +-
 arch/mips/sgi-ip22/ip22-reset.c                             | 2 +-
 arch/mn10300/kernel/fpu.c                                   | 2 ++
 arch/openrisc/mm/fault.c                                    | 2 +-
 arch/parisc/kernel/sys_parisc.c                             | 1 +
 arch/parisc/kernel/unaligned.c                              | 2 +-
 arch/parisc/math-emu/driver.c                               | 3 ++-
 arch/powerpc/kvm/book3s_64_vio.c                            | 1 +
 arch/powerpc/mm/mmap.c                                      | 2 +-
 arch/powerpc/mm/mmu_context_iommu.c                         | 2 +-
 arch/powerpc/platforms/cell/spufs/fault.c                   | 2 +-
 arch/powerpc/xmon/xmon.c                                    | 2 +-
 arch/s390/kernel/nmi.c                                      | 3 +++
 arch/s390/mm/mmap.c                                         | 1 +
 arch/score/kernel/traps.c                                   | 2 +-
 arch/sh/kernel/cpu/sh2a/fpu.c                               | 2 +-
 arch/sh/kernel/hw_breakpoint.c                              | 1 +
 arch/sh/kernel/traps.c                                      | 2 ++
 arch/sh/math-emu/math.c                                     | 2 +-
 arch/sh/mm/asids-debugfs.c                                  | 2 ++
 arch/sh/mm/fault.c                                          | 1 +
 arch/sparc/kernel/sys_sparc_32.c                            | 2 +-
 arch/sparc/kernel/sys_sparc_64.c                            | 2 +-
 arch/sparc/kernel/unaligned_32.c                            | 2 +-
 arch/tile/mm/mmap.c                                         | 2 +-
 arch/um/drivers/line.c                                      | 3 ++-
 arch/um/kernel/reboot.c                                     | 2 +-
 arch/um/kernel/skas/mmu.c                                   | 3 ++-
 arch/um/kernel/tlb.c                                        | 3 ++-
 arch/um/kernel/trap.c                                       | 2 +-
 arch/unicore32/kernel/fpu-ucf64.c                           | 2 +-
 arch/unicore32/kernel/traps.c                               | 1 +
 arch/unicore32/mm/fault.c                                   | 2 +-
 arch/x86/entry/vsyscall/vsyscall_64.c                       | 1 +
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c                    | 2 +-
 arch/x86/kvm/mmu.c                                          | 1 +
 arch/x86/mm/mmap.c                                          | 2 +-
 arch/xtensa/kernel/traps.c                                  | 2 +-
 drivers/android/binder.c                                    | 2 +-
 drivers/block/drbd/drbd_int.h                               | 2 +-
 drivers/char/snsc_event.c                                   | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_events.c                     | 2 +-
 drivers/gpu/drm/drm_lock.c                                  | 2 ++
 drivers/gpu/drm/ttm/ttm_lock.c                              | 2 +-
 drivers/infiniband/core/umem.c                              | 2 +-
 drivers/infiniband/hw/hfi1/user_pages.c                     | 2 +-
 drivers/infiniband/hw/qib/qib_user_pages.c                  | 1 +
 drivers/infiniband/hw/usnic/usnic_uiom.c                    | 2 +-
 drivers/isdn/i4l/isdn_tty.c                                 | 1 +
 drivers/isdn/mISDN/l1oip_core.c                             | 2 ++
 drivers/md/md.c                                             | 1 +
 drivers/md/raid1.c                                          | 3 +++
 drivers/md/raid5.c                                          | 2 ++
 drivers/misc/genwqe/card_dev.c                              | 2 +-
 drivers/misc/mic/cosm/cosm_scif_server.c                    | 2 ++
 drivers/misc/mic/cosm_client/cosm_scif_client.c             | 2 ++
 drivers/misc/mic/scif/scif_rma.c                            | 1 +
 drivers/net/slip/slip.c                                     | 2 +-
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c     | 2 +-
 drivers/parisc/power.c                                      | 2 +-
 drivers/ps3/ps3-sys-manager.c                               | 1 +
 drivers/s390/char/fs3270.c                                  | 1 +
 drivers/s390/char/keyboard.c                                | 2 +-
 drivers/scsi/bnx2fc/bnx2fc.h                                | 2 +-
 drivers/scsi/bnx2i/bnx2i.h                                  | 2 +-
 drivers/scsi/libiscsi.c                                     | 1 +
 drivers/staging/android/lowmemorykiller.c                   | 2 +-
 drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c       | 2 +-
 drivers/staging/rtl8188eu/include/osdep_service.h           | 2 +-
 drivers/staging/rtl8712/osdep_service.h                     | 2 +-
 drivers/staging/rtl8712/rtl8712_cmd.c                       | 1 +
 .../staging/vc04_services/interface/vchiq_arm/vchiq_arm.c   | 1 +
 .../staging/vc04_services/interface/vchiq_arm/vchiq_util.h  | 2 +-
 drivers/target/iscsi/iscsi_target.c                         | 1 +
 drivers/target/iscsi/iscsi_target_erl0.c                    | 2 ++
 drivers/target/iscsi/iscsi_target_login.c                   | 1 +
 drivers/target/iscsi/iscsi_target_nego.c                    | 1 +
 drivers/tty/pty.c                                           | 2 +-
 drivers/tty/sysrq.c                                         | 2 +-
 drivers/tty/tty_io.c                                        | 2 +-
 drivers/tty/vt/keyboard.c                                   | 2 +-
 drivers/tty/vt/vt.c                                         | 2 +-
 drivers/tty/vt/vt_ioctl.c                                   | 2 +-
 drivers/usb/atm/usbatm.c                                    | 2 +-
 drivers/usb/core/devio.c                                    | 1 +
 drivers/usb/gadget/function/f_mass_storage.c                | 1 +
 drivers/vfio/vfio_iommu_spapr_tce.c                         | 1 +
 drivers/vfio/vfio_iommu_type1.c                             | 2 +-
 drivers/w1/w1_family.c                                      | 2 +-
 drivers/w1/w1_int.c                                         | 1 +
 fs/attr.c                                                   | 1 +
 fs/autofs4/waitq.c                                          | 1 +
 fs/cifs/connect.c                                           | 1 +
 fs/coda/upcall.c                                            | 2 +-
 fs/coredump.c                                               | 2 +-
 fs/exec.c                                                   | 1 +
 fs/file.c                                                   | 2 +-
 fs/fs_struct.c                                              | 2 +-
 fs/jffs2/background.c                                       | 2 +-
 fs/lockd/svc.c                                              | 2 +-
 fs/ncpfs/inode.c                                            | 1 +
 fs/ncpfs/sock.c                                             | 1 +
 fs/nfs/callback.c                                           | 1 +
 fs/nfsd/nfssvc.c                                            | 2 +-
 fs/proc/fd.c                                                | 2 +-
 fs/select.c                                                 | 4 ++--
 include/linux/oom.h                                         | 2 +-
 include/linux/ptrace.h                                      | 1 +
 include/linux/sched/signal.h                                | 6 ++++++
 include/linux/signalfd.h                                    | 2 +-
 include/linux/taskstats_kern.h                              | 2 +-
 ipc/mqueue.c                                                | 1 +
 kernel/bpf/syscall.c                                        | 1 +
 kernel/cpu.c                                                | 2 +-
 kernel/debug/gdbstub.c                                      | 1 +
 kernel/debug/kdb/kdb_bt.c                                   | 2 +-
 kernel/hung_task.c                                          | 2 ++
 kernel/rcu/update.c                                         | 2 +-
 kernel/sched/sched.h                                        | 1 +
 kernel/time/itimer.c                                        | 1 +
 kernel/time/posix-cpu-timers.c                              | 2 +-
 kernel/time/tick-sched.c                                    | 2 +-
 kernel/tracepoint.c                                         | 2 +-
 kernel/tsacct.c                                             | 2 +-
 kernel/user_namespace.c                                     | 1 +
 lib/is_single_threaded.c                                    | 3 +--
 mm/filemap.c                                                | 1 +
 mm/kmemleak.c                                               | 2 +-
 mm/memory-failure.c                                         | 2 +-
 mm/vmacache.c                                               | 2 +-
 net/9p/client.c                                             | 2 +-
 net/atm/common.c                                            | 2 +-
 net/ax25/af_ax25.c                                          | 2 +-
 net/caif/caif_socket.c                                      | 2 +-
 net/core/stream.c                                           | 1 +
 net/decnet/af_decnet.c                                      | 2 +-
 net/irda/af_irda.c                                          | 1 +
 net/netrom/af_netrom.c                                      | 2 +-
 net/rose/af_rose.c                                          | 2 +-
 net/sctp/socket.c                                           | 1 +
 net/sunrpc/svc.c                                            | 2 +-
 net/unix/af_unix.c                                          | 2 +-
 net/x25/af_x25.c                                            | 2 +-
 security/selinux/hooks.c                                    | 2 +-
 180 files changed, 204 insertions(+), 121 deletions(-)
 create mode 100644 include/linux/sched/signal.h

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 9d27a7d333dc..568ca29f2ad9 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -11,7 +11,7 @@
  */
 
 #include <linux/errno.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index 17308f925306..b8221f112eee 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -6,7 +6,7 @@
  *  1997-11-02  Modified for POSIX.1b signals by Richard Henderson
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/errno.h>
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index af2994206b4b..6448ab00043d 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -10,7 +10,7 @@
 
 #include <linux/jiffies.h>
 #include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/tty.h>
 #include <linux/delay.h>
 #include <linux/extable.h>
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index 47948b4dd157..c25e8827e7cd 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -4,7 +4,7 @@
  *  Copyright (C) 1995  Linus Torvalds
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <asm/io.h>
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c
index c927aa84e652..ff83e78d0cfb 100644
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -13,7 +13,7 @@
  * Rahul Trivedi: Codito Technologies 2004
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kdebug.h>
 #include <linux/uaccess.h>
 #include <linux/ptrace.h>
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index e94e5aa33985..162c97528872 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -9,7 +9,7 @@
 
 #include <linux/signal.h>
 #include <linux/interrupt.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/uaccess.h>
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index ae738a6319f6..46f7bab81c40 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -10,7 +10,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/elf.h>
 #include <linux/smp.h>
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 9688ec0c6ef4..dc5f1a22b3c9 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -24,7 +24,7 @@
 #include <linux/bug.h>
 #include <linux/delay.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/irq.h>
 
 #include <linux/atomic.h>
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 7d5f4c736a16..d7fe2de9cc9e 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -19,7 +19,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 
 #include <asm/cp15.h>
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index c2b5b9892fd1..520c7778d330 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -16,7 +16,7 @@
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
 #include <linux/page-flags.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/highmem.h>
 #include <linux/perf_event.h>
 
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index bf4d3bc41a7a..2a9040dcf47e 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/bootmem.h>
 #include <linux/mman.h>
+#include <linux/sched/signal.h>
 #include <linux/export.h>
 #include <linux/nodemask.h>
 #include <linux/initrd.h>
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index 66353caa35b9..d448f9cd7715 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -5,7 +5,7 @@
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/shm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/io.h>
 #include <linux/personality.h>
 #include <linux/random.h>
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 569d5a650a4a..a71a48e71fff 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -15,7 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/notifier.h>
 #include <linux/signal.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/uaccess.h>
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index b883f1f75216..06da8ea16bbe 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -21,7 +21,7 @@
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/hardirq.h>
 
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index a22161ccf447..64fc32ea3422 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -22,7 +22,7 @@
 #include <linux/audit.h>
 #include <linux/compat.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/ptrace.h>
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 7d47c2cdfd93..5b8779a849a2 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -29,7 +29,7 @@
 #include <linux/kexec.h>
 #include <linux/delay.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/syscalls.h>
 
 #include <asm/atomic.h>
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 81283851c9af..30dd60ab8a65 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -26,7 +26,7 @@
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
 #include <linux/page-flags.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/highmem.h>
 #include <linux/perf_event.h>
 #include <linux/preempt.h>
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 01c171723bb3..1e0a2650c88b 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -22,7 +22,7 @@
 #include <linux/mman.h>
 #include <linux/export.h>
 #include <linux/shm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/io.h>
 #include <linux/personality.h>
 #include <linux/random.h>
diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c
index eb4a3fcfbaff..50b541325025 100644
--- a/arch/avr32/kernel/traps.c
+++ b/arch/avr32/kernel/traps.c
@@ -14,7 +14,7 @@
 #include <linux/extable.h>
 #include <linux/module.h>	/* print_modules */
 #include <linux/notifier.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 
 #include <asm/addrspace.h>
diff --git a/arch/blackfin/kernel/trace.c b/arch/blackfin/kernel/trace.c
index 719dd796c12c..01e546ad2f7a 100644
--- a/arch/blackfin/kernel/trace.c
+++ b/arch/blackfin/kernel/trace.c
@@ -11,7 +11,7 @@
 #include <linux/thread_info.h>
 #include <linux/mm.h>
 #include <linux/oom.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c
index 1ed85ddadc0d..32676d7721b1 100644
--- a/arch/blackfin/kernel/traps.c
+++ b/arch/blackfin/kernel/traps.c
@@ -9,6 +9,7 @@
 #include <linux/bug.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 #include <asm/traps.h>
 #include <asm/cplb.h>
 #include <asm/blackfin.h>
diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c
index 94183d3639ef..1fca464f1b9e 100644
--- a/arch/cris/mm/fault.c
+++ b/arch/cris/mm/fault.c
@@ -8,6 +8,7 @@
 #include <linux/interrupt.h>
 #include <linux/extable.h>
 #include <linux/wait.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 #include <arch/system.h>
 
diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c
index 31221fb4348e..43c134d6081c 100644
--- a/arch/frv/kernel/traps.c
+++ b/arch/frv/kernel/traps.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/h8300/kernel/ptrace_s.c b/arch/h8300/kernel/ptrace_s.c
index ef5a9c13e76d..c0af930052c0 100644
--- a/arch/h8300/kernel/ptrace_s.c
+++ b/arch/h8300/kernel/ptrace_s.c
@@ -10,7 +10,7 @@
  */
 
 #include <linux/linkage.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <asm/ptrace.h>
 
diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
index 110dab152f82..4496bcf605ef 100644
--- a/arch/hexagon/kernel/traps.c
+++ b/arch/hexagon/kernel/traps.c
@@ -19,7 +19,7 @@
  */
 
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/kdebug.h>
diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c
index 489875fd2be4..3eec33c5cfd7 100644
--- a/arch/hexagon/mm/vm_fault.c
+++ b/arch/hexagon/mm/vm_fault.c
@@ -28,6 +28,7 @@
 #include <asm/traps.h>
 #include <linux/uaccess.h>
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/extable.h>
 #include <linux/hardirq.h>
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index 60ef83e6db71..8786c8b4f187 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -6,7 +6,7 @@
 
 #define ASM_OFFSETS_C 1
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/pid.h>
 #include <linux/clocksource.h>
 #include <linux/kbuild.h>
diff --git a/arch/ia64/kernel/brl_emu.c b/arch/ia64/kernel/brl_emu.c
index 8682df6263d6..987b11be0021 100644
--- a/arch/ia64/kernel/brl_emu.c
+++ b/arch/ia64/kernel/brl_emu.c
@@ -8,7 +8,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 #include <asm/processor.h>
 
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 9509cc73b9c6..5ac51069e453 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -72,7 +72,7 @@
 #include <linux/jiffies.h>
 #include <linux/types.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/bootmem.h>
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 8981ce98afb3..48ba46b025e1 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -9,7 +9,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/tty.h>
 #include <linux/vt_kern.h>		/* For unblank_screen() */
 #include <linux/export.h>
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index 99348d7f2255..a13680ca1e61 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -15,7 +15,7 @@
  */
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/tty.h>
 #include <linux/extable.h>
 #include <linux/ratelimit.h>
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 7f2feb21753c..15f09cfff335 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -4,7 +4,7 @@
  * Copyright (C) 1998-2002 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/extable.h>
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 06cdaef54b2e..8f3efa682ee8 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -12,6 +12,7 @@
 #include <linux/elf.h>
 #include <linux/memblock.h>
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/mmzone.h>
 #include <linux/module.h>
 #include <linux/personality.h>
diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index ae037a304ee4..b11facd11c9d 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -7,7 +7,7 @@
  * Copyright (C) 2001 MIPS Technologies, Inc.
  */
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/export.h>
 #include <asm/branch.h>
diff --git a/arch/mips/kernel/signal_o32.c b/arch/mips/kernel/signal_o32.c
index 5e169fc5ca5c..2b3572fb5f1b 100644
--- a/arch/mips/kernel/signal_o32.c
+++ b/arch/mips/kernel/signal_o32.c
@@ -11,6 +11,7 @@
 #include <linux/compiler.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 
 #include <asm/abi.h>
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index d6d92c02308d..374d71e61ef6 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -13,7 +13,7 @@
 #include <linux/export.h>
 #include <linux/personality.h>
 #include <linux/random.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 
 unsigned long shm_align_mask = PAGE_SIZE - 1;	/* Sane caches */
 EXPORT_SYMBOL(shm_align_mask);
diff --git a/arch/mips/sgi-ip22/ip22-berr.c b/arch/mips/sgi-ip22/ip22-berr.c
index 3f6ccd53c15d..ff8e1935c873 100644
--- a/arch/mips/sgi-ip22/ip22-berr.c
+++ b/arch/mips/sgi-ip22/ip22-berr.c
@@ -6,7 +6,7 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 
 #include <asm/addrspace.h>
 #include <asm/traps.h>
diff --git a/arch/mips/sgi-ip22/ip22-reset.c b/arch/mips/sgi-ip22/ip22-reset.c
index a36f6b87548a..03a39ac5ead9 100644
--- a/arch/mips/sgi-ip22/ip22-reset.c
+++ b/arch/mips/sgi-ip22/ip22-reset.c
@@ -10,7 +10,7 @@
 #include <linux/rtc/ds1286.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/notifier.h>
 #include <linux/pm.h>
 #include <linux/timer.h>
diff --git a/arch/mn10300/kernel/fpu.c b/arch/mn10300/kernel/fpu.c
index 2578b7ae7dd5..50ce7b447fed 100644
--- a/arch/mn10300/kernel/fpu.c
+++ b/arch/mn10300/kernel/fpu.c
@@ -9,6 +9,8 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 #include <linux/uaccess.h>
+#include <linux/sched/signal.h>
+
 #include <asm/fpu.h>
 #include <asm/elf.h>
 #include <asm/exceptions.h>
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index 53592a639744..e310ab499385 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -18,7 +18,7 @@
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/extable.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 
 #include <linux/uaccess.h>
 #include <asm/siginfo.h>
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index bf3294171230..ce07cd3f2507 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -30,6 +30,7 @@
 #include <linux/linkage.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
+#include <linux/sched/signal.h>
 #include <linux/shm.h>
 #include <linux/syscalls.h>
 #include <linux/utsname.h>
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 0a21067ac0a3..a08ab481e556 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -23,7 +23,7 @@
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/ratelimit.h>
 #include <linux/uaccess.h>
diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c
index 09ef4136c693..2fb59d2e2b29 100644
--- a/arch/parisc/math-emu/driver.c
+++ b/arch/parisc/math-emu/driver.c
@@ -27,7 +27,8 @@
  *  Copyright (C) 2001	      Hewlett-Packard <bame@debian.org>
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+
 #include "float.h"
 #include "math-emu.h"
 
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index ab9d14c0e460..3e26cd4979f9 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -24,6 +24,7 @@
 #include <linux/highmem.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/hugetlb.h>
 #include <linux/list.h>
 #include <linux/anon_inodes.h>
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 2f1e44362198..8013861aeaa7 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -25,7 +25,7 @@
 #include <linux/personality.h>
 #include <linux/mm.h>
 #include <linux/random.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/elf-randomize.h>
 #include <linux/security.h>
 #include <linux/mman.h>
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index 7de7124ac91b..497130c5c742 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -10,7 +10,7 @@
  *
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/rculist.h>
 #include <linux/vmalloc.h>
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
index e29e4d5afa2d..870c0a82d560 100644
--- a/arch/powerpc/platforms/cell/spufs/fault.c
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -19,7 +19,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 
 #include <asm/spu.h>
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 26fa03fc9f3c..16321ad9e70c 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -13,7 +13,7 @@
 
 #include <linux/kernel.h>
 #include <linux/errno.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
 #include <linux/reboot.h>
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 80c093e0c6f1..9bf8327154ee 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -13,6 +13,9 @@
 #include <linux/errno.h>
 #include <linux/hardirq.h>
 #include <linux/time.h>
+#include <linux/module.h>
+#include <linux/sched/signal.h>
+
 #include <linux/export.h>
 #include <asm/lowcore.h>
 #include <asm/smp.h>
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 7ae1282d5be9..5ea09403bb87 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -26,6 +26,7 @@
 #include <linux/personality.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
+#include <linux/sched/signal.h>
 #include <linux/random.h>
 #include <linux/compat.h>
 #include <linux/security.h>
diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c
index 569ac02f68df..fa624f30f783 100644
--- a/arch/score/kernel/traps.c
+++ b/arch/score/kernel/traps.c
@@ -24,7 +24,7 @@
  */
 
 #include <linux/extable.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 
 #include <asm/cacheflush.h>
 #include <asm/irq.h>
diff --git a/arch/sh/kernel/cpu/sh2a/fpu.c b/arch/sh/kernel/cpu/sh2a/fpu.c
index 98bbaa447c93..352f894bece1 100644
--- a/arch/sh/kernel/cpu/sh2a/fpu.c
+++ b/arch/sh/kernel/cpu/sh2a/fpu.c
@@ -9,7 +9,7 @@
  *
  * FIXME! These routines can be optimized in big endian case.
  */
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <asm/processor.h>
 #include <asm/io.h>
diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c
index 2197fc584186..afe965712a69 100644
--- a/arch/sh/kernel/hw_breakpoint.c
+++ b/arch/sh/kernel/hw_breakpoint.c
@@ -11,6 +11,7 @@
  */
 #include <linux/init.h>
 #include <linux/perf_event.h>
+#include <linux/sched/signal.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/percpu.h>
 #include <linux/kallsyms.h>
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index 9513fa7840aa..3036dee854d1 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -8,6 +8,8 @@
 #include <linux/hardirq.h>
 #include <linux/kernel.h>
 #include <linux/kexec.h>
+#include <linux/sched/signal.h>
+
 #include <linux/extable.h>
 #include <linux/module.h>	/* print_modules */
 #include <asm/unwinder.h>
diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c
index 5078cb809750..c86f4360c6ce 100644
--- a/arch/sh/math-emu/math.c
+++ b/arch/sh/math-emu/math.c
@@ -10,7 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/perf_event.h>
 
diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c
index bf95fdaedd0c..110bd35165bf 100644
--- a/arch/sh/mm/asids-debugfs.c
+++ b/arch/sh/mm/asids-debugfs.c
@@ -20,6 +20,8 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
+#include <linux/sched/signal.h>
+
 #include <asm/processor.h>
 #include <asm/mmu_context.h>
 
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 9bf876780cef..6fd1bf7481c7 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -13,6 +13,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/hardirq.h>
 #include <linux/kprobes.h>
 #include <linux/perf_event.h>
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index fb7b185ee941..ae49639a484e 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -7,7 +7,7 @@
 
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/file.h>
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 884c70331345..54d3999d8119 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -7,7 +7,7 @@
 
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/mm.h>
diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c
index d20d4e3fd129..8367dce5f41b 100644
--- a/arch/sparc/kernel/unaligned_32.c
+++ b/arch/sparc/kernel/unaligned_32.c
@@ -8,7 +8,7 @@
 
 
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <asm/ptrace.h>
 #include <asm/processor.h>
diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c
index ef61c597898b..377e312dc27e 100644
--- a/arch/tile/mm/mmap.c
+++ b/arch/tile/mm/mmap.c
@@ -17,7 +17,7 @@
 #include <linux/mm.h>
 #include <linux/random.h>
 #include <linux/limits.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mman.h>
 #include <linux/compat.h>
 
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 62087028a9ce..366e57f5e8d6 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -5,8 +5,9 @@
 
 #include <linux/irqreturn.h>
 #include <linux/kd.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
+
 #include "chan.h"
 #include <irq_kern.h>
 #include <irq_user.h>
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index b60a9f8cda75..79218106a033 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -3,7 +3,7 @@
  * Licensed under the GPL
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/oom.h>
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 3943e9d7d13d..7a1f2a936fd1 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -5,8 +5,9 @@
  */
 
 #include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
+
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/sections.h>
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 3777b82759bd..37508b190106 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -5,7 +5,8 @@
 
 #include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <as-layout.h>
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index ad8f206ab5e8..9711ae4aaa6a 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -4,7 +4,7 @@
  */
 
 #include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/hardirq.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
diff --git a/arch/unicore32/kernel/fpu-ucf64.c b/arch/unicore32/kernel/fpu-ucf64.c
index a53343a90ca2..12c8c9527b8e 100644
--- a/arch/unicore32/kernel/fpu-ucf64.c
+++ b/arch/unicore32/kernel/fpu-ucf64.c
@@ -13,7 +13,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/init.h>
 
 #include <asm/fpu-ucf64.h>
diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c
index c54e32410ead..7f5e06f9a202 100644
--- a/arch/unicore32/kernel/traps.c
+++ b/arch/unicore32/kernel/traps.c
@@ -14,6 +14,7 @@
  */
 #include <linux/module.h>
 #include <linux/signal.h>
+#include <linux/sched/signal.h>
 #include <linux/spinlock.h>
 #include <linux/personality.h>
 #include <linux/kallsyms.h>
diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c
index b656d216a8a8..bbefcc46a45e 100644
--- a/arch/unicore32/mm/fault.c
+++ b/arch/unicore32/mm/fault.c
@@ -17,7 +17,7 @@
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
 #include <linux/page-flags.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/io.h>
 
 #include <asm/pgtable.h>
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 636c4b341f36..df91fb393a01 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -27,6 +27,7 @@
 
 #include <linux/kernel.h>
 #include <linux/timer.h>
+#include <linux/sched/signal.h>
 #include <linux/syscalls.h>
 #include <linux/ratelimit.h>
 
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 8af04afdfcb9..d48af18e7baf 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -25,7 +25,7 @@
 #include <linux/sysfs.h>
 #include <linux/kernfs.h>
 #include <linux/seq_file.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/task_work.h>
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 1cda35277278..ac7810513d0e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -36,6 +36,7 @@
 #include <linux/compiler.h>
 #include <linux/srcu.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 #include <linux/hash.h>
 #include <linux/kern_levels.h>
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index d2dc0438d654..5eabf34008f1 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -28,7 +28,7 @@
 #include <linux/mm.h>
 #include <linux/random.h>
 #include <linux/limits.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <asm/elf.h>
 
 struct va_alignment __read_mostly va_align = {
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 282bf721a4d6..84abd66e680d 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -24,7 +24,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/stringify.h>
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index e33e7fbe870b..aae4d8d4be36 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -31,7 +31,7 @@
 #include <linux/poll.h>
 #include <linux/debugfs.h>
 #include <linux/rbtree.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 4cb8f21ff4ef..724d1c50fc52 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -30,7 +30,7 @@
 #include <linux/compiler.h>
 #include <linux/types.h>
 #include <linux/list.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/bitops.h>
 #include <linux/slab.h>
 #include <linux/ratelimit.h>
diff --git a/drivers/char/snsc_event.c b/drivers/char/snsc_event.c
index 59bcefd6ec7c..e452673dff66 100644
--- a/drivers/char/snsc_event.c
+++ b/drivers/char/snsc_event.c
@@ -16,7 +16,7 @@
  */
 
 #include <linux/interrupt.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <asm/byteorder.h>
 #include <asm/sn/sn_sal.h>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 6a3470f84998..d1ce83d73a87 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -23,7 +23,7 @@
 #include <linux/mm_types.h>
 #include <linux/slab.h>
 #include <linux/types.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c
index 32d43f86a8f2..96bb6badb818 100644
--- a/drivers/gpu/drm/drm_lock.c
+++ b/drivers/gpu/drm/drm_lock.c
@@ -34,6 +34,8 @@
  */
 
 #include <linux/export.h>
+#include <linux/sched/signal.h>
+
 #include <drm/drmP.h>
 #include "drm_legacy.h"
 #include "drm_internal.h"
diff --git a/drivers/gpu/drm/ttm/ttm_lock.c b/drivers/gpu/drm/ttm/ttm_lock.c
index f154fb1929bd..913f4318cdc0 100644
--- a/drivers/gpu/drm/ttm/ttm_lock.c
+++ b/drivers/gpu/drm/ttm/ttm_lock.c
@@ -33,7 +33,7 @@
 #include <linux/atomic.h>
 #include <linux/errno.h>
 #include <linux/wait.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 
 #define TTM_WRITE_LOCK_PENDING    (1 << 0)
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index d525b1a2986a..27f155d2df8d 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -34,7 +34,7 @@
 
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/export.h>
 #include <linux/hugetlb.h>
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 20f4ddcac3b0..68295a12b771 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -46,7 +46,7 @@
  */
 
 #include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/device.h>
 #include <linux/module.h>
 
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index 75f08624ac05..ce83ba9a12ef 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -32,6 +32,7 @@
  */
 
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/device.h>
 
 #include "qib.h"
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index ba868be5af2e..c49db7c33979 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -34,7 +34,7 @@
 
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/iommu.h>
diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c
index 63eaa0a9f8a1..1b169559a240 100644
--- a/drivers/isdn/i4l/isdn_tty.c
+++ b/drivers/isdn/i4l/isdn_tty.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
+#include <linux/sched/signal.h>
 #include "isdn_common.h"
 #include "isdn_tty.h"
 #ifdef CONFIG_ISDN_AUDIO
diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c
index 67c21876c35f..6ceca7db62ad 100644
--- a/drivers/isdn/mISDN/l1oip_core.c
+++ b/drivers/isdn/mISDN/l1oip_core.c
@@ -234,6 +234,8 @@
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
+
 #include <net/sock.h>
 #include "core.h"
 #include "l1oip.h"
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 985374f20e2e..548d1b8014f8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -44,6 +44,7 @@
 
 */
 
+#include <linux/sched/signal.h>
 #include <linux/kthread.h>
 #include <linux/blkdev.h>
 #include <linux/badblocks.h>
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 7453d94eeed7..fbc2d7851b49 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -37,7 +37,10 @@
 #include <linux/module.h>
 #include <linux/seq_file.h>
 #include <linux/ratelimit.h>
+#include <linux/sched/signal.h>
+
 #include <trace/events/block.h>
+
 #include "md.h"
 #include "raid1.h"
 #include "bitmap.h"
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2ce23b01dbb2..4fb09b3fcb41 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -55,6 +55,8 @@
 #include <linux/ratelimit.h>
 #include <linux/nodemask.h>
 #include <linux/flex_array.h>
+#include <linux/sched/signal.h>
+
 #include <trace/events/block.h>
 
 #include "md.h"
diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c
index cb290b8ca0c8..dd4617764f14 100644
--- a/drivers/misc/genwqe/card_dev.c
+++ b/drivers/misc/genwqe/card_dev.c
@@ -29,7 +29,7 @@
 #include <linux/pci.h>
 #include <linux/string.h>
 #include <linux/fs.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/wait.h>
 #include <linux/delay.h>
 #include <linux/atomic.h>
diff --git a/drivers/misc/mic/cosm/cosm_scif_server.c b/drivers/misc/mic/cosm/cosm_scif_server.c
index 5696df4326b5..85f7d09cc65f 100644
--- a/drivers/misc/mic/cosm/cosm_scif_server.c
+++ b/drivers/misc/mic/cosm/cosm_scif_server.c
@@ -19,6 +19,8 @@
  *
  */
 #include <linux/kthread.h>
+#include <linux/sched/signal.h>
+
 #include "cosm_main.h"
 
 /*
diff --git a/drivers/misc/mic/cosm_client/cosm_scif_client.c b/drivers/misc/mic/cosm_client/cosm_scif_client.c
index 03e98bf1ac15..aa530fcceaa9 100644
--- a/drivers/misc/mic/cosm_client/cosm_scif_client.c
+++ b/drivers/misc/mic/cosm_client/cosm_scif_client.c
@@ -22,6 +22,8 @@
 #include <linux/delay.h>
 #include <linux/reboot.h>
 #include <linux/kthread.h>
+#include <linux/sched/signal.h>
+
 #include "../cosm/cosm_main.h"
 
 #define COSM_SCIF_MAX_RETRIES 10
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
index d0e9c60f944e..329727e00e97 100644
--- a/drivers/misc/mic/scif/scif_rma.c
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -18,6 +18,7 @@
 #include <linux/dma_remapping.h>
 #include <linux/pagemap.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/signal.h>
 
 #include "scif_main.h"
 #include "scif_map.h"
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index 08db4d687533..1da31dc47f86 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -66,7 +66,7 @@
 
 #include <linux/uaccess.h>
 #include <linux/bitops.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index c5744b45ec8f..65689469c5a1 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -22,7 +22,7 @@
 #include <linux/pci_ids.h>
 #include <linux/netdevice.h>
 #include <linux/interrupt.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mmc/sdio.h>
 #include <linux/mmc/sdio_ids.h>
 #include <linux/mmc/sdio_func.h>
diff --git a/drivers/parisc/power.c b/drivers/parisc/power.c
index ef31b77404ef..e2a3112f1c98 100644
--- a/drivers/parisc/power.c
+++ b/drivers/parisc/power.c
@@ -39,7 +39,7 @@
 #include <linux/kernel.h>
 #include <linux/notifier.h>
 #include <linux/reboot.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kthread.h>
 #include <linux/pm.h>
 
diff --git a/drivers/ps3/ps3-sys-manager.c b/drivers/ps3/ps3-sys-manager.c
index f2ab435954f6..73e496a72113 100644
--- a/drivers/ps3/ps3-sys-manager.c
+++ b/drivers/ps3/ps3-sys-manager.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/workqueue.h>
 #include <linux/reboot.h>
+#include <linux/sched/signal.h>
 
 #include <asm/firmware.h>
 #include <asm/lv1call.h>
diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c
index 85eca1cef063..c4518168fd02 100644
--- a/drivers/s390/char/fs3270.c
+++ b/drivers/s390/char/fs3270.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/compat.h>
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 #include <linux/list.h>
 #include <linux/slab.h>
diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c
index 82c913318b73..ba0e4f93503d 100644
--- a/drivers/s390/char/keyboard.c
+++ b/drivers/s390/char/keyboard.c
@@ -7,7 +7,7 @@
  */
 
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/sysrq.h>
 
diff --git a/drivers/scsi/bnx2fc/bnx2fc.h b/drivers/scsi/bnx2fc/bnx2fc.h
index fdd4eb4e41b2..4fc8ed5fe067 100644
--- a/drivers/scsi/bnx2fc/bnx2fc.h
+++ b/drivers/scsi/bnx2fc/bnx2fc.h
@@ -39,7 +39,7 @@
 #include <linux/bitops.h>
 #include <linux/log2.h>
 #include <linux/interrupt.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/io.h>
 
 #include <scsi/scsi.h>
diff --git a/drivers/scsi/bnx2i/bnx2i.h b/drivers/scsi/bnx2i/bnx2i.h
index ed7f3228e234..89ef1a1678d1 100644
--- a/drivers/scsi/bnx2i/bnx2i.h
+++ b/drivers/scsi/bnx2i/bnx2i.h
@@ -25,7 +25,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/in.h>
 #include <linux/kfifo.h>
 #include <linux/netdevice.h>
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 834d1212b6d5..07c08ce68d70 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -26,6 +26,7 @@
 #include <linux/delay.h>
 #include <linux/log2.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 #include <asm/unaligned.h>
 #include <net/tcp.h>
diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c
index ec3b66561412..054660049395 100644
--- a/drivers/staging/android/lowmemorykiller.c
+++ b/drivers/staging/android/lowmemorykiller.c
@@ -37,7 +37,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/oom.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/swap.h>
 #include <linux/rcupdate.h>
 #include <linux/profile.h>
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c
index cf902154f0aa..bcf9f3dd0310 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c
@@ -34,7 +34,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/fs_struct.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 
 #include "../../../include/linux/libcfs/libcfs.h"
 
diff --git a/drivers/staging/rtl8188eu/include/osdep_service.h b/drivers/staging/rtl8188eu/include/osdep_service.h
index ee3f5ee06529..9e390648d93e 100644
--- a/drivers/staging/rtl8188eu/include/osdep_service.h
+++ b/drivers/staging/rtl8188eu/include/osdep_service.h
@@ -37,7 +37,7 @@
 #include <linux/io.h>
 #include <linux/mutex.h>
 #include <linux/sem.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/etherdevice.h>
 #include <linux/wireless.h>
 #include <net/iw_handler.h>
diff --git a/drivers/staging/rtl8712/osdep_service.h b/drivers/staging/rtl8712/osdep_service.h
index b8a170978434..5d33020554cd 100644
--- a/drivers/staging/rtl8712/osdep_service.h
+++ b/drivers/staging/rtl8712/osdep_service.h
@@ -33,7 +33,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/semaphore.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sem.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
diff --git a/drivers/staging/rtl8712/rtl8712_cmd.c b/drivers/staging/rtl8712/rtl8712_cmd.c
index f19b6b27aa71..5346c657485d 100644
--- a/drivers/staging/rtl8712/rtl8712_cmd.c
+++ b/drivers/staging/rtl8712/rtl8712_cmd.c
@@ -32,6 +32,7 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 #include <linux/kref.h>
 #include <linux/netdevice.h>
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
index cb0b7ca36b1e..8a0d214f6e9b 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
@@ -34,6 +34,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/cdev.h>
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_util.h b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_util.h
index 4055d4bf9f74..e63964f5a18a 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_util.h
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_util.h
@@ -47,7 +47,7 @@
 #include <linux/types.h>
 #include <linux/interrupt.h>
 #include <linux/random.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/ctype.h>
 #include <linux/uaccess.h>
 #include <linux/time.h>  /* for time_t */
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index da2c73a255de..fa1d578d56bd 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -24,6 +24,7 @@
 #include <linux/vmalloc.h>
 #include <linux/idr.h>
 #include <linux/delay.h>
+#include <linux/sched/signal.h>
 #include <asm/unaligned.h>
 #include <net/ipv6.h>
 #include <scsi/scsi_proto.h>
diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c
index b54e72c7ab0f..a4d5e6749932 100644
--- a/drivers/target/iscsi/iscsi_target_erl0.c
+++ b/drivers/target/iscsi/iscsi_target_erl0.c
@@ -17,6 +17,8 @@
  * GNU General Public License for more details.
  ******************************************************************************/
 
+#include <linux/sched/signal.h>
+
 #include <scsi/iscsi_proto.h>
 #include <target/target_core_base.h>
 #include <target/target_core_fabric.h>
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index eab274d17b5c..b03cc03423c1 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -20,6 +20,7 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/kthread.h>
+#include <linux/sched/signal.h>
 #include <linux/idr.h>
 #include <linux/tcp.h>        /* TCP_NODELAY */
 #include <net/ipv6.h>         /* ipv6_addr_v4mapped() */
diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index 46388c9e08da..29eb09e0cd15 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -19,6 +19,7 @@
 #include <linux/ctype.h>
 #include <linux/kthread.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <net/sock.h>
 #include <scsi/iscsi_proto.h>
 #include <target/target_core_base.h>
diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index a23fa5ed1d67..66b59a15780d 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -12,7 +12,7 @@
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/fcntl.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/string.h>
 #include <linux/major.h>
 #include <linux/mm.h>
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 71136742e606..65db0aeb3d80 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -14,7 +14,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/rt.h>
 #include <linux/interrupt.h>
 #include <linux/mm.h>
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index a1fd3f7d487a..985d33f0f315 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -69,7 +69,7 @@
 #include <linux/errno.h>
 #include <linux/signal.h>
 #include <linux/fcntl.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/interrupt.h>
 #include <linux/tty.h>
 #include <linux/tty_driver.h>
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index 397e1509fe51..6b3a2c00974d 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -26,7 +26,7 @@
 
 #include <linux/consolemap.h>
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/mm.h>
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 9d3ce505e7ab..5c4933bb4b53 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -72,7 +72,7 @@
 
 #include <linux/module.h>
 #include <linux/types.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/kernel.h>
diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
index a56edf2d58eb..0cbfe1ff6f6c 100644
--- a/drivers/tty/vt/vt_ioctl.c
+++ b/drivers/tty/vt/vt_ioctl.c
@@ -10,7 +10,7 @@
 
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/tty.h>
 #include <linux/timer.h>
 #include <linux/kernel.h>
diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c
index 5a59da0dc98a..3e80aa3b917a 100644
--- a/drivers/usb/atm/usbatm.c
+++ b/drivers/usb/atm/usbatm.c
@@ -74,7 +74,7 @@
 #include <linux/moduleparam.h>
 #include <linux/netdevice.h>
 #include <linux/proc_fs.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index ca425e8099ea..cfc3cff6e8d5 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -36,6 +36,7 @@
 
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/signal.h>
 #include <linux/poll.h>
diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index 8f3659b65f53..4c8aacc232c0 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -207,6 +207,7 @@
 #include <linux/fs.h>
 #include <linux/kref.h>
 #include <linux/kthread.h>
+#include <linux/sched/signal.h>
 #include <linux/limits.h>
 #include <linux/rwsem.h>
 #include <linux/slab.h>
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 185d50ee1b12..cf3de91fbfe7 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -21,6 +21,7 @@
 #include <linux/vfio.h>
 #include <linux/vmalloc.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/signal.h>
 
 #include <asm/iommu.h>
 #include <asm/tce.h>
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 7e94c7fc90ae..c26fa1f3ed86 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -31,7 +31,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/rbtree.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
diff --git a/drivers/w1/w1_family.c b/drivers/w1/w1_family.c
index df1c9bb90eb5..2096f460498f 100644
--- a/drivers/w1/w1_family.c
+++ b/drivers/w1/w1_family.c
@@ -14,7 +14,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/list.h>
-#include <linux/sched.h>	/* schedule_timeout() */
+#include <linux/sched/signal.h>
 #include <linux/delay.h>
 #include <linux/export.h>
 
diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c
index 4ce1b66d5092..2cae7b29bb5f 100644
--- a/drivers/w1/w1_int.c
+++ b/drivers/w1/w1_int.c
@@ -17,6 +17,7 @@
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/export.h>
 #include <linux/moduleparam.h>
 
diff --git a/fs/attr.c b/fs/attr.c
index c902b3d53508..135304146120 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -9,6 +9,7 @@
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/string.h>
+#include <linux/sched/signal.h>
 #include <linux/capability.h>
 #include <linux/fsnotify.h>
 #include <linux/fcntl.h>
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 79fbd85db4ba..24a58bf9ca72 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -10,6 +10,7 @@
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/signal.h>
+#include <linux/sched/signal.h>
 #include <linux/file.h>
 #include "autofs_i.h"
 
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 777ad9f4fc3c..9bf25be05636 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -21,6 +21,7 @@
 #include <linux/fs.h>
 #include <linux/net.h>
 #include <linux/string.h>
+#include <linux/sched/signal.h>
 #include <linux/list.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index f6c6c8adbc01..e82357c89979 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -15,7 +15,7 @@
  */
 
 #include <linux/signal.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/fs/coredump.c b/fs/coredump.c
index 23a539b29225..c74ab43b8383 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -17,6 +17,7 @@
 #include <linux/binfmts.h>
 #include <linux/coredump.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/signal.h>
 #include <linux/utsname.h>
 #include <linux/pid_namespace.h>
 #include <linux/module.h>
@@ -34,7 +35,6 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/oom.h>
 #include <linux/compat.h>
-#include <linux/sched.h>
 #include <linux/fs.h>
 #include <linux/path.h>
 #include <linux/timekeeping.h>
diff --git a/fs/exec.c b/fs/exec.c
index 9c80d011594e..aa228b98ad5b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -34,6 +34,7 @@
 #include <linux/init.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/signal.h>
 #include <linux/pagemap.h>
 #include <linux/perf_event.h>
 #include <linux/highmem.h>
diff --git a/fs/file.c b/fs/file.c
index 69d6990e3021..ad6f094f2eff 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -12,7 +12,7 @@
 #include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/time.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/file.h>
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 7dca743b2ce1..543ed50f0387 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -1,5 +1,5 @@
 #include <linux/export.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/fs.h>
 #include <linux/path.h>
 #include <linux/slab.h>
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index e5c1783ab64a..453a6a1fff34 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -16,7 +16,7 @@
 #include <linux/jffs2.h>
 #include <linux/mtd/mtd.h>
 #include <linux/completion.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 #include "nodelist.h"
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 7e4ea3b9f472..e7c8b9c76e48 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -17,7 +17,7 @@
 #include <linux/sysctl.h>
 #include <linux/moduleparam.h>
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/uio.h>
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 7eb89c23c847..d5606099712a 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -30,6 +30,7 @@
 #include <linux/vfs.h>
 #include <linux/mount.h>
 #include <linux/seq_file.h>
+#include <linux/sched/signal.h>
 #include <linux/namei.h>
 
 #include <net/sock.h>
diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index 97b111d79489..bdea177aa405 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -16,6 +16,7 @@
 #include <linux/fcntl.h>
 #include <linux/stat.h>
 #include <linux/string.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 #include <linux/in.h>
 #include <linux/net.h>
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 484bebc20bca..bb79972dc638 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -9,6 +9,7 @@
 #include <linux/completion.h>
 #include <linux/ip.h>
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/nfs_fs.h>
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index efd66da99201..786a4a2cb2d7 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -6,7 +6,7 @@
  * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/freezer.h>
 #include <linux/module.h>
 #include <linux/fs_struct.h>
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 00ce1531b2f5..c330495c3115 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -1,4 +1,4 @@
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/dcache.h>
 #include <linux/path.h>
diff --git a/fs/select.c b/fs/select.c
index 305c0daf5d67..e2112270d75a 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -15,7 +15,8 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/rt.h>
 #include <linux/syscalls.h>
 #include <linux/export.h>
 #include <linux/slab.h>
@@ -26,7 +27,6 @@
 #include <linux/fs.h>
 #include <linux/rcupdate.h>
 #include <linux/hrtimer.h>
-#include <linux/sched/rt.h>
 #include <linux/freezer.h>
 #include <net/busy_poll.h>
 #include <linux/vmalloc.h>
diff --git a/include/linux/oom.h b/include/linux/oom.h
index b4e36e92bc87..8a266e2be5a6 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -2,7 +2,7 @@
 #define __INCLUDE_LINUX_OOM_H
 
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/types.h>
 #include <linux/nodemask.h>
 #include <uapi/linux/oom.h>
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index e0e539321ab9..422bc2e4cb6a 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -3,6 +3,7 @@
 
 #include <linux/compiler.h>		/* For unlikely.  */
 #include <linux/sched.h>		/* For struct task_struct.  */
+#include <linux/sched/signal.h>		/* For send_sig(), same_thread_group(), etc. */
 #include <linux/err.h>			/* for IS_ERR_VALUE */
 #include <linux/bug.h>			/* For BUG_ON.  */
 #include <linux/pid_namespace.h>	/* For task_active_pid_ns.  */
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
new file mode 100644
index 000000000000..da69cd05cd68
--- /dev/null
+++ b/include/linux/sched/signal.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_SIGNAL_H
+#define _LINUX_SCHED_SIGNAL_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_SIGNAL_H */
diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h
index eadbe227c256..4985048640a7 100644
--- a/include/linux/signalfd.h
+++ b/include/linux/signalfd.h
@@ -8,7 +8,7 @@
 #define _LINUX_SIGNALFD_H
 
 #include <uapi/linux/signalfd.h>
-
+#include <linux/sched/signal.h>
 
 #ifdef CONFIG_SIGNALFD
 
diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h
index 58de6edf751f..e2a5daf8d14f 100644
--- a/include/linux/taskstats_kern.h
+++ b/include/linux/taskstats_kern.h
@@ -8,7 +8,7 @@
 #define _LINUX_TASKSTATS_KERN_H
 
 #include <linux/taskstats.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 
 #ifdef CONFIG_TASKSTATS
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 40e448de8a7e..4f7241fbeff3 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -36,6 +36,7 @@
 #include <linux/user_namespace.h>
 #include <linux/slab.h>
 #include <linux/sched/wake_q.h>
+#include <linux/sched/signal.h>
 
 #include <net/sock.h>
 #include "util.h"
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 461eb1e66a0f..7af0dcc5d755 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -13,6 +13,7 @@
 #include <linux/bpf_trace.h>
 #include <linux/syscalls.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/vmalloc.h>
 #include <linux/mmzone.h>
 #include <linux/anon_inodes.h>
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 0a5f630f5c54..0aae3183b029 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -7,7 +7,7 @@
 #include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/notifier.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/unistd.h>
 #include <linux/cpu.h>
 #include <linux/oom.h>
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index 19d9a578c753..7510dc687c0d 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -29,6 +29,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/kgdb.h>
 #include <linux/kdb.h>
 #include <linux/serial_core.h>
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
index fe15fff5df53..9b976a42376d 100644
--- a/kernel/debug/kdb/kdb_bt.c
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -12,7 +12,7 @@
 #include <linux/ctype.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kdb.h>
 #include <linux/nmi.h>
 #include "kdb_private.h"
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 40c07e4fa116..129247e56902 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -16,6 +16,8 @@
 #include <linux/export.h>
 #include <linux/sysctl.h>
 #include <linux/utsname.h>
+#include <linux/sched/signal.h>
+
 #include <trace/events/sched.h>
 
 /*
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index a0e90e0afc75..da128deb10ec 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -36,7 +36,7 @@
 #include <linux/spinlock.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/percpu.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e2307a6c29f1..641249471952 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -5,6 +5,7 @@
 #include <linux/sched/rt.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/wake_q.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/sched/deadline.h>
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index a95f13c31464..f6b961c5e58c 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -10,6 +10,7 @@
 #include <linux/interrupt.h>
 #include <linux/syscalls.h>
 #include <linux/time.h>
+#include <linux/sched/signal.h>
 #include <linux/posix-timers.h>
 #include <linux/hrtimer.h>
 #include <trace/events/timer.h>
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index b4377a5e4269..a2475a9f57d8 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -2,7 +2,7 @@
  * Implement CPU time clocks for the POSIX clock interface.
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/posix-timers.h>
 #include <linux/errno.h>
 #include <linux/math64.h>
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 4fee1c3abd0b..0f411a4690a0 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -18,7 +18,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/percpu.h>
 #include <linux/profile.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/clock.h>
 #include <linux/module.h>
 #include <linux/irq_work.h>
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 1f9a31f934a4..9f6984d52fec 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -24,7 +24,7 @@
 #include <linux/tracepoint.h>
 #include <linux/err.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/static_key.h>
 
 extern struct tracepoint * const __start___tracepoints_ptrs[];
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 571a2d3821d8..d9a03b80b75d 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -17,7 +17,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/tsacct_kern.h>
 #include <linux/acct.h>
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 86b7854fec8e..2f735cbe05e8 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -8,6 +8,7 @@
 #include <linux/export.h>
 #include <linux/nsproxy.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/user_namespace.h>
 #include <linux/proc_ns.h>
 #include <linux/highuid.h>
diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c
index 391fd23976a2..9745cfffcb69 100644
--- a/lib/is_single_threaded.c
+++ b/lib/is_single_threaded.c
@@ -9,8 +9,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the Licence, or (at your option) any later version.
  */
-
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 
 /*
  * Returns true if the task does not share ->mm with another thread/process.
diff --git a/mm/filemap.c b/mm/filemap.c
index 1944c631e3e6..1694623a6289 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -13,6 +13,7 @@
 #include <linux/compiler.h>
 #include <linux/dax.h>
 #include <linux/fs.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 #include <linux/capability.h>
 #include <linux/kernel_stat.h>
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index da3436953022..2df6d3687b2a 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -73,7 +73,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/jiffies.h>
 #include <linux/delay.h>
 #include <linux/export.h>
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 3d0f2fd4bf73..b74984db386e 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -40,7 +40,7 @@
 #include <linux/mm.h>
 #include <linux/page-flags.h>
 #include <linux/kernel-page-flags.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/ksm.h>
 #include <linux/rmap.h>
 #include <linux/export.h>
diff --git a/mm/vmacache.c b/mm/vmacache.c
index 7c233f8e20ee..4355d34c68a6 100644
--- a/mm/vmacache.c
+++ b/mm/vmacache.c
@@ -1,7 +1,7 @@
 /*
  * Copyright (C) 2014 Davidlohr Bueso.
  */
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/vmacache.h>
 
diff --git a/net/9p/client.c b/net/9p/client.c
index 3fc94a49ccd5..25cfd8a4bc36 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -32,7 +32,7 @@
 #include <linux/idr.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 #include <linux/uio.h>
 #include <net/9p/9p.h>
diff --git a/net/atm/common.c b/net/atm/common.c
index a3ca922d307b..9613381f5db0 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -13,7 +13,7 @@
 #include <linux/errno.h>	/* error codes */
 #include <linux/capability.h>
 #include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/time.h>		/* struct timeval */
 #include <linux/skbuff.h>
 #include <linux/bitops.h>
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 90fcf5fc2e0a..a8e42cedf1db 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -20,7 +20,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/timer.h>
 #include <linux/string.h>
 #include <linux/sockios.h>
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 92cbbd2afddb..adcad344c843 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -9,7 +9,7 @@
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
diff --git a/net/core/stream.c b/net/core/stream.c
index f575bcf64af2..20231dbb1da0 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 #include <linux/net.h>
 #include <linux/signal.h>
 #include <linux/tcp.h>
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index a90ed67027b0..e6e79eda9763 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -106,7 +106,7 @@ Version 0.0.6    2.1.110   07-aug-98   Eduardo Marcelo Serrat
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/timer.h>
 #include <linux/string.h>
 #include <linux/sockios.h>
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index ab254041dab7..81adc29a448d 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -46,6 +46,7 @@
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/init.h>
 #include <linux/net.h>
 #include <linux/irda.h>
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index ed212ffc1d9d..4bbf4526b885 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -17,7 +17,7 @@
 #include <linux/in.h>
 #include <linux/slab.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/timer.h>
 #include <linux/string.h>
 #include <linux/sockios.h>
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 9ad301c46b88..b8a1df2c9785 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -20,7 +20,7 @@
 #include <linux/in.h>
 #include <linux/slab.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/spinlock.h>
 #include <linux/timer.h>
 #include <linux/string.h>
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 465a9c8464f9..6f0a9be50f50 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -57,6 +57,7 @@
 #include <linux/kernel.h>
 #include <linux/wait.h>
 #include <linux/time.h>
+#include <linux/sched/signal.h>
 #include <linux/ip.h>
 #include <linux/capability.h>
 #include <linux/fcntl.h>
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index b94efd93d3e4..a08aeb56b8e4 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -11,7 +11,7 @@
  */
 
 #include <linux/linkage.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/net.h>
 #include <linux/in.h>
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e2d18b9f910f..ee37b390260a 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -85,7 +85,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/stat.h>
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 079c883aa96e..fd28a49dbe8f 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -41,7 +41,7 @@
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/timer.h>
 #include <linux/string.h>
 #include <linux/net.h>
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 9a8f12f8d5b7..b12f873f92ba 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -28,7 +28,7 @@
 #include <linux/kernel.h>
 #include <linux/tracehook.h>
 #include <linux/errno.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/lsm_hooks.h>
 #include <linux/xattr.h>
 #include <linux/capability.h>
-- 
GitLab


From 8703e8a465b1e9cadc3680b4b1248f5987e54518 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:30 +0100
Subject: [PATCH 0884/1084] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/user.h>

We are going to split <linux/sched/user.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/user.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 block/ioprio.c               | 1 +
 include/linux/cred.h         | 2 +-
 include/linux/sched/user.h   | 6 ++++++
 ipc/mqueue.c                 | 1 +
 kernel/fork.c                | 1 +
 kernel/signal.c              | 1 +
 kernel/user.c                | 1 +
 mm/mlock.c                   | 1 +
 net/core/scm.c               | 1 +
 security/keys/process_keys.c | 1 +
 10 files changed, 15 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sched/user.h

diff --git a/block/ioprio.c b/block/ioprio.c
index 3790669232ff..89c43e07787d 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -25,6 +25,7 @@
 #include <linux/ioprio.h>
 #include <linux/blkdev.h>
 #include <linux/capability.h>
+#include <linux/sched/user.h>
 #include <linux/syscalls.h>
 #include <linux/security.h>
 #include <linux/pid_namespace.h>
diff --git a/include/linux/cred.h b/include/linux/cred.h
index f0e70a1bb3ac..045d33e48069 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -18,8 +18,8 @@
 #include <linux/selinux.h>
 #include <linux/atomic.h>
 #include <linux/uidgid.h>
+#include <linux/sched/user.h>
 
-struct user_struct;
 struct cred;
 struct inode;
 
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
new file mode 100644
index 000000000000..83238e5ddadd
--- /dev/null
+++ b/include/linux/sched/user.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_USER_H
+#define _LINUX_SCHED_USER_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_USER_H */
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 4f7241fbeff3..e8d41ff57241 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -37,6 +37,7 @@
 #include <linux/slab.h>
 #include <linux/sched/wake_q.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/user.h>
 
 #include <net/sock.h>
 #include "util.h"
diff --git a/kernel/fork.c b/kernel/fork.c
index 7332448b668a..a5ad1e4ab604 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -15,6 +15,7 @@
 #include <linux/sched/autogroup.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/user.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
 #include <linux/module.h>
diff --git a/kernel/signal.c b/kernel/signal.c
index bae358532d0a..762fcf64f0c3 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -14,6 +14,7 @@
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/user.h>
 #include <linux/fs.h>
 #include <linux/tty.h>
 #include <linux/binfmts.h>
diff --git a/kernel/user.c b/kernel/user.c
index b069ccbfb0b0..00281add65b2 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/bitops.h>
 #include <linux/key.h>
+#include <linux/sched/user.h>
 #include <linux/interrupt.h>
 #include <linux/export.h>
 #include <linux/user_namespace.h>
diff --git a/mm/mlock.c b/mm/mlock.c
index cdbed8aaa426..1050511f8b2b 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -8,6 +8,7 @@
 #include <linux/capability.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
+#include <linux/sched/user.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
 #include <linux/pagemap.h>
diff --git a/net/core/scm.c b/net/core/scm.c
index b6d83686e149..b1ff8a441748 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -14,6 +14,7 @@
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/user.h>
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/stat.h>
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 918cddcd4516..b6fdd22205b1 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/user.h>
 #include <linux/keyctl.h>
 #include <linux/fs.h>
 #include <linux/err.h>
-- 
GitLab


From 38b8d208a4544c9a26b10baec89b8a21042e5305 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:31 +0100
Subject: [PATCH 0885/1084] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/nmi.h>

We are going to move softlockup APIs out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

<linux/nmi.h> already includes <linux/sched.h>.

Include the <linux/nmi.h> header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/ia64/kernel/time.c               | 1 +
 arch/ia64/kernel/uncached.c           | 1 +
 arch/powerpc/kernel/swsusp_64.c       | 1 +
 arch/x86/kernel/pvclock.c             | 2 ++
 arch/x86/xen/smp.c                    | 1 +
 drivers/ide/ide-taskfile.c            | 1 +
 drivers/mtd/nand/nand_base.c          | 1 +
 drivers/video/fbdev/nvidia/nv_accel.c | 2 ++
 init/main.c                           | 1 +
 kernel/debug/debug_core.c             | 1 +
 kernel/power/hibernate.c              | 1 +
 kernel/power/snapshot.c               | 1 +
 kernel/sched/clock.c                  | 1 +
 kernel/time/tick-sched.c              | 1 +
 kernel/time/timekeeping.c             | 1 +
 15 files changed, 17 insertions(+)

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index faa116822c4c..144f9db7a876 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -16,6 +16,7 @@
 #include <linux/profile.h>
 #include <linux/sched.h>
 #include <linux/time.h>
+#include <linux/nmi.h>
 #include <linux/interrupt.h>
 #include <linux/efi.h>
 #include <linux/timex.h>
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index f3976da36721..583f7ff6b589 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -19,6 +19,7 @@
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/efi.h>
+#include <linux/nmi.h>
 #include <linux/genalloc.h>
 #include <linux/gfp.h>
 #include <asm/page.h>
diff --git a/arch/powerpc/kernel/swsusp_64.c b/arch/powerpc/kernel/swsusp_64.c
index 0e899e47c325..51db012808f5 100644
--- a/arch/powerpc/kernel/swsusp_64.c
+++ b/arch/powerpc/kernel/swsusp_64.c
@@ -10,6 +10,7 @@
 #include <linux/irq.h>
 #include <linux/sched.h>
 #include <linux/interrupt.h>
+#include <linux/nmi.h>
 
 void do_after_copyback(void)
 {
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 9e93fe5803b4..5c3f6d6a5078 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -21,6 +21,8 @@
 #include <linux/sched.h>
 #include <linux/gfp.h>
 #include <linux/bootmem.h>
+#include <linux/nmi.h>
+
 #include <asm/fixmap.h>
 #include <asm/pvclock.h>
 
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 0dee6f59ea82..7ff2f1bfb7ec 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -18,6 +18,7 @@
 #include <linux/smp.h>
 #include <linux/irq_work.h>
 #include <linux/tick.h>
+#include <linux/nmi.h>
 
 #include <asm/paravirt.h>
 #include <asm/desc.h>
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 247b9faccce1..4c0007cb74e3 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -19,6 +19,7 @@
 #include <linux/delay.h>
 #include <linux/hdreg.h>
 #include <linux/ide.h>
+#include <linux/nmi.h>
 #include <linux/scatterlist.h>
 #include <linux/uaccess.h>
 
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 1492c12906f6..b0524f8accb6 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -36,6 +36,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
+#include <linux/nmi.h>
 #include <linux/types.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
diff --git a/drivers/video/fbdev/nvidia/nv_accel.c b/drivers/video/fbdev/nvidia/nv_accel.c
index ad6472a894ea..7341fed63e35 100644
--- a/drivers/video/fbdev/nvidia/nv_accel.c
+++ b/drivers/video/fbdev/nvidia/nv_accel.c
@@ -48,6 +48,8 @@
  */
 
 #include <linux/fb.h>
+#include <linux/nmi.h>
+
 #include "nv_type.h"
 #include "nv_proto.h"
 #include "nv_dma.h"
diff --git a/init/main.c b/init/main.c
index ae9f2008fb86..c891cfb8928f 100644
--- a/init/main.c
+++ b/init/main.c
@@ -27,6 +27,7 @@
 #include <linux/bootmem.h>
 #include <linux/acpi.h>
 #include <linux/tty.h>
+#include <linux/nmi.h>
 #include <linux/percpu.h>
 #include <linux/kmod.h>
 #include <linux/vmalloc.h>
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index a603ef28f70c..65c0f1363788 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -49,6 +49,7 @@
 #include <linux/init.h>
 #include <linux/kgdb.h>
 #include <linux/kdb.h>
+#include <linux/nmi.h>
 #include <linux/pid.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 86385af1080f..b8be5c803cdd 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -21,6 +21,7 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/pm.h>
+#include <linux/nmi.h>
 #include <linux/console.h>
 #include <linux/cpu.h>
 #include <linux/freezer.h>
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 905d5bbd595f..d79a38de425a 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -22,6 +22,7 @@
 #include <linux/device.h>
 #include <linux/init.h>
 #include <linux/bootmem.h>
+#include <linux/nmi.h>
 #include <linux/syscalls.h>
 #include <linux/console.h>
 #include <linux/highmem.h>
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index dd7817cdbf58..a08795e21628 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -58,6 +58,7 @@
 #include <linux/percpu.h>
 #include <linux/ktime.h>
 #include <linux/sched.h>
+#include <linux/nmi.h>
 #include <linux/sched/clock.h>
 #include <linux/static_key.h>
 #include <linux/workqueue.h>
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 0f411a4690a0..5e9e123b4321 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -17,6 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
 #include <linux/percpu.h>
+#include <linux/nmi.h>
 #include <linux/profile.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/clock.h>
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index fb564acee0f3..5b63a2102c29 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -14,6 +14,7 @@
 #include <linux/percpu.h>
 #include <linux/init.h>
 #include <linux/mm.h>
+#include <linux/nmi.h>
 #include <linux/sched.h>
 #include <linux/sched/loadavg.h>
 #include <linux/syscore_ops.h>
-- 
GitLab


From 55687da166bf51129ed6b110d7711f4c7560abe2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:31 +0100
Subject: [PATCH 0886/1084] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/cpufreq.h>

We are going to split <linux/sched/cpufreq.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/cpufreq.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/cpufreq/cpufreq_governor.c | 1 -
 drivers/cpufreq/cpufreq_governor.h | 1 +
 drivers/cpufreq/cpufreq_ondemand.c | 1 +
 drivers/cpufreq/intel_pstate.c     | 2 +-
 include/linux/sched/cpufreq.h      | 6 ++++++
 kernel/sched/sched.h               | 1 +
 6 files changed, 10 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/sched/cpufreq.h

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 631bd2c86c5e..47e24b5384b3 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -18,7 +18,6 @@
 
 #include <linux/export.h>
 #include <linux/kernel_stat.h>
-#include <linux/sched.h>
 #include <linux/slab.h>
 
 #include "cpufreq_governor.h"
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index f5717ca070cc..0236ec2cd654 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -20,6 +20,7 @@
 #include <linux/atomic.h>
 #include <linux/irq_work.h>
 #include <linux/cpufreq.h>
+#include <linux/sched/cpufreq.h>
 #include <linux/kernel_stat.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 4a017e895296..3937acf7e026 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -16,6 +16,7 @@
 #include <linux/percpu-defs.h>
 #include <linux/slab.h>
 #include <linux/tick.h>
+#include <linux/sched/cpufreq.h>
 
 #include "cpufreq_ondemand.h"
 
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index eb0f7fb71685..8f6d29302aac 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -19,7 +19,7 @@
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/cpufreq.h>
 #include <linux/list.h>
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
new file mode 100644
index 000000000000..07ed9664fa20
--- /dev/null
+++ b/include/linux/sched/cpufreq.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_CPUFREQ_H
+#define _LINUX_SCHED_CPUFREQ_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_CPUFREQ_H */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 641249471952..7dfeb14fa43c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -7,6 +7,7 @@
 #include <linux/sched/wake_q.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/cpufreq.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/sched/deadline.h>
 #include <linux/kernel_stat.h>
-- 
GitLab


From 010426079ec1228a7f980d2eef766a84c0f9241a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:31 +0100
Subject: [PATCH 0887/1084] sched/headers: Prepare for new header dependencies
 before moving more code to <linux/sched/mm.h>

We are going to split more MM APIs out of <linux/sched.h>, which
will have to be picked up from a couple of .c files.

The APIs that we are going to move are:

  arch_pick_mmap_layout()
  arch_get_unmapped_area()
  arch_get_unmapped_area_topdown()
  mm_update_next_owner()

Include the header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/kernel/osf_sys.c      | 1 +
 arch/arc/mm/mmap.c               | 3 ++-
 arch/arm/mm/mmap.c               | 1 +
 arch/arm64/mm/mmap.c             | 1 +
 arch/frv/mm/elf-fdpic.c          | 1 +
 arch/ia64/kernel/sys_ia64.c      | 1 +
 arch/mips/mm/mmap.c              | 1 +
 arch/parisc/kernel/cache.c       | 1 +
 arch/parisc/kernel/sys_parisc.c  | 1 +
 arch/parisc/mm/hugetlbpage.c     | 1 +
 arch/powerpc/mm/mmap.c           | 1 +
 arch/s390/mm/mmap.c              | 1 +
 arch/sh/mm/mmap.c                | 1 +
 arch/sparc/kernel/sys_sparc_32.c | 1 +
 arch/sparc/kernel/sys_sparc_64.c | 1 +
 arch/sparc/mm/hugetlbpage.c      | 1 +
 arch/tile/mm/hugetlbpage.c       | 1 +
 arch/tile/mm/mmap.c              | 1 +
 arch/x86/kernel/sys_x86_64.c     | 1 +
 arch/x86/mm/hugetlbpage.c        | 1 +
 arch/x86/mm/mmap.c               | 1 +
 arch/xtensa/kernel/syscall.c     | 1 +
 22 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 568ca29f2ad9..3b9b2a382ba2 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -12,6 +12,7 @@
 
 #include <linux/errno.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c
index 2e06d56e987b..3e25e8d6486b 100644
--- a/arch/arc/mm/mmap.c
+++ b/arch/arc/mm/mmap.c
@@ -13,7 +13,8 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
+
 #include <asm/cacheflush.h>
 
 #define COLOUR_ALIGN(addr, pgoff)			\
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index d448f9cd7715..2239fde10b80 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -6,6 +6,7 @@
 #include <linux/mman.h>
 #include <linux/shm.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <linux/io.h>
 #include <linux/personality.h>
 #include <linux/random.h>
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 1e0a2650c88b..7b0d55756eb1 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -23,6 +23,7 @@
 #include <linux/export.h>
 #include <linux/shm.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <linux/io.h>
 #include <linux/personality.h>
 #include <linux/random.h>
diff --git a/arch/frv/mm/elf-fdpic.c b/arch/frv/mm/elf-fdpic.c
index 836f14707a62..da82c25301e7 100644
--- a/arch/frv/mm/elf-fdpic.c
+++ b/arch/frv/mm/elf-fdpic.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/elf-fdpic.h>
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index a09c12230bc5..ce4cc60d519b 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -10,6 +10,7 @@
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/shm.h>
 #include <linux/file.h>		/* doh, must come after sched.h... */
 #include <linux/smp.h>
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index 374d71e61ef6..64dd8bdd92c3 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -14,6 +14,7 @@
 #include <linux/personality.h>
 #include <linux/random.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 
 unsigned long shm_align_mask = PAGE_SIZE - 1;	/* Sane caches */
 EXPORT_SYMBOL(shm_align_mask);
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 977f0a4f5ecf..7820b864de1a 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -18,6 +18,7 @@
 #include <linux/seq_file.h>
 #include <linux/pagemap.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <asm/pdc.h>
 #include <asm/cache.h>
 #include <asm/cacheflush.h>
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index ce07cd3f2507..e5288638a1d9 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -31,6 +31,7 @@
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <linux/shm.h>
 #include <linux/syscalls.h>
 #include <linux/utsname.h>
diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c
index 5d6eea925cf4..aa50ac090e9b 100644
--- a/arch/parisc/mm/hugetlbpage.c
+++ b/arch/parisc/mm/hugetlbpage.c
@@ -8,6 +8,7 @@
 
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
 #include <linux/sysctl.h>
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 8013861aeaa7..a5d9ef59debe 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -26,6 +26,7 @@
 #include <linux/mm.h>
 #include <linux/random.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <linux/elf-randomize.h>
 #include <linux/security.h>
 #include <linux/mman.h>
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 5ea09403bb87..50618614881f 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -27,6 +27,7 @@
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <linux/random.h>
 #include <linux/compat.h>
 #include <linux/security.h>
diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c
index 6777177807c2..08e7af0be4a7 100644
--- a/arch/sh/mm/mmap.c
+++ b/arch/sh/mm/mmap.c
@@ -9,6 +9,7 @@
  */
 #include <linux/io.h>
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/mman.h>
 #include <linux/module.h>
 #include <asm/page.h>
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index ae49639a484e..ff3573059936 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -8,6 +8,7 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/file.h>
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 54d3999d8119..c521ee277083 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -8,6 +8,7 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/mm.h>
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index e98a3f2e8f0f..323bc6b6e3ad 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -6,6 +6,7 @@
 
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
 #include <linux/sysctl.h>
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
index 77ceaa343fce..cb10153b5c9f 100644
--- a/arch/tile/mm/hugetlbpage.c
+++ b/arch/tile/mm/hugetlbpage.c
@@ -19,6 +19,7 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
 #include <linux/slab.h>
diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c
index 377e312dc27e..8ab28167c44b 100644
--- a/arch/tile/mm/mmap.c
+++ b/arch/tile/mm/mmap.c
@@ -18,6 +18,7 @@
 #include <linux/random.h>
 #include <linux/limits.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <linux/mman.h>
 #include <linux/compat.h>
 
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index a55ed63b9f91..50215a4b9347 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -1,5 +1,6 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/syscalls.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 2ae8584b44c7..c5066a260803 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -7,6 +7,7 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
 #include <linux/err.h>
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 5eabf34008f1..7940166c799b 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -29,6 +29,7 @@
 #include <linux/random.h>
 #include <linux/limits.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <asm/elf.h>
 
 struct va_alignment __read_mostly va_align = {
diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c
index d3fd100dffc9..06937928cb72 100644
--- a/arch/xtensa/kernel/syscall.c
+++ b/arch/xtensa/kernel/syscall.c
@@ -25,6 +25,7 @@
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/mman.h>
+#include <linux/sched/mm.h>
 #include <linux/shm.h>
 
 typedef void (*syscall_t)(void);
-- 
GitLab


From 6a3827d7509cbf96b7e961f8957c1f01d1bcf894 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:31 +0100
Subject: [PATCH 0888/1084] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/numa_balancing.h>

We are going to split <linux/sched/numa_balancing.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/numa_balancing.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/exec.c                            | 1 +
 fs/proc/array.c                      | 1 +
 include/linux/sched/numa_balancing.h | 6 ++++++
 include/trace/events/sched.h         | 2 +-
 kernel/fork.c                        | 1 +
 kernel/sched/sched.h                 | 1 +
 mm/huge_memory.c                     | 1 +
 mm/memory.c                          | 1 +
 mm/mempolicy.c                       | 1 +
 9 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sched/numa_balancing.h

diff --git a/fs/exec.c b/fs/exec.c
index aa228b98ad5b..e857c7260b1f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -35,6 +35,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/numa_balancing.h>
 #include <linux/pagemap.h>
 #include <linux/perf_event.h>
 #include <linux/highmem.h>
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 61bea3a4ecc5..cfe7f934a300 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -61,6 +61,7 @@
 #include <linux/string.h>
 #include <linux/mman.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/numa_balancing.h>
 #include <linux/proc_fs.h>
 #include <linux/ioport.h>
 #include <linux/uaccess.h>
diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h
new file mode 100644
index 000000000000..999182279f78
--- /dev/null
+++ b/include/linux/sched/numa_balancing.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_NUMA_BALANCING_H
+#define _LINUX_SCHED_NUMA_BALANCING_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_NUMA_BALANCING_H */
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 9b90c57517a9..9e3ef6c99e4b 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -4,7 +4,7 @@
 #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_SCHED_H
 
-#include <linux/sched.h>
+#include <linux/sched/numa_balancing.h>
 #include <linux/tracepoint.h>
 #include <linux/binfmts.h>
 
diff --git a/kernel/fork.c b/kernel/fork.c
index a5ad1e4ab604..08d6c091ac7c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -16,6 +16,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
 #include <linux/sched/user.h>
+#include <linux/sched/numa_balancing.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
 #include <linux/module.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7dfeb14fa43c..7e8ce0347fbf 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -6,6 +6,7 @@
 #include <linux/sched/clock.h>
 #include <linux/sched/wake_q.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/numa_balancing.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/cpufreq.h>
 #include <linux/u64_stats_sync.h>
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index dee92fa26bbb..d36b2af4d1bf 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -10,6 +10,7 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/numa_balancing.h>
 #include <linux/highmem.h>
 #include <linux/hugetlb.h>
 #include <linux/mmu_notifier.h>
diff --git a/mm/memory.c b/mm/memory.c
index 6fc918b2c459..d4994a28dc85 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -42,6 +42,7 @@
 #include <linux/mm.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/numa_balancing.h>
 #include <linux/hugetlb.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 90892416ed75..18e0105810df 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -74,6 +74,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/numa_balancing.h>
 #include <linux/nodemask.h>
 #include <linux/cpuset.h>
 #include <linux/slab.h>
-- 
GitLab


From 1e4bae6468375f81a3d4bdf436deaa9ea294e12b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:32 +0100
Subject: [PATCH 0889/1084] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/jobctl.h>

We are going to split <linux/sched/jobctl.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/jobctl.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the file that is going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/jobctl.h | 4 ++++
 include/linux/sched/signal.h | 1 +
 2 files changed, 5 insertions(+)
 create mode 100644 include/linux/sched/jobctl.h

diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h
new file mode 100644
index 000000000000..228e4644937b
--- /dev/null
+++ b/include/linux/sched/jobctl.h
@@ -0,0 +1,4 @@
+#ifndef _LINUX_SCHED_JOBCTL_H
+#define _LINUX_SCHED_JOBCTL_H
+
+#endif /* _LINUX_SCHED_JOBCTL_H */
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index da69cd05cd68..ce93c4a02b79 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -2,5 +2,6 @@
 #define _LINUX_SCHED_SIGNAL_H
 
 #include <linux/sched.h>
+#include <linux/sched/jobctl.h>
 
 #endif /* _LINUX_SCHED_SIGNAL_H */
-- 
GitLab


From 7fce777cd4eacc0bdcb33017e5a4c495d28afed1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 14:47:27 +0100
Subject: [PATCH 0890/1084] sched/headers: Prepare header dependency changes,
 move the <asm/paravirt.h> include to kernel/sched/sched.h

Recent header reorganizations unearthed this hidden dependency:

  kernel/sched/core.c:199:25: error: 'paravirt_steal_rq_enabled' undeclared (first use in this function)
  kernel/sched/core.c:200:11: error: implicit declaration of function 'paravirt_steal_clock' [-Werror=implicit-function-declaration]

So move the asm/paravirt.h include from kernel/sched/cpuclock.c to kernel/sched/sched.h.

( NOTE: We do this change before doing the changes that introduce the build failure,
        so the series remains fully bisectable. )

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/cputime.c | 4 ----
 kernel/sched/sched.h   | 4 ++++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 2ecec3a4f1ee..54031fa681e2 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -6,10 +6,6 @@
 #include <linux/context_tracking.h>
 #include <linux/cputime.h>
 #include "sched.h"
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#endif
-
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7e8ce0347fbf..04f376cf7ba9 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -20,6 +20,10 @@
 #include <linux/tick.h>
 #include <linux/slab.h>
 
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#endif
+
 #include "cpupri.h"
 #include "cpudeadline.h"
 #include "cpuacct.h"
-- 
GitLab


From 5b825c3af1d8a0af4deb4a5eb349d0d0050c62e5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 17:54:15 +0100
Subject: [PATCH 0891/1084] sched/headers: Prepare to remove <linux/cred.h>
 inclusion from <linux/sched.h>

Add #include <linux/cred.h> dependencies to all .c files rely on sched.h
doing that for them.

Note that even if the count where we need to add extra headers seems high,
it's still a net win, because <linux/sched.h> is included in over
2,200 files ...

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/kernel/sys_oabi-compat.c                     | 1 +
 arch/mips/kernel/mips-mt-fpaff.c                      | 1 +
 arch/x86/include/asm/intel_rdt.h                      | 1 +
 arch/x86/kernel/cpu/intel_cacheinfo.c                 | 1 +
 block/ioprio.c                                        | 1 +
 drivers/misc/eeprom/eeprom.c                          | 1 +
 drivers/misc/lkdtm_heap.c                             | 1 +
 drivers/misc/vmw_vmci/vmci_context.c                  | 1 +
 drivers/misc/vmw_vmci/vmci_host.c                     | 1 +
 drivers/staging/lustre/lustre/include/lustre_compat.h | 1 +
 drivers/staging/lustre/lustre/ptlrpc/sec.c            | 1 +
 drivers/xen/balloon.c                                 | 1 +
 fs/9p/v9fs.c                                          | 1 +
 fs/affs/inode.c                                       | 1 +
 fs/affs/super.c                                       | 1 +
 fs/autofs4/dev-ioctl.c                                | 1 +
 fs/befs/linuxvfs.c                                    | 1 +
 fs/binfmt_elf.c                                       | 1 +
 fs/cachefiles/internal.h                              | 1 +
 fs/compat.c                                           | 1 +
 fs/exportfs/expfs.c                                   | 1 +
 fs/ext2/balloc.c                                      | 1 +
 fs/ext4/ialloc.c                                      | 2 ++
 fs/file_table.c                                       | 1 +
 fs/gfs2/inode.c                                       | 1 +
 fs/gfs2/sys.c                                         | 1 +
 fs/hfs/inode.c                                        | 1 +
 fs/hfsplus/inode.c                                    | 1 +
 fs/isofs/inode.c                                      | 1 +
 fs/jffs2/fs.c                                         | 1 +
 fs/libfs.c                                            | 1 +
 fs/namespace.c                                        | 1 +
 fs/ncpfs/ioctl.c                                      | 1 +
 fs/notify/fanotify/fanotify.c                         | 1 +
 fs/notify/inotify/inotify_fsnotify.c                  | 1 +
 fs/omfs/inode.c                                       | 1 +
 fs/overlayfs/copy_up.c                                | 1 +
 fs/overlayfs/inode.c                                  | 1 +
 fs/overlayfs/namei.c                                  | 1 +
 fs/overlayfs/super.c                                  | 1 +
 fs/overlayfs/util.c                                   | 1 +
 fs/posix_acl.c                                        | 1 +
 fs/proc/proc_sysctl.c                                 | 1 +
 fs/proc/root.c                                        | 1 +
 fs/quota/dquot.c                                      | 1 +
 fs/stat.c                                             | 1 +
 fs/xfs/xfs_ioctl.c                                    | 1 +
 include/linux/cred.h                                  | 1 +
 include/linux/sched/signal.h                          | 1 +
 include/linux/wait.h                                  | 1 +
 include/net/scm.h                                     | 1 +
 include/rdma/ib.h                                     | 1 +
 ipc/namespace.c                                       | 1 +
 kernel/pid_namespace.c                                | 1 +
 kernel/ucount.c                                       | 1 +
 kernel/uid16.c                                        | 1 +
 kernel/utsname.c                                      | 1 +
 mm/usercopy.c                                         | 1 +
 net/dns_resolver/dns_query.c                          | 2 ++
 net/netfilter/xt_owner.c                              | 2 ++
 net/sunrpc/auth.c                                     | 1 +
 security/apparmor/policy.c                            | 1 +
 security/keys/internal.h                              | 1 +
 security/keys/keyctl.c                                | 1 +
 security/keys/persistent.c                            | 2 ++
 65 files changed, 69 insertions(+)

diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index 5f221acd21ae..b9786f491873 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -76,6 +76,7 @@
 #include <linux/syscalls.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/cred.h>
 #include <linux/fcntl.h>
 #include <linux/eventpoll.h>
 #include <linux/sem.h>
diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index 1a0a3b4ecc3e..ffc6bd3464d9 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c
@@ -9,6 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/security.h>
 #include <linux/types.h>
 #include <linux/uaccess.h>
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 95ce5c85b009..0d64397cee58 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -3,6 +3,7 @@
 
 #ifdef CONFIG_INTEL_RDT_A
 
+#include <linux/sched.h>
 #include <linux/kernfs.h>
 #include <linux/jump_label.h>
 
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 0282b0df004a..c55fb2cb2acc 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -11,6 +11,7 @@
 #include <linux/cacheinfo.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
+#include <linux/capability.h>
 #include <linux/sysfs.h>
 #include <linux/pci.h>
 
diff --git a/block/ioprio.c b/block/ioprio.c
index 89c43e07787d..4b9ab8367dda 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -23,6 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/ioprio.h>
+#include <linux/cred.h>
 #include <linux/blkdev.h>
 #include <linux/capability.h>
 #include <linux/sched/user.h>
diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c
index 3d1d55157e5f..2fad790db3bf 100644
--- a/drivers/misc/eeprom/eeprom.c
+++ b/drivers/misc/eeprom/eeprom.c
@@ -19,6 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/device.h>
+#include <linux/capability.h>
 #include <linux/jiffies.h>
 #include <linux/i2c.h>
 #include <linux/mutex.h>
diff --git a/drivers/misc/lkdtm_heap.c b/drivers/misc/lkdtm_heap.c
index 0f1581664c1c..ffb6aeac07b3 100644
--- a/drivers/misc/lkdtm_heap.c
+++ b/drivers/misc/lkdtm_heap.c
@@ -4,6 +4,7 @@
  */
 #include "lkdtm.h"
 #include <linux/slab.h>
+#include <linux/sched.h>
 
 /*
  * This tries to stay within the next largest power-of-2 kmalloc cache
diff --git a/drivers/misc/vmw_vmci/vmci_context.c b/drivers/misc/vmw_vmci/vmci_context.c
index f35f0c8606b9..21d0fa592145 100644
--- a/drivers/misc/vmw_vmci/vmci_context.c
+++ b/drivers/misc/vmw_vmci/vmci_context.c
@@ -19,6 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/slab.h>
 
 #include "vmci_queue_pair.h"
diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
index ec090105eb4b..8a16a26e9658 100644
--- a/drivers/misc/vmw_vmci/vmci_host.c
+++ b/drivers/misc/vmw_vmci/vmci_host.c
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/slab.h>
 #include <linux/file.h>
 #include <linux/init.h>
diff --git a/drivers/staging/lustre/lustre/include/lustre_compat.h b/drivers/staging/lustre/lustre/include/lustre_compat.h
index 300e96fb032a..da9ce195c52e 100644
--- a/drivers/staging/lustre/lustre/include/lustre_compat.h
+++ b/drivers/staging/lustre/lustre/include/lustre_compat.h
@@ -35,6 +35,7 @@
 
 #include <linux/fs_struct.h>
 #include <linux/namei.h>
+#include <linux/cred.h>
 
 #include "lustre_patchless_compat.h"
 
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec.c b/drivers/staging/lustre/lustre/ptlrpc/sec.c
index e860df7c45a2..49f34fd655c3 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec.c
@@ -38,6 +38,7 @@
 
 #include "../../include/linux/libcfs/libcfs.h"
 #include <linux/crypto.h>
+#include <linux/cred.h>
 #include <linux/key.h>
 
 #include "../include/obd.h"
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index db107fa50ca1..a6d4378eb8d9 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -41,6 +41,7 @@
 #include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/bootmem.h>
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 072e7599583a..a89f3cfe3c7d 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -29,6 +29,7 @@
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/parser.h>
 #include <linux/idr.h>
 #include <linux/slab.h>
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index a5e6097eb5a9..abcc59899229 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -10,6 +10,7 @@
  *  (C) 1991  Linus Torvalds - minix filesystem
  */
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/gfp.h>
 #include "affs.h"
 
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 37532538e8ab..c2c27a8f128e 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -16,6 +16,7 @@
 #include <linux/parser.h>
 #include <linux/magic.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/slab.h>
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 806df746f1a9..734cbf8d9676 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -17,6 +17,7 @@
 #include <linux/file.h>
 #include <linux/fdtable.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/compat.h>
 #include <linux/syscalls.h>
 #include <linux/magic.h>
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 19407165f4aa..c500e954debb 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -18,6 +18,7 @@
 #include <linux/parser.h>
 #include <linux/namei.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/exportfs.h>
 
 #include "befs.h"
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index fbbe52e1250e..2c95257fa4da 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -36,6 +36,7 @@
 #include <linux/coredump.h>
 #include <linux/sched.h>
 #include <linux/sched/coredump.h>
+#include <linux/cred.h>
 #include <linux/dax.h>
 #include <linux/uaccess.h>
 #include <asm/param.h>
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index cd1effee8a49..9bf90bcc56ac 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -19,6 +19,7 @@
 #include <linux/fscache-cache.h>
 #include <linux/timer.h>
 #include <linux/wait.h>
+#include <linux/cred.h>
 #include <linux/workqueue.h>
 #include <linux/security.h>
 
diff --git a/fs/compat.c b/fs/compat.c
index e50a2114f474..c61b506f5bc9 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -21,6 +21,7 @@
 #include <linux/compat.h>
 #include <linux/errno.h>
 #include <linux/time.h>
+#include <linux/cred.h>
 #include <linux/fs.h>
 #include <linux/fcntl.h>
 #include <linux/namei.h>
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index a4b531be9168..9ec1038f937e 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -15,6 +15,7 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 
 #define dprintk(fmt, args...) do{}while(0)
 
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 4c40c0786e16..d0bdb74f0e15 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -15,6 +15,7 @@
 #include <linux/quotaops.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/buffer_head.h>
 #include <linux/capability.h>
 
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index b14bae2598bc..17bc043308f3 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -21,6 +21,8 @@
 #include <linux/random.h>
 #include <linux/bitops.h>
 #include <linux/blkdev.h>
+#include <linux/cred.h>
+
 #include <asm/byteorder.h>
 
 #include "ext4.h"
diff --git a/fs/file_table.c b/fs/file_table.c
index 6d982b57de92..954d510b765a 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/security.h>
+#include <linux/cred.h>
 #include <linux/eventpoll.h>
 #include <linux/rcupdate.h>
 #include <linux/mount.h>
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index eb7724b8578a..9d28f55fbd1d 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -13,6 +13,7 @@
 #include <linux/buffer_head.h>
 #include <linux/namei.h>
 #include <linux/mm.h>
+#include <linux/cred.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl.h>
 #include <linux/gfs2_ondisk.h>
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index f8d30e41d1d3..7a515345610c 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -10,6 +10,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index f776acf2378a..bfbba799430f 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -14,6 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/mpage.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/uio.h>
 #include <linux/xattr.h>
 
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 2e796f8302ff..e8638d528195 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -14,6 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/mpage.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/uio.h>
 
 #include "hfsplus_fs.h"
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 871c8b392099..020ba0936146 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 
 #include <linux/slab.h>
+#include <linux/cred.h>
 #include <linux/nls.h>
 #include <linux/ctype.h>
 #include <linux/statfs.h>
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 567653f7c0ce..76fa814df3d1 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -15,6 +15,7 @@
 #include <linux/capability.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/fs.h>
 #include <linux/list.h>
 #include <linux/mtd/mtd.h>
diff --git a/fs/libfs.c b/fs/libfs.c
index 28d6f35feed6..217896ca4fae 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -7,6 +7,7 @@
 #include <linux/export.h>
 #include <linux/pagemap.h>
 #include <linux/slab.h>
+#include <linux/cred.h>
 #include <linux/mount.h>
 #include <linux/vfs.h>
 #include <linux/quotaops.h>
diff --git a/fs/namespace.c b/fs/namespace.c
index 8bfad42c1ccf..131cd7b94f47 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -15,6 +15,7 @@
 #include <linux/user_namespace.h>
 #include <linux/namei.h>
 #include <linux/security.h>
+#include <linux/cred.h>
 #include <linux/idr.h>
 #include <linux/init.h>		/* init_rootfs */
 #include <linux/fs_struct.h>	/* get_fs_root et.al. */
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 4434e4977cf3..12550c2320cc 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -19,6 +19,7 @@
 #include <linux/highuid.h>
 #include <linux/vmalloc.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 
 #include <linux/uaccess.h>
 
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index a4c46221755e..e5f7e47de68e 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -6,6 +6,7 @@
 #include <linux/kernel.h> /* UINT_MAX */
 #include <linux/mount.h>
 #include <linux/sched.h>
+#include <linux/sched/user.h>
 #include <linux/types.h>
 #include <linux/wait.h>
 
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index f36c29398de3..1aeb837ae414 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -30,6 +30,7 @@
 #include <linux/slab.h> /* kmem_* */
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/sched/user.h>
 
 #include "inotify.h"
 
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index df7ea8543a2e..8c9034ee7383 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -8,6 +8,7 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/vfs.h>
+#include <linux/cred.h>
 #include <linux/parser.h>
 #include <linux/buffer_head.h>
 #include <linux/vmalloc.h>
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index f57043dace62..53d3f830358f 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -16,6 +16,7 @@
 #include <linux/security.h>
 #include <linux/uaccess.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/namei.h>
 #include <linux/fdtable.h>
 #include <linux/ratelimit.h>
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 08643ac44a02..6639f487f835 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -9,6 +9,7 @@
 
 #include <linux/fs.h>
 #include <linux/slab.h>
+#include <linux/cred.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl.h>
 #include "overlayfs.h"
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 023bb0b03352..b8b077821fb0 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/fs.h>
+#include <linux/cred.h>
 #include <linux/namei.h>
 #include <linux/xattr.h>
 #include <linux/ratelimit.h>
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 20f48abbb82f..9aa37c2f7f7d 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -7,6 +7,7 @@
  * the Free Software Foundation.
  */
 
+#include <uapi/linux/magic.h>
 #include <linux/fs.h>
 #include <linux/namei.h>
 #include <linux/xattr.h>
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 952286f4826c..9dc1c0af586b 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -10,6 +10,7 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/slab.h>
+#include <linux/cred.h>
 #include <linux/xattr.h>
 #include "overlayfs.h"
 #include "ovl_entry.h"
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index c9d48dc78495..eebf5f6cf6d5 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -15,6 +15,7 @@
 #include <linux/atomic.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/posix_acl.h>
 #include <linux/posix_acl_xattr.h>
 #include <linux/xattr.h>
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 3e64c6502dc8..3d203b1f5a02 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -8,6 +8,7 @@
 #include <linux/printk.h>
 #include <linux/security.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/namei.h>
 #include <linux/mm.h>
 #include <linux/module.h>
diff --git a/fs/proc/root.c b/fs/proc/root.c
index b90da888b81a..5d5fed20bfff 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -20,6 +20,7 @@
 #include <linux/mount.h>
 #include <linux/pid_namespace.h>
 #include <linux/parser.h>
+#include <linux/cred.h>
 
 #include "internal.h"
 
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 406fed92362a..74b489e3714d 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -72,6 +72,7 @@
 #include <linux/proc_fs.h>
 #include <linux/security.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/kmod.h>
 #include <linux/namei.h>
 #include <linux/capability.h>
diff --git a/fs/stat.c b/fs/stat.c
index 3f14d1ef0868..95bd41762770 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -12,6 +12,7 @@
 #include <linux/fs.h>
 #include <linux/namei.h>
 #include <linux/security.h>
+#include <linux/cred.h>
 #include <linux/syscalls.h>
 #include <linux/pagemap.h>
 
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index cf1363dbf32b..2fd7fdf5438f 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -43,6 +43,7 @@
 #include "xfs_acl.h"
 
 #include <linux/capability.h>
+#include <linux/cred.h>
 #include <linux/dcache.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 045d33e48069..b03e7d049a64 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -18,6 +18,7 @@
 #include <linux/selinux.h>
 #include <linux/atomic.h>
 #include <linux/uidgid.h>
+#include <linux/sched.h>
 #include <linux/sched/user.h>
 
 struct cred;
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index ce93c4a02b79..0f4e9f4a43fd 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_SCHED_SIGNAL_H
 #define _LINUX_SCHED_SIGNAL_H
 
+#include <linux/cred.h>
 #include <linux/sched.h>
 #include <linux/sched/jobctl.h>
 
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 1421132e9086..aacb1282d19a 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -6,6 +6,7 @@
 #include <linux/list.h>
 #include <linux/stddef.h>
 #include <linux/spinlock.h>
+
 #include <asm/current.h>
 #include <uapi/linux/wait.h>
 
diff --git a/include/net/scm.h b/include/net/scm.h
index 59fa93c01d2a..142ea9e7a6d0 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -3,6 +3,7 @@
 
 #include <linux/limits.h>
 #include <linux/net.h>
+#include <linux/cred.h>
 #include <linux/security.h>
 #include <linux/pid.h>
 #include <linux/nsproxy.h>
diff --git a/include/rdma/ib.h b/include/rdma/ib.h
index a6b93706b0fc..9b4c22a36931 100644
--- a/include/rdma/ib.h
+++ b/include/rdma/ib.h
@@ -35,6 +35,7 @@
 
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 
 struct ib_addr {
 	union {
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 0abdea496493..1f1d713ac19c 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -9,6 +9,7 @@
 #include <linux/rcupdate.h>
 #include <linux/nsproxy.h>
 #include <linux/slab.h>
+#include <linux/cred.h>
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/user_namespace.h>
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index eef2ce968636..25314c96fbe6 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -12,6 +12,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
 #include <linux/syscalls.h>
+#include <linux/cred.h>
 #include <linux/err.h>
 #include <linux/acct.h>
 #include <linux/slab.h>
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 8a11fc0cb459..62630a40ab3a 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -8,6 +8,7 @@
 #include <linux/stat.h>
 #include <linux/sysctl.h>
 #include <linux/slab.h>
+#include <linux/cred.h>
 #include <linux/hash.h>
 #include <linux/user_namespace.h>
 
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 71645ae9303a..5c2dc5b2bf4f 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/highuid.h>
 #include <linux/security.h>
+#include <linux/cred.h>
 #include <linux/syscalls.h>
 
 #include <linux/uaccess.h>
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 6976cd47dcf6..06585ad296ff 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -14,6 +14,7 @@
 #include <linux/utsname.h>
 #include <linux/err.h>
 #include <linux/slab.h>
+#include <linux/cred.h>
 #include <linux/user_namespace.h>
 #include <linux/proc_ns.h>
 
diff --git a/mm/usercopy.c b/mm/usercopy.c
index 8345299e3e3b..7ccad05c0d5c 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -16,6 +16,7 @@
 
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/sched.h>
 #include <asm/sections.h>
 
 enum {
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index ecc28cff08ab..ab9fec00a788 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -37,8 +37,10 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/cred.h>
 #include <linux/dns_resolver.h>
 #include <linux/err.h>
+
 #include <keys/dns_resolver-type.h>
 #include <keys/user-type.h>
 
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 16477df45b3b..3d705c688a27 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -13,6 +13,8 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/file.h>
+#include <linux/cred.h>
+
 #include <net/sock.h>
 #include <net/inet_sock.h>
 #include <linux/netfilter/x_tables.h>
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index a1ee933e3029..d2623b9f23d6 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -8,6 +8,7 @@
 
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index f44312a19522..462c5d36b871 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -76,6 +76,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
+#include <linux/cred.h>
 #include <linux/user_namespace.h>
 
 #include "include/apparmor.h"
diff --git a/security/keys/internal.h b/security/keys/internal.h
index a705a7d92ad7..a2f4c0abb8d8 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -13,6 +13,7 @@
 #define _INTERNAL_H
 
 #include <linux/sched.h>
+#include <linux/cred.h>
 #include <linux/key-type.h>
 #include <linux/task_work.h>
 #include <linux/keyctl.h>
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 04a764f71ec8..bcb0b597c391 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -18,6 +18,7 @@
 #include <linux/keyctl.h>
 #include <linux/fs.h>
 #include <linux/capability.h>
+#include <linux/cred.h>
 #include <linux/string.h>
 #include <linux/err.h>
 #include <linux/vmalloc.h>
diff --git a/security/keys/persistent.c b/security/keys/persistent.c
index 1edc1f0a0ce2..d0cb5b32eff7 100644
--- a/security/keys/persistent.c
+++ b/security/keys/persistent.c
@@ -10,6 +10,8 @@
  */
 
 #include <linux/user_namespace.h>
+#include <linux/cred.h>
+
 #include "internal.h"
 
 unsigned persistent_keyring_expiry = 3 * 24 * 3600; /* Expire after 3 days of non-use */
-- 
GitLab


From 4cf421e55d69016989548e0fb8585e69f54bd283 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 10:03:42 +0100
Subject: [PATCH 0892/1084] sched/headers: Prepare for the removal of
 <asm/ptrace.h> from <linux/sched.h>

Fix up missing #includes in other places that rely on sched.h doing that for them.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/math-emu/math.c    | 1 +
 arch/sh/kernel/cpu/fpu.c      | 1 +
 arch/sh/kernel/process.c      | 1 +
 drivers/oprofile/cpu_buffer.c | 2 ++
 4 files changed, 5 insertions(+)

diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c
index fa5ae0ad8983..d17d705f6545 100644
--- a/arch/alpha/math-emu/math.c
+++ b/arch/alpha/math-emu/math.c
@@ -2,6 +2,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <asm/ptrace.h>
 
 #include <linux/uaccess.h>
 
diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c
index 4e332244ea75..4d4737d099c6 100644
--- a/arch/sh/kernel/cpu/fpu.c
+++ b/arch/sh/kernel/cpu/fpu.c
@@ -3,6 +3,7 @@
 #include <asm/processor.h>
 #include <asm/fpu.h>
 #include <asm/traps.h>
+#include <asm/ptrace.h>
 
 int init_fpu(struct task_struct *tsk)
 {
diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c
index 53bc6c4c84ec..30bff33e810e 100644
--- a/arch/sh/kernel/process.c
+++ b/arch/sh/kernel/process.c
@@ -5,6 +5,7 @@
 #include <linux/export.h>
 #include <linux/stackprotector.h>
 #include <asm/fpu.h>
+#include <asm/ptrace.h>
 
 struct kmem_cache *task_xstate_cachep = NULL;
 unsigned int xstate_size;
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 0581461c3a67..eda2633a393d 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -23,6 +23,8 @@
 #include <linux/oprofile.h>
 #include <linux/errno.h>
 
+#include <asm/ptrace.h>
+
 #include "event_buffer.h"
 #include "cpu_buffer.h"
 #include "buffer_sync.h"
-- 
GitLab


From 037741a6d4ab2e4b0580dae97aca963d8a8dc68f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 10:08:30 +0100
Subject: [PATCH 0893/1084] sched/headers: Prepare for the removal of
 <linux/rtmutex.h> from <linux/sched.h>

Fix up missing #includes in other places that rely on sched.h doing that for them.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/i2c.h          | 1 +
 kernel/fork.c                | 1 +
 kernel/locking/locktorture.c | 1 +
 kernel/rcu/tree.h            | 1 +
 4 files changed, 4 insertions(+)

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index bed8fbb45f31..6b183521c616 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -30,6 +30,7 @@
 #include <linux/device.h>	/* for struct device */
 #include <linux/sched.h>	/* for completion */
 #include <linux/mutex.h>
+#include <linux/rtmutex.h>
 #include <linux/irqdomain.h>		/* for Host Notify IRQ */
 #include <linux/of.h>		/* for struct device_node */
 #include <linux/swab.h>		/* for swab16 */
diff --git a/kernel/fork.c b/kernel/fork.c
index 08d6c091ac7c..8fbe8bcd1e20 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -17,6 +17,7 @@
 #include <linux/sched/coredump.h>
 #include <linux/sched/user.h>
 #include <linux/sched/numa_balancing.h>
+#include <linux/rtmutex.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
 #include <linux/module.h>
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 5ea0a8969ee2..f24582d4dad3 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -33,6 +33,7 @@
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <uapi/linux/sched/types.h>
+#include <linux/rtmutex.h>
 #include <linux/atomic.h>
 #include <linux/moduleparam.h>
 #include <linux/delay.h>
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index b60f2b6caa14..ec62a05bfdb3 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -24,6 +24,7 @@
 
 #include <linux/cache.h>
 #include <linux/spinlock.h>
+#include <linux/rtmutex.h>
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/seqlock.h>
-- 
GitLab


From cc5efc2323a89dcf1a02c17b9b9f255c5a6e0492 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 10:06:45 +0100
Subject: [PATCH 0894/1084] sched/headers: Prepare for the removal of various
 unrelated headers from <linux/sched.h>

We are going to remove the following header inclusions from <linux/sched.h>:

	#include <asm/param.h>
	#include <linux/threads.h>
	#include <linux/kernel.h>
	#include <linux/types.h>
	#include <linux/timex.h>
	#include <linux/jiffies.h>
	#include <linux/rbtree.h>
	#include <linux/thread_info.h>
	#include <linux/cpumask.h>
	#include <linux/errno.h>
	#include <linux/nodemask.h>
	#include <linux/preempt.h>
	#include <asm/page.h>
	#include <linux/smp.h>
	#include <linux/compiler.h>
	#include <linux/completion.h>
	#include <linux/percpu.h>
	#include <linux/topology.h>
	#include <linux/rcupdate.h>
	#include <linux/time.h>
	#include <linux/timer.h>
	#include <linux/llist.h>
	#include <linux/uidgid.h>
	#include <asm/processor.h>

Fix up a single .h file that got hold of <linux/sysctl.h> via one of these headers.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/user_namespace.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 363e0e8082a9..08264641b502 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -5,6 +5,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ns_common.h>
 #include <linux/sched.h>
+#include <linux/sysctl.h>
 #include <linux/err.h>
 
 #define UID_GID_MAP_MAX_EXTENTS 5
-- 
GitLab


From b12fb7f46af7d548503d75be59f493f958c6d1b3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:33 +0100
Subject: [PATCH 0895/1084] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/xacct.h>

We are going to split <linux/sched/xacct.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/xacct.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the .c file that is going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/read_write.c             | 3 ++-
 include/linux/sched/xacct.h | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sched/xacct.h

diff --git a/fs/read_write.c b/fs/read_write.c
index 5816d4c4cab0..dc60075653a8 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -4,8 +4,9 @@
  *  Copyright (C) 1991, 1992  Linus Torvalds
  */
 
-#include <linux/slab.h> 
+#include <linux/slab.h>
 #include <linux/stat.h>
+#include <linux/sched/xacct.h>
 #include <linux/fcntl.h>
 #include <linux/file.h>
 #include <linux/uio.h>
diff --git a/include/linux/sched/xacct.h b/include/linux/sched/xacct.h
new file mode 100644
index 000000000000..890f7ce5cd27
--- /dev/null
+++ b/include/linux/sched/xacct.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_XACCT_H
+#define _LINUX_SCHED_XACCT_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_XACCT_H */
-- 
GitLab


From 174cd4b1e5fbd0d74c68cf3a74f5bd4923485512 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 19:15:33 +0100
Subject: [PATCH 0896/1084] sched/headers: Prepare to move signal wakeup &
 sigpending methods from <linux/sched.h> into <linux/sched/signal.h>

Fix up affected files that include this signal functionality via sched.h.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/common/bL_switcher.c                 |  2 +-
 arch/cris/arch-v10/drivers/sync_serial.c      |  2 +-
 arch/cris/arch-v32/drivers/sync_serial.c      |  2 +-
 arch/mips/kernel/rtlx.c                       |  2 ++
 arch/mips/kvm/mips.c                          |  2 ++
 arch/powerpc/kvm/book3s_hv.c                  |  2 +-
 arch/powerpc/kvm/powerpc.c                    |  1 +
 arch/powerpc/platforms/83xx/suspend.c         |  1 +
 arch/powerpc/platforms/cell/spufs/sched.c     |  2 +-
 arch/powerpc/platforms/cell/spufs/spufs.h     |  1 +
 arch/s390/crypto/prng.c                       |  2 ++
 arch/s390/kvm/kvm-s390.c                      |  2 ++
 arch/s390/kvm/vsie.c                          |  2 ++
 arch/sh/kernel/cpu/fpu.c                      |  2 +-
 arch/sh/kernel/disassemble.c                  |  2 ++
 arch/sh/kernel/process.c                      |  2 +-
 arch/um/drivers/random.c                      |  2 +-
 arch/x86/kernel/apm_32.c                      |  2 +-
 block/blk-cgroup.c                            |  1 +
 block/blk-mq.c                                |  1 +
 crypto/algboss.c                              |  2 +-
 crypto/algif_aead.c                           |  1 +
 crypto/algif_skcipher.c                       |  1 +
 crypto/api.c                                  |  2 +-
 drivers/atm/horizon.c                         |  1 +
 drivers/base/core.c                           |  1 +
 drivers/base/power/wakeup.c                   |  2 +-
 drivers/block/drbd/drbd_main.c                |  1 +
 drivers/block/drbd/drbd_receiver.c            |  1 +
 drivers/block/drbd/drbd_worker.c              |  2 +-
 drivers/block/swim3.c                         |  2 +-
 drivers/char/applicom.c                       |  2 +-
 drivers/char/hpet.c                           |  1 +
 drivers/char/hw_random/core.c                 |  1 +
 drivers/char/ipmi/ipmi_watchdog.c             |  1 +
 drivers/char/lp.c                             |  2 +-
 drivers/char/ppdev.c                          |  2 +-
 drivers/char/rtc.c                            |  2 +-
 drivers/char/snsc.c                           |  2 +-
 drivers/dma-buf/dma-fence.c                   |  1 +
 .../gpu/drm/omapdrm/displays/panel-dsi-cm.c   |  2 +-
 drivers/gpu/drm/vc4/vc4_gem.c                 |  1 +
 drivers/gpu/vga/vgaarb.c                      |  2 +-
 drivers/hid/hid-debug.c                       |  2 +-
 drivers/hid/hid-roccat.c                      |  2 +-
 drivers/hid/hidraw.c                          |  2 +-
 drivers/hid/usbhid/hiddev.c                   |  1 +
 drivers/hsi/clients/cmt_speech.c              |  2 +-
 drivers/i2c/busses/i2c-ibm_iic.c              |  2 ++
 drivers/i2c/busses/i2c-mpc.c                  |  2 +-
 drivers/iio/industrialio-buffer.c             |  2 +-
 drivers/infiniband/ulp/ipoib/ipoib_cm.c       |  1 +
 drivers/infiniband/ulp/ipoib/ipoib_vlan.c     |  1 +
 drivers/isdn/capi/kcapi.c                     |  2 +-
 drivers/isdn/mISDN/timerdev.c                 |  2 ++
 drivers/lguest/core.c                         |  1 +
 drivers/macintosh/adb.c                       |  2 +-
 drivers/macintosh/smu.c                       |  1 +
 drivers/macintosh/via-pmu.c                   |  2 +-
 drivers/mailbox/mailbox-test.c                |  1 +
 drivers/md/dm.c                               |  1 +
 drivers/media/dvb-core/dvb_ca_en50221.c       |  2 +-
 drivers/media/dvb-core/dvb_demux.c            |  2 +-
 drivers/media/dvb-core/dvb_frontend.c         |  2 +-
 drivers/media/pci/cx18/cx18-driver.h          |  2 +-
 drivers/media/pci/ivtv/ivtv-driver.h          | 35 ++++++++++---------
 drivers/media/pci/pt1/pt1.c                   |  1 +
 drivers/media/pci/pt3/pt3.c                   |  1 +
 drivers/media/pci/solo6x10/solo6x10-i2c.c     |  1 +
 drivers/media/pci/zoran/zoran_device.c        |  1 +
 drivers/media/platform/vivid/vivid-radio-rx.c |  2 ++
 drivers/media/platform/vivid/vivid-radio-tx.c |  1 +
 drivers/media/rc/lirc_dev.c                   |  2 +-
 drivers/media/usb/cpia2/cpia2_core.c          |  1 +
 drivers/media/usb/gspca/cpia1.c               |  2 ++
 drivers/misc/cxl/fault.c                      |  2 +-
 drivers/misc/cxl/file.c                       |  2 +-
 drivers/misc/ibmasm/r_heartbeat.c             |  2 +-
 drivers/misc/lis3lv02d/lis3lv02d.c            |  1 +
 drivers/misc/mei/bus.c                        |  2 +-
 drivers/misc/mei/client.c                     |  2 +-
 drivers/misc/mei/main.c                       |  2 +-
 drivers/misc/mic/scif/scif_main.h             |  2 +-
 drivers/misc/vexpress-syscfg.c                |  2 +-
 drivers/mtd/tests/mtd_test.h                  |  2 +-
 drivers/net/bonding/bond_options.c            |  2 ++
 drivers/net/bonding/bond_sysfs.c              |  2 +-
 drivers/net/can/softing/softing_fw.c          |  2 +-
 drivers/net/ethernet/broadcom/tg3.c           |  1 +
 .../ethernet/cavium/liquidio/octeon_main.h    |  2 ++
 drivers/net/ethernet/sfc/falcon/falcon.c      |  2 ++
 drivers/net/irda/stir4200.c                   |  1 +
 drivers/net/macvtap.c                         |  2 +-
 drivers/net/ppp/ppp_generic.c                 |  1 +
 drivers/net/tun.c                             |  1 +
 drivers/net/usb/hso.c                         |  2 +-
 drivers/net/usb/qmi_wwan.c                    |  1 +
 drivers/net/wan/cosa.c                        |  2 +-
 drivers/net/wireless/ath/ath6kl/cfg80211.c    |  1 +
 .../net/wireless/broadcom/b43legacy/main.c    |  2 +-
 .../net/wireless/intersil/hostap/hostap_hw.c  |  2 +-
 .../wireless/intersil/hostap/hostap_ioctl.c   |  2 +-
 drivers/oprofile/event_buffer.c               |  2 +-
 drivers/parport/daisy.c                       |  2 +-
 drivers/parport/ieee1284.c                    |  2 +-
 drivers/parport/ieee1284_ops.c                |  2 +-
 drivers/parport/parport_ip32.c                |  2 +-
 drivers/parport/parport_pc.c                  |  2 +-
 drivers/parport/share.c                       |  2 +-
 drivers/pci/access.c                          |  2 +-
 drivers/pci/hotplug/cpci_hotplug_core.c       |  1 +
 drivers/pci/hotplug/cpqphp.h                  |  2 +-
 drivers/pci/hotplug/pciehp.h                  |  2 +-
 drivers/pci/hotplug/shpchp.h                  |  2 +-
 drivers/rtc/rtc-dev.c                         |  2 +-
 drivers/s390/cio/device.c                     |  1 +
 drivers/scsi/lpfc/lpfc_vport.c                |  1 +
 drivers/scsi/osst.c                           |  2 +-
 drivers/scsi/st.c                             |  2 +-
 drivers/soc/fsl/qbman/dpaa_sys.h              |  1 +
 drivers/staging/comedi/comedi_fops.c          |  2 +-
 drivers/staging/dgnc/dgnc_tty.c               |  2 +-
 drivers/staging/dgnc/dgnc_utils.c             |  2 +-
 drivers/staging/greybus/uart.c                |  2 +-
 .../lustre/include/lustre/lustre_user.h       |  1 +
 .../lustre/lustre/include/lustre_lib.h        |  2 +-
 .../lustre/lustre/include/obd_support.h       |  2 ++
 drivers/staging/media/lirc/lirc_sir.c         |  2 +-
 drivers/staging/media/lirc/lirc_zilog.c       |  2 +-
 drivers/staging/speakup/speakup_soft.c        |  2 +-
 drivers/target/iscsi/cxgbit/cxgbit_target.c   |  2 ++
 drivers/tty/n_gsm.c                           |  2 +-
 drivers/tty/serial/crisv10.c                  |  2 +-
 drivers/tty/serial/serial_core.c              |  1 +
 drivers/tty/tty_ioctl.c                       |  2 +-
 drivers/tty/tty_port.c                        |  2 +-
 drivers/uio/uio.c                             |  2 +-
 drivers/usb/class/cdc-acm.c                   |  1 +
 drivers/usb/class/usblp.c                     |  2 +-
 drivers/usb/gadget/function/f_fs.c            |  1 +
 drivers/usb/image/mdc800.c                    |  2 +-
 drivers/usb/misc/adutux.c                     |  1 +
 drivers/usb/misc/idmouse.c                    |  1 +
 drivers/usb/misc/rio500.c                     |  2 +-
 drivers/usb/misc/uss720.c                     |  1 +
 drivers/usb/mon/mon_bin.c                     |  1 +
 drivers/usb/mon/mon_text.c                    |  1 +
 drivers/usb/serial/digi_acceleport.c          |  1 +
 drivers/usb/serial/generic.c                  |  1 +
 drivers/vhost/net.c                           |  1 +
 drivers/vhost/vhost.c                         |  1 +
 drivers/video/fbdev/cobalt_lcdfb.c            |  1 +
 .../omap2/omapfb/displays/panel-dsi-cm.c      |  2 +-
 fs/afs/rxrpc.c                                |  2 ++
 fs/aio.c                                      |  2 +-
 fs/btrfs/ctree.h                              |  1 +
 fs/ceph/caps.c                                |  2 +-
 fs/cifs/inode.c                               |  2 ++
 fs/coda/psdev.c                               |  2 +-
 fs/dlm/user.c                                 |  1 +
 fs/ecryptfs/read_write.c                      |  2 ++
 fs/eventfd.c                                  |  2 +-
 fs/eventpoll.c                                |  2 +-
 fs/ext4/ext4.h                                |  1 +
 fs/f2fs/data.c                                |  1 +
 fs/fuse/dev.c                                 |  1 +
 fs/gfs2/lock_dlm.c                            |  1 +
 fs/gfs2/super.c                               |  2 +-
 fs/hpfs/hpfs_fn.h                             |  2 +-
 fs/hugetlbfs/inode.c                          |  2 +-
 fs/jffs2/nodemgmt.c                           |  2 +-
 fs/nfs/inode.c                                |  2 +-
 fs/nilfs2/segment.c                           |  2 ++
 fs/notify/fanotify/fanotify_user.c            |  1 +
 fs/notify/inotify/inotify_user.c              |  2 +-
 fs/ntfs/file.c                                |  2 +-
 fs/ocfs2/alloc.c                              |  1 +
 fs/ocfs2/dlm/dlmdomain.c                      |  1 +
 fs/ocfs2/dlmfs/userdlm.c                      |  1 +
 fs/ocfs2/dlmglue.c                            |  1 +
 fs/orangefs/orangefs-kernel.h                 |  2 +-
 fs/overlayfs/copy_up.c                        |  2 +-
 fs/splice.c                                   |  2 ++
 fs/userfaultfd.c                              |  2 +-
 fs/xfs/xfs_linux.h                            |  2 +-
 include/drm/drm_os_linux.h                    |  1 +
 include/linux/sunrpc/types.h                  |  1 +
 include/media/v4l2-ioctl.h                    |  1 +
 include/net/busy_poll.h                       |  1 +
 kernel/locking/mutex.c                        |  2 +-
 kernel/locking/rtmutex.c                      |  2 +-
 kernel/locking/rwsem-spinlock.c               |  2 +-
 kernel/locking/rwsem-xadd.c                   |  2 +-
 kernel/rcu/rcutorture.c                       |  2 +-
 kernel/sched/completion.c                     |  2 +-
 kernel/sched/swait.c                          |  2 +-
 kernel/sched/wait.c                           |  2 +-
 kernel/time/alarmtimer.c                      |  1 +
 kernel/time/hrtimer.c                         |  2 +-
 kernel/time/timer.c                           |  2 +-
 lib/percpu_ida.c                              |  2 +-
 mm/compaction.c                               |  1 +
 mm/gup.c                                      |  2 +-
 mm/hugetlb.c                                  |  1 +
 mm/memory_hotplug.c                           |  1 +
 mm/shmem.c                                    |  1 +
 mm/userfaultfd.c                              |  1 +
 net/atm/svc.c                                 |  2 +-
 net/bluetooth/af_bluetooth.c                  |  2 ++
 net/bluetooth/cmtp/capi.c                     |  2 +-
 net/bluetooth/hci_request.c                   |  2 ++
 net/bluetooth/l2cap_sock.c                    |  1 +
 net/bluetooth/rfcomm/sock.c                   |  1 +
 net/bluetooth/sco.c                           |  1 +
 net/bridge/br_sysfs_br.c                      |  1 +
 net/bridge/br_sysfs_if.c                      |  1 +
 net/core/ethtool.c                            |  2 +-
 net/core/net-sysfs.c                          |  1 +
 net/dccp/output.c                             |  1 +
 net/ipv4/devinet.c                            |  1 +
 net/ipv6/addrconf.c                           |  1 +
 net/irda/ircomm/ircomm_tty.c                  |  2 +-
 net/irda/irnet/irnet_ppp.c                    |  3 +-
 net/iucv/af_iucv.c                            |  2 +-
 net/kcm/kcmsock.c                             |  2 ++
 net/llc/af_llc.c                              |  2 ++
 net/nfc/llcp_sock.c                           |  1 +
 net/phonet/pep.c                              |  1 +
 net/phonet/socket.c                           |  2 ++
 net/rxrpc/conn_client.c                       |  2 ++
 net/rxrpc/recvmsg.c                           |  2 ++
 net/rxrpc/sendmsg.c                           |  2 ++
 net/tipc/socket.c                             |  2 ++
 net/vmw_vsock/af_vsock.c                      |  1 +
 net/vmw_vsock/virtio_transport_common.c       |  1 +
 sound/core/control.c                          |  1 +
 sound/core/hwdep.c                            |  1 +
 sound/core/oss/pcm_oss.c                      |  1 +
 sound/core/pcm_lib.c                          |  1 +
 sound/core/pcm_native.c                       |  1 +
 sound/core/rawmidi.c                          |  2 +-
 sound/core/seq/oss/seq_oss_device.h           |  2 +-
 sound/core/seq/oss/seq_oss_writeq.c           |  1 +
 sound/core/seq/seq_fifo.c                     |  2 ++
 sound/core/seq/seq_memory.c                   |  1 +
 sound/core/timer.c                            |  1 +
 sound/firewire/bebob/bebob.h                  |  1 +
 sound/firewire/dice/dice.h                    |  1 +
 sound/firewire/digi00x/digi00x.h              |  1 +
 sound/firewire/fireworks/fireworks.h          |  1 +
 sound/firewire/oxfw/oxfw.h                    |  1 +
 sound/firewire/tascam/tascam.h                |  1 +
 sound/isa/gus/gus_pcm.c                       |  2 ++
 sound/isa/msnd/msnd.c                         |  1 +
 sound/isa/sb/emu8000.c                        |  2 +-
 sound/isa/sb/emu8000_patch.c                  |  2 ++
 sound/isa/sb/emu8000_pcm.c                    |  2 ++
 sound/isa/wavefront/wavefront_synth.c         |  1 +
 sound/oss/dmabuf.c                            |  2 ++
 sound/oss/dmasound/dmasound_core.c            |  1 +
 sound/oss/midibuf.c                           |  2 ++
 sound/oss/msnd_pinnacle.c                     |  2 ++
 sound/oss/sound_config.h                      |  1 +
 sound/oss/swarm_cs4297a.c                     |  2 +-
 virt/kvm/kvm_main.c                           |  2 +-
 265 files changed, 319 insertions(+), 139 deletions(-)

diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 083c9e517d22..57f3b7512636 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <uapi/linux/sched/types.h>
 #include <linux/interrupt.h>
 #include <linux/cpu_pm.h>
diff --git a/arch/cris/arch-v10/drivers/sync_serial.c b/arch/cris/arch-v10/drivers/sync_serial.c
index 9ac75d68f184..cc62572c1b94 100644
--- a/arch/cris/arch-v10/drivers/sync_serial.c
+++ b/arch/cris/arch-v10/drivers/sync_serial.c
@@ -16,7 +16,7 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/major.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/interrupt.h>
 #include <linux/poll.h>
 #include <linux/init.h>
diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c
index ef515af1a377..8efcc1a899a8 100644
--- a/arch/cris/arch-v32/drivers/sync_serial.c
+++ b/arch/cris/arch-v32/drivers/sync_serial.c
@@ -11,7 +11,7 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/major.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mutex.h>
 #include <linux/interrupt.h>
 #include <linux/poll.h>
diff --git a/arch/mips/kernel/rtlx.c b/arch/mips/kernel/rtlx.c
index c5c4fd54d797..b80dd8b17a76 100644
--- a/arch/mips/kernel/rtlx.c
+++ b/arch/mips/kernel/rtlx.c
@@ -12,6 +12,8 @@
 #include <linux/syscalls.h>
 #include <linux/moduleloader.h>
 #include <linux/atomic.h>
+#include <linux/sched/signal.h>
+
 #include <asm/mipsmtregs.h>
 #include <asm/mips_mt.h>
 #include <asm/processor.h>
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index ed81e5ac1426..15a1b1716c2e 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -16,8 +16,10 @@
 #include <linux/module.h>
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
+#include <linux/sched/signal.h>
 #include <linux/fs.h>
 #include <linux/bootmem.h>
+
 #include <asm/fpu.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1e107ece4e37..fc56a9ce785d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -22,7 +22,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/preempt.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/fs.h>
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 2b38d824e9e5..95c91a9de351 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -23,6 +23,7 @@
 #include <linux/kvm_host.h>
 #include <linux/vmalloc.h>
 #include <linux/hrtimer.h>
+#include <linux/sched/signal.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/file.h>
diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
index 08f92f6ed228..978b85bb3233 100644
--- a/arch/powerpc/platforms/83xx/suspend.c
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -15,6 +15,7 @@
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
 #include <linux/wait.h>
+#include <linux/sched/signal.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/suspend.h>
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index d317c84dc794..1fbb5da17dd2 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -23,7 +23,7 @@
 #undef DEBUG
 
 #include <linux/errno.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/loadavg.h>
 #include <linux/sched/rt.h>
 #include <linux/kernel.h>
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index aac733966092..5e59f80e95db 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -27,6 +27,7 @@
 #include <linux/spinlock.h>
 #include <linux/fs.h>
 #include <linux/cpumask.h>
+#include <linux/sched/signal.h>
 
 #include <asm/spu.h>
 #include <asm/spu_csa.h>
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 85b7f5efe06a..5a3ec04a7082 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -20,6 +20,8 @@
 #include <linux/cpufeature.h>
 #include <linux/random.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
+
 #include <asm/debug.h>
 #include <linux/uaccess.h>
 #include <asm/timex.h>
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index f5694838234d..fd6cd05bb6a7 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -29,6 +29,8 @@
 #include <linux/timer.h>
 #include <linux/vmalloc.h>
 #include <linux/bitmap.h>
+#include <linux/sched/signal.h>
+
 #include <asm/asm-offsets.h>
 #include <asm/lowcore.h>
 #include <asm/stp.h>
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 38556e395915..5491be39776b 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -14,6 +14,8 @@
 #include <linux/bug.h>
 #include <linux/list.h>
 #include <linux/bitmap.h>
+#include <linux/sched/signal.h>
+
 #include <asm/gmap.h>
 #include <asm/mmu_context.h>
 #include <asm/sclp.h>
diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c
index 4d4737d099c6..37e35330aae6 100644
--- a/arch/sh/kernel/cpu/fpu.c
+++ b/arch/sh/kernel/cpu/fpu.c
@@ -1,4 +1,4 @@
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <asm/processor.h>
 #include <asm/fpu.h>
diff --git a/arch/sh/kernel/disassemble.c b/arch/sh/kernel/disassemble.c
index 64d5d8dded7c..015fee58014b 100644
--- a/arch/sh/kernel/disassemble.c
+++ b/arch/sh/kernel/disassemble.c
@@ -12,6 +12,8 @@
 #include <linux/string.h>
 #include <linux/uaccess.h>
 
+#include <asm/ptrace.h>
+
 /*
  * Format of an instruction in memory.
  */
diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c
index 30bff33e810e..80a61c5e3015 100644
--- a/arch/sh/kernel/process.c
+++ b/arch/sh/kernel/process.c
@@ -1,7 +1,7 @@
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/export.h>
 #include <linux/stackprotector.h>
 #include <asm/fpu.h>
diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index 57f03050c850..37c51a6be690 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -6,7 +6,7 @@
  * This software may be used and distributed according to the terms
  * of the GNU General Public License, incorporated herein by reference.
  */
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/interrupt.h>
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 4a7080c84a5a..dc04b30cbd60 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -218,7 +218,7 @@
 #include <linux/apm_bios.h>
 #include <linux/init.h>
 #include <linux/time.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/pm.h>
 #include <linux/capability.h>
 #include <linux/device.h>
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 295e98c2c8cc..bbe7ee00bd3d 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -17,6 +17,7 @@
 #include <linux/ioprio.h>
 #include <linux/kdev_t.h>
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 #include <linux/err.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 746c14e0d157..6f35b6fd4799 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -21,6 +21,7 @@
 #include <linux/cache.h>
 #include <linux/sched/sysctl.h>
 #include <linux/sched/topology.h>
+#include <linux/sched/signal.h>
 #include <linux/delay.h>
 #include <linux/crash_dump.h>
 #include <linux/prefetch.h>
diff --git a/crypto/algboss.c b/crypto/algboss.c
index ccb85e1798f2..960d8548171b 100644
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -19,7 +19,7 @@
 #include <linux/module.h>
 #include <linux/notifier.h>
 #include <linux/rtnetlink.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 533265f110e0..5a8053758657 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -19,6 +19,7 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/net.h>
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index a9e79d8eff87..43839b00fe6c 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -18,6 +18,7 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/net.h>
diff --git a/crypto/api.c b/crypto/api.c
index b16ce1653284..941cd4c6c7ec 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -21,7 +21,7 @@
 #include <linux/kmod.h>
 #include <linux/module.h>
 #include <linux/param.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include "internal.h"
diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c
index 2bf1ef1c3c78..0f18480b33b5 100644
--- a/drivers/atm/horizon.c
+++ b/drivers/atm/horizon.c
@@ -27,6 +27,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/pci.h>
 #include <linux/errno.h>
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 3050e6f99403..684bda4d14a1 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -26,6 +26,7 @@
 #include <linux/mutex.h>
 #include <linux/pm_runtime.h>
 #include <linux/netdevice.h>
+#include <linux/sched/signal.h>
 #include <linux/sysfs.h>
 
 #include "base.h"
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index f546f8f107b0..136854970489 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -8,7 +8,7 @@
 
 #include <linux/device.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/capability.h>
 #include <linux/export.h>
 #include <linux/suspend.h>
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 116509852a34..c7d530a95e53 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -52,6 +52,7 @@
 #define __KERNEL_SYSCALLS__
 #include <linux/unistd.h>
 #include <linux/vmalloc.h>
+#include <linux/sched/signal.h>
 
 #include <linux/drbd_limits.h>
 #include "drbd_int.h"
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 8b40a5b2f8e6..aa6bf9692eff 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -37,6 +37,7 @@
 #include <linux/mm_inline.h>
 #include <linux/slab.h>
 #include <uapi/linux/sched/types.h>
+#include <linux/sched/signal.h>
 #include <linux/pkt_sched.h>
 #define __KERNEL_SYSCALLS__
 #include <linux/unistd.h>
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index c6755c9a0aea..3bff33f21435 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -25,7 +25,7 @@
 
 #include <linux/module.h>
 #include <linux/drbd.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/wait.h>
 #include <linux/mm.h>
 #include <linux/memcontrol.h>
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index aabd8e9d3035..61b3ffa4f458 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -20,7 +20,7 @@
 
 #include <linux/stddef.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/timer.h>
 #include <linux/delay.h>
 #include <linux/fd.h>
diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c
index e5c62dcf2c11..e770ad977472 100644
--- a/drivers/char/applicom.c
+++ b/drivers/char/applicom.c
@@ -23,7 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/mutex.h>
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 20b32bb8c2af..8bdc38d81adf 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -25,6 +25,7 @@
 #include <linux/spinlock.h>
 #include <linux/sysctl.h>
 #include <linux/wait.h>
+#include <linux/sched/signal.h>
 #include <linux/bcd.h>
 #include <linux/seq_file.h>
 #include <linux/bitops.h>
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 5c654b5d4adf..503a41dfa193 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -17,6 +17,7 @@
 #include <linux/hw_random.h>
 #include <linux/kernel.h>
 #include <linux/kthread.h>
+#include <linux/sched/signal.h>
 #include <linux/miscdevice.h>
 #include <linux/module.h>
 #include <linux/random.h>
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 30b9e83bf1bf..5ca24d9b101b 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -53,6 +53,7 @@
 #include <linux/ctype.h>
 #include <linux/delay.h>
 #include <linux/atomic.h>
+#include <linux/sched/signal.h>
 
 #ifdef CONFIG_X86
 /*
diff --git a/drivers/char/lp.c b/drivers/char/lp.c
index 5b6742770656..565e4cf04a02 100644
--- a/drivers/char/lp.c
+++ b/drivers/char/lp.c
@@ -117,7 +117,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/major.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/fcntl.h>
 #include <linux/delay.h>
diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index 87885d146dbb..2a558c706581 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -58,7 +58,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/device.h>
 #include <linux/ioctl.h>
 #include <linux/parport.h>
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index 35259961cc38..974d48927b07 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -74,7 +74,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sysctl.h>
 #include <linux/wait.h>
 #include <linux/bcd.h>
diff --git a/drivers/char/snsc.c b/drivers/char/snsc.c
index ec07f0e99732..6aa32679fd58 100644
--- a/drivers/char/snsc.c
+++ b/drivers/char/snsc.c
@@ -16,7 +16,7 @@
  */
 
 #include <linux/interrupt.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/device.h>
 #include <linux/poll.h>
 #include <linux/init.h>
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index d1f1f456f5c4..d195d617076d 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -22,6 +22,7 @@
 #include <linux/export.h>
 #include <linux/atomic.h>
 #include <linux/dma-fence.h>
+#include <linux/sched/signal.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/dma_fence.h>
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c b/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
index a2bb855a2851..ac5800c72cb4 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
@@ -18,7 +18,7 @@
 #include <linux/jiffies.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/of_device.h>
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index ab3016982466..1eef98c3331d 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -26,6 +26,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/device.h>
 #include <linux/io.h>
+#include <linux/sched/signal.h>
 
 #include "uapi/drm/vc4_drm.h"
 #include "vc4_drv.h"
diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
index 0f5b2dd24507..92f1452dad57 100644
--- a/drivers/gpu/vga/vgaarb.c
+++ b/drivers/gpu/vga/vgaarb.c
@@ -41,7 +41,7 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/list.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/wait.h>
 #include <linux/spinlock.h>
 #include <linux/poll.h>
diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index acfb522a432a..c6c9c51c806f 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -30,7 +30,7 @@
 
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
diff --git a/drivers/hid/hid-roccat.c b/drivers/hid/hid-roccat.c
index 76d06cf87b2a..fb77dec720a4 100644
--- a/drivers/hid/hid-roccat.c
+++ b/drivers/hid/hid-roccat.c
@@ -25,7 +25,7 @@
 
 #include <linux/cdev.h>
 #include <linux/poll.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/hid-roccat.h>
 #include <linux/module.h>
 
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index f0e2757cb909..ec530454e6f6 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -33,7 +33,7 @@
 #include <linux/slab.h>
 #include <linux/hid.h>
 #include <linux/mutex.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/string.h>
 
 #include <linux/hidraw.h>
diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index 700145b15088..774bd701dae0 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -27,6 +27,7 @@
 
 #include <linux/poll.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/input.h>
diff --git a/drivers/hsi/clients/cmt_speech.c b/drivers/hsi/clients/cmt_speech.c
index 7175e6bedf21..727f968ac1cb 100644
--- a/drivers/hsi/clients/cmt_speech.c
+++ b/drivers/hsi/clients/cmt_speech.c
@@ -31,7 +31,7 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/poll.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/ioctl.h>
 #include <linux/uaccess.h>
 #include <linux/pm_qos.h>
diff --git a/drivers/i2c/busses/i2c-ibm_iic.c b/drivers/i2c/busses/i2c-ibm_iic.c
index 412b91d255ad..961c5f42d956 100644
--- a/drivers/i2c/busses/i2c-ibm_iic.c
+++ b/drivers/i2c/busses/i2c-ibm_iic.c
@@ -37,6 +37,8 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
+#include <linux/sched/signal.h>
+
 #include <asm/irq.h>
 #include <linux/io.h>
 #include <linux/i2c.h>
diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c
index 565a49a0c564..96caf378b1dc 100644
--- a/drivers/i2c/busses/i2c-mpc.c
+++ b/drivers/i2c/busses/i2c-mpc.c
@@ -15,7 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index 4972986f6455..d2b465140a6b 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -20,7 +20,7 @@
 #include <linux/cdev.h>
 #include <linux/slab.h>
 #include <linux/poll.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 
 #include <linux/iio/iio.h>
 #include "iio_core.h"
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index a6d6c617b597..0cdf2b7f272f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -38,6 +38,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/moduleparam.h>
+#include <linux/sched/signal.h>
 
 #include "ipoib.h"
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index deedb6fc1b05..3e10e3dac2e7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -31,6 +31,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 
 #include <linux/init.h>
 #include <linux/seq_file.h>
diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index 49d0f70c2bae..1dfd1085a04f 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -18,7 +18,7 @@
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
 #include <linux/proc_fs.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/seq_file.h>
 #include <linux/skbuff.h>
 #include <linux/workqueue.h>
diff --git a/drivers/isdn/mISDN/timerdev.c b/drivers/isdn/mISDN/timerdev.c
index 9438d7ec3308..b1e135fc1fb5 100644
--- a/drivers/isdn/mISDN/timerdev.c
+++ b/drivers/isdn/mISDN/timerdev.c
@@ -25,6 +25,8 @@
 #include <linux/module.h>
 #include <linux/mISDNif.h>
 #include <linux/mutex.h>
+#include <linux/sched/signal.h>
+
 #include "core.h"
 
 static DEFINE_MUTEX(mISDN_mutex);
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index ac219045daf7..395ed1961dbf 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -8,6 +8,7 @@
 #include <linux/stddef.h>
 #include <linux/io.h>
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/vmalloc.h>
 #include <linux/cpu.h>
 #include <linux/freezer.h>
diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c
index 152414e6378a..fee939efc4fc 100644
--- a/drivers/macintosh/adb.c
+++ b/drivers/macintosh/adb.c
@@ -23,7 +23,7 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/adb.h>
 #include <linux/cuda.h>
 #include <linux/pmu.h>
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 227869159ac0..1ac66421877a 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -39,6 +39,7 @@
 #include <linux/of_platform.h>
 #include <linux/slab.h>
 #include <linux/memblock.h>
+#include <linux/sched/signal.h>
 
 #include <asm/byteorder.h>
 #include <asm/io.h>
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 43b8db2b5445..cce99f72e4ae 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -23,7 +23,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/miscdevice.h>
 #include <linux/blkdev.h>
 #include <linux/pci.h>
diff --git a/drivers/mailbox/mailbox-test.c b/drivers/mailbox/mailbox-test.c
index 9c79f8019d2a..97fb956bb6e0 100644
--- a/drivers/mailbox/mailbox-test.c
+++ b/drivers/mailbox/mailbox-test.c
@@ -21,6 +21,7 @@
 #include <linux/poll.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/sched/signal.h>
 
 #define MBOX_MAX_SIG_LEN	8
 #define MBOX_MAX_MSG_LEN	128
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 9f37d7fc2786..f4ffd1eb8f44 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/sched/signal.h>
 #include <linux/blkpg.h>
 #include <linux/bio.h>
 #include <linux/mempool.h>
diff --git a/drivers/media/dvb-core/dvb_ca_en50221.c b/drivers/media/dvb-core/dvb_ca_en50221.c
index 000d737ad827..8d65028c7a74 100644
--- a/drivers/media/dvb-core/dvb_ca_en50221.c
+++ b/drivers/media/dvb-core/dvb_ca_en50221.c
@@ -34,7 +34,7 @@
 #include <linux/vmalloc.h>
 #include <linux/delay.h>
 #include <linux/spinlock.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kthread.h>
 
 #include "dvb_ca_en50221.h"
diff --git a/drivers/media/dvb-core/dvb_demux.c b/drivers/media/dvb-core/dvb_demux.c
index 4eac71e50c5f..6628f80d184f 100644
--- a/drivers/media/dvb-core/dvb_demux.c
+++ b/drivers/media/dvb-core/dvb_demux.c
@@ -19,7 +19,7 @@
 
 #define pr_fmt(fmt) "dvb_demux: " fmt
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c
index 85ae3669aa66..e3fff8f64d37 100644
--- a/drivers/media/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb-core/dvb_frontend.c
@@ -29,7 +29,7 @@
 
 #include <linux/string.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
 #include <linux/poll.h>
diff --git a/drivers/media/pci/cx18/cx18-driver.h b/drivers/media/pci/cx18/cx18-driver.h
index fef3c736fcba..7be2088c45fe 100644
--- a/drivers/media/pci/cx18/cx18-driver.h
+++ b/drivers/media/pci/cx18/cx18-driver.h
@@ -24,7 +24,7 @@
 #include <linux/moduleparam.h>
 #include <linux/init.h>
 #include <linux/delay.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/fs.h>
 #include <linux/pci.h>
 #include <linux/interrupt.h>
diff --git a/drivers/media/pci/ivtv/ivtv-driver.h b/drivers/media/pci/ivtv/ivtv-driver.h
index cde452e30746..d27c5c2c07ea 100644
--- a/drivers/media/pci/ivtv/ivtv-driver.h
+++ b/drivers/media/pci/ivtv/ivtv-driver.h
@@ -38,37 +38,38 @@
  *                using information provided by Jiun-Kuei Jung @ AVerMedia.
  */
 
-#include <asm/byteorder.h>
+#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/delay.h>
-#include <linux/device.h>
+#include <linux/sched/signal.h>
 #include <linux/fs.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/ivtv.h>
-#include <linux/kernel.h>
-#include <linux/kthread.h>
 #include <linux/list.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
+#include <linux/unistd.h>
 #include <linux/pagemap.h>
-#include <linux/pci.h>
 #include <linux/scatterlist.h>
-#include <linux/sched.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
 #include <linux/slab.h>
-#include <linux/spinlock.h>
 #include <linux/uaccess.h>
-#include <linux/unistd.h>
+#include <asm/byteorder.h>
 
-#include <media/drv-intf/cx2341x.h>
-#include <media/i2c/ir-kbd-i2c.h>
-#include <media/tuner.h>
+#include <linux/dvb/video.h>
+#include <linux/dvb/audio.h>
 #include <media/v4l2-common.h>
+#include <media/v4l2-ioctl.h>
 #include <media/v4l2-ctrls.h>
 #include <media/v4l2-device.h>
 #include <media/v4l2-fh.h>
-#include <media/v4l2-ioctl.h>
+#include <media/tuner.h>
+#include <media/drv-intf/cx2341x.h>
+#include <media/i2c/ir-kbd-i2c.h>
+
+#include <linux/ivtv.h>
 
 /* Memory layout */
 #define IVTV_ENCODER_OFFSET	0x00000000
diff --git a/drivers/media/pci/pt1/pt1.c b/drivers/media/pci/pt1/pt1.c
index da1eebd2016f..3219d2f3271e 100644
--- a/drivers/media/pci/pt1/pt1.c
+++ b/drivers/media/pci/pt1/pt1.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
diff --git a/drivers/media/pci/pt3/pt3.c b/drivers/media/pci/pt3/pt3.c
index 77f4d15f322b..e8b5d0992157 100644
--- a/drivers/media/pci/pt3/pt3.c
+++ b/drivers/media/pci/pt3/pt3.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/string.h>
+#include <linux/sched/signal.h>
 
 #include "dmxdev.h"
 #include "dvbdev.h"
diff --git a/drivers/media/pci/solo6x10/solo6x10-i2c.c b/drivers/media/pci/solo6x10/solo6x10-i2c.c
index c908672b2c40..e83bb79f9349 100644
--- a/drivers/media/pci/solo6x10/solo6x10-i2c.c
+++ b/drivers/media/pci/solo6x10/solo6x10-i2c.c
@@ -27,6 +27,7 @@
  * thread context, ACK the interrupt, and move on. -- BenC */
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 
 #include "solo6x10.h"
 
diff --git a/drivers/media/pci/zoran/zoran_device.c b/drivers/media/pci/zoran/zoran_device.c
index 671907a6e6b6..40adceebca7e 100644
--- a/drivers/media/pci/zoran/zoran_device.c
+++ b/drivers/media/pci/zoran/zoran_device.c
@@ -28,6 +28,7 @@
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/ktime.h>
+#include <linux/sched/signal.h>
 
 #include <linux/interrupt.h>
 #include <linux/proc_fs.h>
diff --git a/drivers/media/platform/vivid/vivid-radio-rx.c b/drivers/media/platform/vivid/vivid-radio-rx.c
index f99092ca8f5c..47c36c26096b 100644
--- a/drivers/media/platform/vivid/vivid-radio-rx.c
+++ b/drivers/media/platform/vivid/vivid-radio-rx.c
@@ -22,6 +22,8 @@
 #include <linux/delay.h>
 #include <linux/videodev2.h>
 #include <linux/v4l2-dv-timings.h>
+#include <linux/sched/signal.h>
+
 #include <media/v4l2-common.h>
 #include <media/v4l2-event.h>
 #include <media/v4l2-dv-timings.h>
diff --git a/drivers/media/platform/vivid/vivid-radio-tx.c b/drivers/media/platform/vivid/vivid-radio-tx.c
index 8c59d4f53200..0e8025b7b4dd 100644
--- a/drivers/media/platform/vivid/vivid-radio-tx.c
+++ b/drivers/media/platform/vivid/vivid-radio-tx.c
@@ -19,6 +19,7 @@
 
 #include <linux/errno.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/delay.h>
 #include <linux/videodev2.h>
 #include <linux/v4l2-dv-timings.h>
diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c
index a54ca531d8ef..393dccaabdd0 100644
--- a/drivers/media/rc/lirc_dev.c
+++ b/drivers/media/rc/lirc_dev.c
@@ -19,7 +19,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/ioctl.h>
 #include <linux/fs.h>
diff --git a/drivers/media/usb/cpia2/cpia2_core.c b/drivers/media/usb/cpia2/cpia2_core.c
index 431dd0b4b332..b1d13444ff30 100644
--- a/drivers/media/usb/cpia2/cpia2_core.c
+++ b/drivers/media/usb/cpia2/cpia2_core.c
@@ -32,6 +32,7 @@
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/firmware.h>
+#include <linux/sched/signal.h>
 
 #define FIRMWARE "cpia2/stv0672_vp4.bin"
 MODULE_FIRMWARE(FIRMWARE);
diff --git a/drivers/media/usb/gspca/cpia1.c b/drivers/media/usb/gspca/cpia1.c
index 23d3285f182a..e91d00762e94 100644
--- a/drivers/media/usb/gspca/cpia1.c
+++ b/drivers/media/usb/gspca/cpia1.c
@@ -27,6 +27,8 @@
 #define MODULE_NAME "cpia1"
 
 #include <linux/input.h>
+#include <linux/sched/signal.h>
+
 #include "gspca.h"
 
 MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>");
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index e33011e3e7e2..2fa015c05561 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c
@@ -8,7 +8,7 @@
  */
 
 #include <linux/workqueue.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/pid.h>
 #include <linux/mm.h>
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 859959f19f10..e7139c76f961 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -12,7 +12,7 @@
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/bitmap.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/poll.h>
 #include <linux/pid.h>
 #include <linux/fs.h>
diff --git a/drivers/misc/ibmasm/r_heartbeat.c b/drivers/misc/ibmasm/r_heartbeat.c
index 232034f5da48..5c7dd26db716 100644
--- a/drivers/misc/ibmasm/r_heartbeat.c
+++ b/drivers/misc/ibmasm/r_heartbeat.c
@@ -20,7 +20,7 @@
  *
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include "ibmasm.h"
 #include "dot_command.h"
 
diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c
index fb8705fc3aca..e389b0b5278d 100644
--- a/drivers/misc/lis3lv02d/lis3lv02d.c
+++ b/drivers/misc/lis3lv02d/lis3lv02d.c
@@ -23,6 +23,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/dmi.h>
 #include <linux/module.h>
 #include <linux/types.h>
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index cb3e9e0ca049..df5f78ae3d25 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -16,7 +16,7 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
index 68fe37b5bc52..d3e3372424d6 100644
--- a/drivers/misc/mei/client.c
+++ b/drivers/misc/mei/client.c
@@ -14,7 +14,7 @@
  *
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/wait.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
index 9d0b7050c79a..bf816449cd40 100644
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@@ -26,7 +26,7 @@
 #include <linux/init.h>
 #include <linux/ioctl.h>
 #include <linux/cdev.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/uuid.h>
 #include <linux/compat.h>
 #include <linux/jiffies.h>
diff --git a/drivers/misc/mic/scif/scif_main.h b/drivers/misc/mic/scif/scif_main.h
index a08f0b600a9e..0e5eff9ad080 100644
--- a/drivers/misc/mic/scif/scif_main.h
+++ b/drivers/misc/mic/scif/scif_main.h
@@ -18,7 +18,7 @@
 #ifndef SCIF_MAIN_H
 #define SCIF_MAIN_H
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/pci.h>
 #include <linux/miscdevice.h>
 #include <linux/dmaengine.h>
diff --git a/drivers/misc/vexpress-syscfg.c b/drivers/misc/vexpress-syscfg.c
index c344483fa7d6..2cde80c7bb93 100644
--- a/drivers/misc/vexpress-syscfg.c
+++ b/drivers/misc/vexpress-syscfg.c
@@ -16,7 +16,7 @@
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/syscore_ops.h>
 #include <linux/vexpress.h>
diff --git a/drivers/mtd/tests/mtd_test.h b/drivers/mtd/tests/mtd_test.h
index 4b7bee17c924..04afd0e7074f 100644
--- a/drivers/mtd/tests/mtd_test.h
+++ b/drivers/mtd/tests/mtd_test.h
@@ -1,5 +1,5 @@
 #include <linux/mtd/mtd.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 
 static inline int mtdtest_relax(void)
 {
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 577e57cad1dc..1bcbb8913e17 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -16,6 +16,8 @@
 #include <linux/rcupdate.h>
 #include <linux/ctype.h>
 #include <linux/inet.h>
+#include <linux/sched/signal.h>
+
 #include <net/bonding.h>
 
 static int bond_option_active_slave_set(struct bonding *bond,
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index e23c3ed737de..770623a0cc01 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -24,7 +24,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/device.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/fs.h>
 #include <linux/types.h>
 #include <linux/string.h>
diff --git a/drivers/net/can/softing/softing_fw.c b/drivers/net/can/softing/softing_fw.c
index 4063215c9b54..aac58ce6e371 100644
--- a/drivers/net/can/softing/softing_fw.c
+++ b/drivers/net/can/softing/softing_fw.c
@@ -17,7 +17,7 @@
  */
 
 #include <linux/firmware.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <asm/div64.h>
 #include <asm/io.h>
 
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index a448177990fe..30d1eb9ebec9 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -20,6 +20,7 @@
 #include <linux/moduleparam.h>
 #include <linux/stringify.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/types.h>
 #include <linux/compiler.h>
 #include <linux/slab.h>
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
index 8cd389148166..aa36e9ae7676 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
@@ -23,6 +23,8 @@
 #ifndef _OCTEON_MAIN_H_
 #define  _OCTEON_MAIN_H_
 
+#include <linux/sched/signal.h>
+
 #if BITS_PER_LONG == 32
 #define CVM_CAST64(v) ((long long)(v))
 #elif BITS_PER_LONG == 64
diff --git a/drivers/net/ethernet/sfc/falcon/falcon.c b/drivers/net/ethernet/sfc/falcon/falcon.c
index c6ff0cc5ef18..93c713c1f627 100644
--- a/drivers/net/ethernet/sfc/falcon/falcon.c
+++ b/drivers/net/ethernet/sfc/falcon/falcon.c
@@ -16,6 +16,8 @@
 #include <linux/i2c.h>
 #include <linux/mii.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
+
 #include "net_driver.h"
 #include "bitfield.h"
 #include "efx.h"
diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c
index 42da094b68dd..7ee514879531 100644
--- a/drivers/net/irda/stir4200.c
+++ b/drivers/net/irda/stir4200.c
@@ -40,6 +40,7 @@
 #include <linux/moduleparam.h>
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/ktime.h>
 #include <linux/types.h>
 #include <linux/time.h>
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index a4bfc10b61dd..da85057680d6 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -9,7 +9,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/cache.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/wait.h>
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index a411b43a69eb..f9c0e62716ea 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -24,6 +24,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/kmod.h>
 #include <linux/init.h>
 #include <linux/list.h>
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 30863e378925..dc1b1dd9157c 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -44,6 +44,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/major.h>
 #include <linux/slab.h>
 #include <linux/poll.h>
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index e7b516342678..4f2e8141dbe2 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -52,7 +52,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/delay.h>
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 24d5272cdce5..805674550683 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 #include <linux/netdevice.h>
 #include <linux/ethtool.h>
 #include <linux/etherdevice.h>
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 087eb266601f..4ca71bca39ac 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -78,7 +78,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/poll.h>
 #include <linux/fs.h>
diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index b7fe0af4cb24..363b30a549c2 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -20,6 +20,7 @@
 #include <linux/moduleparam.h>
 #include <linux/inetdevice.h>
 #include <linux/export.h>
+#include <linux/sched/signal.h>
 
 #include "core.h"
 #include "cfg80211.h"
diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c
index e97ab2b91663..cdafebb9c936 100644
--- a/drivers/net/wireless/broadcom/b43legacy/main.c
+++ b/drivers/net/wireless/broadcom/b43legacy/main.c
@@ -36,7 +36,7 @@
 #include <linux/etherdevice.h>
 #include <linux/firmware.h>
 #include <linux/workqueue.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/skbuff.h>
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
diff --git a/drivers/net/wireless/intersil/hostap/hostap_hw.c b/drivers/net/wireless/intersil/hostap/hostap_hw.c
index 544ef7adde7d..04dfd040a650 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_hw.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_hw.c
@@ -43,7 +43,7 @@
 #include <linux/delay.h>
 #include <linux/random.h>
 #include <linux/wait.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/rtnetlink.h>
 #include <linux/wireless.h>
 #include <net/iw_handler.h>
diff --git a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
index a5656bc0e6aa..b2c6b065b542 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
@@ -2,7 +2,7 @@
 
 #include <linux/slab.h>
 #include <linux/types.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/ethtool.h>
 #include <linux/if_arp.h>
 #include <linux/module.h>
diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c
index 67935fbbbcab..32888f2bd1a9 100644
--- a/drivers/oprofile/event_buffer.c
+++ b/drivers/oprofile/event_buffer.c
@@ -14,7 +14,7 @@
 
 #include <linux/vmalloc.h>
 #include <linux/oprofile.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/capability.h>
 #include <linux/dcookies.h>
 #include <linux/fs.h>
diff --git a/drivers/parport/daisy.c b/drivers/parport/daisy.c
index d998d0ed2bec..46eb15fb57ff 100644
--- a/drivers/parport/daisy.c
+++ b/drivers/parport/daisy.c
@@ -23,7 +23,7 @@
 #include <linux/parport.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 
 #include <asm/current.h>
 #include <linux/uaccess.h>
diff --git a/drivers/parport/ieee1284.c b/drivers/parport/ieee1284.c
index f9fd4b33a546..74cc6dd982d2 100644
--- a/drivers/parport/ieee1284.c
+++ b/drivers/parport/ieee1284.c
@@ -23,7 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 #include <linux/timer.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 
 #undef DEBUG /* undef me for production */
 
diff --git a/drivers/parport/ieee1284_ops.c b/drivers/parport/ieee1284_ops.c
index 75071605d22f..a959224d011b 100644
--- a/drivers/parport/ieee1284_ops.c
+++ b/drivers/parport/ieee1284_ops.c
@@ -17,7 +17,7 @@
 #include <linux/module.h>
 #include <linux/parport.h>
 #include <linux/delay.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 
 #undef DEBUG /* undef me for production */
diff --git a/drivers/parport/parport_ip32.c b/drivers/parport/parport_ip32.c
index 30e981be14c2..dcbeeb220dda 100644
--- a/drivers/parport/parport_ip32.c
+++ b/drivers/parport/parport_ip32.c
@@ -102,7 +102,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/parport.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index 3e56e7deab8e..9d42dfe65d44 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -44,7 +44,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
diff --git a/drivers/parport/share.c b/drivers/parport/share.c
index 3308427ed9f7..bc090daa850a 100644
--- a/drivers/parport/share.c
+++ b/drivers/parport/share.c
@@ -27,7 +27,7 @@
 #include <linux/ioport.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kmod.h>
 #include <linux/device.h>
 
diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index b9dd37c8c9ce..8b7382705bf2 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -1,7 +1,7 @@
 #include <linux/delay.h>
 #include <linux/pci.h>
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/ioport.h>
 #include <linux/wait.h>
diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c
index 7ec8a8f72c69..95f689f53920 100644
--- a/drivers/pci/hotplug/cpci_hotplug_core.c
+++ b/drivers/pci/hotplug/cpci_hotplug_core.c
@@ -27,6 +27,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
 #include <linux/pci_hotplug.h>
diff --git a/drivers/pci/hotplug/cpqphp.h b/drivers/pci/hotplug/cpqphp.h
index 9103a7b9f3b9..48c8a066a6b7 100644
--- a/drivers/pci/hotplug/cpqphp.h
+++ b/drivers/pci/hotplug/cpqphp.h
@@ -32,7 +32,7 @@
 #include <asm/io.h>		/* for read? and write? functions */
 #include <linux/delay.h>	/* for delays */
 #include <linux/mutex.h>
-#include <linux/sched.h>	/* for signal_pending() */
+#include <linux/sched/signal.h>	/* for signal_pending() */
 
 #define MY_NAME	"cpqphp"
 
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 37d70b5ad22f..06109d40c4ac 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -33,7 +33,7 @@
 #include <linux/pci.h>
 #include <linux/pci_hotplug.h>
 #include <linux/delay.h>
-#include <linux/sched.h>		/* signal_pending() */
+#include <linux/sched/signal.h>		/* signal_pending() */
 #include <linux/pcieport_if.h>
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h
index 4da8fc601467..70c7ea6af034 100644
--- a/drivers/pci/hotplug/shpchp.h
+++ b/drivers/pci/hotplug/shpchp.h
@@ -33,7 +33,7 @@
 #include <linux/pci.h>
 #include <linux/pci_hotplug.h>
 #include <linux/delay.h>
-#include <linux/sched.h>	/* signal_pending(), struct timer_list */
+#include <linux/sched/signal.h>	/* signal_pending(), struct timer_list */
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
 
diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index a6d9434addf6..6dc8f29697ab 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c
@@ -15,7 +15,7 @@
 
 #include <linux/module.h>
 #include <linux/rtc.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include "rtc-core.h"
 
 static dev_t rtc_devt;
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 79823ee9c100..b8006ea9099c 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -24,6 +24,7 @@
 #include <linux/delay.h>
 #include <linux/timer.h>
 #include <linux/kernel_stat.h>
+#include <linux/sched/signal.h>
 
 #include <asm/ccwdev.h>
 #include <asm/cio.h>
diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c
index e18bbc66e83b..4e36998a266c 100644
--- a/drivers/scsi/lpfc/lpfc_vport.c
+++ b/drivers/scsi/lpfc/lpfc_vport.c
@@ -28,6 +28,7 @@
 #include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/sched/signal.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_device.h>
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 75ac662793a3..c47f4b349bac 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -35,7 +35,7 @@ static const char * osst_version = "0.99.4";
 
 #include <linux/fs.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/proc_fs.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 81212d4bd9bf..e5ef78a6848e 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -23,7 +23,7 @@ static const char *verstr = "20160209";
 
 #include <linux/fs.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/string.h>
diff --git a/drivers/soc/fsl/qbman/dpaa_sys.h b/drivers/soc/fsl/qbman/dpaa_sys.h
index 2eaf3184f61d..2ce394aa4c95 100644
--- a/drivers/soc/fsl/qbman/dpaa_sys.h
+++ b/drivers/soc/fsl/qbman/dpaa_sys.h
@@ -36,6 +36,7 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/kthread.h>
+#include <linux/sched/signal.h>
 #include <linux/vmalloc.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index 57e8599b54e6..8deac8d9225d 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -23,7 +23,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/fcntl.h>
 #include <linux/delay.h>
 #include <linux/mm.h>
diff --git a/drivers/staging/dgnc/dgnc_tty.c b/drivers/staging/dgnc/dgnc_tty.c
index c63e591631f6..c3b8fc54883d 100644
--- a/drivers/staging/dgnc/dgnc_tty.c
+++ b/drivers/staging/dgnc/dgnc_tty.c
@@ -19,7 +19,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/sched.h>	/* For jiffies, task states */
+#include <linux/sched/signal.h>	/* For jiffies, task states, etc. */
 #include <linux/interrupt.h>	/* For tasklet and interrupt structs/defines */
 #include <linux/module.h>
 #include <linux/ctype.h>
diff --git a/drivers/staging/dgnc/dgnc_utils.c b/drivers/staging/dgnc/dgnc_utils.c
index 95272f4765fc..6f59240024d1 100644
--- a/drivers/staging/dgnc/dgnc_utils.c
+++ b/drivers/staging/dgnc/dgnc_utils.c
@@ -1,5 +1,5 @@
 #include <linux/tty.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include "dgnc_utils.h"
 
 /*
diff --git a/drivers/staging/greybus/uart.c b/drivers/staging/greybus/uart.c
index ab0dbf5cab5a..43255e2e9276 100644
--- a/drivers/staging/greybus/uart.c
+++ b/drivers/staging/greybus/uart.c
@@ -14,7 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
index 21aec0ca9ad3..7d8628ce0d3b 100644
--- a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
+++ b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h
@@ -44,6 +44,7 @@
 
 #ifdef __KERNEL__
 # include <linux/quota.h>
+# include <linux/sched/signal.h>
 # include <linux/string.h> /* snprintf() */
 # include <linux/version.h>
 #else /* !__KERNEL__ */
diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h
index 27f3148c4344..b04d613846ee 100644
--- a/drivers/staging/lustre/lustre/include/lustre_lib.h
+++ b/drivers/staging/lustre/lustre/include/lustre_lib.h
@@ -42,7 +42,7 @@
  * @{
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/types.h>
 #include "../../include/linux/libcfs/libcfs.h"
diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h
index aaedec7d793c..dace6591a0a4 100644
--- a/drivers/staging/lustre/lustre/include/obd_support.h
+++ b/drivers/staging/lustre/lustre/include/obd_support.h
@@ -34,6 +34,8 @@
 #define _OBD_SUPPORT
 
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
+
 #include "../../include/linux/libcfs/libcfs.h"
 #include "lustre_compat.h"
 #include "lprocfs_status.h"
diff --git a/drivers/staging/media/lirc/lirc_sir.c b/drivers/staging/media/lirc/lirc_sir.c
index c75ae43095ba..c6c3de94adaa 100644
--- a/drivers/staging/media/lirc/lirc_sir.c
+++ b/drivers/staging/media/lirc/lirc_sir.c
@@ -36,7 +36,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
 #include <linux/fs.h>
diff --git a/drivers/staging/media/lirc/lirc_zilog.c b/drivers/staging/media/lirc/lirc_zilog.c
index 34aac3e2eb87..e4a533b6beb3 100644
--- a/drivers/staging/media/lirc/lirc_zilog.c
+++ b/drivers/staging/media/lirc/lirc_zilog.c
@@ -42,7 +42,7 @@
 #include <linux/module.h>
 #include <linux/kmod.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/fs.h>
 #include <linux/poll.h>
 #include <linux/string.h>
diff --git a/drivers/staging/speakup/speakup_soft.c b/drivers/staging/speakup/speakup_soft.c
index ff68a384f9c2..d2ff0afd685a 100644
--- a/drivers/staging/speakup/speakup_soft.c
+++ b/drivers/staging/speakup/speakup_soft.c
@@ -22,7 +22,7 @@
 #include <linux/unistd.h>
 #include <linux/miscdevice.h>	/* for misc_register, and SYNTH_MINOR */
 #include <linux/poll.h>		/* for poll_wait() */
-#include <linux/sched.h> /* schedule(), signal_pending(), TASK_INTERRUPTIBLE */
+#include <linux/sched/signal.h> /* schedule(), signal_pending(), TASK_INTERRUPTIBLE */
 
 #include "spk_priv.h"
 #include "speakup.h"
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c
index 8bcb9b71f764..0c3e8fce3695 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_target.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c
@@ -8,6 +8,8 @@
 
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
+#include <linux/sched/signal.h>
+
 #include <asm/unaligned.h>
 #include <net/tcp.h>
 #include <target/target_core_base.h>
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index f3932baed07d..55577cf9b6a4 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -39,7 +39,7 @@
 #include <linux/errno.h>
 #include <linux/signal.h>
 #include <linux/fcntl.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/interrupt.h>
 #include <linux/tty.h>
 #include <linux/ctype.h>
diff --git a/drivers/tty/serial/crisv10.c b/drivers/tty/serial/crisv10.c
index e92c23470e51..59a2a7e18b5a 100644
--- a/drivers/tty/serial/crisv10.c
+++ b/drivers/tty/serial/crisv10.c
@@ -12,7 +12,7 @@ static char *serial_version = "$Revision: 1.25 $";
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/timer.h>
 #include <linux/interrupt.h>
 #include <linux/tty.h>
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 9939c3d9912b..3fe56894974a 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -24,6 +24,7 @@
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/init.h>
 #include <linux/console.h>
 #include <linux/of.h>
diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c
index f27fc0f14c11..a9a978731c5b 100644
--- a/drivers/tty/tty_ioctl.c
+++ b/drivers/tty/tty_ioctl.c
@@ -9,7 +9,7 @@
 #include <linux/types.h>
 #include <linux/termios.h>
 #include <linux/errno.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kernel.h>
 #include <linux/major.h>
 #include <linux/tty.h>
diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c
index 5cd3cd932293..1d21a9c1d33e 100644
--- a/drivers/tty/tty_port.c
+++ b/drivers/tty/tty_port.c
@@ -11,7 +11,7 @@
 #include <linux/timer.h>
 #include <linux/string.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/wait.h>
 #include <linux/bitops.h>
 #include <linux/delay.h>
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 31d95dc9c202..60ce7fd54e89 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -20,7 +20,7 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/idr.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/string.h>
 #include <linux/kobject.h>
 #include <linux/cdev.h>
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 235e305f8473..d5388938bc7a 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -32,6 +32,7 @@
 #undef VERBOSE_DEBUG
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/slab.h>
diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c
index 071964c7847f..cc61055fb9be 100644
--- a/drivers/usb/class/usblp.c
+++ b/drivers/usb/class/usblp.c
@@ -49,7 +49,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/poll.h>
 #include <linux/slab.h>
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 87fccf611b69..a5b7cd615698 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -23,6 +23,7 @@
 #include <linux/export.h>
 #include <linux/hid.h>
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 #include <linux/uio.h>
 #include <asm/unaligned.h>
 
diff --git a/drivers/usb/image/mdc800.c b/drivers/usb/image/mdc800.c
index 5cf2633cdb04..e92540a21b6b 100644
--- a/drivers/usb/image/mdc800.c
+++ b/drivers/usb/image/mdc800.c
@@ -85,7 +85,7 @@
  * (20/10/1999)
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/spinlock.h>
 #include <linux/errno.h>
diff --git a/drivers/usb/misc/adutux.c b/drivers/usb/misc/adutux.c
index c5fa584d8f0a..db9a9e6ff6be 100644
--- a/drivers/usb/misc/adutux.c
+++ b/drivers/usb/misc/adutux.c
@@ -21,6 +21,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/module.h>
diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c
index debc1fd74b0d..8b9fd7534f69 100644
--- a/drivers/usb/misc/idmouse.c
+++ b/drivers/usb/misc/idmouse.c
@@ -17,6 +17,7 @@
 */
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
diff --git a/drivers/usb/misc/rio500.c b/drivers/usb/misc/rio500.c
index fc329c98a6e8..b106ce76997b 100644
--- a/drivers/usb/misc/rio500.c
+++ b/drivers/usb/misc/rio500.c
@@ -31,7 +31,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mutex.h>
 #include <linux/errno.h>
 #include <linux/random.h>
diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c
index 0a643fa74cab..e45a3a680db8 100644
--- a/drivers/usb/misc/uss720.c
+++ b/drivers/usb/misc/uss720.c
@@ -50,6 +50,7 @@
 #include <linux/completion.h>
 #include <linux/kref.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 
 /*
  * Version Information
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index 9fb8b1e6ecc2..b6d8bf475c92 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/types.h>
 #include <linux/fs.h>
 #include <linux/cdev.h>
diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c
index db1a4abf2806..19c416d69eb9 100644
--- a/drivers/usb/mon/mon_text.c
+++ b/drivers/usb/mon/mon_text.c
@@ -8,6 +8,7 @@
 #include <linux/list.h>
 #include <linux/usb.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/time.h>
 #include <linux/ktime.h>
 #include <linux/export.h>
diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index eb433922598c..ab78111e0968 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -27,6 +27,7 @@
 #include <linux/uaccess.h>
 #include <linux/usb.h>
 #include <linux/wait.h>
+#include <linux/sched/signal.h>
 #include <linux/usb/serial.h>
 
 /* Defines */
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 944de657a07a..49ce2be90fa0 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/sysrq.h>
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 5c98ad4d2f4c..9b519897cc17 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -18,6 +18,7 @@
 #include <linux/file.h>
 #include <linux/slab.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/signal.h>
 #include <linux/vmalloc.h>
 
 #include <linux/net.h>
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 4a00140a7624..dcbe2e29bf17 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -28,6 +28,7 @@
 #include <linux/module.h>
 #include <linux/sort.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/interval_tree_generic.h>
 
 #include "vhost.h"
diff --git a/drivers/video/fbdev/cobalt_lcdfb.c b/drivers/video/fbdev/cobalt_lcdfb.c
index 038ac6934fe9..9da90bd242f4 100644
--- a/drivers/video/fbdev/cobalt_lcdfb.c
+++ b/drivers/video/fbdev/cobalt_lcdfb.c
@@ -26,6 +26,7 @@
 #include <linux/uaccess.h>
 #include <linux/platform_device.h>
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 
 /*
  * Cursor position address
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
index 8b810696a42b..fd2b372d0264 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
@@ -19,7 +19,7 @@
 #include <linux/jiffies.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/of_device.h>
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 95f42872b787..4f6b00efc27c 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -10,6 +10,8 @@
  */
 
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
+
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <rxrpc/packet.h>
diff --git a/fs/aio.c b/fs/aio.c
index 7e2ab9c8e39c..0bb108476de2 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -20,7 +20,7 @@
 #include <linux/backing-dev.h>
 #include <linux/uio.h>
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/mm.h>
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 105d4d43993e..a8812d95359d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -20,6 +20,7 @@
 #define __BTRFS_CTREE__
 
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/highmem.h>
 #include <linux/fs.h>
 #include <linux/rwsem.h>
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index cd966f276a8d..68c78be19d5b 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2,7 +2,7 @@
 
 #include <linux/fs.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/wait.h>
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 7ab5be7944aa..1858fc20eb7d 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -23,6 +23,8 @@
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/freezer.h>
+#include <linux/sched/signal.h>
+
 #include <asm/div64.h>
 #include "cifsfs.h"
 #include "cifspdu.h"
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 822629126e89..f40e3953e7fe 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -22,7 +22,7 @@
 #include <linux/kernel.h>
 #include <linux/major.h>
 #include <linux/time.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/ioport.h>
 #include <linux/fcntl.h>
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 1ce908c2232c..23488f559cf9 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -17,6 +17,7 @@
 #include <linux/dlm.h>
 #include <linux/dlm_device.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 
 #include "dlm_internal.h"
 #include "lockspace.h"
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 158a3a39f82d..039e627194a9 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -22,6 +22,8 @@
 
 #include <linux/fs.h>
 #include <linux/pagemap.h>
+#include <linux/sched/signal.h>
+
 #include "ecryptfs_kernel.h"
 
 /**
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 1231cd1999d8..68b9fffcb2c8 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -9,7 +9,7 @@
 #include <linux/poll.h>
 #include <linux/init.h>
 #include <linux/fs.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/list.h>
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 5ec16313da1a..341251421ced 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -13,7 +13,7 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/signal.h>
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 2fd17e8e4984..77798a46b0c6 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -28,6 +28,7 @@
 #include <linux/timer.h>
 #include <linux/version.h>
 #include <linux/wait.h>
+#include <linux/sched/signal.h>
 #include <linux/blockgroup_lock.h>
 #include <linux/percpu_counter.h>
 #include <linux/ratelimit.h>
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 1375fef11146..1602b4bccae6 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -22,6 +22,7 @@
 #include <linux/mm.h>
 #include <linux/memcontrol.h>
 #include <linux/cleancache.h>
+#include <linux/sched/signal.h>
 
 #include "f2fs.h"
 #include "node.h"
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index f11792672977..b681b43c766e 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/poll.h>
+#include <linux/sched/signal.h>
 #include <linux/uio.h>
 #include <linux/miscdevice.h>
 #include <linux/pagemap.h>
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 8b907c5cc913..0515f0a68637 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -15,6 +15,7 @@
 #include <linux/types.h>
 #include <linux/delay.h>
 #include <linux/gfs2_ondisk.h>
+#include <linux/sched/signal.h>
 
 #include "incore.h"
 #include "glock.h"
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index e3ee387a6dfe..361796a84fce 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -10,7 +10,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/bio.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index aebb78f9e47f..d352f3a6af7f 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -18,7 +18,7 @@
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/blkdev.h>
 #include <asm/unaligned.h>
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 54de77e78775..8f96461236f6 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -11,7 +11,7 @@
 
 #include <linux/thread_info.h>
 #include <asm/current.h>
-#include <linux/sched.h>		/* remove ASAP */
+#include <linux/sched/signal.h>		/* remove ASAP */
 #include <linux/falloc.h>
 #include <linux/fs.h>
 #include <linux/mount.h>
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index cda0774c2c9c..a7bbe879cfc3 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -14,7 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/mtd/mtd.h>
 #include <linux/compiler.h>
-#include <linux/sched.h> /* For cond_resched() */
+#include <linux/sched/signal.h>
 #include "nodelist.h"
 #include "debug.h"
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5ca4d96b1942..685565b229c3 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -15,7 +15,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/time.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 7d18d62e8e07..febed1217b3f 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -30,6 +30,8 @@
 #include <linux/crc32.h>
 #include <linux/pagevec.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
+
 #include "nilfs.h"
 #include "btnode.h"
 #include "page.h"
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 7ebfca6a1427..2b37f2785834 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/uaccess.h>
 #include <linux/compat.h>
+#include <linux/sched/signal.h>
 
 #include <asm/ioctls.h>
 
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 1cf41c623be1..498d609b26c7 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -30,7 +30,7 @@
 #include <linux/inotify.h>
 #include <linux/kernel.h> /* roundup() */
 #include <linux/namei.h> /* LOOKUP_FOLLOW */
-#include <linux/sched.h> /* struct user */
+#include <linux/sched/signal.h>
 #include <linux/slab.h> /* struct kmem_cache */
 #include <linux/syscalls.h>
 #include <linux/types.h>
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 358ed7e1195a..c4f68c338735 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -24,7 +24,7 @@
 #include <linux/gfp.h>
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/swap.h>
 #include <linux/uio.h>
 #include <linux/writeback.h>
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index d4ec0d8961a6..fb15a96df0b6 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -30,6 +30,7 @@
 #include <linux/swap.h>
 #include <linux/quotaops.h>
 #include <linux/blkdev.h>
+#include <linux/sched/signal.h>
 
 #include <cluster/masklog.h>
 
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 32fd261ae13d..a2b19fbdcf46 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -33,6 +33,7 @@
 #include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/debugfs.h>
+#include <linux/sched/signal.h>
 
 #include "cluster/heartbeat.h"
 #include "cluster/nodemanager.h"
diff --git a/fs/ocfs2/dlmfs/userdlm.c b/fs/ocfs2/dlmfs/userdlm.c
index f70cda2f090d..9cecf4857195 100644
--- a/fs/ocfs2/dlmfs/userdlm.c
+++ b/fs/ocfs2/dlmfs/userdlm.c
@@ -28,6 +28,7 @@
  */
 
 #include <linux/signal.h>
+#include <linux/sched/signal.h>
 
 #include <linux/module.h>
 #include <linux/fs.h>
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 8dce4099a6ca..3b7c937a36b5 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -33,6 +33,7 @@
 #include <linux/seq_file.h>
 #include <linux/time.h>
 #include <linux/quotaops.h>
+#include <linux/sched/signal.h>
 
 #define MLOG_MASK_PREFIX ML_DLM_GLUE
 #include <cluster/masklog.h>
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 70355a9a2596..8948683b367f 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -41,7 +41,7 @@
 #include <linux/uaccess.h>
 #include <linux/atomic.h>
 #include <linux/uio.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/wait.h>
 #include <linux/dcache.h>
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 53d3f830358f..a34aa7aa2563 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -15,7 +15,7 @@
 #include <linux/xattr.h>
 #include <linux/security.h>
 #include <linux/uaccess.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/cred.h>
 #include <linux/namei.h>
 #include <linux/fdtable.h>
diff --git a/fs/splice.c b/fs/splice.c
index 4ef78aa8ef61..e49336555739 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -33,6 +33,8 @@
 #include <linux/gfp.h>
 #include <linux/socket.h>
 #include <linux/compat.h>
+#include <linux/sched/signal.h>
+
 #include "internal.h"
 
 /*
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 18d12bfff770..973607df579d 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -14,7 +14,7 @@
 
 #include <linux/list.h>
 #include <linux/hashtable.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/mm.h>
 #include <linux/poll.h>
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 7a989de224f4..592fdf7111cb 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -55,7 +55,7 @@ typedef __u32			xfs_nlink_t;
 #include <linux/file.h>
 #include <linux/swap.h>
 #include <linux/errno.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/bitops.h>
 #include <linux/major.h>
 #include <linux/pagemap.h>
diff --git a/include/drm/drm_os_linux.h b/include/drm/drm_os_linux.h
index 86ab99bc0ac5..35e1482ba8a1 100644
--- a/include/drm/drm_os_linux.h
+++ b/include/drm/drm_os_linux.h
@@ -4,6 +4,7 @@
  */
 
 #include <linux/interrupt.h>	/* For task queue support */
+#include <linux/sched/signal.h>
 #include <linux/delay.h>
 
 #ifndef readq
diff --git a/include/linux/sunrpc/types.h b/include/linux/sunrpc/types.h
index d222f47550af..11a7536c0fd2 100644
--- a/include/linux/sunrpc/types.h
+++ b/include/linux/sunrpc/types.h
@@ -10,6 +10,7 @@
 #define _LINUX_SUNRPC_TYPES_H_
 
 #include <linux/timer.h>
+#include <linux/sched/signal.h>
 #include <linux/workqueue.h>
 #include <linux/sunrpc/debug.h>
 #include <linux/list.h>
diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h
index 574ff2ae94be..6cd94e5ee113 100644
--- a/include/media/v4l2-ioctl.h
+++ b/include/media/v4l2-ioctl.h
@@ -12,6 +12,7 @@
 #include <linux/poll.h>
 #include <linux/fs.h>
 #include <linux/mutex.h>
+#include <linux/sched/signal.h>
 #include <linux/compiler.h> /* need __user */
 #include <linux/videodev2.h>
 
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index b96832df239e..c0452de83086 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -26,6 +26,7 @@
 
 #include <linux/netdevice.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/signal.h>
 #include <net/ip.h>
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 57f6311e2405..82ff5726bc1b 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -19,7 +19,7 @@
  */
 #include <linux/mutex.h>
 #include <linux/ww_mutex.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/wake_q.h>
 #include <linux/export.h>
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index d4f798491361..1c8c8707853a 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -12,7 +12,7 @@
  */
 #include <linux/spinlock.h>
 #include <linux/export.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/deadline.h>
 #include <linux/sched/wake_q.h>
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 5eacab880f67..91e7bc8e9d5a 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -6,7 +6,7 @@
  * - Derived also from comments by Linus
  */
 #include <linux/rwsem.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/export.h>
 
 enum rwsem_waiter_type {
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 4fe8d8ad4396..9bd6e768164e 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -12,7 +12,7 @@
 #include <linux/rwsem.h>
 #include <linux/init.h>
 #include <linux/export.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/wake_q.h>
 #include <linux/osq_lock.h>
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 6a28b79710f0..cccc417a8135 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -32,7 +32,7 @@
 #include <linux/smp.h>
 #include <linux/rcupdate.h>
 #include <linux/interrupt.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <uapi/linux/sched/types.h>
 #include <linux/atomic.h>
 #include <linux/bitops.h>
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index f063a25d4449..b294a8ee2842 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -11,7 +11,7 @@
  * Waiting for completion is a typically sync point, but not an exclusion point.
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/completion.h>
 
 /**
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
index 82f0dff90030..3d5610dcce11 100644
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -1,4 +1,4 @@
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/swait.h>
 
 void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 9453efe9b25a..1fedfcf6fc9b 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -5,7 +5,7 @@
  */
 #include <linux/init.h>
 #include <linux/export.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/wait.h>
 #include <linux/hash.h>
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index e6dc9a538efa..04939053c823 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -19,6 +19,7 @@
 #include <linux/hrtimer.h>
 #include <linux/timerqueue.h>
 #include <linux/rtc.h>
+#include <linux/sched/signal.h>
 #include <linux/alarmtimer.h>
 #include <linux/mutex.h>
 #include <linux/platform_device.h>
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 8e11d8d9f419..df512e8c04d4 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -43,7 +43,7 @@
 #include <linux/seq_file.h>
 #include <linux/err.h>
 #include <linux/debugobjects.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/sysctl.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/deadline.h>
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 82a6bfa0c307..f4465d2a25d1 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -38,7 +38,7 @@
 #include <linux/tick.h>
 #include <linux/kallsyms.h>
 #include <linux/irq_work.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/sysctl.h>
 #include <linux/slab.h>
 #include <linux/compat.h>
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index 6d40944960de..010410990bc6 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -22,7 +22,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/percpu.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/string.h>
 #include <linux/spinlock.h>
 #include <linux/percpu_ida.h>
diff --git a/mm/compaction.c b/mm/compaction.c
index 0fdfde016ee2..81e1eaa2a2cf 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -12,6 +12,7 @@
 #include <linux/migrate.h>
 #include <linux/compaction.h>
 #include <linux/mm_inline.h>
+#include <linux/sched/signal.h>
 #include <linux/backing-dev.h>
 #include <linux/sysctl.h>
 #include <linux/sysfs.h>
diff --git a/mm/gup.c b/mm/gup.c
index 94fab8fa432b..9c047e951aa3 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -10,7 +10,7 @@
 #include <linux/swap.h>
 #include <linux/swapops.h>
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/rwsem.h>
 #include <linux/hugetlb.h>
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2e0e8159ce8e..a7aa811b7d14 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -18,6 +18,7 @@
 #include <linux/bootmem.h>
 #include <linux/sysfs.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/rmap.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 1d3ed58f92ab..295479b792ec 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -6,6 +6,7 @@
 
 #include <linux/stddef.h>
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/swap.h>
 #include <linux/interrupt.h>
 #include <linux/pagemap.h>
diff --git a/mm/shmem.c b/mm/shmem.c
index a26649a6633f..de8cdef4ef9b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -29,6 +29,7 @@
 #include <linux/pagemap.h>
 #include <linux/file.h>
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/export.h>
 #include <linux/swap.h>
 #include <linux/uio.h>
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 9f0ad2a4f102..479e631d43c2 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/pagemap.h>
 #include <linux/rmap.h>
 #include <linux/swap.h>
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 878563a8354d..db9794ec61d8 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -10,7 +10,7 @@
 #include <linux/kernel.h>	/* printk */
 #include <linux/skbuff.h>
 #include <linux/wait.h>
-#include <linux/sched.h>	/* jiffies and HZ */
+#include <linux/sched/signal.h>
 #include <linux/fcntl.h>	/* O_NONBLOCK */
 #include <linux/init.h>
 #include <linux/atm.h>		/* ATM stuff */
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index cfb2faba46de..69e1f7d362a8 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -27,6 +27,8 @@
 #include <linux/module.h>
 #include <linux/debugfs.h>
 #include <linux/stringify.h>
+#include <linux/sched/signal.h>
+
 #include <asm/ioctls.h>
 
 #include <net/bluetooth/bluetooth.h>
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index 46ac686c8911..bb308224099c 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -26,7 +26,7 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/poll.h>
 #include <linux/fcntl.h>
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 1015d9c8d97d..b5faff458d8b 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -21,6 +21,8 @@
    SOFTWARE IS DISCLAIMED.
 */
 
+#include <linux/sched/signal.h>
+
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 #include <net/bluetooth/mgmt.h>
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index a8ba752732c9..f307b145ea54 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -29,6 +29,7 @@
 
 #include <linux/module.h>
 #include <linux/export.h>
+#include <linux/sched/signal.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 7511df72347f..aa1a814ceddc 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -27,6 +27,7 @@
 
 #include <linux/export.h>
 #include <linux/debugfs.h>
+#include <linux/sched/signal.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 3125ce670c2f..e4e9a2da1e7e 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/sched/signal.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 0f4034934d56..0b5dd607444c 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -19,6 +19,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/spinlock.h>
 #include <linux/times.h>
+#include <linux/sched/signal.h>
 
 #include "br_private.h"
 
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 05e8946ccc03..79aee759aba5 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -17,6 +17,7 @@
 #include <linux/if_bridge.h>
 #include <linux/rtnetlink.h>
 #include <linux/spinlock.h>
+#include <linux/sched/signal.h>
 
 #include "br_private.h"
 
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index be7bab1adcde..aecb2c7241b6 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -24,7 +24,7 @@
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/rtnetlink.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/net.h>
 
 /*
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b0c04cf4851d..3945821e9c1f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -15,6 +15,7 @@
 #include <net/switchdev.h>
 #include <linux/if_arp.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/nsproxy.h>
 #include <net/sock.h>
 #include <net/net_namespace.h>
diff --git a/net/dccp/output.c b/net/dccp/output.c
index b66c84db0766..91a15b3c4915 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 
 #include <net/inet_sock.h>
 #include <net/sock.h>
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5d367b7ff542..cebedd545e5e 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -32,6 +32,7 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/socket.h>
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3a2025f5bf2c..77362b88a661 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -43,6 +43,7 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 817b1b186aff..f6061c4bb0a8 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -32,7 +32,7 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/seq_file.h>
 #include <linux/termios.h>
 #include <linux/tty.h>
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 35dbf3dc3d28..7025dcb853d0 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -13,8 +13,9 @@
  *	2) as a control channel (write commands, read events)
  */
 
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
+
 #include "irnet_ppp.h"		/* Private header */
 /* Please put other headers in irnet.h - Thanks */
 
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 13190b38f22e..89bbde1081ce 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -17,7 +17,7 @@
 #include <linux/list.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/skbuff.h>
 #include <linux/init.h>
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index a646f3481240..309062f3debe 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -24,6 +24,8 @@
 #include <linux/uaccess.h>
 #include <linux/workqueue.h>
 #include <linux/syscalls.h>
+#include <linux/sched/signal.h>
+
 #include <net/kcm.h>
 #include <net/netns/generic.h>
 #include <net/sock.h>
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 5e9296382420..06186d608a27 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -26,6 +26,8 @@
 #include <linux/rtnetlink.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
+
 #include <net/llc.h>
 #include <net/llc_sap.h>
 #include <net/llc_pdu.h>
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index b9edf5fae6ae..879885b31cce 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -21,6 +21,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/nfc.h>
+#include <linux/sched/signal.h>
 
 #include "nfc.h"
 #include "llcp.h"
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 8bad5624a27a..222bedcd9575 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/socket.h>
 #include <net/sock.h>
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index ffd5f2297584..a6c8da3ee893 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -27,6 +27,8 @@
 #include <linux/kernel.h>
 #include <linux/net.h>
 #include <linux/poll.h>
+#include <linux/sched/signal.h>
+
 #include <net/sock.h>
 #include <net/tcp_states.h>
 
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 40a1ef2adeb4..c3be03e8d098 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -76,6 +76,8 @@
 #include <linux/slab.h>
 #include <linux/idr.h>
 #include <linux/timer.h>
+#include <linux/sched/signal.h>
+
 #include "ar-internal.h"
 
 __read_mostly unsigned int rxrpc_max_client_connections = 1000;
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index f3a688e10843..28274a3c9831 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -14,6 +14,8 @@
 #include <linux/net.h>
 #include <linux/skbuff.h>
 #include <linux/export.h>
+#include <linux/sched/signal.h>
+
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 0a6ef217aa8a..19b36c60fb4c 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -15,6 +15,8 @@
 #include <linux/gfp.h>
 #include <linux/skbuff.h>
 #include <linux/export.h>
+#include <linux/sched/signal.h>
+
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 6b09a778cc71..43e4045e72bc 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -35,6 +35,8 @@
  */
 
 #include <linux/rhashtable.h>
+#include <linux/sched/signal.h>
+
 #include "core.h"
 #include "name_table.h"
 #include "node.h"
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 8a398b3fb532..9192ead66751 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -90,6 +90,7 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/kmod.h>
 #include <linux/list.h>
 #include <linux/miscdevice.h>
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 849c4ad0411e..8d592a45b597 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -9,6 +9,7 @@
  */
 #include <linux/spinlock.h>
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 #include <linux/ctype.h>
 #include <linux/list.h>
 #include <linux/virtio.h>
diff --git a/sound/core/control.c b/sound/core/control.c
index fb096cb20a80..c109b82eef4b 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -25,6 +25,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/time.h>
+#include <linux/sched/signal.h>
 #include <sound/core.h>
 #include <sound/minors.h>
 #include <sound/info.h>
diff --git a/sound/core/hwdep.c b/sound/core/hwdep.c
index 36d2416f90d9..9602a7e38d8a 100644
--- a/sound/core/hwdep.c
+++ b/sound/core/hwdep.c
@@ -25,6 +25,7 @@
 #include <linux/time.h>
 #include <linux/mutex.h>
 #include <linux/module.h>
+#include <linux/sched/signal.h>
 #include <sound/core.h>
 #include <sound/control.h>
 #include <sound/minors.h>
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index 698a01419515..36baf962f9b0 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -28,6 +28,7 @@
 
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/time.h>
 #include <linux/vmalloc.h>
 #include <linux/module.h>
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index bb1261591a1f..5088d4b8db22 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/time.h>
 #include <linux/math64.h>
 #include <linux/export.h>
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index aec9c92250fd..13dec5ec93f2 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -23,6 +23,7 @@
 #include <linux/module.h>
 #include <linux/file.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/time.h>
 #include <linux/pm_qos.h>
 #include <linux/io.h>
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index 8da9cb245d01..ab890336175f 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -22,7 +22,7 @@
 #include <sound/core.h>
 #include <linux/major.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/wait.h>
diff --git a/sound/core/seq/oss/seq_oss_device.h b/sound/core/seq/oss/seq_oss_device.h
index d7b4d016b547..afa007c0cc2d 100644
--- a/sound/core/seq/oss/seq_oss_device.h
+++ b/sound/core/seq/oss/seq_oss_device.h
@@ -24,7 +24,7 @@
 #include <linux/time.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <sound/core.h>
 #include <sound/seq_oss.h>
 #include <sound/rawmidi.h>
diff --git a/sound/core/seq/oss/seq_oss_writeq.c b/sound/core/seq/oss/seq_oss_writeq.c
index 1f6788a18444..5e04f4df10e4 100644
--- a/sound/core/seq/oss/seq_oss_writeq.c
+++ b/sound/core/seq/oss/seq_oss_writeq.c
@@ -28,6 +28,7 @@
 #include "../seq_clientmgr.h"
 #include <linux/wait.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 
 
 /*
diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c
index 86240d02b530..448efd4e980e 100644
--- a/sound/core/seq/seq_fifo.c
+++ b/sound/core/seq/seq_fifo.c
@@ -21,6 +21,8 @@
 
 #include <sound/core.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
+
 #include "seq_fifo.h"
 #include "seq_lock.h"
 
diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c
index dfa5156f3585..1a1acf3ddda4 100644
--- a/sound/core/seq/seq_memory.c
+++ b/sound/core/seq/seq_memory.c
@@ -23,6 +23,7 @@
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 #include <linux/vmalloc.h>
 #include <sound/core.h>
 
diff --git a/sound/core/timer.c b/sound/core/timer.c
index ad153149b231..6d4fbc439246 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -27,6 +27,7 @@
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/string.h>
+#include <linux/sched/signal.h>
 #include <sound/core.h>
 #include <sound/timer.h>
 #include <sound/control.h>
diff --git a/sound/firewire/bebob/bebob.h b/sound/firewire/bebob/bebob.h
index 175da875162d..17678d6ab5a2 100644
--- a/sound/firewire/bebob/bebob.h
+++ b/sound/firewire/bebob/bebob.h
@@ -17,6 +17,7 @@
 #include <linux/mod_devicetable.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 
 #include <sound/core.h>
 #include <sound/initval.h>
diff --git a/sound/firewire/dice/dice.h b/sound/firewire/dice/dice.h
index e6c07857f475..da00e75e09d4 100644
--- a/sound/firewire/dice/dice.h
+++ b/sound/firewire/dice/dice.h
@@ -23,6 +23,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/wait.h>
+#include <linux/sched/signal.h>
 
 #include <sound/control.h>
 #include <sound/core.h>
diff --git a/sound/firewire/digi00x/digi00x.h b/sound/firewire/digi00x/digi00x.h
index 2cd465c0caae..9dc761bdacca 100644
--- a/sound/firewire/digi00x/digi00x.h
+++ b/sound/firewire/digi00x/digi00x.h
@@ -16,6 +16,7 @@
 #include <linux/mod_devicetable.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 
 #include <sound/core.h>
 #include <sound/initval.h>
diff --git a/sound/firewire/fireworks/fireworks.h b/sound/firewire/fireworks/fireworks.h
index d73c12b8753d..9b19c7f05d57 100644
--- a/sound/firewire/fireworks/fireworks.h
+++ b/sound/firewire/fireworks/fireworks.h
@@ -17,6 +17,7 @@
 #include <linux/mod_devicetable.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
 
 #include <sound/core.h>
 #include <sound/initval.h>
diff --git a/sound/firewire/oxfw/oxfw.h b/sound/firewire/oxfw/oxfw.h
index 2047dcb27625..d54d4a9ac4a1 100644
--- a/sound/firewire/oxfw/oxfw.h
+++ b/sound/firewire/oxfw/oxfw.h
@@ -13,6 +13,7 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/compat.h>
+#include <linux/sched/signal.h>
 
 #include <sound/control.h>
 #include <sound/core.h>
diff --git a/sound/firewire/tascam/tascam.h b/sound/firewire/tascam/tascam.h
index 1f61011579a7..d3cd4065722b 100644
--- a/sound/firewire/tascam/tascam.h
+++ b/sound/firewire/tascam/tascam.h
@@ -17,6 +17,7 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/compat.h>
+#include <linux/sched/signal.h>
 
 #include <sound/core.h>
 #include <sound/initval.h>
diff --git a/sound/isa/gus/gus_pcm.c b/sound/isa/gus/gus_pcm.c
index 25f6788ccef3..06505999155f 100644
--- a/sound/isa/gus/gus_pcm.c
+++ b/sound/isa/gus/gus_pcm.c
@@ -27,6 +27,8 @@
 
 #include <asm/dma.h>
 #include <linux/slab.h>
+#include <linux/sched/signal.h>
+
 #include <sound/core.h>
 #include <sound/control.h>
 #include <sound/gus.h>
diff --git a/sound/isa/msnd/msnd.c b/sound/isa/msnd/msnd.c
index 835d4aa26761..8109ab3d29d1 100644
--- a/sound/isa/msnd/msnd.c
+++ b/sound/isa/msnd/msnd.c
@@ -36,6 +36,7 @@
  ********************************************************************/
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/types.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
diff --git a/sound/isa/sb/emu8000.c b/sound/isa/sb/emu8000.c
index 94c411299e5a..ec180708f160 100644
--- a/sound/isa/sb/emu8000.c
+++ b/sound/isa/sb/emu8000.c
@@ -21,7 +21,7 @@
  */
 
 #include <linux/wait.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/ioport.h>
 #include <linux/export.h>
diff --git a/sound/isa/sb/emu8000_patch.c b/sound/isa/sb/emu8000_patch.c
index 71d13c0bb746..c2e41d2762f7 100644
--- a/sound/isa/sb/emu8000_patch.c
+++ b/sound/isa/sb/emu8000_patch.c
@@ -20,6 +20,8 @@
  */
 
 #include "emu8000_local.h"
+
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 #include <linux/moduleparam.h>
 
diff --git a/sound/isa/sb/emu8000_pcm.c b/sound/isa/sb/emu8000_pcm.c
index 250fd0006b53..32f234f494e5 100644
--- a/sound/isa/sb/emu8000_pcm.c
+++ b/sound/isa/sb/emu8000_pcm.c
@@ -19,6 +19,8 @@
  */
 
 #include "emu8000_local.h"
+
+#include <linux/sched/signal.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <sound/initval.h>
diff --git a/sound/isa/wavefront/wavefront_synth.c b/sound/isa/wavefront/wavefront_synth.c
index 718d5e3b7806..4dae9ff9ef5a 100644
--- a/sound/isa/wavefront/wavefront_synth.c
+++ b/sound/isa/wavefront/wavefront_synth.c
@@ -26,6 +26,7 @@
 #include <linux/delay.h>
 #include <linux/time.h>
 #include <linux/wait.h>
+#include <linux/sched/signal.h>
 #include <linux/firmware.h>
 #include <linux/moduleparam.h>
 #include <linux/slab.h>
diff --git a/sound/oss/dmabuf.c b/sound/oss/dmabuf.c
index e3f29132d3ac..c5dd396c66a2 100644
--- a/sound/oss/dmabuf.c
+++ b/sound/oss/dmabuf.c
@@ -27,6 +27,8 @@
 
 #include <linux/mm.h>
 #include <linux/gfp.h>
+#include <linux/sched/signal.h>
+
 #include "sound_config.h"
 #include "sleep.h"
 
diff --git a/sound/oss/dmasound/dmasound_core.c b/sound/oss/dmasound/dmasound_core.c
index 5f248fb41bea..fb3bbceb1fef 100644
--- a/sound/oss/dmasound/dmasound_core.c
+++ b/sound/oss/dmasound/dmasound_core.c
@@ -182,6 +182,7 @@
 #include <linux/soundcard.h>
 #include <linux/poll.h>
 #include <linux/mutex.h>
+#include <linux/sched/signal.h>
 
 #include <linux/uaccess.h>
 
diff --git a/sound/oss/midibuf.c b/sound/oss/midibuf.c
index 8f45cd999965..701c7625c971 100644
--- a/sound/oss/midibuf.c
+++ b/sound/oss/midibuf.c
@@ -16,6 +16,8 @@
 #include <linux/stddef.h>
 #include <linux/kmod.h>
 #include <linux/spinlock.h>
+#include <linux/sched/signal.h>
+
 #define MIDIBUF_C
 
 #include "sound_config.h"
diff --git a/sound/oss/msnd_pinnacle.c b/sound/oss/msnd_pinnacle.c
index a8bb4a06ba6f..f34ec01d2239 100644
--- a/sound/oss/msnd_pinnacle.c
+++ b/sound/oss/msnd_pinnacle.c
@@ -41,6 +41,8 @@
 #include <linux/interrupt.h>
 #include <linux/mutex.h>
 #include <linux/gfp.h>
+#include <linux/sched/signal.h>
+
 #include <asm/irq.h>
 #include <asm/io.h>
 #include "sound_config.h"
diff --git a/sound/oss/sound_config.h b/sound/oss/sound_config.h
index f2554ab78f5e..5253b0a70437 100644
--- a/sound/oss/sound_config.h
+++ b/sound/oss/sound_config.h
@@ -16,6 +16,7 @@
 
 #include <linux/fs.h>
 #include <linux/sound.h>
+#include <linux/sched/signal.h>
 
 #include "os.h"
 #include "soundvers.h"
diff --git a/sound/oss/swarm_cs4297a.c b/sound/oss/swarm_cs4297a.c
index f3af63e58b36..97899352b15f 100644
--- a/sound/oss/swarm_cs4297a.c
+++ b/sound/oss/swarm_cs4297a.c
@@ -64,7 +64,7 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/ioport.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/delay.h>
 #include <linux/sound.h>
 #include <linux/slab.h>
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index dd5de09bf362..ae79f7679cc2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -32,7 +32,7 @@
 #include <linux/file.h>
 #include <linux/syscore_ops.h>
 #include <linux/cpu.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/cpumask.h>
 #include <linux/smp.h>
-- 
GitLab


From 5b3cc15aff243cb518cbeed8b1a220cbfd023d9c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 20:43:54 +0100
Subject: [PATCH 0897/1084] sched/headers: Prepare to move the
 memalloc_noio_*() APIs to <linux/sched/mm.h>

Update the .c files that depend on these APIs.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/base/power/runtime.c | 2 +-
 drivers/md/dm-bufio.c        | 1 +
 drivers/md/dm-ioctl.c        | 1 +
 drivers/usb/core/hub.c       | 2 +-
 fs/ocfs2/cluster/tcp.c       | 1 +
 fs/xfs/kmem.c                | 1 +
 fs/xfs/xfs_buf.c             | 1 +
 mm/page_alloc.c              | 1 +
 mm/vmscan.c                  | 1 +
 net/ceph/crypto.c            | 1 +
 10 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index a14fac6a01d3..7bcf80fa9ada 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -7,7 +7,7 @@
  * This file is released under the GPLv2.
  */
 
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/export.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_wakeirq.h>
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index d36d427a9efb..df4859f6ac6a 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -11,6 +11,7 @@
 #include <linux/device-mapper.h>
 #include <linux/dm-io.h>
 #include <linux/slab.h>
+#include <linux/sched/mm.h>
 #include <linux/jiffies.h>
 #include <linux/vmalloc.h>
 #include <linux/shrinker.h>
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index a5a9b17f0f7f..4da6fc6b1ffd 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/miscdevice.h>
+#include <linux/sched/mm.h>
 #include <linux/init.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index a56c75e09786..f0dd08198d74 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -15,7 +15,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/completion.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/ioctl.h>
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index ec000575e863..4348027384f5 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -54,6 +54,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/sched/mm.h>
 #include <linux/jiffies.h>
 #include <linux/slab.h>
 #include <linux/idr.h>
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 339c696bbc01..2dfdc62f795e 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -16,6 +16,7 @@
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/highmem.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 8c7d01b75922..b6208728ba39 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -33,6 +33,7 @@
 #include <linux/migrate.h>
 #include <linux/backing-dev.h>
 #include <linux/freezer.h>
+#include <linux/sched/mm.h>
 
 #include "xfs_format.h"
 #include "xfs_log_format.h"
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a7a6aac95a6d..eaa64d2ffdc5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -61,6 +61,7 @@
 #include <linux/migrate.h>
 #include <linux/hugetlb.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/mm.h>
 #include <linux/page_owner.h>
 #include <linux/kthread.h>
 #include <linux/memcontrol.h>
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 70aa739c6b68..bc8031ef994d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -14,6 +14,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/module.h>
 #include <linux/gfp.h>
 #include <linux/kernel_stat.h>
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 85747b7f91a9..46008d5ac504 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -8,6 +8,7 @@
 #include <crypto/aes.h>
 #include <crypto/skcipher.h>
 #include <linux/key-type.h>
+#include <linux/sched/mm.h>
 
 #include <keys/ceph-type.h>
 #include <keys/user-type.h>
-- 
GitLab


From fd7712337ff09a248df424c5843c149586a3f017 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 20:56:33 +0100
Subject: [PATCH 0898/1084] sched/headers: Prepare to remove the <linux/gfp.h>
 include from <linux/sched.h>

<linux/topology.h> is still needed - also update other headers
and .c files that depend on sched.h including gfp.h (and its
sub-headers) for them.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h    | 1 +
 include/linux/sched/mm.h | 1 +
 lib/percpu_ida.c         | 1 +
 3 files changed, 3 insertions(+)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index e87c97e1a947..8ae7b3d85658 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -56,6 +56,7 @@ struct sched_param {
 #include <linux/llist.h>
 #include <linux/uidgid.h>
 #include <linux/gfp.h>
+#include <linux/topology.h>
 #include <linux/magic.h>
 #include <linux/cgroup-defs.h>
 
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index a7adba1cd0a9..1cf7941bb946 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -2,5 +2,6 @@
 #define _LINUX_SCHED_MM_H
 
 #include <linux/sched.h>
+#include <linux/gfp.h>
 
 #endif /* _LINUX_SCHED_MM_H */
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index 010410990bc6..6016f1deb1f5 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -14,6 +14,7 @@
  * General Public License for more details.
  */
 
+#include <linux/mm.h>
 #include <linux/bitmap.h>
 #include <linux/bitops.h>
 #include <linux/bug.h>
-- 
GitLab


From 03441a3482a31462c93509939a388877e3cd9261 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:35 +0100
Subject: [PATCH 0899/1084] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/stat.h>

We are going to split <linux/sched/stat.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/stat.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/kvm/book3s_hv.c     | 1 +
 arch/s390/appldata/appldata_os.c | 1 +
 crypto/mcryptd.c                 | 1 +
 drivers/cpuidle/governors/menu.c | 1 +
 fs/proc/loadavg.c                | 1 +
 fs/proc/root.c                   | 1 +
 fs/proc/stat.c                   | 1 +
 include/linux/sched/stat.h       | 6 ++++++
 kernel/debug/kdb/kdb_main.c      | 1 +
 kernel/exit.c                    | 1 +
 kernel/fork.c                    | 1 +
 kernel/sched/sched.h             | 1 +
 kernel/sys.c                     | 1 +
 kernel/time/tick-sched.c         | 1 +
 virt/kvm/kvm_main.c              | 1 +
 15 files changed, 20 insertions(+)
 create mode 100644 include/linux/sched/stat.h

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index fc56a9ce785d..1ec86d9e2a82 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -23,6 +23,7 @@
 #include <linux/slab.h>
 #include <linux/preempt.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/stat.h>
 #include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/fs.h>
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 079446619f89..45b3178200ab 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -18,6 +18,7 @@
 #include <linux/netdevice.h>
 #include <linux/sched.h>
 #include <linux/sched/loadavg.h>
+#include <linux/sched/stat.h>
 #include <asm/appldata.h>
 #include <asm/smp.h>
 
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c
index c207458d6299..4e6472658852 100644
--- a/crypto/mcryptd.c
+++ b/crypto/mcryptd.c
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 #include <linux/scatterlist.h>
 #include <linux/sched.h>
+#include <linux/sched/stat.h>
 #include <linux/slab.h>
 #include <linux/hardirq.h>
 
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index fe86060e48f7..6d8a4026a903 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -19,6 +19,7 @@
 #include <linux/tick.h>
 #include <linux/sched.h>
 #include <linux/sched/loadavg.h>
+#include <linux/sched/stat.h>
 #include <linux/math64.h>
 #include <linux/cpu.h>
 
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index add5255ce7e0..983fce5c2418 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -4,6 +4,7 @@
 #include <linux/proc_fs.h>
 #include <linux/sched.h>
 #include <linux/sched/loadavg.h>
+#include <linux/sched/stat.h>
 #include <linux/seq_file.h>
 #include <linux/seqlock.h>
 #include <linux/time.h>
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 5d5fed20bfff..a50ba388255f 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -14,6 +14,7 @@
 #include <linux/stat.h>
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/stat.h>
 #include <linux/module.h>
 #include <linux/bitops.h>
 #include <linux/user_namespace.h>
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index e47c3e8c4dfe..b95556e036bb 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -5,6 +5,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/proc_fs.h>
 #include <linux/sched.h>
+#include <linux/sched/stat.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/time.h>
diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
new file mode 100644
index 000000000000..30fe012aecb0
--- /dev/null
+++ b/include/linux/sched/stat.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_STAT_H
+#define _LINUX_SCHED_STAT_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_STAT_H */
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 308937c7a687..bf48a492b4be 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -19,6 +19,7 @@
 #include <linux/reboot.h>
 #include <linux/sched.h>
 #include <linux/sched/loadavg.h>
+#include <linux/sched/stat.h>
 #include <linux/sysrq.h>
 #include <linux/smp.h>
 #include <linux/utsname.h>
diff --git a/kernel/exit.c b/kernel/exit.c
index 48649ccbb649..5bad7dce1b7b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -8,6 +8,7 @@
 #include <linux/slab.h>
 #include <linux/sched/autogroup.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/stat.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/capability.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 8fbe8bcd1e20..8632905f64c5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -17,6 +17,7 @@
 #include <linux/sched/coredump.h>
 #include <linux/sched/user.h>
 #include <linux/sched/numa_balancing.h>
+#include <linux/sched/stat.h>
 #include <linux/rtmutex.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 04f376cf7ba9..6c8a1db44dde 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -9,6 +9,7 @@
 #include <linux/sched/numa_balancing.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/cpufreq.h>
+#include <linux/sched/stat.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/sched/deadline.h>
 #include <linux/kernel_stat.h>
diff --git a/kernel/sys.c b/kernel/sys.c
index e2d743fb7f55..8c0392c8be51 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -51,6 +51,7 @@
 #include <linux/sched.h>
 #include <linux/sched/autogroup.h>
 #include <linux/sched/loadavg.h>
+#include <linux/sched/stat.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
 #include <linux/rcupdate.h>
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 5e9e123b4321..29d79b175141 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -21,6 +21,7 @@
 #include <linux/profile.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/stat.h>
 #include <linux/module.h>
 #include <linux/irq_work.h>
 #include <linux/posix-timers.h>
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ae79f7679cc2..799499417f5b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -34,6 +34,7 @@
 #include <linux/cpu.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/stat.h>
 #include <linux/cpumask.h>
 #include <linux/smp.h>
 #include <linux/anon_inodes.h>
-- 
GitLab


From 370c91355c76cbcaad8ff79b4bb15a7f2ea59433 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:35 +0100
Subject: [PATCH 0900/1084] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/nohz.h>

We are going to split <linux/sched/nohz.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/nohz.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/nohz.h | 6 ++++++
 kernel/sched/sched.h       | 1 +
 kernel/time/hrtimer.c      | 1 +
 kernel/time/tick-sched.c   | 1 +
 kernel/time/timer.c        | 1 +
 5 files changed, 10 insertions(+)
 create mode 100644 include/linux/sched/nohz.h

diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
new file mode 100644
index 000000000000..fe6caf0dab10
--- /dev/null
+++ b/include/linux/sched/nohz.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_NOHZ_H
+#define _LINUX_SCHED_NOHZ_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_NOHZ_H */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 6c8a1db44dde..5979f47c422c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -10,6 +10,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/cpufreq.h>
 #include <linux/sched/stat.h>
+#include <linux/sched/nohz.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/sched/deadline.h>
 #include <linux/kernel_stat.h>
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index df512e8c04d4..aed2207f5b2d 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -47,6 +47,7 @@
 #include <linux/sched/sysctl.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/deadline.h>
+#include <linux/sched/nohz.h>
 #include <linux/timer.h>
 #include <linux/freezer.h>
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 29d79b175141..7fe53be86077 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -22,6 +22,7 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/stat.h>
+#include <linux/sched/nohz.h>
 #include <linux/module.h>
 #include <linux/irq_work.h>
 #include <linux/posix-timers.h>
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index f4465d2a25d1..d7999cb06229 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -40,6 +40,7 @@
 #include <linux/irq_work.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/sysctl.h>
+#include <linux/sched/nohz.h>
 #include <linux/slab.h>
 #include <linux/compat.h>
 
-- 
GitLab


From b17b01533b719e9949e437abf66436a875739b40 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:35 +0100
Subject: [PATCH 0901/1084] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/debug.h>

We are going to split <linux/sched/debug.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/debug.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/kernel/process.c               | 1 +
 arch/alpha/kernel/traps.c                 | 1 +
 arch/arc/kernel/ctx_sw.c                  | 1 +
 arch/arc/kernel/stacktrace.c              | 2 ++
 arch/arc/kernel/troubleshoot.c            | 1 +
 arch/arm/kernel/process.c                 | 1 +
 arch/arm/kernel/stacktrace.c              | 1 +
 arch/arm/kernel/traps.c                   | 1 +
 arch/arm/mm/alignment.c                   | 1 +
 arch/arm/mm/fault.c                       | 1 +
 arch/arm/probes/kprobes/core.c            | 1 +
 arch/arm64/kernel/probes/kprobes.c        | 1 +
 arch/arm64/kernel/process.c               | 1 +
 arch/arm64/kernel/stacktrace.c            | 1 +
 arch/arm64/kernel/traps.c                 | 1 +
 arch/arm64/mm/fault.c                     | 1 +
 arch/avr32/kernel/nmi_debug.c             | 1 +
 arch/avr32/kernel/process.c               | 1 +
 arch/blackfin/kernel/dumpstack.c          | 2 ++
 arch/blackfin/kernel/early_printk.c       | 1 +
 arch/blackfin/kernel/nmi.c                | 1 +
 arch/blackfin/kernel/process.c            | 1 +
 arch/blackfin/kernel/trace.c              | 1 +
 arch/blackfin/kernel/traps.c              | 1 +
 arch/blackfin/mach-common/ints-priority.c | 1 +
 arch/blackfin/mm/isram-driver.c           | 1 +
 arch/c6x/kernel/traps.c                   | 1 +
 arch/cris/arch-v10/kernel/process.c       | 1 +
 arch/cris/arch-v10/kernel/traps.c         | 2 ++
 arch/cris/arch-v32/kernel/process.c       | 1 +
 arch/cris/arch-v32/kernel/traps.c         | 2 ++
 arch/cris/kernel/irq.c                    | 1 +
 arch/cris/kernel/stacktrace.c             | 2 +-
 arch/cris/kernel/traps.c                  | 1 +
 arch/frv/kernel/process.c                 | 1 +
 arch/frv/kernel/traps.c                   | 1 +
 arch/h8300/kernel/process.c               | 1 +
 arch/h8300/kernel/traps.c                 | 1 +
 arch/hexagon/kernel/process.c             | 1 +
 arch/hexagon/kernel/traps.c               | 1 +
 arch/hexagon/kernel/vm_events.c           | 1 +
 arch/ia64/hp/sim/simserial.c              | 1 +
 arch/ia64/kernel/mca.c                    | 1 +
 arch/ia64/kernel/process.c                | 1 +
 arch/ia64/kernel/traps.c                  | 1 +
 arch/m32r/kernel/process.c                | 1 +
 arch/m32r/kernel/traps.c                  | 1 +
 arch/m68k/kernel/process.c                | 1 +
 arch/m68k/kernel/traps.c                  | 1 +
 arch/m68k/mac/macints.c                   | 1 +
 arch/metag/kernel/process.c               | 1 +
 arch/metag/kernel/stacktrace.c            | 1 +
 arch/metag/kernel/traps.c                 | 1 +
 arch/metag/mm/fault.c                     | 1 +
 arch/microblaze/kernel/exceptions.c       | 1 +
 arch/microblaze/kernel/process.c          | 1 +
 arch/microblaze/kernel/traps.c            | 1 +
 arch/mips/kernel/process.c                | 1 +
 arch/mips/kernel/stacktrace.c             | 1 +
 arch/mips/kernel/traps.c                  | 1 +
 arch/mips/sgi-ip22/ip28-berr.c            | 1 +
 arch/mips/sgi-ip27/ip27-berr.c            | 1 +
 arch/mips/sgi-ip32/ip32-berr.c            | 1 +
 arch/mips/sgi-ip32/ip32-irq.c             | 1 +
 arch/mn10300/kernel/process.c             | 1 +
 arch/mn10300/kernel/traps.c               | 1 +
 arch/nios2/kernel/process.c               | 1 +
 arch/nios2/kernel/traps.c                 | 1 +
 arch/nios2/mm/fault.c                     | 1 +
 arch/openrisc/kernel/process.c            | 1 +
 arch/openrisc/kernel/traps.c              | 1 +
 arch/parisc/kernel/pa7300lc.c             | 1 +
 arch/parisc/kernel/process.c              | 1 +
 arch/parisc/kernel/signal.c               | 1 +
 arch/parisc/kernel/traps.c                | 1 +
 arch/parisc/kernel/unaligned.c            | 1 +
 arch/parisc/mm/fault.c                    | 1 +
 arch/powerpc/kernel/process.c             | 1 +
 arch/powerpc/kernel/stacktrace.c          | 1 +
 arch/powerpc/kernel/traps.c               | 1 +
 arch/s390/kernel/dumpstack.c              | 1 +
 arch/s390/kernel/process.c                | 1 +
 arch/s390/kernel/stacktrace.c             | 1 +
 arch/s390/kernel/traps.c                  | 1 +
 arch/s390/mm/fault.c                      | 1 +
 arch/score/kernel/traps.c                 | 1 +
 arch/sh/kernel/dumpstack.c                | 1 +
 arch/sh/kernel/nmi_debug.c                | 1 +
 arch/sh/kernel/process_32.c               | 1 +
 arch/sh/kernel/process_64.c               | 1 +
 arch/sh/kernel/stacktrace.c               | 1 +
 arch/sh/kernel/traps.c                    | 1 +
 arch/sh/kernel/traps_64.c                 | 1 +
 arch/sparc/kernel/process_32.c            | 1 +
 arch/sparc/kernel/process_64.c            | 1 +
 arch/sparc/kernel/stacktrace.c            | 1 +
 arch/sparc/kernel/sun4m_irq.c             | 1 +
 arch/sparc/kernel/sys_sparc_32.c          | 1 +
 arch/sparc/kernel/sys_sparc_64.c          | 1 +
 arch/sparc/kernel/traps_32.c              | 1 +
 arch/sparc/kernel/traps_64.c              | 1 +
 arch/sparc/mm/fault_64.c                  | 1 +
 arch/tile/include/asm/stack.h             | 2 ++
 arch/tile/kernel/process.c                | 1 +
 arch/tile/kernel/signal.c                 | 1 +
 arch/tile/kernel/stack.c                  | 1 +
 arch/tile/kernel/traps.c                  | 1 +
 arch/tile/kernel/unaligned.c              | 1 +
 arch/tile/mm/fault.c                      | 1 +
 arch/um/drivers/mconsole_kern.c           | 1 +
 arch/um/kernel/process.c                  | 1 +
 arch/um/kernel/sysrq.c                    | 2 ++
 arch/um/kernel/trap.c                     | 1 +
 arch/unicore32/kernel/process.c           | 1 +
 arch/unicore32/kernel/stacktrace.c        | 1 +
 arch/unicore32/kernel/traps.c             | 1 +
 arch/unicore32/mm/alignment.c             | 1 +
 arch/x86/kernel/amd_gart_64.c             | 1 +
 arch/x86/kernel/doublefault.c             | 1 +
 arch/x86/kernel/dumpstack.c               | 1 +
 arch/x86/kernel/dumpstack_32.c            | 1 +
 arch/x86/kernel/dumpstack_64.c            | 1 +
 arch/x86/kernel/kprobes/core.c            | 1 +
 arch/x86/kernel/nmi.c                     | 1 +
 arch/x86/kernel/process.c                 | 1 +
 arch/x86/kernel/stacktrace.c              | 1 +
 arch/x86/mm/extable.c                     | 2 ++
 arch/x86/platform/uv/uv_nmi.c             | 1 +
 arch/x86/um/sysrq_32.c                    | 1 +
 arch/x86/um/sysrq_64.c                    | 1 +
 arch/xtensa/kernel/process.c              | 1 +
 arch/xtensa/kernel/traps.c                | 1 +
 drivers/base/power/main.c                 | 1 +
 drivers/tty/sysrq.c                       | 2 ++
 drivers/tty/tty_ldsem.c                   | 1 +
 drivers/tty/vt/keyboard.c                 | 2 ++
 fs/proc/base.c                            | 1 +
 include/linux/sched/debug.h               | 6 ++++++
 kernel/debug/kdb/kdb_bt.c                 | 1 +
 kernel/debug/kdb/kdb_main.c               | 1 +
 kernel/hung_task.c                        | 1 +
 kernel/latencytop.c                       | 1 +
 kernel/locking/mutex.c                    | 1 +
 kernel/locking/rtmutex-debug.c            | 1 +
 kernel/locking/rtmutex.c                  | 1 +
 kernel/locking/rwsem-spinlock.c           | 1 +
 kernel/locking/rwsem-xadd.c               | 1 +
 kernel/locking/rwsem.c                    | 1 +
 kernel/locking/semaphore.c                | 1 +
 kernel/panic.c                            | 1 +
 kernel/power/process.c                    | 1 +
 kernel/printk/printk.c                    | 1 +
 kernel/rcu/tree.c                         | 1 +
 kernel/rcu/tree_plugin.h                  | 1 +
 kernel/rcu/update.c                       | 1 +
 kernel/sched/completion.c                 | 1 +
 kernel/sched/sched.h                      | 2 +-
 kernel/sched/wait.c                       | 1 +
 kernel/signal.c                           | 1 +
 kernel/time/alarmtimer.c                  | 1 +
 kernel/time/hrtimer.c                     | 1 +
 kernel/time/timer.c                       | 1 +
 kernel/watchdog.c                         | 1 +
 kernel/watchdog_hld.c                     | 2 ++
 lib/dump_stack.c                          | 1 +
 lib/nmi_backtrace.c                       | 1 +
 166 files changed, 181 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/sched/debug.h

diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index bca963a4aa48..88f7a974d311 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -11,6 +11,7 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index 6448ab00043d..b137390e87e7 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -11,6 +11,7 @@
 #include <linux/jiffies.h>
 #include <linux/mm.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/tty.h>
 #include <linux/delay.h>
 #include <linux/extable.h>
diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c
index 6f4cb0dab1b9..9e1ae9d41925 100644
--- a/arch/arc/kernel/ctx_sw.c
+++ b/arch/arc/kernel/ctx_sw.c
@@ -16,6 +16,7 @@
 
 #include <asm/asm-offsets.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #ifdef CONFIG_ARC_PLAT_EZNPS
 #include <plat/ctop.h>
 #endif
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index b9192a653b7e..74315f302971 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -28,6 +28,8 @@
 #include <linux/export.h>
 #include <linux/stacktrace.h>
 #include <linux/kallsyms.h>
+#include <linux/sched/debug.h>
+
 #include <asm/arcregs.h>
 #include <asm/unwind.h>
 #include <asm/switch_to.h>
diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index 0c48907f56a9..f9caf79186d4 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -14,6 +14,7 @@
 #include <linux/proc_fs.h>
 #include <linux/file.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/debug.h>
 
 #include <asm/arcregs.h>
 #include <asm/irqflags.h>
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 91d2d5b01414..03d46395d1ff 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -12,6 +12,7 @@
 
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c
index 92b72375c4c7..3a2fa203637a 100644
--- a/arch/arm/kernel/stacktrace.c
+++ b/arch/arm/kernel/stacktrace.c
@@ -1,5 +1,6 @@
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 
 #include <asm/stacktrace.h>
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index dc5f1a22b3c9..a9dad001b97d 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -25,6 +25,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/irq.h>
 
 #include <linux/atomic.h>
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index d7fe2de9cc9e..2c96190e018b 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -14,6 +14,7 @@
 #include <linux/moduleparam.h>
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <linux/sched/debug.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/proc_fs.h>
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 520c7778d330..ff8b0aa2dfde 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -17,6 +17,7 @@
 #include <linux/uaccess.h>
 #include <linux/page-flags.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/highmem.h>
 #include <linux/perf_event.h>
 
diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c
index a4ec240ee7ba..b6dc9d838a9a 100644
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/stop_machine.h>
+#include <linux/sched/debug.h>
 #include <linux/stringify.h>
 #include <asm/traps.h>
 #include <asm/opcodes.h>
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index f0593c92279b..2a07aae5b8a2 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -22,6 +22,7 @@
 #include <linux/extable.h>
 #include <linux/slab.h>
 #include <linux/stop_machine.h>
+#include <linux/sched/debug.h>
 #include <linux/stringify.h>
 #include <asm/traps.h>
 #include <asm/ptrace.h>
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 1ad48f93abdd..40a16cdd9873 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -24,6 +24,7 @@
 #include <linux/efi.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 8a552a33c6ef..7597e42feeea 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -19,6 +19,7 @@
 #include <linux/export.h>
 #include <linux/ftrace.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 
 #include <asm/irq.h>
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 5b8779a849a2..5a5d83791837 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -30,6 +30,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/syscalls.h>
 
 #include <asm/atomic.h>
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 30dd60ab8a65..4bf899fb451b 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -27,6 +27,7 @@
 #include <linux/uaccess.h>
 #include <linux/page-flags.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/highmem.h>
 #include <linux/perf_event.h>
 #include <linux/preempt.h>
diff --git a/arch/avr32/kernel/nmi_debug.c b/arch/avr32/kernel/nmi_debug.c
index 3414b8566c29..25823049bb99 100644
--- a/arch/avr32/kernel/nmi_debug.c
+++ b/arch/avr32/kernel/nmi_debug.c
@@ -9,6 +9,7 @@
 #include <linux/kdebug.h>
 #include <linux/notifier.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 
 #include <asm/irq.h>
 
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index 68e5b9dac059..dac022d88d9d 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -6,6 +6,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/fs.h>
diff --git a/arch/blackfin/kernel/dumpstack.c b/arch/blackfin/kernel/dumpstack.c
index 95ba6d9e9a3d..3c992c1f8ef2 100644
--- a/arch/blackfin/kernel/dumpstack.c
+++ b/arch/blackfin/kernel/dumpstack.c
@@ -10,6 +10,8 @@
 #include <linux/mm.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
+#include <linux/sched/debug.h>
+
 #include <asm/trace.h>
 
 /*
diff --git a/arch/blackfin/kernel/early_printk.c b/arch/blackfin/kernel/early_printk.c
index 61fbd2de993d..4b89af9243d3 100644
--- a/arch/blackfin/kernel/early_printk.c
+++ b/arch/blackfin/kernel/early_printk.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/sched/debug.h>
 #include <linux/init.h>
 #include <linux/serial_core.h>
 #include <linux/console.h>
diff --git a/arch/blackfin/kernel/nmi.c b/arch/blackfin/kernel/nmi.c
index 9919d29287dc..633c37083e87 100644
--- a/arch/blackfin/kernel/nmi.c
+++ b/arch/blackfin/kernel/nmi.c
@@ -17,6 +17,7 @@
 #include <linux/nmi.h>
 #include <linux/smp.h>
 #include <linux/timer.h>
+#include <linux/sched/debug.h>
 #include <asm/blackfin.h>
 #include <linux/atomic.h>
 #include <asm/cacheflush.h>
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index 4aa5545c4fde..e95d094c20a6 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -12,6 +12,7 @@
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/tick.h>
 #include <linux/fs.h>
 #include <linux/err.h>
diff --git a/arch/blackfin/kernel/trace.c b/arch/blackfin/kernel/trace.c
index 01e546ad2f7a..23c05b8effb3 100644
--- a/arch/blackfin/kernel/trace.c
+++ b/arch/blackfin/kernel/trace.c
@@ -12,6 +12,7 @@
 #include <linux/mm.h>
 #include <linux/oom.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c
index 32676d7721b1..a323a40a46e9 100644
--- a/arch/blackfin/kernel/traps.c
+++ b/arch/blackfin/kernel/traps.c
@@ -10,6 +10,7 @@
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <asm/traps.h>
 #include <asm/cplb.h>
 #include <asm/blackfin.h>
diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c
index 4986b4fbcee9..13e94bf9d8ba 100644
--- a/arch/blackfin/mach-common/ints-priority.c
+++ b/arch/blackfin/mach-common/ints-priority.c
@@ -16,6 +16,7 @@
 #include <linux/seq_file.h>
 #include <linux/irq.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/syscore_ops.h>
 #include <linux/gpio.h>
 #include <asm/delay.h>
diff --git a/arch/blackfin/mm/isram-driver.c b/arch/blackfin/mm/isram-driver.c
index 7e2e674ed444..aaa1e64b753b 100644
--- a/arch/blackfin/mm/isram-driver.c
+++ b/arch/blackfin/mm/isram-driver.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 
 #include <asm/blackfin.h>
 #include <asm/dma.h>
diff --git a/arch/c6x/kernel/traps.c b/arch/c6x/kernel/traps.c
index dcc2c2f6d67c..09b8a40d5680 100644
--- a/arch/c6x/kernel/traps.c
+++ b/arch/c6x/kernel/traps.c
@@ -10,6 +10,7 @@
  */
 #include <linux/module.h>
 #include <linux/ptrace.h>
+#include <linux/sched/debug.h>
 #include <linux/kallsyms.h>
 #include <linux/bug.h>
 
diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c
index 96e5afef6b47..068a40374200 100644
--- a/arch/cris/arch-v10/kernel/process.c
+++ b/arch/cris/arch-v10/kernel/process.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/fs.h>
diff --git a/arch/cris/arch-v10/kernel/traps.c b/arch/cris/arch-v10/kernel/traps.c
index 96d004fe9740..c0a501f29bd8 100644
--- a/arch/cris/arch-v10/kernel/traps.c
+++ b/arch/cris/arch-v10/kernel/traps.c
@@ -10,6 +10,8 @@
 
 #include <linux/ptrace.h>
 #include <linux/uaccess.h>
+#include <linux/sched/debug.h>
+
 #include <arch/sv_addr_ag.h>
 #include <arch/system.h>
 
diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c
index 4d1afa9f9fd3..613c2d71bf10 100644
--- a/arch/cris/arch-v32/kernel/process.c
+++ b/arch/cris/arch-v32/kernel/process.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/fs.h>
diff --git a/arch/cris/arch-v32/kernel/traps.c b/arch/cris/arch-v32/kernel/traps.c
index ad6174e217c9..a34256515036 100644
--- a/arch/cris/arch-v32/kernel/traps.c
+++ b/arch/cris/arch-v32/kernel/traps.c
@@ -5,6 +5,8 @@
 #include <linux/ptrace.h>
 #include <linux/extable.h>
 #include <linux/uaccess.h>
+#include <linux/sched/debug.h>
+
 #include <hwregs/supp_reg.h>
 #include <hwregs/intr_vect_defs.h>
 #include <asm/irq.h>
diff --git a/arch/cris/kernel/irq.c b/arch/cris/kernel/irq.c
index 694850e8f077..09b864f46f8a 100644
--- a/arch/cris/kernel/irq.c
+++ b/arch/cris/kernel/irq.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/ptrace.h>
 #include <linux/irq.h>
+#include <linux/sched/debug.h>
 
 #include <linux/kernel_stat.h>
 #include <linux/signal.h>
diff --git a/arch/cris/kernel/stacktrace.c b/arch/cris/kernel/stacktrace.c
index 99838c74456d..f1cc3aaacd8d 100644
--- a/arch/cris/kernel/stacktrace.c
+++ b/arch/cris/kernel/stacktrace.c
@@ -1,5 +1,5 @@
 #include <linux/sched.h>
-#include <linux/stacktrace.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 #include <asm/stacktrace.h>
 
diff --git a/arch/cris/kernel/traps.c b/arch/cris/kernel/traps.c
index b2a312a7afc6..a01636a12a6e 100644
--- a/arch/cris/kernel/traps.c
+++ b/arch/cris/kernel/traps.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/utsname.h>
+#include <linux/sched/debug.h>
 #ifdef CONFIG_KALLSYMS
 #include <linux/kallsyms.h>
 #endif
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index b306241c4ef2..cf4c34c09ab3 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c
index 43c134d6081c..ce29991e4219 100644
--- a/arch/frv/kernel/traps.c
+++ b/arch/frv/kernel/traps.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/signal.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index 891974a11704..d7cabf373fe3 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -25,6 +25,7 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c
index 044a36125846..ee7e3ce40d78 100644
--- a/arch/h8300/kernel/traps.c
+++ b/arch/h8300/kernel/traps.c
@@ -16,6 +16,7 @@
 
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/init.h>
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index d9edfd3fc52a..3bdd9592d025 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/tick.h>
diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
index 4496bcf605ef..b55f13c70b34 100644
--- a/arch/hexagon/kernel/traps.c
+++ b/arch/hexagon/kernel/traps.c
@@ -20,6 +20,7 @@
 
 #include <linux/init.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/kdebug.h>
diff --git a/arch/hexagon/kernel/vm_events.c b/arch/hexagon/kernel/vm_events.c
index 741aaa917cda..04f57ef22009 100644
--- a/arch/hexagon/kernel/vm_events.c
+++ b/arch/hexagon/kernel/vm_events.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/sched/debug.h>
 #include <asm/registers.h>
 #include <linux/irq.h>
 #include <linux/hardirq.h>
diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index 21fd50def270..de8cba121013 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/major.h>
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 5ac51069e453..f9fe3ac64242 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -73,6 +73,7 @@
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/bootmem.h>
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 52deab683ba1..804b251ee5d1 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -20,6 +20,7 @@
 #include <linux/notifier.h>
 #include <linux/personality.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stddef.h>
 #include <linux/thread_info.h>
 #include <linux/unistd.h>
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 48ba46b025e1..7b1fe9462158 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/tty.h>
 #include <linux/vt_kern.h>		/* For unblank_screen() */
 #include <linux/export.h>
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index e0568bee60c0..1a227c1fdb1a 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -22,6 +22,7 @@
 
 #include <linux/fs.h>
 #include <linux/slab.h>
+#include <linux/sched/debug.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
 #include <linux/unistd.h>
diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c
index c3c5fdfae920..7c05bb393597 100644
--- a/arch/m32r/kernel/traps.c
+++ b/arch/m32r/kernel/traps.c
@@ -14,6 +14,7 @@
 #include <linux/kallsyms.h>
 #include <linux/stddef.h>
 #include <linux/ptrace.h>
+#include <linux/sched/debug.h>
 #include <linux/mm.h>
 #include <asm/page.h>
 #include <asm/processor.h>
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index f0a8e9b332cd..b4a4675f4119 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index 558f38402737..a926d2c88898 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/signal.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/m68k/mac/macints.c b/arch/m68k/mac/macints.c
index b5cd06df71fd..9637dee90dac 100644
--- a/arch/m68k/mac/macints.c
+++ b/arch/m68k/mac/macints.c
@@ -110,6 +110,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/delay.h>
diff --git a/arch/metag/kernel/process.c b/arch/metag/kernel/process.c
index 35062796edf2..2e611bd37515 100644
--- a/arch/metag/kernel/process.c
+++ b/arch/metag/kernel/process.c
@@ -8,6 +8,7 @@
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/unistd.h>
diff --git a/arch/metag/kernel/stacktrace.c b/arch/metag/kernel/stacktrace.c
index 5510361d5bea..4f806d66a58c 100644
--- a/arch/metag/kernel/stacktrace.c
+++ b/arch/metag/kernel/stacktrace.c
@@ -1,5 +1,6 @@
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 
 #include <asm/stacktrace.h>
diff --git a/arch/metag/kernel/traps.c b/arch/metag/kernel/traps.c
index 17b2e2e38d5a..5ce67f9124aa 100644
--- a/arch/metag/kernel/traps.c
+++ b/arch/metag/kernel/traps.c
@@ -10,6 +10,7 @@
 
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/signal.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c
index c765b3621b9b..5055477486b6 100644
--- a/arch/metag/mm/fault.c
+++ b/arch/metag/mm/fault.c
@@ -8,6 +8,7 @@
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/ptrace.h>
+#include <linux/sched/debug.h>
 #include <linux/interrupt.h>
 #include <linux/uaccess.h>
 
diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c
index 42dd12a62ff5..e6f338d0496b 100644
--- a/arch/microblaze/kernel/exceptions.c
+++ b/arch/microblaze/kernel/exceptions.c
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kallsyms.h>
 
 #include <asm/exceptions.h>
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index b2dd37196b3b..8c5fdb2e2850 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -11,6 +11,7 @@
 #include <linux/cpu.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/pm.h>
 #include <linux/tick.h>
 #include <linux/bitops.h>
diff --git a/arch/microblaze/kernel/traps.c b/arch/microblaze/kernel/traps.c
index cb619533a192..45bbba9d919f 100644
--- a/arch/microblaze/kernel/traps.c
+++ b/arch/microblaze/kernel/traps.c
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/kallsyms.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/debug_locks.h>
 
 #include <asm/exceptions.h>
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 803e255b6fc3..97574cdb532d 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -11,6 +11,7 @@
  */
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/tick.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/mips/kernel/stacktrace.c b/arch/mips/kernel/stacktrace.c
index 506021f62549..986f910961d9 100644
--- a/arch/mips/kernel/stacktrace.c
+++ b/arch/mips/kernel/stacktrace.c
@@ -4,6 +4,7 @@
  *  Copyright (C) 2006 Atsushi Nemoto <anemo@mba.ocn.ne.jp>
  */
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 #include <linux/export.h>
 #include <asm/stacktrace.h>
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 49c6df20672a..1a9366a157cb 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -24,6 +24,7 @@
 #include <linux/extable.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/smp.h>
 #include <linux/spinlock.h>
 #include <linux/kallsyms.h>
diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c
index 9960a8302eac..1f2a5bc4779e 100644
--- a/arch/mips/sgi-ip22/ip28-berr.c
+++ b/arch/mips/sgi-ip22/ip28-berr.c
@@ -8,6 +8,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/seq_file.h>
 
 #include <asm/addrspace.h>
diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c
index f8919b6a24c8..d12879eb2b1f 100644
--- a/arch/mips/sgi-ip27/ip27-berr.c
+++ b/arch/mips/sgi-ip27/ip27-berr.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/signal.h>	/* for SIGBUS */
 #include <linux/sched.h>	/* schow_regs(), force_sig() */
+#include <linux/sched/debug.h>
 
 #include <asm/sn/addrs.h>
 #include <asm/sn/arch.h>
diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c
index ba8f46d80ab8..57d8c7486fe6 100644
--- a/arch/mips/sgi-ip32/ip32-berr.c
+++ b/arch/mips/sgi-ip32/ip32-berr.c
@@ -10,6 +10,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <asm/traps.h>
 #include <linux/uaccess.h>
 #include <asm/addrspace.h>
diff --git a/arch/mips/sgi-ip32/ip32-irq.c b/arch/mips/sgi-ip32/ip32-irq.c
index 838d8589a1c0..a6a0ff7f5aed 100644
--- a/arch/mips/sgi-ip32/ip32-irq.c
+++ b/arch/mips/sgi-ip32/ip32-irq.c
@@ -18,6 +18,7 @@
 #include <linux/mm.h>
 #include <linux/random.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 
 #include <asm/irq_cpu.h>
 #include <asm/mipsregs.h>
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index e5def2217f72..a1e2f8301d94 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/mn10300/kernel/traps.c b/arch/mn10300/kernel/traps.c
index a7a987c7954f..800fd0801969 100644
--- a/arch/mn10300/kernel/traps.c
+++ b/arch/mn10300/kernel/traps.c
@@ -10,6 +10,7 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/errno.h>
diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c
index 2f8c74f93e70..3ad87a6b119b 100644
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@@ -14,6 +14,7 @@
 
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/tick.h>
 #include <linux/uaccess.h>
 
diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c
index 72ed30a93c85..8184e7d6b385 100644
--- a/arch/nios2/kernel/traps.c
+++ b/arch/nios2/kernel/traps.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/export.h>
diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c
index e7a14e1e0d6b..b804dd06ea1c 100644
--- a/arch/nios2/mm/fault.c
+++ b/arch/nios2/mm/fault.c
@@ -13,6 +13,7 @@
 
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 6e9d1cb519f2..899339dac938 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -22,6 +22,7 @@
 
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mm.h>
diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c
index 7e81ad258bca..2354ab88d948 100644
--- a/arch/openrisc/kernel/traps.c
+++ b/arch/openrisc/kernel/traps.c
@@ -22,6 +22,7 @@
 
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/extable.h>
 #include <linux/kmod.h>
diff --git a/arch/parisc/kernel/pa7300lc.c b/arch/parisc/kernel/pa7300lc.c
index 8a89780223aa..9b245fc67560 100644
--- a/arch/parisc/kernel/pa7300lc.c
+++ b/arch/parisc/kernel/pa7300lc.c
@@ -5,6 +5,7 @@
  *   Copyright (C) 2000 Philipp Rumpf */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
 #include <asm/io.h>
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index ea6603ee8d24..8c283caf2b4d 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -43,6 +43,7 @@
 #include <linux/personality.h>
 #include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/slab.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index e58925ac64d1..9e03296641d7 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 378df9207406..991654c88eec 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/errno.h>
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index a08ab481e556..e36f7b75ab07 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -24,6 +24,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/signal.h>
 #include <linux/ratelimit.h>
 #include <linux/uaccess.h>
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 1a0b4f63f0e9..c3cac4ddfe9c 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -13,6 +13,7 @@
 #include <linux/mm.h>
 #include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/interrupt.h>
 #include <linux/extable.h>
 #include <linux/uaccess.h>
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 4379a079b3c2..76f58b87dcb2 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -16,6 +16,7 @@
 
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 4f24606afc3f..66711958493c 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -12,6 +12,7 @@
 
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 #include <asm/ptrace.h>
 #include <asm/processor.h>
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index e6cc56b61d01..ff365f9de27a 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -17,6 +17,7 @@
 
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 55d4fe174fd9..72c584d62f59 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -14,6 +14,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <asm/processor.h>
 #include <asm/debug.h>
 #include <asm/dis.h>
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 54281660582c..ed99ff911b67 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -11,6 +11,7 @@
 #include <linux/compiler.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/elfcore.h>
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 0085b2d8ed7d..e66687dc6144 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 #include <linux/kallsyms.h>
 #include <linux/export.h>
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 283ad7840335..f787b9d8f54c 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -17,6 +17,7 @@
 #include <linux/extable.h>
 #include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index bb5560eb2435..5845d3028ffc 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -12,6 +12,7 @@
 #include <linux/perf_event.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/string.h>
diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c
index fa624f30f783..8a54d320fb41 100644
--- a/arch/score/kernel/traps.c
+++ b/arch/score/kernel/traps.c
@@ -25,6 +25,7 @@
 
 #include <linux/extable.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 
 #include <asm/cacheflush.h>
 #include <asm/irq.h>
diff --git a/arch/sh/kernel/dumpstack.c b/arch/sh/kernel/dumpstack.c
index 8dfe645bcc4b..d00cab2f50f9 100644
--- a/arch/sh/kernel/dumpstack.c
+++ b/arch/sh/kernel/dumpstack.c
@@ -11,6 +11,7 @@
 #include <linux/kallsyms.h>
 #include <linux/ftrace.h>
 #include <linux/debug_locks.h>
+#include <linux/sched/debug.h>
 #include <linux/kdebug.h>
 #include <linux/export.h>
 #include <linux/uaccess.h>
diff --git a/arch/sh/kernel/nmi_debug.c b/arch/sh/kernel/nmi_debug.c
index ff0abbd1e652..730d928f0d12 100644
--- a/arch/sh/kernel/nmi_debug.c
+++ b/arch/sh/kernel/nmi_debug.c
@@ -9,6 +9,7 @@
 #include <linux/kdebug.h>
 #include <linux/notifier.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/hardirq.h>
 
 enum nmi_action {
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 51741850a715..5098274e0f23 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -15,6 +15,7 @@
  */
 #include <linux/module.h>
 #include <linux/mm.h>
+#include <linux/sched/debug.h>
 #include <linux/slab.h>
 #include <linux/elfcore.h>
 #include <linux/kallsyms.h>
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index e0b271bffd6a..ced21a417d9d 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -25,6 +25,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/io.h>
+#include <linux/sched/debug.h>
 #include <asm/syscalls.h>
 #include <linux/uaccess.h>
 #include <asm/pgtable.h>
diff --git a/arch/sh/kernel/stacktrace.c b/arch/sh/kernel/stacktrace.c
index bf989e063a0c..7a73d2763e1b 100644
--- a/arch/sh/kernel/stacktrace.c
+++ b/arch/sh/kernel/stacktrace.c
@@ -10,6 +10,7 @@
  * for more details.
  */
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 #include <linux/thread_info.h>
 #include <linux/module.h>
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index 3036dee854d1..bc5c9f347b9e 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -4,6 +4,7 @@
 #include <linux/kdebug.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/uaccess.h>
 #include <linux/hardirq.h>
 #include <linux/kernel.h>
diff --git a/arch/sh/kernel/traps_64.c b/arch/sh/kernel/traps_64.c
index 00835edb6e20..014fb08cf133 100644
--- a/arch/sh/kernel/traps_64.c
+++ b/arch/sh/kernel/traps_64.c
@@ -10,6 +10,7 @@
  * for more details.
  */
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/errno.h>
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 48ffc3e7d1dd..237a471c8ed5 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -14,6 +14,7 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index d249ca10b203..98f6ff6eaa55 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -14,6 +14,7 @@
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
diff --git a/arch/sparc/kernel/stacktrace.c b/arch/sparc/kernel/stacktrace.c
index e78386a0029f..be4c14cccc05 100644
--- a/arch/sparc/kernel/stacktrace.c
+++ b/arch/sparc/kernel/stacktrace.c
@@ -1,4 +1,5 @@
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 #include <linux/thread_info.h>
 #include <linux/ftrace.h>
diff --git a/arch/sparc/kernel/sun4m_irq.c b/arch/sparc/kernel/sun4m_irq.c
index da737c712fa8..aa84da0b2d30 100644
--- a/arch/sparc/kernel/sun4m_irq.c
+++ b/arch/sparc/kernel/sun4m_irq.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/slab.h>
+#include <linux/sched/debug.h>
 
 #include <asm/timer.h>
 #include <asm/traps.h>
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index ff3573059936..7aecb239626d 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -9,6 +9,7 @@
 #include <linux/types.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/debug.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/file.h>
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index c521ee277083..ef4520efc813 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -9,6 +9,7 @@
 #include <linux/types.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/debug.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/mm.h>
diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c
index ecddac5a4c96..26740a2f285d 100644
--- a/arch/sparc/kernel/traps_32.c
+++ b/arch/sparc/kernel/traps_32.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/sched.h>  /* for jiffies */
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/smp.h>
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index e022d7b00390..4ff4c35f76b2 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -10,6 +10,7 @@
 
 #include <linux/extable.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/linkage.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 643c149a3151..b84c4dd14954 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -10,6 +10,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/ptrace.h>
 #include <linux/mman.h>
 #include <linux/signal.h>
diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h
index c3cb42615a9f..3573325e340b 100644
--- a/arch/tile/include/asm/stack.h
+++ b/arch/tile/include/asm/stack.h
@@ -17,6 +17,8 @@
 
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
+
 #include <asm/backtrace.h>
 #include <asm/page.h>
 #include <hv/hypervisor.h>
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index c84c54a1ac55..557b4c8435d0 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/preempt.h>
 #include <linux/module.h>
 #include <linux/fs.h>
diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c
index 87299a6cfec8..8cc92ae6eb27 100644
--- a/arch/tile/kernel/signal.c
+++ b/arch/tile/kernel/signal.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c
index 22bbbd3ff4a3..a1c0787fe9d8 100644
--- a/arch/tile/kernel/stack.c
+++ b/arch/tile/kernel/stack.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
 #include <linux/module.h>
diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c
index 39f427bb0de2..54804866f238 100644
--- a/arch/tile/kernel/traps.c
+++ b/arch/tile/kernel/traps.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c
index f229e979584e..142acae78316 100644
--- a/arch/tile/kernel/unaligned.c
+++ b/arch/tile/kernel/unaligned.c
@@ -17,6 +17,7 @@
 #include <linux/smp.h>
 #include <linux/ptrace.h>
 #include <linux/slab.h>
+#include <linux/sched/debug.h>
 #include <linux/thread_info.h>
 #include <linux/uaccess.h>
 #include <linux/mman.h>
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 709f8e9ba3e9..005c91c2adca 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -16,6 +16,7 @@
 
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/string.h>
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 8a4c72af3bc0..af326fb6510d 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/notifier.h>
 #include <linux/reboot.h>
+#include <linux/sched/debug.h>
 #include <linux/proc_fs.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 078630d6448c..c6c45ea4021f 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -17,6 +17,7 @@
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/seq_file.h>
 #include <linux/tick.h>
 #include <linux/threads.h>
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index aa1b56f5ac68..34ae555c3e70 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -11,6 +11,8 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
+
 #include <asm/sysrq.h>
 #include <asm/stacktrace.h>
 #include <os.h>
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 9711ae4aaa6a..59158871b9fc 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -8,6 +8,7 @@
 #include <linux/hardirq.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
+#include <linux/sched/debug.h>
 #include <asm/current.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
index d7c6b676b3a5..c1a0eec88f1f 100644
--- a/arch/unicore32/kernel/process.c
+++ b/arch/unicore32/kernel/process.c
@@ -13,6 +13,7 @@
 
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/unicore32/kernel/stacktrace.c b/arch/unicore32/kernel/stacktrace.c
index b34030bdabe3..9976e767d51c 100644
--- a/arch/unicore32/kernel/stacktrace.c
+++ b/arch/unicore32/kernel/stacktrace.c
@@ -11,6 +11,7 @@
  */
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 
 #include <asm/stacktrace.h>
diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c
index 7f5e06f9a202..506e1a2127c6 100644
--- a/arch/unicore32/kernel/traps.c
+++ b/arch/unicore32/kernel/traps.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/signal.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/spinlock.h>
 #include <linux/personality.h>
 #include <linux/kallsyms.h>
diff --git a/arch/unicore32/mm/alignment.c b/arch/unicore32/mm/alignment.c
index 24e836023e6c..3a7f6faa8794 100644
--- a/arch/unicore32/mm/alignment.c
+++ b/arch/unicore32/mm/alignment.c
@@ -15,6 +15,7 @@
  */
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <linux/sched/debug.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/init.h>
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 82dfe32faaf4..df083efe6ee0 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -17,6 +17,7 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/string.h>
 #include <linux/spinlock.h>
 #include <linux/pci.h>
diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
index b2f7207ba86c..f9c324e08d85 100644
--- a/arch/x86/kernel/doublefault.c
+++ b/arch/x86/kernel/doublefault.c
@@ -1,5 +1,6 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/init_task.h>
 #include <linux/fs.h>
 
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 0cfd01d2754c..7b9e7e68f316 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -10,6 +10,7 @@
 #include <linux/kdebug.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
+#include <linux/sched/debug.h>
 #include <linux/ftrace.h>
 #include <linux/kexec.h>
 #include <linux/bug.h>
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index bb3b5b9a6899..b0b3a3df7c20 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -2,6 +2,7 @@
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
  */
+#include <linux/sched/debug.h>
 #include <linux/kallsyms.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index fac189efcc34..a8b117e93b46 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -2,6 +2,7 @@
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
  */
+#include <linux/sched/debug.h>
 #include <linux/kallsyms.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 520b8dfe1640..6384eb754a58 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -45,6 +45,7 @@
 #include <linux/slab.h>
 #include <linux/hardirq.h>
 #include <linux/preempt.h>
+#include <linux/sched/debug.h>
 #include <linux/extable.h>
 #include <linux/kdebug.h>
 #include <linux/kallsyms.h>
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index d17b1a940add..f088ea4c66e7 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -13,6 +13,7 @@
 #include <linux/spinlock.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
+#include <linux/sched/debug.h>
 #include <linux/nmi.h>
 #include <linux/debugfs.h>
 #include <linux/delay.h>
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 6395e0cd7dd6..505be5589e52 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -8,6 +8,7 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/sched/idle.h>
+#include <linux/sched/debug.h>
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/pm.h>
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 0653788026e2..330cae0025d0 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -4,6 +4,7 @@
  *  Copyright (C) 2006-2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  */
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/stacktrace.h>
 #include <linux/export.h>
 #include <linux/uaccess.h>
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 61a7e9ea9aa1..35ea061010a1 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,5 +1,7 @@
 #include <linux/extable.h>
 #include <linux/uaccess.h>
+#include <linux/sched/debug.h>
+
 #include <asm/traps.h>
 #include <asm/kdebug.h>
 
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index 9743d0ccfec6..c34bd8233f7c 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -27,6 +27,7 @@
 #include <linux/moduleparam.h>
 #include <linux/nmi.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/slab.h>
 #include <linux/clocksource.h>
 
diff --git a/arch/x86/um/sysrq_32.c b/arch/x86/um/sysrq_32.c
index 16ee0e450e3e..f2383484840d 100644
--- a/arch/x86/um/sysrq_32.c
+++ b/arch/x86/um/sysrq_32.c
@@ -6,6 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/smp.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kallsyms.h>
 #include <asm/ptrace.h>
 #include <asm/sysrq.h>
diff --git a/arch/x86/um/sysrq_64.c b/arch/x86/um/sysrq_64.c
index 38b4e4abd0f8..903ad91b624f 100644
--- a/arch/x86/um/sysrq_64.c
+++ b/arch/x86/um/sysrq_64.c
@@ -7,6 +7,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/utsname.h>
 #include <asm/current.h>
 #include <asm/ptrace.h>
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 826d25104846..afaa3588d869 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -17,6 +17,7 @@
 
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 84abd66e680d..d8bb2e62393e 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -25,6 +25,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/stringify.h>
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 249e0304597f..9faee1c893e5 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -27,6 +27,7 @@
 #include <linux/pm_wakeirq.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/async.h>
 #include <linux/suspend.h>
 #include <trace/events/power.h>
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 65db0aeb3d80..e5f0c7d0fe8d 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -16,6 +16,8 @@
 
 #include <linux/sched/signal.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/debug.h>
 #include <linux/interrupt.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
diff --git a/drivers/tty/tty_ldsem.c b/drivers/tty/tty_ldsem.c
index 9229de43e19d..4f6b1b10b537 100644
--- a/drivers/tty/tty_ldsem.c
+++ b/drivers/tty/tty_ldsem.c
@@ -32,6 +32,7 @@
 #include <linux/atomic.h>
 #include <linux/tty.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index 6b3a2c00974d..c5f0fc906136 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -27,6 +27,8 @@
 #include <linux/consolemap.h>
 #include <linux/module.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/debug.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/mm.h>
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 4c5fa704e38c..5914fed3712d 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -88,6 +88,7 @@
 #include <linux/sched/autogroup.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/debug.h>
 #include <linux/flex_array.h>
 #include <linux/posix-timers.h>
 #ifdef CONFIG_HARDWALL
diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h
new file mode 100644
index 000000000000..55bc0cd35fd5
--- /dev/null
+++ b/include/linux/sched/debug.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_DEBUG_H
+#define _LINUX_SCHED_DEBUG_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_DEBUG_H */
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
index 9b976a42376d..6ad4a9fcbd6f 100644
--- a/kernel/debug/kdb/kdb_bt.c
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -13,6 +13,7 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/kdb.h>
 #include <linux/nmi.h>
 #include "kdb_private.h"
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index bf48a492b4be..c8146d53ca67 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -20,6 +20,7 @@
 #include <linux/sched.h>
 #include <linux/sched/loadavg.h>
 #include <linux/sched/stat.h>
+#include <linux/sched/debug.h>
 #include <linux/sysrq.h>
 #include <linux/smp.h>
 #include <linux/utsname.h>
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 129247e56902..f0f8e2a8496f 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -17,6 +17,7 @@
 #include <linux/sysctl.h>
 #include <linux/utsname.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 
 #include <trace/events/sched.h>
 
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index b5c30d9f46c5..545839b838d6 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -55,6 +55,7 @@
 #include <linux/latencytop.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/list.h>
 #include <linux/stacktrace.h>
 
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 82ff5726bc1b..198527a62149 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -22,6 +22,7 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/wake_q.h>
+#include <linux/sched/debug.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c
index 62b6cee8ea7f..97ee9df32e0f 100644
--- a/kernel/locking/rtmutex-debug.c
+++ b/kernel/locking/rtmutex-debug.c
@@ -18,6 +18,7 @@
  */
 #include <linux/sched.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/debug.h>
 #include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 1c8c8707853a..6edc32ecd9c5 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -16,6 +16,7 @@
 #include <linux/sched/rt.h>
 #include <linux/sched/deadline.h>
 #include <linux/sched/wake_q.h>
+#include <linux/sched/debug.h>
 #include <linux/timer.h>
 
 #include "rtmutex_common.h"
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 91e7bc8e9d5a..7bc24d477805 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -7,6 +7,7 @@
  */
 #include <linux/rwsem.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/export.h>
 
 enum rwsem_waiter_type {
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 9bd6e768164e..34e727f18e49 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -15,6 +15,7 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/wake_q.h>
+#include <linux/sched/debug.h>
 #include <linux/osq_lock.h>
 
 #include "rwsem.h"
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 45ba475d4be3..90a74ccd85a4 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -7,6 +7,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/export.h>
 #include <linux/rwsem.h>
 #include <linux/atomic.h>
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
index 9512e37637dc..561acdd39960 100644
--- a/kernel/locking/semaphore.c
+++ b/kernel/locking/semaphore.c
@@ -29,6 +29,7 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/semaphore.h>
 #include <linux/spinlock.h>
 #include <linux/ftrace.h>
diff --git a/kernel/panic.c b/kernel/panic.c
index 3ec16e603e88..a58932b41700 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -9,6 +9,7 @@
  * to indicate a major problem.
  */
 #include <linux/debug_locks.h>
+#include <linux/sched/debug.h>
 #include <linux/interrupt.h>
 #include <linux/kmsg_dump.h>
 #include <linux/kallsyms.h>
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 2fba066e125f..85e2915d8961 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -12,6 +12,7 @@
 #include <linux/oom.h>
 #include <linux/suspend.h>
 #include <linux/module.h>
+#include <linux/sched/debug.h>
 #include <linux/syscalls.h>
 #include <linux/freezer.h>
 #include <linux/delay.h>
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 47050eedc206..3caaaa04b92e 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -46,6 +46,7 @@
 #include <linux/ctype.h>
 #include <linux/uio.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/debug.h>
 
 #include <linux/uaccess.h>
 #include <asm/sections.h>
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index e456327a63d6..50fee7689e71 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -35,6 +35,7 @@
 #include <linux/rcupdate_wait.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/nmi.h>
 #include <linux/atomic.h>
 #include <linux/bitops.h>
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 9dabb04003be..0a62a8f1caac 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -27,6 +27,7 @@
 #include <linux/delay.h>
 #include <linux/gfp.h>
 #include <linux/oom.h>
+#include <linux/sched/debug.h>
 #include <linux/smpboot.h>
 #include <uapi/linux/sched/types.h>
 #include "../time/tick-internal.h"
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index da128deb10ec..55c8530316c7 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -37,6 +37,7 @@
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/percpu.h>
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index b294a8ee2842..53f9558fa925 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/completion.h>
 
 /**
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 5979f47c422c..e1e819f731b2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -11,6 +11,7 @@
 #include <linux/sched/cpufreq.h>
 #include <linux/sched/stat.h>
 #include <linux/sched/nohz.h>
+#include <linux/sched/debug.h>
 #include <linux/u64_stats_sync.h>
 #include <linux/sched/deadline.h>
 #include <linux/kernel_stat.h>
@@ -1830,7 +1831,6 @@ extern void print_rt_stats(struct seq_file *m, int cpu);
 extern void print_dl_stats(struct seq_file *m, int cpu);
 extern void
 print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
-
 #ifdef CONFIG_NUMA_BALANCING
 extern void
 show_numa_stats(struct task_struct *p, struct seq_file *m);
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 1fedfcf6fc9b..4d2ea6f25568 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -6,6 +6,7 @@
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/mm.h>
 #include <linux/wait.h>
 #include <linux/hash.h>
diff --git a/kernel/signal.c b/kernel/signal.c
index 762fcf64f0c3..15092fd8c7b1 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/sched/user.h>
+#include <linux/sched/debug.h>
 #include <linux/fs.h>
 #include <linux/tty.h>
 #include <linux/binfmts.h>
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 04939053c823..ce3a31e8eb36 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -20,6 +20,7 @@
 #include <linux/timerqueue.h>
 #include <linux/rtc.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
 #include <linux/alarmtimer.h>
 #include <linux/mutex.h>
 #include <linux/platform_device.h>
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index aed2207f5b2d..ec08f527d7ee 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -48,6 +48,7 @@
 #include <linux/sched/rt.h>
 #include <linux/sched/deadline.h>
 #include <linux/sched/nohz.h>
+#include <linux/sched/debug.h>
 #include <linux/timer.h>
 #include <linux/freezer.h>
 
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index d7999cb06229..1dc0256bfb6e 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -41,6 +41,7 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/sysctl.h>
 #include <linux/sched/nohz.h>
+#include <linux/sched/debug.h>
 #include <linux/slab.h>
 #include <linux/compat.h>
 
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 52718f4512e9..03e0b69bb5bf 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -23,6 +23,7 @@
 #include <linux/tick.h>
 #include <linux/workqueue.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/debug.h>
 
 #include <asm/irq_regs.h>
 #include <linux/kvm_para.h>
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index b5de262a9eb9..54a427d1f344 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -13,6 +13,8 @@
 
 #include <linux/nmi.h>
 #include <linux/module.h>
+#include <linux/sched/debug.h>
+
 #include <asm/irq_regs.h>
 #include <linux/perf_event.h>
 
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index c30d07e99dba..625375e7f11f 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -6,6 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/smp.h>
 #include <linux/atomic.h>
 
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index 5f7999eacad5..4e8a30d1c22f 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -17,6 +17,7 @@
 #include <linux/kprobes.h>
 #include <linux/nmi.h>
 #include <linux/cpu.h>
+#include <linux/sched/debug.h>
 
 #ifdef arch_trigger_cpumask_backtrace
 /* For reliability, we're prepared to waste bits here. */
-- 
GitLab


From ef8bd77f332bb0a4e467d7171bbfc6c57aa08a88 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:36 +0100
Subject: [PATCH 0902/1084] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/hotplug.h>

We are going to split <linux/sched/hotplug.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/hotplug.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/kernel/smp.c                        | 1 +
 arch/arm64/include/asm/mmu_context.h         | 1 +
 arch/arm64/kernel/smp.c                      | 1 +
 arch/ia64/kernel/process.c                   | 1 +
 arch/metag/kernel/smp.c                      | 1 +
 arch/mips/cavium-octeon/smp.c                | 1 +
 arch/mips/kernel/smp-bmips.c                 | 1 +
 arch/mips/kernel/smp-cps.c                   | 1 +
 arch/mips/loongson64/loongson-3/smp.c        | 1 +
 arch/powerpc/platforms/85xx/smp.c            | 1 +
 arch/powerpc/platforms/powermac/smp.c        | 1 +
 arch/powerpc/platforms/powernv/smp.c         | 1 +
 arch/powerpc/platforms/pseries/hotplug-cpu.c | 1 +
 arch/s390/kernel/smp.c                       | 1 +
 arch/sh/kernel/smp.c                         | 1 +
 arch/sparc/kernel/smp_64.c                   | 1 +
 arch/x86/kernel/smpboot.c                    | 1 +
 arch/xtensa/kernel/smp.c                     | 1 +
 include/linux/sched/hotplug.h                | 6 ++++++
 kernel/cpu.c                                 | 1 +
 kernel/sched/core.c                          | 1 +
 kernel/sched/sched.h                         | 4 +++-
 22 files changed, 29 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sched/hotplug.h

diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 5a07c5a4b894..2083a5370308 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/interrupt.h>
 #include <linux/cache.h>
 #include <linux/profile.h>
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 1ef40d82cfd3..3c9f7d18a7e6 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -25,6 +25,7 @@
 
 #include <linux/compiler.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 
 #include <asm/cacheflush.h>
 #include <asm/cpufeature.h>
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 827d52d78b67..f66e58523b96 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -22,6 +22,7 @@
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/interrupt.h>
 #include <linux/cache.h>
 #include <linux/profile.h>
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 804b251ee5d1..2204ae450d65 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -21,6 +21,7 @@
 #include <linux/personality.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/hotplug.h>
 #include <linux/stddef.h>
 #include <linux/thread_info.h>
 #include <linux/unistd.h>
diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
index c622293254e4..198eda75846b 100644
--- a/arch/metag/kernel/smp.c
+++ b/arch/metag/kernel/smp.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/interrupt.h>
 #include <linux/cache.h>
 #include <linux/profile.h>
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 4355a4cf4d74..4b94b7fbafa3 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -11,6 +11,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/init.h>
 #include <linux/export.h>
 
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 16e37a28f876..3daa2cae50b0 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -10,6 +10,7 @@
 
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/smp.h>
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index a2544c2394e4..1d3188c23bb8 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -12,6 +12,7 @@
 #include <linux/io.h>
 #include <linux/irqchip/mips-gic.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/types.h>
diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c
index cfcf240cedbe..66ede5b11355 100644
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ b/arch/mips/loongson64/loongson-3/smp.c
@@ -17,6 +17,7 @@
 #include <linux/init.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/smp.h>
 #include <linux/cpufreq.h>
 #include <asm/processor.h>
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index a83a6d26090d..078097a0b09d 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -12,6 +12,7 @@
 
 #include <linux/stddef.h>
 #include <linux/kernel.h>
+#include <linux/sched/hotplug.h>
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/of.h>
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index c9eb7d6540ea..746ca7321b03 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -23,6 +23,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index e39e6c428af1..8b67e1eefb5c 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index a1b63e00b2f7..7bc0e91f8715 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -24,6 +24,7 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/sched.h>	/* for idle_task_exit */
+#include <linux/sched/hotplug.h>
 #include <linux/cpu.h>
 #include <linux/of.h>
 #include <linux/slab.h>
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index d0a74d7ce433..b5766e03cdcb 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -31,6 +31,7 @@
 #include <linux/irqflags.h>
 #include <linux/cpu.h>
 #include <linux/slab.h>
+#include <linux/sched/hotplug.h>
 #include <linux/crash_dump.h>
 #include <linux/memblock.h>
 #include <asm/asm-offsets.h>
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index edc4769b047e..4abf119c129c 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -21,6 +21,7 @@
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/atomic.h>
 #include <linux/clockchips.h>
 #include <asm/processor.h>
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 8e3e13924594..15052d364e04 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -6,6 +6,7 @@
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/hotplug.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/threads.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fe7a66e6b5a0..f3eb266d75ff 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -46,6 +46,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/topology.h>
+#include <linux/sched/hotplug.h>
 #include <linux/percpu.h>
 #include <linux/bootmem.h>
 #include <linux/err.h>
diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c
index fcea72019df7..f2b3e1725349 100644
--- a/arch/xtensa/kernel/smp.c
+++ b/arch/xtensa/kernel/smp.c
@@ -21,6 +21,7 @@
 #include <linux/irq.h>
 #include <linux/kdebug.h>
 #include <linux/module.h>
+#include <linux/sched/hotplug.h>
 #include <linux/reboot.h>
 #include <linux/seq_file.h>
 #include <linux/smp.h>
diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h
new file mode 100644
index 000000000000..34670ed24894
--- /dev/null
+++ b/include/linux/sched/hotplug.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_HOTPLUG_H
+#define _LINUX_SCHED_HOTPLUG_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_HOTPLUG_H */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 0aae3183b029..78c206579d01 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -8,6 +8,7 @@
 #include <linux/init.h>
 #include <linux/notifier.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/hotplug.h>
 #include <linux/unistd.h>
 #include <linux/cpu.h>
 #include <linux/oom.h>
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 11a0684a29a7..956383844116 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9,6 +9,7 @@
 #include <linux/sched/clock.h>
 #include <uapi/linux/sched/types.h>
 #include <linux/sched/loadavg.h>
+#include <linux/sched/hotplug.h>
 #include <linux/cpuset.h>
 #include <linux/delayacct.h>
 #include <linux/init_task.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e1e819f731b2..0974eb2ef50d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3,6 +3,7 @@
 #include <linux/sched/sysctl.h>
 #include <linux/sched/topology.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/deadline.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/wake_q.h>
 #include <linux/sched/signal.h>
@@ -12,8 +13,9 @@
 #include <linux/sched/stat.h>
 #include <linux/sched/nohz.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/hotplug.h>
+
 #include <linux/u64_stats_sync.h>
-#include <linux/sched/deadline.h>
 #include <linux/kernel_stat.h>
 #include <linux/binfmts.h>
 #include <linux/mutex.h>
-- 
GitLab


From 299300258d1bc4e997b7db340a2e06636757fe2e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:36 +0100
Subject: [PATCH 0903/1084] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/task.h>

We are going to split <linux/sched/task.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/task.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/kernel/process.c              | 1 +
 arch/arc/kernel/process.c                | 2 ++
 arch/arm/kernel/process.c                | 1 +
 arch/arm/mm/init.c                       | 1 +
 arch/arm64/kernel/process.c              | 1 +
 arch/avr32/kernel/process.c              | 1 +
 arch/blackfin/kernel/process.c           | 1 +
 arch/blackfin/kernel/trace.c             | 1 +
 arch/c6x/kernel/process.c                | 1 +
 arch/cris/arch-v10/kernel/process.c      | 1 +
 arch/cris/arch-v32/kernel/process.c      | 1 +
 arch/cris/kernel/process.c               | 1 +
 arch/frv/kernel/process.c                | 1 +
 arch/frv/mm/mmu-context.c                | 1 +
 arch/h8300/kernel/process.c              | 1 +
 arch/hexagon/kernel/process.c            | 1 +
 arch/ia64/kernel/mca.c                   | 1 +
 arch/ia64/kernel/perfmon.c               | 1 +
 arch/ia64/kernel/process.c               | 1 +
 arch/ia64/kernel/ptrace.c                | 1 +
 arch/m32r/kernel/process.c               | 1 +
 arch/m32r/kernel/smpboot.c               | 1 +
 arch/m68k/kernel/process.c               | 1 +
 arch/metag/kernel/process.c              | 1 +
 arch/metag/kernel/traps.c                | 1 +
 arch/microblaze/kernel/process.c         | 1 +
 arch/mips/kernel/mips-mt-fpaff.c         | 1 +
 arch/mips/kernel/process.c               | 1 +
 arch/mn10300/kernel/process.c            | 1 +
 arch/mn10300/kernel/smp.c                | 1 +
 arch/nios2/kernel/process.c              | 1 +
 arch/openrisc/kernel/process.c           | 1 +
 arch/parisc/kernel/process.c             | 1 +
 arch/powerpc/kernel/process.c            | 1 +
 arch/s390/kernel/process.c               | 1 +
 arch/s390/kernel/setup.c                 | 1 +
 arch/score/kernel/process.c              | 1 +
 arch/sh/kernel/cpu/fpu.c                 | 1 +
 arch/sh/kernel/process_32.c              | 1 +
 arch/sh/kernel/process_64.c              | 1 +
 arch/sh/mm/asids-debugfs.c               | 1 +
 arch/sparc/kernel/process_32.c           | 1 +
 arch/sparc/kernel/process_64.c           | 1 +
 arch/tile/kernel/process.c               | 1 +
 arch/tile/kernel/smpboot.c               | 1 +
 arch/tile/kernel/unaligned.c             | 1 +
 arch/tile/mm/fault.c                     | 2 ++
 arch/um/kernel/exec.c                    | 1 +
 arch/um/kernel/process.c                 | 1 +
 arch/um/kernel/reboot.c                  | 1 +
 arch/unicore32/kernel/process.c          | 1 +
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 1 +
 arch/x86/kernel/fpu/init.c               | 1 +
 arch/x86/kernel/process.c                | 1 +
 arch/x86/kernel/process_32.c             | 1 +
 arch/x86/kernel/process_64.c             | 1 +
 arch/x86/kernel/unwind_frame.c           | 1 +
 arch/xtensa/kernel/process.c             | 1 +
 drivers/misc/kgdbts.c                    | 2 ++
 drivers/tty/sysrq.c                      | 2 +-
 drivers/tty/tty_io.c                     | 1 +
 fs/exec.c                                | 1 +
 fs/fcntl.c                               | 1 +
 fs/fs_struct.c                           | 1 +
 fs/proc/array.c                          | 1 +
 fs/proc/kcore.c                          | 1 +
 include/linux/sched/task.h               | 6 ++++++
 init/main.c                              | 1 +
 kernel/cgroup/cgroup.c                   | 1 +
 kernel/cpu.c                             | 1 +
 kernel/exit.c                            | 1 +
 kernel/fork.c                            | 1 +
 kernel/kmod.c                            | 1 +
 kernel/kthread.c                         | 1 +
 kernel/locking/lockdep.c                 | 1 +
 kernel/pid.c                             | 1 +
 kernel/pid_namespace.c                   | 1 +
 kernel/power/process.c                   | 1 +
 kernel/ptrace.c                          | 1 +
 kernel/sched/sched.h                     | 1 +
 kernel/signal.c                          | 1 +
 kernel/smpboot.c                         | 1 +
 kernel/sys.c                             | 1 +
 kernel/trace/ftrace.c                    | 1 +
 kernel/tracepoint.c                      | 1 +
 lib/dma-debug.c                          | 1 +
 mm/kmemleak.c                            | 1 +
 mm/memory-failure.c                      | 1 +
 mm/memory.c                              | 1 +
 mm/oom_kill.c                            | 1 +
 mm/rmap.c                                | 1 +
 mm/swapfile.c                            | 1 +
 mm/usercopy.c                            | 2 ++
 security/keys/keyctl.c                   | 1 +
 security/selinux/hooks.c                 | 1 +
 95 files changed, 104 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sched/task.h

diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 88f7a974d311..713b4fac998e 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index a41a79a4f4fe..d618d26721ab 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -11,6 +11,8 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
+
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/unistd.h>
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 03d46395d1ff..d4c7c9a1afa9 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 2a9040dcf47e..1d8558ff9827 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -14,6 +14,7 @@
 #include <linux/bootmem.h>
 #include <linux/mman.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/export.h>
 #include <linux/nodemask.h>
 #include <linux/initrd.h>
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 40a16cdd9873..f9b8e74ff8f2 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -25,6 +25,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index dac022d88d9d..75b944a5107c 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -7,6 +7,7 @@
  */
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/fs.h>
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index e95d094c20a6..1a1f444004b1 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/tick.h>
 #include <linux/fs.h>
 #include <linux/err.h>
diff --git a/arch/blackfin/kernel/trace.c b/arch/blackfin/kernel/trace.c
index 23c05b8effb3..151f22196ab6 100644
--- a/arch/blackfin/kernel/trace.c
+++ b/arch/blackfin/kernel/trace.c
@@ -13,6 +13,7 @@
 #include <linux/oom.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
diff --git a/arch/c6x/kernel/process.c b/arch/c6x/kernel/process.c
index 0ee7686a78f3..6b61779d426a 100644
--- a/arch/c6x/kernel/process.c
+++ b/arch/c6x/kernel/process.c
@@ -17,6 +17,7 @@
 #include <linux/mqueue.h>
 #include <linux/syscalls.h>
 #include <linux/reboot.h>
+#include <linux/sched/task.h>
 
 #include <asm/syscalls.h>
 
diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c
index 068a40374200..808c2186b704 100644
--- a/arch/cris/arch-v10/kernel/process.c
+++ b/arch/cris/arch-v10/kernel/process.c
@@ -12,6 +12,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/fs.h>
diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c
index 613c2d71bf10..c852df262948 100644
--- a/arch/cris/arch-v32/kernel/process.c
+++ b/arch/cris/arch-v32/kernel/process.c
@@ -10,6 +10,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/fs.h>
diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c
index 50a7dd451456..0bbd3a0c3d70 100644
--- a/arch/cris/kernel/process.c
+++ b/arch/cris/kernel/process.c
@@ -20,6 +20,7 @@
 #include <linux/spinlock.h>
 #include <linux/init_task.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/fs.h>
 #include <linux/user.h>
 #include <linux/elfcore.h>
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index cf4c34c09ab3..c96dbd4b8626 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -14,6 +14,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/frv/mm/mmu-context.c b/arch/frv/mm/mmu-context.c
index 4031289bf2ec..16946a58f64d 100644
--- a/arch/frv/mm/mmu-context.c
+++ b/arch/frv/mm/mmu-context.c
@@ -11,6 +11,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 #include <linux/mm.h>
 #include <asm/tlbflush.h>
 
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index d7cabf373fe3..54e4d6f01865 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -26,6 +26,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index 3bdd9592d025..a2a822a875b6 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -20,6 +20,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/tick.h>
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index f9fe3ac64242..79c7c46d7dc1 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -74,6 +74,7 @@
 #include <linux/init.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/bootmem.h>
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 677a86826771..7e943d3c05ed 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/interrupt.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 2204ae450d65..054facf22156 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -22,6 +22,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task.h>
 #include <linux/stddef.h>
 #include <linux/thread_info.h>
 #include <linux/unistd.h>
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 0b1153e610ea..04fe1436e1cc 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -11,6 +11,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/mm.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index 1a227c1fdb1a..2a450382934c 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -23,6 +23,7 @@
 #include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
 #include <linux/unistd.h>
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index f98d2f6519d6..a7d04684d2c7 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -45,6 +45,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/err.h>
 #include <linux/irq.h>
 #include <linux/bootmem.h>
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index b4a4675f4119..5d335d178706 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
diff --git a/arch/metag/kernel/process.c b/arch/metag/kernel/process.c
index 2e611bd37515..801e6f927e62 100644
--- a/arch/metag/kernel/process.c
+++ b/arch/metag/kernel/process.c
@@ -9,6 +9,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/unistd.h>
diff --git a/arch/metag/kernel/traps.c b/arch/metag/kernel/traps.c
index 5ce67f9124aa..23c5ac33a93f 100644
--- a/arch/metag/kernel/traps.c
+++ b/arch/metag/kernel/traps.c
@@ -11,6 +11,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/signal.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 8c5fdb2e2850..a642fe5ac5ff 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -12,6 +12,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/pm.h>
 #include <linux/tick.h>
 #include <linux/bitops.h>
diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c
index ffc6bd3464d9..8cab633e0e5a 100644
--- a/arch/mips/kernel/mips-mt-fpaff.c
+++ b/arch/mips/kernel/mips-mt-fpaff.c
@@ -9,6 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/cred.h>
 #include <linux/security.h>
 #include <linux/types.h>
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 97574cdb532d..8bfb833b3158 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -12,6 +12,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/tick.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index a1e2f8301d94..8ca7a9de09a5 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -12,6 +12,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c
index e65b5cc2fa67..d924f7a79708 100644
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c
@@ -22,6 +22,7 @@
 #include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/profile.h>
 #include <linux/smp.h>
 #include <linux/cpu.h>
diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c
index 3ad87a6b119b..5ee1ddf6a924 100644
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@@ -15,6 +15,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/tick.h>
 #include <linux/uaccess.h>
 
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 899339dac938..0c5ad3bde0f5 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -23,6 +23,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mm.h>
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 8c283caf2b4d..e5f97239cc5e 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -44,6 +44,7 @@
 #include <linux/ptrace.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 76f58b87dcb2..b99b12656f6f 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index ed99ff911b67..250c41f05721 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -12,6 +12,7 @@
 #include <linux/cpu.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/elfcore.h>
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index e4d811f17971..3de07004c02e 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -18,6 +18,7 @@
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/memblock.h>
 #include <linux/mm.h>
diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c
index aae9480706c2..32924159d8c9 100644
--- a/arch/score/kernel/process.c
+++ b/arch/score/kernel/process.c
@@ -28,6 +28,7 @@
 #include <linux/elfcore.h>
 #include <linux/pm.h>
 #include <linux/rcupdate.h>
+#include <linux/sched/task.h>
 
 void (*pm_power_off)(void);
 EXPORT_SYMBOL(pm_power_off);
diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c
index 37e35330aae6..b76370a47bf9 100644
--- a/arch/sh/kernel/cpu/fpu.c
+++ b/arch/sh/kernel/cpu/fpu.c
@@ -1,4 +1,5 @@
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <asm/processor.h>
 #include <asm/fpu.h>
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 5098274e0f23..493c3eb86aa5 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/elfcore.h>
 #include <linux/kallsyms.h>
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index ced21a417d9d..056607185158 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -26,6 +26,7 @@
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <asm/syscalls.h>
 #include <linux/uaccess.h>
 #include <asm/pgtable.h>
diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c
index 110bd35165bf..e5539e0f8e3b 100644
--- a/arch/sh/mm/asids-debugfs.c
+++ b/arch/sh/mm/asids-debugfs.c
@@ -21,6 +21,7 @@
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 
 #include <asm/processor.h>
 #include <asm/mmu_context.h>
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 237a471c8ed5..ed87c45a785b 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 98f6ff6eaa55..4c82a73af893 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -15,6 +15,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 557b4c8435d0..dbb4fa76d1dd 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -14,6 +14,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/preempt.h>
 #include <linux/module.h>
 #include <linux/fs.h>
diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c
index 53ce940a5016..f3fdd0c39b12 100644
--- a/arch/tile/kernel/smpboot.c
+++ b/arch/tile/kernel/smpboot.c
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/kernel_stat.h>
 #include <linux/bootmem.h>
 #include <linux/notifier.h>
diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c
index 142acae78316..8149c38f67b6 100644
--- a/arch/tile/kernel/unaligned.c
+++ b/arch/tile/kernel/unaligned.c
@@ -18,6 +18,7 @@
 #include <linux/ptrace.h>
 #include <linux/slab.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/thread_info.h>
 #include <linux/uaccess.h>
 #include <linux/mman.h>
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 005c91c2adca..f58fa06a2214 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -17,6 +17,8 @@
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/string.h>
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 770ec07b6a6a..2df4e5d11939 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -8,6 +8,7 @@
 #include <linux/fs.h>
 #include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <asm/current.h>
 #include <asm/processor.h>
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index c6c45ea4021f..e76dc7c11251 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/seq_file.h>
 #include <linux/tick.h>
 #include <linux/threads.h>
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index 79218106a033..0bc921cee0b1 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/oom.h>
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
index c1a0eec88f1f..d58f1600b477 100644
--- a/arch/unicore32/kernel/process.c
+++ b/arch/unicore32/kernel/process.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index d48af18e7baf..0bbe0f3a039f 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -26,6 +26,7 @@
 #include <linux/kernfs.h>
 #include <linux/seq_file.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/task_work.h>
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 19bdd1bf8160..c2f8dde3255c 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -7,6 +7,7 @@
 #include <asm/cmdline.h>
 
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/init.h>
 
 /*
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 505be5589e52..441f00e7be8f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -9,6 +9,7 @@
 #include <linux/sched.h>
 #include <linux/sched/idle.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/pm.h>
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a0ac3e81518a..d79ffa47aab8 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -12,6 +12,7 @@
 #include <linux/cpu.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index a61e141b6891..124f5652ae3c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -17,6 +17,7 @@
 #include <linux/cpu.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 23d15565d02a..7c0abdc9a732 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -1,4 +1,5 @@
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <asm/ptrace.h>
 #include <asm/bitops.h>
 #include <asm/stacktrace.h>
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index afaa3588d869..af33f9d4c624 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -18,6 +18,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
index 99635dd9dbac..fc7efedbc4be 100644
--- a/drivers/misc/kgdbts.c
+++ b/drivers/misc/kgdbts.c
@@ -103,6 +103,8 @@
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/module.h>
+#include <linux/sched/task.h>
+
 #include <asm/sections.h>
 
 #define v1printk(a...) do { \
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index e5f0c7d0fe8d..c6fc7141d7b2 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -17,7 +17,7 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/debug.h>
-#include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/interrupt.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 985d33f0f315..e6d1a6510886 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -70,6 +70,7 @@
 #include <linux/signal.h>
 #include <linux/fcntl.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/interrupt.h>
 #include <linux/tty.h>
 #include <linux/tty_driver.h>
diff --git a/fs/exec.c b/fs/exec.c
index e857c7260b1f..65145a3df065 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -36,6 +36,7 @@
 #include <linux/sched/coredump.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/numa_balancing.h>
+#include <linux/sched/task.h>
 #include <linux/pagemap.h>
 #include <linux/perf_event.h>
 #include <linux/highmem.h>
diff --git a/fs/fcntl.c b/fs/fcntl.c
index e1c54f20325c..be8fbe289087 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -7,6 +7,7 @@
 #include <linux/syscalls.h>
 #include <linux/init.h>
 #include <linux/mm.h>
+#include <linux/sched/task.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/fdtable.h>
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 543ed50f0387..be0250788b73 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -1,5 +1,6 @@
 #include <linux/export.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/fs.h>
 #include <linux/path.h>
 #include <linux/slab.h>
diff --git a/fs/proc/array.c b/fs/proc/array.c
index cfe7f934a300..f3169b58af38 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -62,6 +62,7 @@
 #include <linux/mman.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/numa_balancing.h>
+#include <linux/sched/task.h>
 #include <linux/proc_fs.h>
 #include <linux/ioport.h>
 #include <linux/uaccess.h>
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index ea9f3d1ae830..4ee55274f155 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -28,6 +28,7 @@
 #include <linux/list.h>
 #include <linux/ioport.h>
 #include <linux/memory.h>
+#include <linux/sched/task.h>
 #include <asm/sections.h>
 #include "internal.h"
 
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
new file mode 100644
index 000000000000..0023c91ff821
--- /dev/null
+++ b/include/linux/sched/task.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_TASK_H
+#define _LINUX_SCHED_TASK_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_TASK_H */
diff --git a/init/main.c b/init/main.c
index c891cfb8928f..47956b22add1 100644
--- a/init/main.c
+++ b/init/main.c
@@ -76,6 +76,7 @@
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
 #include <linux/sched_clock.h>
+#include <linux/sched/task.h>
 #include <linux/context_tracking.h>
 #include <linux/random.h>
 #include <linux/list.h>
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index e8f87bf9840c..0125589c7428 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -41,6 +41,7 @@
 #include <linux/proc_fs.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/percpu-rwsem.h>
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 78c206579d01..f7c063239fa5 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -9,6 +9,7 @@
 #include <linux/notifier.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task.h>
 #include <linux/unistd.h>
 #include <linux/cpu.h>
 #include <linux/oom.h>
diff --git a/kernel/exit.c b/kernel/exit.c
index 5bad7dce1b7b..e53408d156df 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -9,6 +9,7 @@
 #include <linux/sched/autogroup.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/stat.h>
+#include <linux/sched/task.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/capability.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 8632905f64c5..0af64d5d8b73 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -18,6 +18,7 @@
 #include <linux/sched/user.h>
 #include <linux/sched/numa_balancing.h>
 #include <linux/sched/stat.h>
+#include <linux/sched/task.h>
 #include <linux/rtmutex.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 0c407f905ca4..ac5f5c2d098d 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -20,6 +20,7 @@
 */
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/syscalls.h>
 #include <linux/unistd.h>
 #include <linux/kmod.h>
diff --git a/kernel/kthread.c b/kernel/kthread.c
index ef9b9eb809c7..2f26adea0f84 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -7,6 +7,7 @@
  */
 #include <uapi/linux/sched/types.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/kthread.h>
 #include <linux/completion.h>
 #include <linux/err.h>
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index cdafaff926ca..12e38c213b70 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -29,6 +29,7 @@
 #include <linux/mutex.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/task.h>
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/proc_fs.h>
diff --git a/kernel/pid.c b/kernel/pid.c
index 0291804151b5..0143ac0ddceb 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -38,6 +38,7 @@
 #include <linux/syscalls.h>
 #include <linux/proc_ns.h>
 #include <linux/proc_fs.h>
+#include <linux/sched/task.h>
 
 #define pid_hashfn(nr, ns)	\
 	hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 25314c96fbe6..7c8367854dc4 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -19,6 +19,7 @@
 #include <linux/proc_ns.h>
 #include <linux/reboot.h>
 #include <linux/export.h>
+#include <linux/sched/task.h>
 
 struct pid_cache {
 	int nr_ids;
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 85e2915d8961..c7209f060eeb 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -13,6 +13,7 @@
 #include <linux/suspend.h>
 #include <linux/module.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/syscalls.h>
 #include <linux/freezer.h>
 #include <linux/delay.h>
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index bcdbdc19eb35..0af928712174 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/task.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0974eb2ef50d..6b7155ae5c33 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -14,6 +14,7 @@
 #include <linux/sched/nohz.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task.h>
 
 #include <linux/u64_stats_sync.h>
 #include <linux/kernel_stat.h>
diff --git a/kernel/signal.c b/kernel/signal.c
index 15092fd8c7b1..25d099fd80f7 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -16,6 +16,7 @@
 #include <linux/sched.h>
 #include <linux/sched/user.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 #include <linux/fs.h>
 #include <linux/tty.h>
 #include <linux/binfmts.h>
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 4a5c6e73ecd4..1d71c051a951 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -9,6 +9,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/export.h>
 #include <linux/percpu.h>
 #include <linux/kthread.h>
diff --git a/kernel/sys.c b/kernel/sys.c
index 8c0392c8be51..7f1ecd2e41c1 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -54,6 +54,7 @@
 #include <linux/sched/stat.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/task.h>
 #include <linux/rcupdate.h>
 #include <linux/uidgid.h>
 #include <linux/cred.h>
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0c0609326391..0d1597c9ee30 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -15,6 +15,7 @@
 
 #include <linux/stop_machine.h>
 #include <linux/clocksource.h>
+#include <linux/sched/task.h>
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
 #include <linux/suspend.h>
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 9f6984d52fec..685c50ae6300 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -25,6 +25,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/static_key.h>
 
 extern struct tracepoint * const __start___tracepoints_ptrs[];
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 60c57ec936db..942adf13418d 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -19,6 +19,7 @@
 
 #include <linux/scatterlist.h>
 #include <linux/dma-mapping.h>
+#include <linux/sched/task.h>
 #include <linux/stacktrace.h>
 #include <linux/dma-debug.h>
 #include <linux/spinlock.h>
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 2df6d3687b2a..a3970573359e 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -74,6 +74,7 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/jiffies.h>
 #include <linux/delay.h>
 #include <linux/export.h>
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index b74984db386e..27f7210e7fab 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -41,6 +41,7 @@
 #include <linux/page-flags.h>
 #include <linux/kernel-page-flags.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/ksm.h>
 #include <linux/rmap.h>
 #include <linux/export.h>
diff --git a/mm/memory.c b/mm/memory.c
index d4994a28dc85..a97a4cec2e1f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -43,6 +43,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
 #include <linux/sched/numa_balancing.h>
+#include <linux/sched/task.h>
 #include <linux/hugetlb.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 69f75b014607..d083714a2bb9 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -24,6 +24,7 @@
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/task.h>
 #include <linux/swap.h>
 #include <linux/timex.h>
 #include <linux/jiffies.h>
diff --git a/mm/rmap.c b/mm/rmap.c
index 0367a0506667..2da487d6cea8 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -47,6 +47,7 @@
 
 #include <linux/mm.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
diff --git a/mm/swapfile.c b/mm/swapfile.c
index ff2bf3f61b14..521ef9b6064f 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -7,6 +7,7 @@
 
 #include <linux/mm.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 #include <linux/hugetlb.h>
 #include <linux/mman.h>
 #include <linux/slab.h>
diff --git a/mm/usercopy.c b/mm/usercopy.c
index 7ccad05c0d5c..d155e12563b1 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -17,6 +17,8 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <asm/sections.h>
 
 enum {
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index bcb0b597c391..52c34532c785 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
 #include <linux/key.h>
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index b12f873f92ba..57ff53696144 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -29,6 +29,7 @@
 #include <linux/tracehook.h>
 #include <linux/errno.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/lsm_hooks.h>
 #include <linux/xattr.h>
 #include <linux/capability.h>
-- 
GitLab


From 68db0cf10678630d286f4bbbbdfa102951a35faa Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:37 +0100
Subject: [PATCH 0904/1084] sched/headers: Prepare for new header dependencies
 before moving code to <linux/sched/task_stack.h>

We are going to split <linux/sched/task_stack.h> out of <linux/sched.h>, which
will have to be picked up from other headers and a couple of .c files.

Create a trivial placeholder <linux/sched/task_stack.h> file that just
maps to <linux/sched.h> to make this patch obviously correct and
bisectable.

Include the new header in the files that are going to need it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/kernel/osf_sys.c            | 1 +
 arch/alpha/kernel/process.c            | 1 +
 arch/alpha/kernel/ptrace.c             | 1 +
 arch/alpha/kernel/signal.c             | 1 +
 arch/arc/kernel/kgdb.c                 | 1 +
 arch/arc/kernel/process.c              | 1 +
 arch/arc/kernel/ptrace.c               | 1 +
 arch/arc/kernel/signal.c               | 2 ++
 arch/arm/kernel/perf_regs.c            | 1 +
 arch/arm/kernel/process.c              | 1 +
 arch/arm/kernel/ptrace.c               | 1 +
 arch/arm/kernel/smp.c                  | 1 +
 arch/arm/kernel/traps.c                | 1 +
 arch/arm64/include/asm/compat.h        | 1 +
 arch/arm64/kernel/debug-monitors.c     | 1 +
 arch/arm64/kernel/kgdb.c               | 2 ++
 arch/arm64/kernel/perf_regs.c          | 1 +
 arch/arm64/kernel/process.c            | 1 +
 arch/arm64/kernel/ptrace.c             | 1 +
 arch/arm64/kernel/smp.c                | 1 +
 arch/arm64/kernel/stacktrace.c         | 1 +
 arch/arm64/kernel/traps.c              | 1 +
 arch/avr32/kernel/process.c            | 1 +
 arch/avr32/kernel/ptrace.c             | 1 +
 arch/avr32/kernel/stacktrace.c         | 1 +
 arch/blackfin/kernel/process.c         | 1 +
 arch/blackfin/kernel/ptrace.c          | 1 +
 arch/blackfin/kernel/signal.c          | 1 +
 arch/blackfin/kernel/stacktrace.c      | 1 +
 arch/blackfin/mach-common/smp.c        | 1 +
 arch/c6x/kernel/process.c              | 1 +
 arch/c6x/kernel/ptrace.c               | 1 +
 arch/cris/arch-v10/kernel/process.c    | 1 +
 arch/cris/arch-v10/kernel/ptrace.c     | 1 +
 arch/cris/arch-v10/kernel/signal.c     | 1 +
 arch/cris/arch-v32/kernel/process.c    | 1 +
 arch/cris/arch-v32/kernel/ptrace.c     | 1 +
 arch/cris/arch-v32/kernel/signal.c     | 1 +
 arch/frv/kernel/process.c              | 1 +
 arch/h8300/kernel/process.c            | 1 +
 arch/h8300/kernel/signal.c             | 1 +
 arch/hexagon/kernel/kgdb.c             | 1 +
 arch/hexagon/kernel/process.c          | 1 +
 arch/hexagon/kernel/ptrace.c           | 1 +
 arch/hexagon/kernel/signal.c           | 2 ++
 arch/hexagon/kernel/stacktrace.c       | 1 +
 arch/hexagon/kernel/traps.c            | 1 +
 arch/ia64/kernel/perfmon.c             | 1 +
 arch/ia64/kernel/process.c             | 1 +
 arch/ia64/kernel/ptrace.c              | 1 +
 arch/ia64/kernel/setup.c               | 1 +
 arch/ia64/kernel/sys_ia64.c            | 1 +
 arch/m32r/kernel/process.c             | 1 +
 arch/m32r/kernel/ptrace.c              | 1 +
 arch/m68k/kernel/process.c             | 1 +
 arch/m68k/kernel/ptrace.c              | 1 +
 arch/metag/kernel/process.c            | 1 +
 arch/metag/kernel/ptrace.c             | 2 ++
 arch/metag/kernel/signal.c             | 1 +
 arch/metag/kernel/smp.c                | 1 +
 arch/metag/kernel/traps.c              | 1 +
 arch/microblaze/kernel/process.c       | 1 +
 arch/microblaze/kernel/ptrace.c        | 1 +
 arch/microblaze/kernel/unwind.c        | 1 +
 arch/mips/include/asm/fpu.h            | 1 +
 arch/mips/kernel/crash.c               | 1 +
 arch/mips/kernel/perf_event.c          | 1 +
 arch/mips/kernel/process.c             | 1 +
 arch/mips/kernel/ptrace.c              | 1 +
 arch/mips/kernel/ptrace32.c            | 1 +
 arch/mips/kernel/stacktrace.c          | 1 +
 arch/mips/kernel/syscall.c             | 1 +
 arch/mips/loongson64/loongson-3/smp.c  | 1 +
 arch/mips/paravirt/paravirt-smp.c      | 1 +
 arch/mips/sibyte/bcm1480/smp.c         | 1 +
 arch/mn10300/kernel/process.c          | 1 +
 arch/mn10300/kernel/ptrace.c           | 1 +
 arch/nios2/kernel/process.c            | 1 +
 arch/nios2/kernel/ptrace.c             | 1 +
 arch/openrisc/kernel/process.c         | 1 +
 arch/openrisc/kernel/ptrace.c          | 1 +
 arch/parisc/kernel/process.c           | 1 +
 arch/powerpc/kernel/process.c          | 1 +
 arch/powerpc/mm/fault.c                | 1 +
 arch/powerpc/perf/perf_regs.c          | 1 +
 arch/s390/include/asm/compat.h         | 1 +
 arch/s390/include/asm/kprobes.h        | 1 +
 arch/s390/kernel/compat_signal.c       | 1 +
 arch/s390/kernel/dumpstack.c           | 1 +
 arch/s390/kernel/process.c             | 1 +
 arch/s390/kernel/ptrace.c              | 1 +
 arch/s390/kernel/runtime_instr.c       | 2 ++
 arch/s390/kernel/signal.c              | 1 +
 arch/s390/kernel/smp.c                 | 1 +
 arch/s390/kernel/uprobes.c             | 2 ++
 arch/score/kernel/process.c            | 1 +
 arch/score/kernel/ptrace.c             | 1 +
 arch/sh/kernel/cpu/fpu.c               | 1 +
 arch/sh/kernel/dumpstack.c             | 1 +
 arch/sh/kernel/kgdb.c                  | 2 ++
 arch/sh/kernel/process.c               | 1 +
 arch/sh/kernel/process_32.c            | 1 +
 arch/sh/kernel/process_64.c            | 1 +
 arch/sh/kernel/ptrace_32.c             | 1 +
 arch/sh/kernel/ptrace_64.c             | 1 +
 arch/sh/kernel/signal_32.c             | 1 +
 arch/sh/kernel/sys_sh32.c              | 1 +
 arch/sh/kernel/traps.c                 | 1 +
 arch/sh/kernel/traps_32.c              | 2 ++
 arch/sparc/kernel/process_32.c         | 1 +
 arch/sparc/kernel/process_64.c         | 1 +
 arch/sparc/kernel/ptrace_64.c          | 1 +
 arch/tile/kernel/compat_signal.c       | 1 +
 arch/tile/kernel/kgdb.c                | 2 ++
 arch/tile/kernel/process.c             | 1 +
 arch/tile/kernel/ptrace.c              | 2 ++
 arch/tile/kernel/signal.c              | 1 +
 arch/tile/kernel/stack.c               | 1 +
 arch/um/kernel/exec.c                  | 1 +
 arch/um/kernel/process.c               | 1 +
 arch/um/kernel/skas/process.c          | 1 +
 arch/unicore32/kernel/process.c        | 1 +
 arch/unicore32/kernel/ptrace.c         | 1 +
 arch/unicore32/kernel/traps.c          | 1 +
 arch/x86/entry/common.c                | 1 +
 arch/x86/entry/vdso/vma.c              | 1 +
 arch/x86/ia32/ia32_aout.c              | 1 +
 arch/x86/ia32/ia32_signal.c            | 1 +
 arch/x86/kernel/dumpstack.c            | 1 +
 arch/x86/kernel/fpu/regset.c           | 1 +
 arch/x86/kernel/ioport.c               | 1 +
 arch/x86/kernel/irq_64.c               | 1 +
 arch/x86/kernel/perf_regs.c            | 1 +
 arch/x86/kernel/process.c              | 1 +
 arch/x86/kernel/process_32.c           | 1 +
 arch/x86/kernel/process_64.c           | 1 +
 arch/x86/kernel/ptrace.c               | 1 +
 arch/x86/kernel/signal.c               | 1 +
 arch/x86/kernel/smpboot.c              | 1 +
 arch/x86/kernel/stacktrace.c           | 1 +
 arch/x86/kernel/step.c                 | 1 +
 arch/x86/kernel/traps.c                | 1 +
 arch/x86/kernel/unwind_frame.c         | 1 +
 arch/x86/kernel/vm86_32.c              | 1 +
 arch/x86/mm/fault.c                    | 1 +
 arch/xtensa/kernel/process.c           | 1 +
 arch/xtensa/kernel/ptrace.c            | 1 +
 arch/xtensa/kernel/signal.c            | 1 +
 arch/xtensa/kernel/smp.c               | 1 +
 block/blk-map.c                        | 1 +
 drivers/hv/vmbus_drv.c                 | 2 ++
 drivers/ide/ide-cd.c                   | 1 +
 drivers/md/bcache/closure.h            | 1 +
 drivers/misc/lkdtm_usercopy.c          | 1 +
 drivers/mtd/nand/gpmi-nand/gpmi-nand.c | 1 +
 fs/binfmt_aout.c                       | 1 +
 fs/binfmt_elf.c                        | 1 +
 fs/binfmt_elf_fdpic.c                  | 3 ++-
 fs/binfmt_flat.c                       | 1 +
 fs/coredump.c                          | 1 +
 include/linux/elfcore.h                | 2 ++
 include/linux/perf_regs.h              | 2 ++
 include/linux/sched/task_stack.h       | 6 ++++++
 init/main.c                            | 1 +
 kernel/events/callchain.c              | 2 ++
 kernel/exit.c                          | 1 +
 kernel/fork.c                          | 1 +
 kernel/printk/printk.c                 | 1 +
 kernel/sched/sched.h                   | 1 +
 kernel/seccomp.c                       | 1 +
 kernel/signal.c                        | 1 +
 kernel/trace/trace_stack.c             | 1 +
 lib/debugobjects.c                     | 1 +
 lib/dma-debug.c                        | 1 +
 lib/syscall.c                          | 1 +
 mm/kasan/kasan.c                       | 1 +
 mm/kmemleak.c                          | 1 +
 mm/util.c                              | 1 +
 178 files changed, 198 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/sched/task_stack.h

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 3b9b2a382ba2..73446baa632e 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -13,6 +13,7 @@
 #include <linux/errno.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 713b4fac998e..0b9635040721 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c
index bc4d2cdcf21d..285a82d491ef 100644
--- a/arch/alpha/kernel/ptrace.c
+++ b/arch/alpha/kernel/ptrace.c
@@ -6,6 +6,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index b8221f112eee..8129dd92cadc 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/sched/signal.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/errno.h>
diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c
index ecf6a7869375..9a3c34af2ae8 100644
--- a/arch/arc/kernel/kgdb.c
+++ b/arch/arc/kernel/kgdb.c
@@ -10,6 +10,7 @@
 
 #include <linux/kgdb.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <asm/disasm.h>
 #include <asm/cacheflush.h>
 
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index d618d26721ab..2a018de6d6cd 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 
 #include <linux/mm.h>
 #include <linux/fs.h>
diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c
index 4442204fe238..31150060d38b 100644
--- a/arch/arc/kernel/ptrace.c
+++ b/arch/arc/kernel/ptrace.c
@@ -8,6 +8,7 @@
 
 #include <linux/ptrace.h>
 #include <linux/tracehook.h>
+#include <linux/sched/task_stack.h>
 #include <linux/regset.h>
 #include <linux/unistd.h>
 #include <linux/elf.h>
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
index d347bbc086fe..48685445002e 100644
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -53,6 +53,8 @@
 #include <linux/uaccess.h>
 #include <linux/syscalls.h>
 #include <linux/tracehook.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/ucontext.h>
 
 struct rt_sigframe {
diff --git a/arch/arm/kernel/perf_regs.c b/arch/arm/kernel/perf_regs.c
index 592dda3f21ff..c366b83bf955 100644
--- a/arch/arm/kernel/perf_regs.c
+++ b/arch/arm/kernel/perf_regs.c
@@ -3,6 +3,7 @@
 #include <linux/kernel.h>
 #include <linux/perf_event.h>
 #include <linux/bug.h>
+#include <linux/sched/task_stack.h>
 #include <asm/perf_regs.h>
 #include <asm/ptrace.h>
 
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index d4c7c9a1afa9..939e8b58c59d 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -14,6 +14,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 46f7bab81c40..58e3771e4c5b 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -11,6 +11,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/elf.h>
 #include <linux/smp.h>
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 2083a5370308..b724cff7ad60 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -13,6 +13,7 @@
 #include <linux/spinlock.h>
 #include <linux/sched.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
 #include <linux/cache.h>
 #include <linux/profile.h>
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index a9dad001b97d..948c648fea00 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -26,6 +26,7 @@
 #include <linux/init.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/irq.h>
 
 #include <linux/atomic.h>
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index eb8432bb82b8..e39d487bf724 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -23,6 +23,7 @@
  */
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 
 #define COMPAT_USER_HZ		100
 #ifdef __AARCH64EB__
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 2bd426448fc1..32913567da08 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -26,6 +26,7 @@
 #include <linux/kprobes.h>
 #include <linux/stat.h>
 #include <linux/uaccess.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/cpufeature.h>
 #include <asm/cputype.h>
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index d217c9e95b06..2122cd187f19 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -24,6 +24,8 @@
 #include <linux/kdebug.h>
 #include <linux/kgdb.h>
 #include <linux/kprobes.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/debug-monitors.h>
 #include <asm/insn.h>
 #include <asm/traps.h>
diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c
index 3f62b35fb6f1..bd1b74c2436f 100644
--- a/arch/arm64/kernel/perf_regs.c
+++ b/arch/arm64/kernel/perf_regs.c
@@ -2,6 +2,7 @@
 #include <linux/kernel.h>
 #include <linux/perf_event.h>
 #include <linux/bug.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/compat.h>
 #include <asm/perf_regs.h>
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index f9b8e74ff8f2..043d373b8369 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -26,6 +26,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 64fc32ea3422..c142459a88f3 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -23,6 +23,7 @@
 #include <linux/compat.h>
 #include <linux/kernel.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/ptrace.h>
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index f66e58523b96..83c0a839a6ad 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -23,6 +23,7 @@
 #include <linux/spinlock.h>
 #include <linux/sched.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
 #include <linux/cache.h>
 #include <linux/profile.h>
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 7597e42feeea..feac80c22f61 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -20,6 +20,7 @@
 #include <linux/ftrace.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
 
 #include <asm/irq.h>
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 5a5d83791837..bdbd0c3febf4 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -31,6 +31,7 @@
 #include <linux/init.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/syscalls.h>
 
 #include <asm/atomic.h>
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index 75b944a5107c..ad0dfccedb79 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -8,6 +8,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/fs.h>
diff --git a/arch/avr32/kernel/ptrace.c b/arch/avr32/kernel/ptrace.c
index a89b893279bb..41a14e96a1db 100644
--- a/arch/avr32/kernel/ptrace.c
+++ b/arch/avr32/kernel/ptrace.c
@@ -8,6 +8,7 @@
 #undef DEBUG
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/ptrace.h>
 #include <linux/errno.h>
diff --git a/arch/avr32/kernel/stacktrace.c b/arch/avr32/kernel/stacktrace.c
index c09f0d8dd679..f8cc995cf0e0 100644
--- a/arch/avr32/kernel/stacktrace.c
+++ b/arch/avr32/kernel/stacktrace.c
@@ -8,6 +8,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
 #include <linux/thread_info.h>
 #include <linux/module.h>
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index 1a1f444004b1..b691ef875a40 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -14,6 +14,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/tick.h>
 #include <linux/fs.h>
 #include <linux/err.h>
diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c
index 360d99645163..a6827095b99a 100644
--- a/arch/blackfin/kernel/ptrace.c
+++ b/arch/blackfin/kernel/ptrace.c
@@ -7,6 +7,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/elf.h>
diff --git a/arch/blackfin/kernel/signal.c b/arch/blackfin/kernel/signal.c
index ea570db598e5..5f5172779204 100644
--- a/arch/blackfin/kernel/signal.c
+++ b/arch/blackfin/kernel/signal.c
@@ -12,6 +12,7 @@
 #include <linux/binfmts.h>
 #include <linux/uaccess.h>
 #include <linux/tracehook.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/cacheflush.h>
 #include <asm/ucontext.h>
diff --git a/arch/blackfin/kernel/stacktrace.c b/arch/blackfin/kernel/stacktrace.c
index 30301e1eace5..17198f3650b6 100644
--- a/arch/blackfin/kernel/stacktrace.c
+++ b/arch/blackfin/kernel/stacktrace.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
 #include <linux/thread_info.h>
 #include <linux/module.h>
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index a2e6db2ce811..3ac98252d299 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
 #include <linux/cache.h>
 #include <linux/clockchips.h>
diff --git a/arch/c6x/kernel/process.c b/arch/c6x/kernel/process.c
index 6b61779d426a..c4ecb24c2d5c 100644
--- a/arch/c6x/kernel/process.c
+++ b/arch/c6x/kernel/process.c
@@ -18,6 +18,7 @@
 #include <linux/syscalls.h>
 #include <linux/reboot.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/syscalls.h>
 
diff --git a/arch/c6x/kernel/ptrace.c b/arch/c6x/kernel/ptrace.c
index 3c494e84444d..a27e1f02ce18 100644
--- a/arch/c6x/kernel/ptrace.c
+++ b/arch/c6x/kernel/ptrace.c
@@ -14,6 +14,7 @@
 #include <linux/tracehook.h>
 #include <linux/regset.h>
 #include <linux/elf.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/cacheflush.h>
 
diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c
index 808c2186b704..e299d30105b5 100644
--- a/arch/cris/arch-v10/kernel/process.c
+++ b/arch/cris/arch-v10/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/fs.h>
diff --git a/arch/cris/arch-v10/kernel/ptrace.c b/arch/cris/arch-v10/kernel/ptrace.c
index eca94c7d56e7..c2f2b9b83cc4 100644
--- a/arch/cris/arch-v10/kernel/ptrace.c
+++ b/arch/cris/arch-v10/kernel/ptrace.c
@@ -4,6 +4,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
diff --git a/arch/cris/arch-v10/kernel/signal.c b/arch/cris/arch-v10/kernel/signal.c
index db30c98e4926..bab4a8dd6bfd 100644
--- a/arch/cris/arch-v10/kernel/signal.c
+++ b/arch/cris/arch-v10/kernel/signal.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c
index c852df262948..c530a8fa87ce 100644
--- a/arch/cris/arch-v32/kernel/process.c
+++ b/arch/cris/arch-v32/kernel/process.c
@@ -11,6 +11,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/fs.h>
diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c
index c366bc05466a..0461e95bbb62 100644
--- a/arch/cris/arch-v32/kernel/ptrace.c
+++ b/arch/cris/arch-v32/kernel/ptrace.c
@@ -4,6 +4,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
diff --git a/arch/cris/arch-v32/kernel/signal.c b/arch/cris/arch-v32/kernel/signal.c
index 816bf2ca93ef..ea2e8e1398e8 100644
--- a/arch/cris/arch-v32/kernel/signal.c
+++ b/arch/cris/arch-v32/kernel/signal.c
@@ -3,6 +3,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/kernel.h>
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index c96dbd4b8626..5a4c92abc99e 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index 54e4d6f01865..0f5db5bb561b 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -27,6 +27,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c
index d784f7117f9a..1e8070d08770 100644
--- a/arch/h8300/kernel/signal.c
+++ b/arch/h8300/kernel/signal.c
@@ -25,6 +25,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
diff --git a/arch/hexagon/kernel/kgdb.c b/arch/hexagon/kernel/kgdb.c
index 62dece3ad827..16c24b22d0b2 100644
--- a/arch/hexagon/kernel/kgdb.c
+++ b/arch/hexagon/kernel/kgdb.c
@@ -20,6 +20,7 @@
 
 #include <linux/irq.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kdebug.h>
 #include <linux/kgdb.h>
 
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index a2a822a875b6..de715bab7956 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -21,6 +21,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/tick.h>
diff --git a/arch/hexagon/kernel/ptrace.c b/arch/hexagon/kernel/ptrace.c
index 390a9ad14ca1..ecd75e2e8eb3 100644
--- a/arch/hexagon/kernel/ptrace.c
+++ b/arch/hexagon/kernel/ptrace.c
@@ -22,6 +22,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c
index c6b22b9945a7..78aa7304a5c9 100644
--- a/arch/hexagon/kernel/signal.c
+++ b/arch/hexagon/kernel/signal.c
@@ -21,6 +21,8 @@
 #include <linux/linkage.h>
 #include <linux/syscalls.h>
 #include <linux/tracehook.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/registers.h>
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
diff --git a/arch/hexagon/kernel/stacktrace.c b/arch/hexagon/kernel/stacktrace.c
index f94918b449a8..41866a06adf7 100644
--- a/arch/hexagon/kernel/stacktrace.c
+++ b/arch/hexagon/kernel/stacktrace.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
 #include <linux/thread_info.h>
 #include <linux/module.h>
diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
index b55f13c70b34..2942a9204a9a 100644
--- a/arch/hexagon/kernel/traps.c
+++ b/arch/hexagon/kernel/traps.c
@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/kdebug.h>
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 7e943d3c05ed..09f86ebfcc7b 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -23,6 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 054facf22156..d344d0d691aa 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -23,6 +23,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/stddef.h>
 #include <linux/thread_info.h>
 #include <linux/unistd.h>
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 04fe1436e1cc..3f8293378a83 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 32a6cc36296f..b2b4f5216180 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -33,6 +33,7 @@
 #include <linux/reboot.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/task_stack.h>
 #include <linux/seq_file.h>
 #include <linux/string.h>
 #include <linux/threads.h>
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index ce4cc60d519b..5ce927c854a6 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -11,6 +11,7 @@
 #include <linux/mman.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task_stack.h>
 #include <linux/shm.h>
 #include <linux/file.h>		/* doh, must come after sched.h... */
 #include <linux/smp.h>
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index 2a450382934c..d8ffcfec599c 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
 #include <linux/unistd.h>
diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c
index a68acb9fa515..2d887400e30e 100644
--- a/arch/m32r/kernel/ptrace.c
+++ b/arch/m32r/kernel/ptrace.c
@@ -16,6 +16,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/err.h>
 #include <linux/smp.h>
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index 5d335d178706..e475c945c8b2 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c
index 9cd86d7343a6..748c63bd0081 100644
--- a/arch/m68k/kernel/ptrace.c
+++ b/arch/m68k/kernel/ptrace.c
@@ -12,6 +12,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
diff --git a/arch/metag/kernel/process.c b/arch/metag/kernel/process.c
index 801e6f927e62..c4606ce743d2 100644
--- a/arch/metag/kernel/process.c
+++ b/arch/metag/kernel/process.c
@@ -10,6 +10,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/unistd.h>
diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c
index 7563628822bd..5fd16ee5280c 100644
--- a/arch/metag/kernel/ptrace.c
+++ b/arch/metag/kernel/ptrace.c
@@ -15,6 +15,8 @@
 #include <linux/tracehook.h>
 #include <linux/elf.h>
 #include <linux/uaccess.h>
+#include <linux/sched/task_stack.h>
+
 #include <trace/syscall.h>
 
 #define CREATE_TRACE_POINTS
diff --git a/arch/metag/kernel/signal.c b/arch/metag/kernel/signal.c
index ce49d429c74a..338925d808e6 100644
--- a/arch/metag/kernel/signal.c
+++ b/arch/metag/kernel/signal.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
index 198eda75846b..cab2aa64ca82 100644
--- a/arch/metag/kernel/smp.c
+++ b/arch/metag/kernel/smp.c
@@ -14,6 +14,7 @@
 #include <linux/spinlock.h>
 #include <linux/sched.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
 #include <linux/cache.h>
 #include <linux/profile.h>
diff --git a/arch/metag/kernel/traps.c b/arch/metag/kernel/traps.c
index 23c5ac33a93f..444851e510d5 100644
--- a/arch/metag/kernel/traps.c
+++ b/arch/metag/kernel/traps.c
@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/signal.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index a642fe5ac5ff..e92a817e645f 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/pm.h>
 #include <linux/tick.h>
 #include <linux/bitops.h>
diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c
index 8cfa98cadf3d..badd286882ae 100644
--- a/arch/microblaze/kernel/ptrace.c
+++ b/arch/microblaze/kernel/ptrace.c
@@ -27,6 +27,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/ptrace.h>
 #include <linux/signal.h>
 #include <linux/elf.h>
diff --git a/arch/microblaze/kernel/unwind.c b/arch/microblaze/kernel/unwind.c
index 61c04eed14d5..34c270cb11fc 100644
--- a/arch/microblaze/kernel/unwind.c
+++ b/arch/microblaze/kernel/unwind.c
@@ -17,6 +17,7 @@
 #include <linux/kallsyms.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
 #include <linux/types.h>
 #include <linux/errno.h>
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index f06f97bd62df..321752bcbab6 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -11,6 +11,7 @@
 #define _ASM_FPU_H
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/thread_info.h>
 #include <linux/bitops.h>
 
diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c
index 5a71518be0f1..ca25cd393b1c 100644
--- a/arch/mips/kernel/crash.c
+++ b/arch/mips/kernel/crash.c
@@ -8,6 +8,7 @@
 #include <linux/irq.h>
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 
 /* This keeps a track of which one is crashing cpu. */
 static int crashing_cpu = -1;
diff --git a/arch/mips/kernel/perf_event.c b/arch/mips/kernel/perf_event.c
index d64056e0bb56..f298eb2ff6c2 100644
--- a/arch/mips/kernel/perf_event.c
+++ b/arch/mips/kernel/perf_event.c
@@ -15,6 +15,7 @@
  */
 
 #include <linux/perf_event.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/stacktrace.h>
 
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 8bfb833b3158..fb6b6b650719 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/tick.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index fdef26382c37..339601267265 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -19,6 +19,7 @@
 #include <linux/elf.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c
index 4f0998525626..40e212d6b26b 100644
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -18,6 +18,7 @@
 #include <linux/compat.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
diff --git a/arch/mips/kernel/stacktrace.c b/arch/mips/kernel/stacktrace.c
index 986f910961d9..7c7c902249f2 100644
--- a/arch/mips/kernel/stacktrace.c
+++ b/arch/mips/kernel/stacktrace.c
@@ -5,6 +5,7 @@
  */
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
 #include <linux/export.h>
 #include <asm/stacktrace.h>
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index c86ddbaa4598..f1d17ece4181 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -26,6 +26,7 @@
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 #include <linux/elf.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/asm.h>
 #include <asm/branch.h>
diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c
index 66ede5b11355..64659fc73940 100644
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ b/arch/mips/loongson64/loongson-3/smp.c
@@ -18,6 +18,7 @@
 #include <linux/cpu.h>
 #include <linux/sched.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/smp.h>
 #include <linux/cpufreq.h>
 #include <asm/processor.h>
diff --git a/arch/mips/paravirt/paravirt-smp.c b/arch/mips/paravirt/paravirt-smp.c
index f8d3e081b2eb..72eb1a56c645 100644
--- a/arch/mips/paravirt/paravirt-smp.c
+++ b/arch/mips/paravirt/paravirt-smp.c
@@ -10,6 +10,7 @@
 #include <linux/cpumask.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/mipsregs.h>
 #include <asm/setup.h>
diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c
index 4c71aea25663..d0e94ffcc1b8 100644
--- a/arch/mips/sibyte/bcm1480/smp.c
+++ b/arch/mips/sibyte/bcm1480/smp.c
@@ -21,6 +21,7 @@
 #include <linux/smp.h>
 #include <linux/kernel_stat.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/mmu_context.h>
 #include <asm/io.h>
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index 8ca7a9de09a5..c9fa42619c6a 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/mn10300/kernel/ptrace.c b/arch/mn10300/kernel/ptrace.c
index 976020f469c1..8009876a7ac4 100644
--- a/arch/mn10300/kernel/ptrace.c
+++ b/arch/mn10300/kernel/ptrace.c
@@ -11,6 +11,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c
index 5ee1ddf6a924..869a4d59de32 100644
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@@ -16,6 +16,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/tick.h>
 #include <linux/uaccess.h>
 
diff --git a/arch/nios2/kernel/ptrace.c b/arch/nios2/kernel/ptrace.c
index 681dda92eff1..de97bcb7dd44 100644
--- a/arch/nios2/kernel/ptrace.c
+++ b/arch/nios2/kernel/ptrace.c
@@ -14,6 +14,7 @@
 #include <linux/ptrace.h>
 #include <linux/regset.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/tracehook.h>
 #include <linux/uaccess.h>
 #include <linux/user.h>
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 0c5ad3bde0f5..828a29110459 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -24,6 +24,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mm.h>
diff --git a/arch/openrisc/kernel/ptrace.c b/arch/openrisc/kernel/ptrace.c
index 228288887d74..eb97a8e7c8aa 100644
--- a/arch/openrisc/kernel/ptrace.c
+++ b/arch/openrisc/kernel/ptrace.c
@@ -18,6 +18,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/string.h>
 
 #include <linux/mm.h>
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index e5f97239cc5e..06f7ca7fe70b 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -45,6 +45,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/slab.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index b99b12656f6f..d645da302bf2 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -18,6 +18,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 8dc758658972..51def8a515be 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -17,6 +17,7 @@
 
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/string.h>
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index d24a8a3668fa..cbd82fde5770 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -10,6 +10,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/perf_event.h>
 #include <linux/bug.h>
 #include <linux/stddef.h>
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 352f7bdaf11f..0ddd37e6c29d 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -5,6 +5,7 @@
  */
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/thread_info.h>
 
 #define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p(typeof(0?(t)0:0ULL), u64))
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index 84c0f9086483..1293c4066cfc 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -35,6 +35,7 @@
 #include <linux/types.h>
 #include <linux/ptrace.h>
 #include <linux/percpu.h>
+#include <linux/sched/task_stack.h>
 
 #define __ARCH_WANT_KPROBES_INSN_SLOT
 
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 362350cc485c..c620049c61f2 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -10,6 +10,7 @@
 
 #include <linux/compat.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 72c584d62f59..829e1c53005c 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <asm/processor.h>
 #include <asm/debug.h>
 #include <asm/dis.h>
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 250c41f05721..20cd339e11ae 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/elfcore.h>
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 12020b55887b..c14df0a1ec3c 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -8,6 +8,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
index fffa0e5462af..429d3a782f1c 100644
--- a/arch/s390/kernel/runtime_instr.c
+++ b/arch/s390/kernel/runtime_instr.c
@@ -11,6 +11,8 @@
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/kernel_stat.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/runtime_instr.h>
 #include <asm/cpu_mf.h>
 #include <asm/irq.h>
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 62a4c263e887..289dd50f9744 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index b5766e03cdcb..47a973b5b4f1 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -32,6 +32,7 @@
 #include <linux/cpu.h>
 #include <linux/slab.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/crash_dump.h>
 #include <linux/memblock.h>
 #include <asm/asm-offsets.h>
diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c
index 66956c09d5bf..314e0ee3016a 100644
--- a/arch/s390/kernel/uprobes.c
+++ b/arch/s390/kernel/uprobes.c
@@ -9,6 +9,8 @@
 #include <linux/uprobes.h>
 #include <linux/compat.h>
 #include <linux/kdebug.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/switch_to.h>
 #include <asm/facility.h>
 #include <asm/kprobes.h>
diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c
index 32924159d8c9..eb64d7a677cb 100644
--- a/arch/score/kernel/process.c
+++ b/arch/score/kernel/process.c
@@ -29,6 +29,7 @@
 #include <linux/pm.h>
 #include <linux/rcupdate.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 
 void (*pm_power_off)(void);
 EXPORT_SYMBOL(pm_power_off);
diff --git a/arch/score/kernel/ptrace.c b/arch/score/kernel/ptrace.c
index 8b75e54816c1..d8455e60bce0 100644
--- a/arch/score/kernel/ptrace.c
+++ b/arch/score/kernel/ptrace.c
@@ -28,6 +28,7 @@
 #include <linux/mm.h>
 #include <linux/ptrace.h>
 #include <linux/regset.h>
+#include <linux/sched/task_stack.h>
 
 #include <linux/uaccess.h>
 
diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c
index b76370a47bf9..547c73478459 100644
--- a/arch/sh/kernel/cpu/fpu.c
+++ b/arch/sh/kernel/cpu/fpu.c
@@ -1,5 +1,6 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/slab.h>
 #include <asm/processor.h>
 #include <asm/fpu.h>
diff --git a/arch/sh/kernel/dumpstack.c b/arch/sh/kernel/dumpstack.c
index d00cab2f50f9..b564b1eae4ae 100644
--- a/arch/sh/kernel/dumpstack.c
+++ b/arch/sh/kernel/dumpstack.c
@@ -12,6 +12,7 @@
 #include <linux/ftrace.h>
 #include <linux/debug_locks.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kdebug.h>
 #include <linux/export.h>
 #include <linux/uaccess.h>
diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c
index adad46e41a1d..4f04c6638a4d 100644
--- a/arch/sh/kernel/kgdb.c
+++ b/arch/sh/kernel/kgdb.c
@@ -14,6 +14,8 @@
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/cacheflush.h>
 #include <asm/traps.h>
 
diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c
index 80a61c5e3015..f8a695a223dd 100644
--- a/arch/sh/kernel/process.c
+++ b/arch/sh/kernel/process.c
@@ -2,6 +2,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task_stack.h>
 #include <linux/export.h>
 #include <linux/stackprotector.h>
 #include <asm/fpu.h>
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 493c3eb86aa5..2c7bdf8cb934 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -17,6 +17,7 @@
 #include <linux/mm.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/slab.h>
 #include <linux/elfcore.h>
 #include <linux/kallsyms.h>
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index 056607185158..ee2abe96f9f3 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -27,6 +27,7 @@
 #include <linux/io.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <asm/syscalls.h>
 #include <linux/uaccess.h>
 #include <asm/pgtable.h>
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 1aabfd356b35..5fc3ff606210 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -12,6 +12,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c
index c49d0d05a215..1e0656d9e7af 100644
--- a/arch/sh/kernel/ptrace_64.c
+++ b/arch/sh/kernel/ptrace_64.c
@@ -18,6 +18,7 @@
 #include <linux/kernel.h>
 #include <linux/rwsem.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/bitops.h>
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index 5128d3001ee5..08bce11badc6 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -9,6 +9,7 @@
  *
  */
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c
index d5287d76809c..a2e1231a90a3 100644
--- a/arch/sh/kernel/sys_sh32.c
+++ b/arch/sh/kernel/sys_sh32.c
@@ -1,5 +1,6 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/sem.h>
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index bc5c9f347b9e..b32d1c3a4655 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -5,6 +5,7 @@
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/uaccess.h>
 #include <linux/hardirq.h>
 #include <linux/kernel.h>
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index ff639342a8be..57cff00cad17 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -25,6 +25,8 @@
 #include <linux/sysfs.h>
 #include <linux/uaccess.h>
 #include <linux/perf_event.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/alignment.h>
 #include <asm/fpu.h>
 #include <asm/kprobes.h>
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index ed87c45a785b..b6dac8e980f0 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -16,6 +16,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 4c82a73af893..1badc493e62e 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -16,6 +16,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
index 901063c1cf7e..df9e731a76f5 100644
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -12,6 +12,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/errno.h>
 #include <linux/export.h>
diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c
index c667e104a0c2..0e863f1ee08c 100644
--- a/arch/tile/kernel/compat_signal.c
+++ b/arch/tile/kernel/compat_signal.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/tile/kernel/kgdb.c b/arch/tile/kernel/kgdb.c
index 9247d6b562f4..d4eb5fb2df9d 100644
--- a/arch/tile/kernel/kgdb.c
+++ b/arch/tile/kernel/kgdb.c
@@ -19,6 +19,8 @@
 #include <linux/kdebug.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/cacheflush.h>
 
 static tile_bundle_bits singlestep_insn = TILEGX_BPT_BUNDLE | DIE_SSTEPBP;
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index dbb4fa76d1dd..f0a0e18e4dfb 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/preempt.h>
 #include <linux/module.h>
 #include <linux/fs.h>
diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c
index e279572824b1..e1a078e6828e 100644
--- a/arch/tile/kernel/ptrace.c
+++ b/arch/tile/kernel/ptrace.c
@@ -23,6 +23,8 @@
 #include <linux/elf.h>
 #include <linux/tracehook.h>
 #include <linux/context_tracking.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/traps.h>
 #include <arch/chip.h>
 
diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c
index 8cc92ae6eb27..f2bf557bb005 100644
--- a/arch/tile/kernel/signal.c
+++ b/arch/tile/kernel/signal.c
@@ -15,6 +15,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c
index a1c0787fe9d8..94ecbc6676e5 100644
--- a/arch/tile/kernel/stack.c
+++ b/arch/tile/kernel/stack.c
@@ -14,6 +14,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
 #include <linux/module.h>
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 2df4e5d11939..31968677a0d0 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -9,6 +9,7 @@
 #include <linux/ptrace.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/slab.h>
 #include <asm/current.h>
 #include <asm/processor.h>
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index e76dc7c11251..a9bd61820042 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -19,6 +19,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/seq_file.h>
 #include <linux/tick.h>
 #include <linux/threads.h>
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index 527fa5881915..ddecf326dab2 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -5,6 +5,7 @@
 
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <as-layout.h>
 #include <kern.h>
 #include <os.h>
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
index d58f1600b477..d22c1dc7e39e 100644
--- a/arch/unicore32/kernel/process.c
+++ b/arch/unicore32/kernel/process.c
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
diff --git a/arch/unicore32/kernel/ptrace.c b/arch/unicore32/kernel/ptrace.c
index 9f07c08da050..a102c2b4f358 100644
--- a/arch/unicore32/kernel/ptrace.c
+++ b/arch/unicore32/kernel/ptrace.c
@@ -15,6 +15,7 @@
 #include <linux/ptrace.h>
 #include <linux/signal.h>
 #include <linux/uaccess.h>
+#include <linux/sched/task_stack.h>
 
 /*
  * this routine will get a word off of the processes privileged stack.
diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c
index 506e1a2127c6..5f25b39f04d4 100644
--- a/arch/unicore32/kernel/traps.c
+++ b/arch/unicore32/kernel/traps.c
@@ -16,6 +16,7 @@
 #include <linux/signal.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/spinlock.h>
 #include <linux/personality.h>
 #include <linux/kallsyms.h>
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index b83c61cfd154..370c42c7f046 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -9,6 +9,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 572cee3fccff..226ca70dc6bd 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -7,6 +7,7 @@
 #include <linux/mm.h>
 #include <linux/err.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/random.h>
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 7c0a711989d2..8d0879f1d42c 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -26,6 +26,7 @@
 #include <linux/init.h>
 #include <linux/jiffies.h>
 #include <linux/perf_event.h>
+#include <linux/sched/task_stack.h>
 
 #include <linux/uaccess.h>
 #include <asm/pgalloc.h>
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 95c0b4ae09b0..724153797209 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 7b9e7e68f316..09d4ac0d2661 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/ptrace.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/ftrace.h>
 #include <linux/kexec.h>
 #include <linux/bug.h>
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index c114b132d121..b188b16841e3 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -5,6 +5,7 @@
 #include <asm/fpu/signal.h>
 #include <asm/fpu/regset.h>
 #include <asm/fpu/xstate.h>
+#include <linux/sched/task_stack.h>
 
 /*
  * The xstateregs_active() routine is the same as the regset_fpregs_active() routine,
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index b01bc8517450..ca49bab3e467 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/capability.h>
 #include <linux/errno.h>
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 6b0678a541e2..3be74fbdeff2 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -15,6 +15,7 @@
 #include <linux/ftrace.h>
 #include <linux/uaccess.h>
 #include <linux/smp.h>
+#include <linux/sched/task_stack.h>
 #include <asm/io_apic.h>
 #include <asm/apic.h>
 
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index da8cb987b973..587d887f7f17 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -1,6 +1,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/perf_event.h>
 #include <linux/bug.h>
 #include <linux/stddef.h>
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 441f00e7be8f..56b059486c3b 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -10,6 +10,7 @@
 #include <linux/sched/idle.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/pm.h>
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index d79ffa47aab8..4c818f8bc135 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -13,6 +13,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 124f5652ae3c..d6b784a5520d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -18,6 +18,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 9cc7d5a330ef..2364b23ea3e5 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -6,6 +6,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/errno.h>
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 763af1d0de64..396c042e9d0e 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -10,6 +10,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/kernel.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f3eb266d75ff..bd1f1ad35284 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -47,6 +47,7 @@
 #include <linux/sched.h>
 #include <linux/sched/topology.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/percpu.h>
 #include <linux/bootmem.h>
 #include <linux/err.h>
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 330cae0025d0..8e2b79b88e51 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -5,6 +5,7 @@
  */
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
 #include <linux/export.h>
 #include <linux/uaccess.h>
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index a23ce84a3f6c..f07f83b3611b 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -2,6 +2,7 @@
  * x86 single-step support code, common to 32-bit and 64-bit.
  */
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/ptrace.h>
 #include <asm/desc.h>
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 1dc86ee60a03..948443e115c1 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -29,6 +29,7 @@
 #include <linux/errno.h>
 #include <linux/kexec.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/bug.h>
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 7c0abdc9a732..478d15dbaee4 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -1,5 +1,6 @@
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <asm/ptrace.h>
 #include <asm/bitops.h>
 #include <asm/stacktrace.h>
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 0442d98367ae..23ee89ce59a9 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -35,6 +35,7 @@
 #include <linux/interrupt.h>
 #include <linux/syscalls.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/string.h>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e3254ca0eec4..428e31763cb9 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -4,6 +4,7 @@
  *  Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
  */
 #include <linux/sched.h>		/* test_thread_flag(), ...	*/
+#include <linux/sched/task_stack.h>	/* task_stack_*(), ...		*/
 #include <linux/kdebug.h>		/* oops_begin/end, ...		*/
 #include <linux/extable.h>		/* search_exception_tables	*/
 #include <linux/bootmem.h>		/* max_low_pfn			*/
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index af33f9d4c624..58f96d1230d4 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -19,6 +19,7 @@
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
index 32519b71d914..e0f583fed06a 100644
--- a/arch/xtensa/kernel/ptrace.c
+++ b/arch/xtensa/kernel/ptrace.c
@@ -20,6 +20,7 @@
 #include <linux/perf_event.h>
 #include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/security.h>
 #include <linux/signal.h>
 #include <linux/smp.h>
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index c41294745731..70a131945443 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -20,6 +20,7 @@
 #include <linux/ptrace.h>
 #include <linux/personality.h>
 #include <linux/tracehook.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/ucontext.h>
 #include <linux/uaccess.h>
diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c
index f2b3e1725349..fd894eaa63f3 100644
--- a/arch/xtensa/kernel/smp.c
+++ b/arch/xtensa/kernel/smp.c
@@ -22,6 +22,7 @@
 #include <linux/kdebug.h>
 #include <linux/module.h>
 #include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/reboot.h>
 #include <linux/seq_file.h>
 #include <linux/smp.h>
diff --git a/block/blk-map.c b/block/blk-map.c
index 2f18c2a0be1b..3b5cb863318f 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -2,6 +2,7 @@
  * Functions related to mapping data to requests
  */
 #include <linux/kernel.h>
+#include <linux/sched/task_stack.h>
 #include <linux/module.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index f7f6b9144b07..da6b59ba5940 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -34,6 +34,8 @@
 #include <linux/kernel_stat.h>
 #include <linux/clockchips.h>
 #include <linux/cpu.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/hyperv.h>
 #include <asm/hypervisor.h>
 #include <asm/mshyperv.h>
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index aef00511ca86..74f1b7dc03f7 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -28,6 +28,7 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/sched/task_stack.h>
 #include <linux/delay.h>
 #include <linux/timer.h>
 #include <linux/seq_file.h>
diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
index 9b2fe2d3e3a9..1ec84ca81146 100644
--- a/drivers/md/bcache/closure.h
+++ b/drivers/md/bcache/closure.h
@@ -3,6 +3,7 @@
 
 #include <linux/llist.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/workqueue.h>
 
 /*
diff --git a/drivers/misc/lkdtm_usercopy.c b/drivers/misc/lkdtm_usercopy.c
index 1dd611423d8b..df6ac985fbb5 100644
--- a/drivers/misc/lkdtm_usercopy.c
+++ b/drivers/misc/lkdtm_usercopy.c
@@ -5,6 +5,7 @@
 #include "lkdtm.h"
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mman.h>
 #include <linux/uaccess.h>
 #include <asm/cacheflush.h>
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index 6c062b8251d2..d52139635b67 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -20,6 +20,7 @@
  */
 #include <linux/clk.h>
 #include <linux/slab.h>
+#include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/mtd/partitions.h>
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 2a59139f520b..9be82c4e14a4 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -25,6 +25,7 @@
 #include <linux/init.h>
 #include <linux/coredump.h>
 #include <linux/slab.h>
+#include <linux/sched/task_stack.h>
 
 #include <linux/uaccess.h>
 #include <asm/cacheflush.h>
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 2c95257fa4da..92c00a13b28b 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -36,6 +36,7 @@
 #include <linux/coredump.h>
 #include <linux/sched.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/task_stack.h>
 #include <linux/cred.h>
 #include <linux/dax.h>
 #include <linux/uaccess.h>
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 222873bb689c..6103a8149ccd 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -15,6 +15,8 @@
 #include <linux/fs.h>
 #include <linux/stat.h>
 #include <linux/sched.h>
+#include <linux/sched/coredump.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/errno.h>
@@ -35,7 +37,6 @@
 #include <linux/elf-fdpic.h>
 #include <linux/elfcore.h>
 #include <linux/coredump.h>
-#include <linux/sched/coredump.h>
 #include <linux/dax.h>
 
 #include <linux/uaccess.h>
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 9b2917a30294..2edcefc0a294 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -19,6 +19,7 @@
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/errno.h>
diff --git a/fs/coredump.c b/fs/coredump.c
index c74ab43b8383..592683711c64 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -18,6 +18,7 @@
 #include <linux/coredump.h>
 #include <linux/sched/coredump.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task_stack.h>
 #include <linux/utsname.h>
 #include <linux/pid_namespace.h>
 #include <linux/module.h>
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 698d51a0eea3..c8240a12c42d 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -3,6 +3,8 @@
 
 #include <linux/user.h>
 #include <linux/bug.h>
+#include <linux/sched/task_stack.h>
+
 #include <asm/elf.h>
 #include <uapi/linux/elfcore.h>
 
diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h
index a5f98d53d732..9b7dd59fe28d 100644
--- a/include/linux/perf_regs.h
+++ b/include/linux/perf_regs.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_PERF_REGS_H
 #define _LINUX_PERF_REGS_H
 
+#include <linux/sched/task_stack.h>
+
 struct perf_regs {
 	__u64		abi;
 	struct pt_regs	*regs;
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
new file mode 100644
index 000000000000..933cd49824c2
--- /dev/null
+++ b/include/linux/sched/task_stack.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_TASK_STACK_H
+#define _LINUX_SCHED_TASK_STACK_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_TASK_STACK_H */
diff --git a/init/main.c b/init/main.c
index 47956b22add1..c2d337a136d1 100644
--- a/init/main.c
+++ b/init/main.c
@@ -77,6 +77,7 @@
 #include <linux/elevator.h>
 #include <linux/sched_clock.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/context_tracking.h>
 #include <linux/random.h>
 #include <linux/list.h>
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index e9fdb5203de5..c04917cad1bf 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -11,6 +11,8 @@
 
 #include <linux/perf_event.h>
 #include <linux/slab.h>
+#include <linux/sched/task_stack.h>
+
 #include "internal.h"
 
 struct callchain_cpus_entries {
diff --git a/kernel/exit.c b/kernel/exit.c
index e53408d156df..771c33fc9952 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -10,6 +10,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/stat.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/capability.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 0af64d5d8b73..c87b6f03c710 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -19,6 +19,7 @@
 #include <linux/sched/numa_balancing.h>
 #include <linux/sched/stat.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/rtmutex.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 3caaaa04b92e..2984fb0f0257 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -47,6 +47,7 @@
 #include <linux/uio.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 
 #include <linux/uaccess.h>
 #include <asm/sections.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 6b7155ae5c33..4e2fec3f12a0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -15,6 +15,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 
 #include <linux/u64_stats_sync.h>
 #include <linux/kernel_stat.h>
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index e15185c28de5..65f61077ad50 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -18,6 +18,7 @@
 #include <linux/compat.h>
 #include <linux/coredump.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/seccomp.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
diff --git a/kernel/signal.c b/kernel/signal.c
index 25d099fd80f7..e6d470a5f75a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -17,6 +17,7 @@
 #include <linux/sched/user.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/fs.h>
 #include <linux/tty.h>
 #include <linux/binfmts.h>
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 2a1abbaca10e..1d68b5b7ad41 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -2,6 +2,7 @@
  * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
  *
  */
+#include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 8c28cbd7e104..17afb0430161 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -13,6 +13,7 @@
 #include <linux/debugobjects.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
 #include <linux/slab.h>
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 942adf13418d..b157b46cc9a6 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -17,6 +17,7 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
+#include <linux/sched/task_stack.h>
 #include <linux/scatterlist.h>
 #include <linux/dma-mapping.h>
 #include <linux/sched/task.h>
diff --git a/lib/syscall.c b/lib/syscall.c
index 63239e097b13..17d5ff5fa6a3 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -1,5 +1,6 @@
 #include <linux/ptrace.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/export.h>
 #include <asm/syscall.h>
 
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index d99312ed7c22..98b27195e38b 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -29,6 +29,7 @@
 #include <linux/module.h>
 #include <linux/printk.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/slab.h>
 #include <linux/stacktrace.h>
 #include <linux/string.h>
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index a3970573359e..26c874e90b12 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -75,6 +75,7 @@
 #include <linux/list.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
 #include <linux/jiffies.h>
 #include <linux/delay.h>
 #include <linux/export.h>
diff --git a/mm/util.c b/mm/util.c
index 14f4e13323e2..656dc5e37a87 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -6,6 +6,7 @@
 #include <linux/err.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task_stack.h>
 #include <linux/security.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
-- 
GitLab


From dfc3401a33086a3fd465468e171ea0e82430569b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 23:15:21 +0100
Subject: [PATCH 0905/1084] sched/headers: Prepare to move the
 'root_task_group' declaration to <linux/sched/autogroup.h>

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/init_task.h | 1 +
 kernel/sched/sched.h      | 1 +
 2 files changed, 2 insertions(+)

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 3a85d61f7614..452c9799318c 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -12,6 +12,7 @@
 #include <linux/securebits.h>
 #include <linux/seqlock.h>
 #include <linux/rbtree.h>
+#include <linux/sched/autogroup.h>
 #include <net/net_namespace.h>
 #include <linux/sched/rt.h>
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4e2fec3f12a0..b1f1c8443837 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1,5 +1,6 @@
 
 #include <linux/sched.h>
+#include <linux/sched/autogroup.h>
 #include <linux/sched/sysctl.h>
 #include <linux/sched/topology.h>
 #include <linux/sched/rt.h>
-- 
GitLab


From e6d930b4e0115eb0732eec0efb11c3b09a8000fc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 23:43:50 +0100
Subject: [PATCH 0906/1084] signals: Prepare to split out
 <linux/signal_types.h> from <linux/signal.h>

Introduce dummy header and add dependencies to places that will depend on it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/mips/include/asm/abi.h  | 2 ++
 include/linux/signal.h       | 3 +--
 include/linux/signal_types.h | 7 +++++++
 3 files changed, 10 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/signal_types.h

diff --git a/arch/mips/include/asm/abi.h b/arch/mips/include/asm/abi.h
index 940760844e2f..dba7f4b6bebf 100644
--- a/arch/mips/include/asm/abi.h
+++ b/arch/mips/include/asm/abi.h
@@ -9,6 +9,8 @@
 #ifndef _ASM_ABI_H
 #define _ASM_ABI_H
 
+#include <linux/signal_types.h>
+
 #include <asm/signal.h>
 #include <asm/siginfo.h>
 #include <asm/vdso.h>
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 5308304993be..d1c2b05a7a55 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -1,9 +1,8 @@
 #ifndef _LINUX_SIGNAL_H
 #define _LINUX_SIGNAL_H
 
-#include <linux/list.h>
 #include <linux/bug.h>
-#include <uapi/linux/signal.h>
+#include <linux/signal_types.h>
 
 struct task_struct;
 
diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h
new file mode 100644
index 000000000000..28799c88f490
--- /dev/null
+++ b/include/linux/signal_types.h
@@ -0,0 +1,7 @@
+#ifndef _LINUX_SIGNAL_TYPES_H
+#define _LINUX_SIGNAL_TYPES_H
+
+#include <linux/list.h>
+#include <uapi/linux/signal.h>
+
+#endif /* _LINUX_SIGNAL_TYPES_H */
-- 
GitLab


From f361bf4a66c9bfabace46f6ff5d97005c9b524fe Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 23:47:37 +0100
Subject: [PATCH 0907/1084] sched/headers: Prepare for the reduction of
 <linux/sched.h>'s signal API dependency

Instead of including the full <linux/signal.h>, we are going to include the
types-only <linux/signal_types.h> header in <linux/sched.h>, to further
decouple the scheduler header from the signal headers.

This means that various files which relied on the full <linux/signal.h> need
to be updated to gain an explicit dependency on it.

Update the code that relies on sched.h's inclusion of the <linux/signal.h> header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm64/kernel/sys_compat.c          | 1 +
 drivers/gpu/drm/i915/i915_gem_request.c | 1 +
 drivers/isdn/mISDN/stack.c              | 2 ++
 fs/btrfs/extent-tree.c                  | 1 +
 fs/btrfs/free-space-cache.c             | 1 +
 fs/buffer.c                             | 1 +
 fs/ceph/addr.c                          | 1 +
 fs/dax.c                                | 1 +
 fs/ioctl.c                              | 2 ++
 fs/iomap.c                              | 2 ++
 fs/ocfs2/super.c                        | 1 +
 include/linux/sched.h                   | 1 +
 include/linux/sched/signal.h            | 1 +
 kernel/pid_namespace.c                  | 1 +
 mm/page-writeback.c                     | 1 +
 15 files changed, 18 insertions(+)

diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index abaf582fc7a8..8b8bbd3eaa52 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -21,6 +21,7 @@
 #include <linux/compat.h>
 #include <linux/personality.h>
 #include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index df3fef393dbe..e7c3c0318ff6 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -26,6 +26,7 @@
 #include <linux/dma-fence-array.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/signal.h>
 
 #include "i915_drv.h"
 
diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c
index b324474c0c12..696f22fd5ab4 100644
--- a/drivers/isdn/mISDN/stack.c
+++ b/drivers/isdn/mISDN/stack.c
@@ -19,6 +19,8 @@
 #include <linux/mISDNif.h>
 #include <linux/kthread.h>
 #include <linux/sched.h>
+#include <linux/signal.h>
+
 #include "core.h"
 
 static u_int	*debug;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c35b96633554..dad395b0b9fc 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -16,6 +16,7 @@
  * Boston, MA 021110-1307, USA.
  */
 #include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/pagemap.h>
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 1a131f7d6c1b..493a654b6012 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -18,6 +18,7 @@
 
 #include <linux/pagemap.h>
 #include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/math64.h>
 #include <linux/ratelimit.h>
diff --git a/fs/buffer.c b/fs/buffer.c
index 28484b3ebc98..9196f2a270da 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/sched/signal.h>
 #include <linux/syscalls.h>
 #include <linux/fs.h>
 #include <linux/iomap.h>
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index f297a9e18642..1a3e1b40799a 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -8,6 +8,7 @@
 #include <linux/slab.h>
 #include <linux/pagevec.h>
 #include <linux/task_io_accounting_ops.h>
+#include <linux/signal.h>
 
 #include "super.h"
 #include "mds_client.h"
diff --git a/fs/dax.c b/fs/dax.c
index 7436c98b92c8..de622d4282a6 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -27,6 +27,7 @@
 #include <linux/pagevec.h>
 #include <linux/pmem.h>
 #include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/uio.h>
 #include <linux/vmstat.h>
 #include <linux/pfn_t.h>
diff --git a/fs/ioctl.c b/fs/ioctl.c
index cb9b02940805..569db68d02b3 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -15,6 +15,8 @@
 #include <linux/writeback.h>
 #include <linux/buffer_head.h>
 #include <linux/falloc.h>
+#include <linux/sched/signal.h>
+
 #include "internal.h"
 
 #include <asm/ioctls.h>
diff --git a/fs/iomap.c b/fs/iomap.c
index 0f85f2410605..3ca1a8e44135 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -26,6 +26,8 @@
 #include <linux/buffer_head.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/dax.h>
+#include <linux/sched/signal.h>
+
 #include "internal.h"
 
 /*
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index a24e42f95341..ca1646fbcaef 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -42,6 +42,7 @@
 #include <linux/seq_file.h>
 #include <linux/quotaops.h>
 #include <linux/cleancache.h>
+#include <linux/signal.h>
 
 #define CREATE_TRACE_POINTS
 #include "ocfs2_trace.h"
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8ae7b3d85658..ea05116bc3c2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -36,6 +36,7 @@ struct sched_param {
 #include <linux/signal.h>
 #include <linux/compiler.h>
 #include <linux/completion.h>
+#include <linux/signal_types.h>
 #include <linux/pid.h>
 #include <linux/percpu.h>
 #include <linux/topology.h>
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 0f4e9f4a43fd..7e10a7824523 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_SCHED_SIGNAL_H
 #define _LINUX_SCHED_SIGNAL_H
 
+#include <linux/signal.h>
 #include <linux/cred.h>
 #include <linux/sched.h>
 #include <linux/sched/jobctl.h>
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 7c8367854dc4..de461aa0bf9a 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -20,6 +20,7 @@
 #include <linux/reboot.h>
 #include <linux/export.h>
 #include <linux/sched/task.h>
+#include <linux/sched/signal.h>
 
 struct pid_cache {
 	int nr_ids;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 26a60818a8fc..d8ac2a7fb9e7 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -36,6 +36,7 @@
 #include <linux/pagevec.h>
 #include <linux/timer.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/signal.h>
 #include <linux/mm_inline.h>
 #include <trace/events/writeback.h>
 
-- 
GitLab


From 2e58f173ab89b29a1373088b8727133dbf7322b0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 00:12:19 +0100
Subject: [PATCH 0908/1084] mm/headers, sched/headers: Prepare to split
 <linux/mm_types_task.h> out of <linux/mm_types.h>

We are going to separate all the MM types that are embedded directly in 'struct task_struct'
into the new <linux/mm_types_task.h> header.

Create a new <linux/mm_types_task.h> that only contains some includes from mm_types.h itself.

This should be trivially correct and easy to bisect to.

(This patch does not materially move the types yet.)

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm_types.h      |  6 +++---
 include/linux/mm_types_task.h | 10 ++++++++++
 2 files changed, 13 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/mm_types_task.h

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 137797cd7b50..6daaf0a51968 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1,9 +1,9 @@
 #ifndef _LINUX_MM_TYPES_H
 #define _LINUX_MM_TYPES_H
 
+#include <linux/mm_types_task.h>
+
 #include <linux/auxvec.h>
-#include <linux/types.h>
-#include <linux/threads.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/rbtree.h>
@@ -13,7 +13,7 @@
 #include <linux/uprobes.h>
 #include <linux/page-flags-layout.h>
 #include <linux/workqueue.h>
-#include <asm/page.h>
+
 #include <asm/mmu.h>
 
 #ifndef AT_VECTOR_SIZE_ARCH
diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
new file mode 100644
index 000000000000..2419d302f03c
--- /dev/null
+++ b/include/linux/mm_types_task.h
@@ -0,0 +1,10 @@
+#ifndef _LINUX_MM_TYPES_TASK_H
+#define _LINUX_MM_TYPES_TASK_H
+
+#include <linux/types.h>
+#include <linux/threads.h>
+#include <linux/atomic.h>
+
+#include <asm/page.h>
+
+#endif /* _LINUX_MM_TYPES_TASK_H */
-- 
GitLab


From 589ee62844e042b0b7d19ef57fb4cff77f3ca294 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 00:16:44 +0100
Subject: [PATCH 0909/1084] sched/headers: Prepare to remove the
 <linux/mm_types.h> dependency from <linux/sched.h>

Update code that relied on sched.h including various MM types for them.

This will allow us to remove the <linux/mm_types.h> include from <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/a.out-core.h              | 1 +
 arch/alpha/include/asm/mmu_context.h             | 2 ++
 arch/arc/mm/tlb.c                                | 2 ++
 arch/arm/include/asm/mmu_context.h               | 2 ++
 arch/arm/include/asm/tlbflush.h                  | 7 ++++---
 arch/arm/kernel/suspend.c                        | 1 +
 arch/arm/kernel/swp_emulate.c                    | 1 +
 arch/arm/mm/idmap.c                              | 1 +
 arch/arm64/include/asm/mmu_context.h             | 1 +
 arch/arm64/kernel/traps.c                        | 1 +
 arch/avr32/include/asm/mmu_context.h             | 2 ++
 arch/blackfin/include/asm/mmu_context.h          | 2 ++
 arch/blackfin/kernel/flat.c                      | 1 +
 arch/blackfin/kernel/process.c                   | 1 +
 arch/blackfin/mm/sram-alloc.c                    | 2 ++
 arch/cris/arch-v10/mm/tlb.c                      | 2 ++
 arch/cris/arch-v32/mm/tlb.c                      | 1 +
 arch/cris/include/asm/pgtable.h                  | 2 +-
 arch/cris/mm/tlb.c                               | 2 ++
 arch/h8300/kernel/traps.c                        | 1 +
 arch/hexagon/include/asm/mmu_context.h           | 2 ++
 arch/hexagon/kernel/smp.c                        | 1 +
 arch/ia64/include/asm/mmu_context.h              | 1 +
 arch/ia64/include/asm/pgtable.h                  | 2 +-
 arch/ia64/sn/kernel/sn2/sn2_smp.c                | 1 +
 arch/m32r/include/asm/mmu_context.h              | 2 ++
 arch/m68k/include/asm/a.out-core.h               | 1 +
 arch/m68k/include/asm/mmu_context.h              | 1 +
 arch/metag/include/asm/mmu_context.h             | 1 +
 arch/microblaze/include/asm/mmu_context_mm.h     | 2 ++
 arch/microblaze/mm/pgtable.c                     | 1 +
 arch/mips/include/asm/elf.h                      | 2 ++
 arch/mips/include/asm/mmu_context.h              | 2 ++
 arch/mips/kernel/smp.c                           | 2 +-
 arch/mips/math-emu/dsemul.c                      | 1 +
 arch/mips/mm/ioremap.c                           | 1 +
 arch/mn10300/include/asm/mmu_context.h           | 2 ++
 arch/mn10300/kernel/smp.c                        | 2 +-
 arch/mn10300/mm/tlb-smp.c                        | 2 +-
 arch/nios2/include/asm/mmu_context.h             | 2 ++
 arch/nios2/kernel/process.c                      | 1 +
 arch/powerpc/kernel/io-workarounds.c             | 2 +-
 arch/powerpc/kvm/e500_mmu_host.c                 | 2 +-
 arch/powerpc/lib/feature-fixups.c                | 1 +
 arch/powerpc/mm/hash_utils_64.c                  | 2 +-
 arch/powerpc/mm/pgtable-book3s64.c               | 2 ++
 arch/powerpc/mm/pgtable-hash64.c                 | 2 ++
 arch/powerpc/mm/pgtable-radix.c                  | 2 +-
 arch/powerpc/mm/slb.c                            | 2 ++
 arch/s390/include/asm/elf.h                      | 2 +-
 arch/s390/include/asm/mmu_context.h              | 1 +
 arch/s390/kernel/processor.c                     | 1 +
 arch/s390/kvm/gaccess.c                          | 2 ++
 arch/s390/kvm/priv.c                             | 2 ++
 arch/score/include/asm/mmu_context.h             | 2 ++
 arch/score/kernel/traps.c                        | 1 +
 arch/sh/include/asm/mmu_context.h                | 2 ++
 arch/sparc/include/asm/mmu_context_64.h          | 2 ++
 arch/sparc/include/asm/pgtable_64.h              | 3 +++
 arch/sparc/kernel/asm-offsets.c                  | 1 +
 arch/sparc/kernel/traps_32.c                     | 1 +
 arch/sparc/mm/tsb.c                              | 2 ++
 arch/tile/include/asm/mmu_context.h              | 2 ++
 arch/um/include/asm/mmu_context.h                | 2 ++
 arch/um/kernel/exec.c                            | 2 +-
 arch/um/kernel/reboot.c                          | 1 +
 arch/um/kernel/skas/process.c                    | 3 ++-
 arch/x86/entry/vsyscall/vsyscall_64.c            | 1 +
 arch/x86/events/core.c                           | 2 +-
 arch/x86/include/asm/a.out-core.h                | 2 ++
 arch/x86/include/asm/mpx.h                       | 2 ++
 arch/x86/mm/mpx.c                                | 1 +
 arch/x86/um/syscalls_64.c                        | 1 +
 arch/x86/xen/mmu.c                               | 2 +-
 arch/xtensa/include/asm/mmu_context.h            | 1 +
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 2 ++
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c  | 2 ++
 drivers/infiniband/hw/cxgb3/iwch_provider.c      | 2 +-
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h           | 2 +-
 drivers/media/v4l2-core/videobuf-dma-sg.c        | 2 +-
 fs/binfmt_misc.c                                 | 2 +-
 fs/kernfs/file.c                                 | 2 +-
 include/drm/drm_mm.h                             | 1 +
 include/linux/init_task.h                        | 1 +
 include/linux/sched/mm.h                         | 1 +
 kernel/sched/debug.c                             | 2 +-
 kernel/sched/fair.c                              | 2 +-
 kernel/signal.c                                  | 2 +-
 lib/is_single_threaded.c                         | 1 +
 89 files changed, 125 insertions(+), 25 deletions(-)

diff --git a/arch/alpha/include/asm/a.out-core.h b/arch/alpha/include/asm/a.out-core.h
index 9e33e92e524c..1610d078b064 100644
--- a/arch/alpha/include/asm/a.out-core.h
+++ b/arch/alpha/include/asm/a.out-core.h
@@ -15,6 +15,7 @@
 #ifdef __KERNEL__
 
 #include <linux/user.h>
+#include <linux/mm_types.h>
 
 /*
  * Fill in the user structure for an ECOFF core dump.
diff --git a/arch/alpha/include/asm/mmu_context.h b/arch/alpha/include/asm/mmu_context.h
index 4c51c05333c6..384bd47b5187 100644
--- a/arch/alpha/include/asm/mmu_context.h
+++ b/arch/alpha/include/asm/mmu_context.h
@@ -7,6 +7,8 @@
  * Copyright (C) 1996, Linus Torvalds
  */
 
+#include <linux/mm_types.h>
+
 #include <asm/machvec.h>
 #include <asm/compiler.h>
 #include <asm-generic/mm_hooks.h>
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index bdb295e09160..d0126fdfe2d8 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -53,6 +53,8 @@
 
 #include <linux/module.h>
 #include <linux/bug.h>
+#include <linux/mm_types.h>
+
 #include <asm/arcregs.h>
 #include <asm/setup.h>
 #include <asm/mmu_context.h>
diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index 3cc14dd8587c..7f303295ef19 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -15,7 +15,9 @@
 
 #include <linux/compiler.h>
 #include <linux/sched.h>
+#include <linux/mm_types.h>
 #include <linux/preempt.h>
+
 #include <asm/cacheflush.h>
 #include <asm/cachetype.h>
 #include <asm/proc-fns.h>
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index def9e570199f..1897b5196fb5 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -10,6 +10,10 @@
 #ifndef _ASMARM_TLBFLUSH_H
 #define _ASMARM_TLBFLUSH_H
 
+#ifndef __ASSEMBLY__
+# include <linux/mm_types.h>
+#endif
+
 #ifdef CONFIG_MMU
 
 #include <asm/glue.h>
@@ -644,9 +648,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 #elif defined(CONFIG_SMP)	/* !CONFIG_MMU */
 
 #ifndef __ASSEMBLY__
-
-#include <linux/mm_types.h>
-
 static inline void local_flush_tlb_all(void)									{ }
 static inline void local_flush_tlb_mm(struct mm_struct *mm)							{ }
 static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)			{ }
diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
index 9a2f882a0a2d..ef794c799cb6 100644
--- a/arch/arm/kernel/suspend.c
+++ b/arch/arm/kernel/suspend.c
@@ -1,5 +1,6 @@
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/mm_types.h>
 
 #include <asm/cacheflush.h>
 #include <asm/idmap.h>
diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
index 853221f81104..3bda08bee674 100644
--- a/arch/arm/kernel/swp_emulate.c
+++ b/arch/arm/kernel/swp_emulate.c
@@ -23,6 +23,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/syscalls.h>
 #include <linux/perf_event.h>
 
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index c1a48f88764e..3e511bec69b8 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -1,6 +1,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/mm_types.h>
 
 #include <asm/cputype.h>
 #include <asm/idmap.h>
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 3c9f7d18a7e6..3257895a9b5e 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -26,6 +26,7 @@
 #include <linux/compiler.h>
 #include <linux/sched.h>
 #include <linux/sched/hotplug.h>
+#include <linux/mm_types.h>
 
 #include <asm/cacheflush.h>
 #include <asm/cpufeature.h>
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index bdbd0c3febf4..e52be6aa44ee 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -33,6 +33,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/syscalls.h>
+#include <linux/mm_types.h>
 
 #include <asm/atomic.h>
 #include <asm/bug.h>
diff --git a/arch/avr32/include/asm/mmu_context.h b/arch/avr32/include/asm/mmu_context.h
index 27ff23407100..cd87abba8db7 100644
--- a/arch/avr32/include/asm/mmu_context.h
+++ b/arch/avr32/include/asm/mmu_context.h
@@ -12,6 +12,8 @@
 #ifndef __ASM_AVR32_MMU_CONTEXT_H
 #define __ASM_AVR32_MMU_CONTEXT_H
 
+#include <linux/mm_types.h>
+
 #include <asm/tlbflush.h>
 #include <asm/sysreg.h>
 #include <asm-generic/mm_hooks.h>
diff --git a/arch/blackfin/include/asm/mmu_context.h b/arch/blackfin/include/asm/mmu_context.h
index 15b16d3e8de8..0ce6de873b27 100644
--- a/arch/blackfin/include/asm/mmu_context.h
+++ b/arch/blackfin/include/asm/mmu_context.h
@@ -9,6 +9,8 @@
 
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/mm_types.h>
+
 #include <asm/setup.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
diff --git a/arch/blackfin/kernel/flat.c b/arch/blackfin/kernel/flat.c
index a88daddbf074..b5b658449616 100644
--- a/arch/blackfin/kernel/flat.c
+++ b/arch/blackfin/kernel/flat.c
@@ -6,6 +6,7 @@
 
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/mm_types.h>
 #include <linux/flat.h>
 
 #define FLAT_BFIN_RELOC_TYPE_16_BIT 0
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index b691ef875a40..89d5162d4ca6 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -15,6 +15,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
+#include <linux/mm_types.h>
 #include <linux/tick.h>
 #include <linux/fs.h>
 #include <linux/err.h>
diff --git a/arch/blackfin/mm/sram-alloc.c b/arch/blackfin/mm/sram-alloc.c
index 1f3b3ef3e103..d2a96c2c02a3 100644
--- a/arch/blackfin/mm/sram-alloc.c
+++ b/arch/blackfin/mm/sram-alloc.c
@@ -19,6 +19,8 @@
 #include <linux/spinlock.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
+#include <linux/mm_types.h>
+
 #include <asm/blackfin.h>
 #include <asm/mem_map.h>
 #include "blackfin_sram.h"
diff --git a/arch/cris/arch-v10/mm/tlb.c b/arch/cris/arch-v10/mm/tlb.c
index 21d78c599bab..3225d38bdaea 100644
--- a/arch/cris/arch-v10/mm/tlb.c
+++ b/arch/cris/arch-v10/mm/tlb.c
@@ -10,6 +10,8 @@
  *
  */
 
+#include <linux/mm_types.h>
+
 #include <asm/tlb.h>
 #include <asm/mmu_context.h>
 #include <arch/svinto.h>
diff --git a/arch/cris/arch-v32/mm/tlb.c b/arch/cris/arch-v32/mm/tlb.c
index c030d020660a..bc3de5b5e27c 100644
--- a/arch/cris/arch-v32/mm/tlb.c
+++ b/arch/cris/arch-v32/mm/tlb.c
@@ -6,6 +6,7 @@
  * Authors:   Bjorn Wesen <bjornw@axis.com>
  *            Tobias Anderberg <tobiasa@axis.com>, CRISv32 port.
  */
+#include <linux/mm_types.h>
 
 #include <asm/tlb.h>
 #include <asm/mmu_context.h>
diff --git a/arch/cris/include/asm/pgtable.h b/arch/cris/include/asm/pgtable.h
index ceefc314d64d..2a3210ba4c72 100644
--- a/arch/cris/include/asm/pgtable.h
+++ b/arch/cris/include/asm/pgtable.h
@@ -9,7 +9,7 @@
 #include <asm-generic/pgtable-nopmd.h>
 
 #ifndef __ASSEMBLY__
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <asm/mmu.h>
 #endif
 #include <arch/pgtable.h>
diff --git a/arch/cris/mm/tlb.c b/arch/cris/mm/tlb.c
index b7f8de576777..8413741cfa0f 100644
--- a/arch/cris/mm/tlb.c
+++ b/arch/cris/mm/tlb.c
@@ -9,6 +9,8 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/mm_types.h>
+
 #include <asm/tlb.h>
 
 #define D(x)
diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c
index ee7e3ce40d78..e47a9e0dc278 100644
--- a/arch/h8300/kernel/traps.c
+++ b/arch/h8300/kernel/traps.c
@@ -17,6 +17,7 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/mm_types.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/init.h>
diff --git a/arch/hexagon/include/asm/mmu_context.h b/arch/hexagon/include/asm/mmu_context.h
index d423d2e73c30..d8a071afdd1d 100644
--- a/arch/hexagon/include/asm/mmu_context.h
+++ b/arch/hexagon/include/asm/mmu_context.h
@@ -21,6 +21,8 @@
 #ifndef _ASM_MMU_CONTEXT_H
 #define _ASM_MMU_CONTEXT_H
 
+#include <linux/mm_types.h>
+
 #include <asm/setup.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c
index c02a6455839e..1f63e91a353b 100644
--- a/arch/hexagon/kernel/smp.c
+++ b/arch/hexagon/kernel/smp.c
@@ -29,6 +29,7 @@
 #include <linux/smp.h>
 #include <linux/spinlock.h>
 #include <linux/cpu.h>
+#include <linux/mm_types.h>
 
 #include <asm/time.h>    /*  timer_interrupt  */
 #include <asm/hexagon_vm.h>
diff --git a/arch/ia64/include/asm/mmu_context.h b/arch/ia64/include/asm/mmu_context.h
index 7f2a456603cb..9b99368633b5 100644
--- a/arch/ia64/include/asm/mmu_context.h
+++ b/arch/ia64/include/asm/mmu_context.h
@@ -26,6 +26,7 @@
 #include <linux/compiler.h>
 #include <linux/percpu.h>
 #include <linux/sched.h>
+#include <linux/mm_types.h>
 #include <linux/spinlock.h>
 
 #include <asm/processor.h>
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 9f3ed9ee8f13..384794e665fc 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -147,7 +147,7 @@
 
 # ifndef __ASSEMBLY__
 
-#include <linux/sched.h>	/* for mm_struct */
+#include <linux/sched/mm.h>	/* for mm_struct */
 #include <linux/bitops.h>
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index c98dc965fe82..b73b0ebf8214 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -13,6 +13,7 @@
 #include <linux/spinlock.h>
 #include <linux/threads.h>
 #include <linux/sched.h>
+#include <linux/mm_types.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
diff --git a/arch/m32r/include/asm/mmu_context.h b/arch/m32r/include/asm/mmu_context.h
index 9fc78fc44445..1230b7050d8e 100644
--- a/arch/m32r/include/asm/mmu_context.h
+++ b/arch/m32r/include/asm/mmu_context.h
@@ -12,6 +12,8 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/atomic.h>
+#include <linux/mm_types.h>
+
 #include <asm/pgalloc.h>
 #include <asm/mmu.h>
 #include <asm/tlbflush.h>
diff --git a/arch/m68k/include/asm/a.out-core.h b/arch/m68k/include/asm/a.out-core.h
index f6bfc1d63ff6..ae91ea6bb303 100644
--- a/arch/m68k/include/asm/a.out-core.h
+++ b/arch/m68k/include/asm/a.out-core.h
@@ -16,6 +16,7 @@
 
 #include <linux/user.h>
 #include <linux/elfcore.h>
+#include <linux/mm_types.h>
 
 /*
  * fill in the user structure for an a.out core dump
diff --git a/arch/m68k/include/asm/mmu_context.h b/arch/m68k/include/asm/mmu_context.h
index dc3be991d634..4a6ae6dffa34 100644
--- a/arch/m68k/include/asm/mmu_context.h
+++ b/arch/m68k/include/asm/mmu_context.h
@@ -2,6 +2,7 @@
 #define __M68K_MMU_CONTEXT_H
 
 #include <asm-generic/mm_hooks.h>
+#include <linux/mm_types.h>
 
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 {
diff --git a/arch/metag/include/asm/mmu_context.h b/arch/metag/include/asm/mmu_context.h
index ae2a71b5e0be..2e0312748197 100644
--- a/arch/metag/include/asm/mmu_context.h
+++ b/arch/metag/include/asm/mmu_context.h
@@ -9,6 +9,7 @@
 #include <asm/cacheflush.h>
 
 #include <linux/io.h>
+#include <linux/mm_types.h>
 
 static inline void enter_lazy_tlb(struct mm_struct *mm,
 				  struct task_struct *tsk)
diff --git a/arch/microblaze/include/asm/mmu_context_mm.h b/arch/microblaze/include/asm/mmu_context_mm.h
index d68647746448..99472d2ca340 100644
--- a/arch/microblaze/include/asm/mmu_context_mm.h
+++ b/arch/microblaze/include/asm/mmu_context_mm.h
@@ -12,6 +12,8 @@
 #define _ASM_MICROBLAZE_MMU_CONTEXT_H
 
 #include <linux/atomic.h>
+#include <linux/mm_types.h>
+
 #include <asm/bitops.h>
 #include <asm/mmu.h>
 #include <asm-generic/mm_hooks.h>
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index cc732fe357ad..4c0599239915 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -31,6 +31,7 @@
 #include <linux/types.h>
 #include <linux/vmalloc.h>
 #include <linux/init.h>
+#include <linux/mm_types.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h
index 7a6c466e5f2a..0eb1a75be105 100644
--- a/arch/mips/include/asm/elf.h
+++ b/arch/mips/include/asm/elf.h
@@ -10,6 +10,8 @@
 
 #include <linux/auxvec.h>
 #include <linux/fs.h>
+#include <linux/mm_types.h>
+
 #include <uapi/linux/elf.h>
 
 #include <asm/current.h>
diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h
index 2abf94f72c0a..da2004cef2d5 100644
--- a/arch/mips/include/asm/mmu_context.h
+++ b/arch/mips/include/asm/mmu_context.h
@@ -13,8 +13,10 @@
 
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/mm_types.h>
 #include <linux/smp.h>
 #include <linux/slab.h>
+
 #include <asm/cacheflush.h>
 #include <asm/dsemul.h>
 #include <asm/hazards.h>
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 8c60a296294c..6e71130549ea 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -28,7 +28,7 @@
 #include <linux/export.h>
 #include <linux/time.h>
 #include <linux/timex.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/cpumask.h>
 #include <linux/cpu.h>
 #include <linux/err.h>
diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c
index c4469ff4a996..6664908514b3 100644
--- a/arch/mips/math-emu/dsemul.c
+++ b/arch/mips/math-emu/dsemul.c
@@ -1,5 +1,6 @@
 #include <linux/err.h>
 #include <linux/slab.h>
+#include <linux/mm_types.h>
 
 #include <asm/branch.h>
 #include <asm/cacheflush.h>
diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c
index 1f189627440f..1986e09fb457 100644
--- a/arch/mips/mm/ioremap.c
+++ b/arch/mips/mm/ioremap.c
@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/mm_types.h>
 #include <asm/cacheflush.h>
 #include <asm/io.h>
 #include <asm/tlbflush.h>
diff --git a/arch/mn10300/include/asm/mmu_context.h b/arch/mn10300/include/asm/mmu_context.h
index 75dbe696f830..d2034f5e6eda 100644
--- a/arch/mn10300/include/asm/mmu_context.h
+++ b/arch/mn10300/include/asm/mmu_context.h
@@ -23,6 +23,8 @@
 #define _ASM_MMU_CONTEXT_H
 
 #include <linux/atomic.h>
+#include <linux/mm_types.h>
+
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm-generic/mm_hooks.h>
diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c
index d924f7a79708..35d2c3fe6f76 100644
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c
@@ -21,7 +21,7 @@
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/task.h>
 #include <linux/profile.h>
 #include <linux/smp.h>
diff --git a/arch/mn10300/mm/tlb-smp.c b/arch/mn10300/mm/tlb-smp.c
index 9a39ea9031d4..085f2bb691ac 100644
--- a/arch/mn10300/mm/tlb-smp.c
+++ b/arch/mn10300/mm/tlb-smp.c
@@ -20,7 +20,7 @@
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/profile.h>
 #include <linux/smp.h>
 #include <asm/tlbflush.h>
diff --git a/arch/nios2/include/asm/mmu_context.h b/arch/nios2/include/asm/mmu_context.h
index 294b4b1f81d4..78ab3dacf579 100644
--- a/arch/nios2/include/asm/mmu_context.h
+++ b/arch/nios2/include/asm/mmu_context.h
@@ -13,6 +13,8 @@
 #ifndef _ASM_NIOS2_MMU_CONTEXT_H
 #define _ASM_NIOS2_MMU_CONTEXT_H
 
+#include <linux/mm_types.h>
+
 #include <asm-generic/mm_hooks.h>
 
 extern void mmu_context_init(void);
diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c
index 869a4d59de32..509e7855e8dc 100644
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@@ -17,6 +17,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
+#include <linux/mm_types.h>
 #include <linux/tick.h>
 #include <linux/uaccess.h>
 
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index 5f8613ceb97f..a582e0d42525 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -12,7 +12,7 @@
 #undef DEBUG
 
 #include <linux/kernel.h>
-#include <linux/sched.h>	/* for init_mm */
+#include <linux/sched/mm.h>	/* for init_mm */
 
 #include <asm/io.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index b0333cc737dd..0fda4230f6c0 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -25,7 +25,7 @@
 #include <linux/highmem.h>
 #include <linux/log2.h>
 #include <linux/uaccess.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/rwsem.h>
 #include <linux/vmalloc.h>
 #include <linux/hugetlb.h>
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 043415f0bdb1..f3917705c686 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/init.h>
+#include <linux/sched/mm.h>
 #include <asm/cputable.h>
 #include <asm/code-patching.h>
 #include <asm/page.h>
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 12d679df50bd..c554768b1fa2 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -23,7 +23,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/errno.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
 #include <linux/sysctl.h>
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index b798ff674fab..5fcb3dd74c13 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -8,6 +8,8 @@
  */
 
 #include <linux/sched.h>
+#include <linux/mm_types.h>
+
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index c23e286a6b8f..8b85a14b08ea 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -10,6 +10,8 @@
  */
 
 #include <linux/sched.h>
+#include <linux/mm_types.h>
+
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index feeda90cd06d..2a590a98e652 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -8,7 +8,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/memblock.h>
 #include <linux/of_fdt.h>
 
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 48fc28bab544..5e01b2ece1d0 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -22,6 +22,8 @@
 #include <asm/cacheflush.h>
 #include <asm/smp.h>
 #include <linux/compiler.h>
+#include <linux/mm_types.h>
+
 #include <asm/udbg.h>
 #include <asm/code-patching.h>
 
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 83aaefed2a7b..1d48880b3cc1 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -132,7 +132,7 @@ typedef s390_fp_regs compat_elf_fpregset_t;
 typedef s390_compat_regs compat_elf_gregset_t;
 
 #include <linux/compat.h>
-#include <linux/sched.h>	/* for task_struct */
+#include <linux/sched/mm.h>	/* for task_struct */
 #include <asm/mmu_context.h>
 
 #include <asm/vdso.h>
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 9b828c073176..6e31d87fb669 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -9,6 +9,7 @@
 
 #include <asm/pgalloc.h>
 #include <linux/uaccess.h>
+#include <linux/mm_types.h>
 #include <asm/tlbflush.h>
 #include <asm/ctl_reg.h>
 
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index bc2b60dcb178..bf7854523831 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/seq_file.h>
+#include <linux/mm_types.h>
 #include <linux/delay.h>
 #include <linux/cpu.h>
 #include <asm/diag.h>
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 4492c9363178..d55c829a5944 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -6,7 +6,9 @@
  */
 
 #include <linux/vmalloc.h>
+#include <linux/mm_types.h>
 #include <linux/err.h>
+
 #include <asm/pgtable.h>
 #include <asm/gmap.h>
 #include "kvm-s390.h"
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index fb4b494cde9b..64b6a309f2c4 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -15,6 +15,8 @@
 #include <linux/gfp.h>
 #include <linux/errno.h>
 #include <linux/compat.h>
+#include <linux/mm_types.h>
+
 #include <asm/asm-offsets.h>
 #include <asm/facility.h>
 #include <asm/current.h>
diff --git a/arch/score/include/asm/mmu_context.h b/arch/score/include/asm/mmu_context.h
index 2644577c96e8..073f95d350de 100644
--- a/arch/score/include/asm/mmu_context.h
+++ b/arch/score/include/asm/mmu_context.h
@@ -3,7 +3,9 @@
 
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/mm_types.h>
 #include <linux/slab.h>
+
 #include <asm-generic/mm_hooks.h>
 
 #include <asm/cacheflush.h>
diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c
index 8a54d320fb41..fe68de6c746c 100644
--- a/arch/score/kernel/traps.c
+++ b/arch/score/kernel/traps.c
@@ -26,6 +26,7 @@
 #include <linux/extable.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/mm_types.h>
 
 #include <asm/cacheflush.h>
 #include <asm/irq.h>
diff --git a/arch/sh/include/asm/mmu_context.h b/arch/sh/include/asm/mmu_context.h
index 35ffdd081d26..eb6ac3c10c44 100644
--- a/arch/sh/include/asm/mmu_context.h
+++ b/arch/sh/include/asm/mmu_context.h
@@ -11,6 +11,8 @@
 #include <cpu/mmu_context.h>
 #include <asm/tlbflush.h>
 #include <linux/uaccess.h>
+#include <linux/mm_types.h>
+
 #include <asm/io.h>
 #include <asm-generic/mm_hooks.h>
 
diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h
index d0317993e947..22fede6eba11 100644
--- a/arch/sparc/include/asm/mmu_context_64.h
+++ b/arch/sparc/include/asm/mmu_context_64.h
@@ -6,6 +6,8 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/spinlock.h>
+#include <linux/mm_types.h>
+
 #include <asm/spitfire.h>
 #include <asm-generic/mm_hooks.h>
 
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 7932a4a37817..56e49c8f770d 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -878,6 +878,9 @@ static inline unsigned long pud_pfn(pud_t pud)
 #define pte_offset_map			pte_index
 #define pte_unmap(pte)			do { } while (0)
 
+/* We cannot include <linux/mm_types.h> at this point yet: */
+extern struct mm_struct init_mm;
+
 /* Actual page table PTE updates.  */
 void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
 		   pte_t *ptep, pte_t orig, int fullmm,
diff --git a/arch/sparc/kernel/asm-offsets.c b/arch/sparc/kernel/asm-offsets.c
index f76389a32342..3f09e1c83f58 100644
--- a/arch/sparc/kernel/asm-offsets.c
+++ b/arch/sparc/kernel/asm-offsets.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/mm_types.h>
 // #include <linux/mm.h>
 #include <linux/kbuild.h>
 
diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c
index 26740a2f285d..2de72a49308a 100644
--- a/arch/sparc/kernel/traps_32.c
+++ b/arch/sparc/kernel/traps_32.c
@@ -11,6 +11,7 @@
 
 #include <linux/sched.h>  /* for jiffies */
 #include <linux/sched/debug.h>
+#include <linux/mm_types.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/smp.h>
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
index 23479c3d39f0..0a04811f06b7 100644
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -6,6 +6,8 @@
 #include <linux/kernel.h>
 #include <linux/preempt.h>
 #include <linux/slab.h>
+#include <linux/mm_types.h>
+
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
diff --git a/arch/tile/include/asm/mmu_context.h b/arch/tile/include/asm/mmu_context.h
index f67753db1f78..45a4b4c424cf 100644
--- a/arch/tile/include/asm/mmu_context.h
+++ b/arch/tile/include/asm/mmu_context.h
@@ -16,6 +16,8 @@
 #define _ASM_TILE_MMU_CONTEXT_H
 
 #include <linux/smp.h>
+#include <linux/mm_types.h>
+
 #include <asm/setup.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
index 1a60e1328e2f..94ac2739918c 100644
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -7,6 +7,8 @@
 #define __UM_MMU_CONTEXT_H
 
 #include <linux/sched.h>
+#include <linux/mm_types.h>
+
 #include <asm/mmu.h>
 
 extern void uml_setup_stubs(struct mm_struct *mm);
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 31968677a0d0..a43d42bf0a86 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -7,7 +7,7 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/ptrace.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
 #include <linux/slab.h>
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index 0bc921cee0b1..71f3e9217cf2 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -5,6 +5,7 @@
 
 #include <linux/sched/signal.h>
 #include <linux/sched/task.h>
+#include <linux/sched/mm.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/oom.h>
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index ddecf326dab2..3a952a4f7965 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -4,8 +4,9 @@
  */
 
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/task_stack.h>
+
 #include <as-layout.h>
 #include <kern.h>
 #include <os.h>
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index df91fb393a01..ce1d7534fa53 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -28,6 +28,7 @@
 #include <linux/kernel.h>
 #include <linux/timer.h>
 #include <linux/sched/signal.h>
+#include <linux/mm_types.h>
 #include <linux/syscalls.h>
 #include <linux/ratelimit.h>
 
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 2ecc0e97772b..349d4d17aa7f 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -20,7 +20,7 @@
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/kdebug.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/clock.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h
index 7a15588e45d4..7d3ece8bfb61 100644
--- a/arch/x86/include/asm/a.out-core.h
+++ b/arch/x86/include/asm/a.out-core.h
@@ -17,6 +17,8 @@
 
 #include <linux/user.h>
 #include <linux/elfcore.h>
+#include <linux/mm_types.h>
+
 #include <asm/debugreg.h>
 
 /*
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
index 0b416d4cf73b..a0d662be4c5b 100644
--- a/arch/x86/include/asm/mpx.h
+++ b/arch/x86/include/asm/mpx.h
@@ -2,6 +2,8 @@
 #define _ASM_X86_MPX_H
 
 #include <linux/types.h>
+#include <linux/mm_types.h>
+
 #include <asm/ptrace.h>
 #include <asm/insn.h>
 
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index c98079684bdb..5126dfd52b18 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -7,6 +7,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/mm_types.h>
 #include <linux/syscalls.h>
 #include <linux/sched/sysctl.h>
 
diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c
index e6552275320b..10d907098c26 100644
--- a/arch/x86/um/syscalls_64.c
+++ b/arch/x86/um/syscalls_64.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/uaccess.h>
 #include <asm/prctl.h> /* XXX This should get the constants from libc */
 #include <os.h>
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index f6740b5b1738..37cb5aad71de 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -38,7 +38,7 @@
  *
  * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
  */
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/highmem.h>
 #include <linux/debugfs.h>
 #include <linux/bug.h>
diff --git a/arch/xtensa/include/asm/mmu_context.h b/arch/xtensa/include/asm/mmu_context.h
index 04c8ebdc4517..f7e186dfc4e4 100644
--- a/arch/xtensa/include/asm/mmu_context.h
+++ b/arch/xtensa/include/asm/mmu_context.h
@@ -17,6 +17,7 @@
 
 #include <linux/stringify.h>
 #include <linux/sched.h>
+#include <linux/mm_types.h>
 
 #include <asm/vectors.h>
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index d83de985e88c..6acc4313363e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -23,6 +23,8 @@
 
 #include <linux/printk.h>
 #include <linux/slab.h>
+#include <linux/mm_types.h>
+
 #include "kfd_priv.h"
 #include "kfd_mqd_manager.h"
 #include "cik_regs.h"
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index fa32c32fa1c2..a9b9882a9a77 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -23,6 +23,8 @@
 
 #include <linux/printk.h>
 #include <linux/slab.h>
+#include <linux/mm_types.h>
+
 #include "kfd_priv.h"
 #include "kfd_mqd_manager.h"
 #include "vi_structs.h"
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 318ec5267bdf..86ecd3ea6a4b 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -37,7 +37,7 @@
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/list.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/spinlock.h>
 #include <linux/ethtool.h>
 #include <linux/rtnetlink.h>
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index d19662f635b1..5846c47c8d55 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -37,7 +37,7 @@
 #include <linux/idr.h>
 #include <linux/completion.h>
 #include <linux/netdevice.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <linux/inet.h>
diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c
index 36bd904946bd..0b5c43f7e020 100644
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -21,7 +21,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 9b4688ab1d8e..bee1a36bc2ec 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -12,7 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/magic.h>
 #include <linux/binfmts.h>
 #include <linux/slab.h>
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 35043a8c4529..8e4dc7ab584c 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -13,7 +13,7 @@
 #include <linux/slab.h>
 #include <linux/poll.h>
 #include <linux/pagemap.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/fsnotify.h>
 
 #include "kernfs-internal.h"
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index d81b0ba9921f..2ef16bf25826 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -40,6 +40,7 @@
 #include <linux/bug.h>
 #include <linux/rbtree.h>
 #include <linux/kernel.h>
+#include <linux/mm_types.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #ifdef CONFIG_DRM_DEBUG_MM
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 452c9799318c..f6841c19c913 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -15,6 +15,7 @@
 #include <linux/sched/autogroup.h>
 #include <net/net_namespace.h>
 #include <linux/sched/rt.h>
+#include <linux/mm_types.h>
 
 #include <asm/thread_info.h>
 
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 1cf7941bb946..d32e3932b2e3 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -2,6 +2,7 @@
 #define _LINUX_SCHED_MM_H
 
 #include <linux/sched.h>
+#include <linux/mm_types.h>
 #include <linux/gfp.h>
 
 #endif /* _LINUX_SCHED_MM_H */
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 109adc0e9cb9..e865d8bfb881 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -11,7 +11,7 @@
  */
 
 #include <linux/proc_fs.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/seq_file.h>
 #include <linux/kallsyms.h>
 #include <linux/utsname.h>
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 11e0ab57748a..3e88b35ac157 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -20,7 +20,7 @@
  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
  */
 
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/topology.h>
 
 #include <linux/latencytop.h>
diff --git a/kernel/signal.c b/kernel/signal.c
index e6d470a5f75a..2008aa999976 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -13,7 +13,7 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 #include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/user.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c
index 9745cfffcb69..8d4678edcc0e 100644
--- a/lib/is_single_threaded.c
+++ b/lib/is_single_threaded.c
@@ -10,6 +10,7 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 
 /*
  * Returns true if the task does not share ->mm with another thread/process.
-- 
GitLab


From 9164bb4a18dfa592cd0aca455ea57abf89ca4526 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 01:20:53 +0100
Subject: [PATCH 0910/1084] sched/headers: Prepare to move 'init_task' and
 'init_thread_union' from <linux/sched.h> to <linux/sched/task.h>

Update all usage sites first.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm64/kernel/setup.c     | 1 +
 arch/arm64/mm/kasan_init.c    | 1 +
 arch/frv/mm/init.c            | 1 +
 arch/metag/mm/init.c          | 1 +
 arch/nios2/kernel/setup.c     | 1 +
 arch/powerpc/kernel/paca.c    | 1 +
 arch/um/kernel/skas/process.c | 1 +
 arch/um/kernel/um_arch.c      | 2 ++
 arch/x86/kernel/cpu/common.c  | 1 +
 arch/x86/mm/kasan_init_64.c   | 1 +
 fs/namespace.c                | 2 ++
 include/linux/sched/signal.h  | 1 +
 init/init_task.c              | 1 +
 kernel/delayacct.c            | 1 +
 lib/is_single_threaded.c      | 1 +
 mm/vmacache.c                 | 1 +
 16 files changed, 18 insertions(+)

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 952e2c0dabd5..42274bda0ccb 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -42,6 +42,7 @@
 #include <linux/of_fdt.h>
 #include <linux/efi.h>
 #include <linux/psci.h>
+#include <linux/sched/task.h>
 #include <linux/mm.h>
 
 #include <asm/acpi.h>
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 201d918e7575..55d1e9205543 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -13,6 +13,7 @@
 #define pr_fmt(fmt) "kasan: " fmt
 #include <linux/kasan.h>
 #include <linux/kernel.h>
+#include <linux/sched/task.h>
 #include <linux/memblock.h>
 #include <linux/start_kernel.h>
 #include <linux/mm.h>
diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c
index 88a159743528..328f0a292316 100644
--- a/arch/frv/mm/init.c
+++ b/arch/frv/mm/init.c
@@ -18,6 +18,7 @@
 
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/pagemap.h>
 #include <linux/gfp.h>
 #include <linux/swap.h>
diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c
index c0ec116b3993..188d4d9fbed4 100644
--- a/arch/metag/mm/init.c
+++ b/arch/metag/mm/init.c
@@ -12,6 +12,7 @@
 #include <linux/percpu.h>
 #include <linux/memblock.h>
 #include <linux/initrd.h>
+#include <linux/sched/task.h>
 
 #include <asm/setup.h>
 #include <asm/page.h>
diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c
index a3fa80d1aacc..6e57ffa5db27 100644
--- a/arch/nios2/kernel/setup.c
+++ b/arch/nios2/kernel/setup.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/console.h>
 #include <linux/bootmem.h>
 #include <linux/initrd.h>
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index fa20060ff7a5..dfc479df9634 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -10,6 +10,7 @@
 #include <linux/smp.h>
 #include <linux/export.h>
 #include <linux/memblock.h>
+#include <linux/sched/task.h>
 
 #include <asm/lppaca.h>
 #include <asm/paca.h>
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index 3a952a4f7965..d4dbf08722d6 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -6,6 +6,7 @@
 #include <linux/init.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sched/task.h>
 
 #include <as-layout.h>
 #include <kern.h>
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index e8175a8aa22c..4b85acd4020c 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -11,7 +11,9 @@
 #include <linux/string.h>
 #include <linux/utsname.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/kmsg_dump.h>
+
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/sections.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c7e2ca966386..f2fd8fefc589 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -9,6 +9,7 @@
 #include <linux/delay.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
+#include <linux/sched/task.h>
 #include <linux/init.h>
 #include <linux/kprobes.h>
 #include <linux/kgdb.h>
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 0493c17b8a51..8d63d7a104c3 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -4,6 +4,7 @@
 #include <linux/kdebug.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/vmalloc.h>
 
 #include <asm/tlbflush.h>
diff --git a/fs/namespace.c b/fs/namespace.c
index 131cd7b94f47..cc1375eff88c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -25,6 +25,8 @@
 #include <linux/magic.h>
 #include <linux/bootmem.h>
 #include <linux/task_work.h>
+#include <linux/sched/task.h>
+
 #include "pnode.h"
 #include "internal.h"
 
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 7e10a7824523..320eca0446cd 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -5,5 +5,6 @@
 #include <linux/cred.h>
 #include <linux/sched.h>
 #include <linux/sched/jobctl.h>
+#include <linux/sched/task.h>
 
 #endif /* _LINUX_SCHED_SIGNAL_H */
diff --git a/init/init_task.c b/init/init_task.c
index 53d4ce942a88..66787e30a419 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -4,6 +4,7 @@
 #include <linux/sched.h>
 #include <linux/sched/sysctl.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/task.h>
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 660549656991..c94135fc2698 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/taskstats.h>
 #include <linux/time.h>
diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c
index 8d4678edcc0e..9c7d89df40ed 100644
--- a/lib/is_single_threaded.c
+++ b/lib/is_single_threaded.c
@@ -10,6 +10,7 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/sched/mm.h>
 
 /*
diff --git a/mm/vmacache.c b/mm/vmacache.c
index 4355d34c68a6..7ffa0ee341b5 100644
--- a/mm/vmacache.c
+++ b/mm/vmacache.c
@@ -2,6 +2,7 @@
  * Copyright (C) 2014 Davidlohr Bueso.
  */
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/mm.h>
 #include <linux/vmacache.h>
 
-- 
GitLab


From b2d091031075ac9a1598e3cc3a29c28f02e64c0d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 01:27:20 +0100
Subject: [PATCH 0911/1084] sched/headers: Prepare to use <linux/rcuupdate.h>
 instead of <linux/rculist.h> in <linux/sched.h>

We don't actually need the full rculist.h header in sched.h anymore,
we will be able to include the smaller rcupdate.h header instead.

But first update code that relied on the implicit header inclusion.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/kvm/book3s_mmu_hpte.c  | 1 +
 arch/x86/kvm/irq_comm.c             | 2 ++
 arch/x86/kvm/page_track.c           | 2 ++
 drivers/md/bcache/btree.c           | 2 ++
 drivers/misc/vmw_vmci/vmci_event.c  | 1 +
 drivers/nvme/target/admin-cmd.c     | 2 ++
 drivers/nvme/target/core.c          | 2 ++
 drivers/s390/cio/qdio_thinint.c     | 2 ++
 drivers/scsi/libfc/fc_disc.c        | 2 ++
 drivers/scsi/libfc/fc_rport.c       | 2 ++
 include/linux/dmar.h                | 2 +-
 include/linux/pid.h                 | 2 +-
 include/linux/rhashtable.h          | 2 +-
 include/linux/sched/signal.h        | 1 +
 include/net/bluetooth/hci_core.h    | 2 ++
 kernel/trace/trace_events_hist.c    | 1 +
 kernel/trace/trace_events_trigger.c | 1 +
 kernel/trace/trace_kprobe.c         | 1 +
 kernel/trace/trace_uprobe.c         | 1 +
 lib/bug.c                           | 1 +
 lib/rhashtable.c                    | 1 +
 net/mac80211/mesh_plink.c           | 2 ++
 net/mac802154/llsec.c               | 2 ++
 security/apparmor/policy.c          | 1 +
 security/tomoyo/domain.c            | 2 ++
 security/tomoyo/group.c             | 2 ++
 security/tomoyo/util.c              | 2 ++
 27 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index 5a1ab1250a05..905a934c1ef4 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -21,6 +21,7 @@
 #include <linux/kvm_host.h>
 #include <linux/hash.h>
 #include <linux/slab.h>
+#include <linux/rculist.h>
 
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index b96d3893f121..6825cd36d13b 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -23,6 +23,8 @@
 #include <linux/kvm_host.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/rculist.h>
+
 #include <trace/events/kvm.h>
 
 #include <asm/msidef.h>
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
index 4a1c13eaa518..37942e419c32 100644
--- a/arch/x86/kvm/page_track.c
+++ b/arch/x86/kvm/page_track.c
@@ -14,6 +14,8 @@
  */
 
 #include <linux/kvm_host.h>
+#include <linux/rculist.h>
+
 #include <asm/kvm_host.h>
 #include <asm/kvm_page_track.h>
 
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 384559af310e..450d0e848ae4 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -33,6 +33,8 @@
 #include <linux/random.h>
 #include <linux/rcupdate.h>
 #include <linux/sched/clock.h>
+#include <linux/rculist.h>
+
 #include <trace/events/bcache.h>
 
 /*
diff --git a/drivers/misc/vmw_vmci/vmci_event.c b/drivers/misc/vmw_vmci/vmci_event.c
index 8449516d6ac6..84258a48029d 100644
--- a/drivers/misc/vmw_vmci/vmci_event.c
+++ b/drivers/misc/vmw_vmci/vmci_event.c
@@ -19,6 +19,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/rculist.h>
 
 #include "vmci_driver.h"
 #include "vmci_event.h"
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 94e524fea568..a7bcff45f437 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -13,6 +13,8 @@
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
+#include <linux/rculist.h>
+
 #include <generated/utsrelease.h>
 #include <asm/unaligned.h>
 #include "nvmet.h"
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 5267ce20c12d..11b0a0a5f661 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -14,6 +14,8 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/random.h>
+#include <linux/rculist.h>
+
 #include "nvmet.h"
 
 static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c
index 8ad98a902a91..c61164f4528e 100644
--- a/drivers/s390/cio/qdio_thinint.c
+++ b/drivers/s390/cio/qdio_thinint.c
@@ -8,6 +8,8 @@
 #include <linux/slab.h>
 #include <linux/kernel_stat.h>
 #include <linux/atomic.h>
+#include <linux/rculist.h>
+
 #include <asm/debug.h>
 #include <asm/qdio.h>
 #include <asm/airq.h>
diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index 6103231104da..fd501f8dbb11 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -36,6 +36,8 @@
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/export.h>
+#include <linux/rculist.h>
+
 #include <asm/unaligned.h>
 
 #include <scsi/fc/fc_gs.h>
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index c991f3b822f8..b44c3136eb51 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -65,6 +65,8 @@
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 #include <linux/export.h>
+#include <linux/rculist.h>
+
 #include <asm/unaligned.h>
 
 #include <scsi/libfc.h>
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index e9bc9292bd3a..e8ffba1052d3 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -26,7 +26,7 @@
 #include <linux/msi.h>
 #include <linux/irqreturn.h>
 #include <linux/rwsem.h>
-#include <linux/rcupdate.h>
+#include <linux/rculist.h>
 
 struct acpi_dmar_header;
 
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 298ead5512e5..4d179316e431 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_PID_H
 #define _LINUX_PID_H
 
-#include <linux/rcupdate.h>
+#include <linux/rculist.h>
 
 enum pid_type
 {
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index f2e12a845910..092292b6675e 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -25,7 +25,7 @@
 #include <linux/list_nulls.h>
 #include <linux/workqueue.h>
 #include <linux/mutex.h>
-#include <linux/rcupdate.h>
+#include <linux/rculist.h>
 
 /*
  * The end of the chain is marked with a special nulls marks which has
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 320eca0446cd..c6958a53fef3 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_SCHED_SIGNAL_H
 #define _LINUX_SCHED_SIGNAL_H
 
+#include <linux/rculist.h>
 #include <linux/signal.h>
 #include <linux/cred.h>
 #include <linux/sched.h>
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 90708f68cc02..95ccc1eef558 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -26,6 +26,8 @@
 #define __HCI_CORE_H
 
 #include <linux/leds.h>
+#include <linux/rculist.h>
+
 #include <net/bluetooth/hci.h>
 #include <net/bluetooth/hci_sock.h>
 
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index f3a960ed75a1..1c21d0e2a145 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -19,6 +19,7 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/stacktrace.h>
+#include <linux/rculist.h>
 
 #include "tracing_map.h"
 #include "trace.h"
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 6721a1e89f39..f2ac9d44f6c4 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -22,6 +22,7 @@
 #include <linux/ctype.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
+#include <linux/rculist.h>
 
 #include "trace.h"
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index eadd96ef772f..5f688cc724f0 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -20,6 +20,7 @@
 
 #include <linux/module.h>
 #include <linux/uaccess.h>
+#include <linux/rculist.h>
 
 #include "trace_probe.h"
 
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index f4379e772171..a7581fec9681 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -24,6 +24,7 @@
 #include <linux/uprobes.h>
 #include <linux/namei.h>
 #include <linux/string.h>
+#include <linux/rculist.h>
 
 #include "trace_probe.h"
 
diff --git a/lib/bug.c b/lib/bug.c
index bc3656e944d2..06edbbef0623 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -45,6 +45,7 @@
 #include <linux/kernel.h>
 #include <linux/bug.h>
 #include <linux/sched.h>
+#include <linux/rculist.h>
 
 extern const struct bug_entry __start___bug_table[], __stop___bug_table[];
 
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index c5b9b9351cec..f8635fd57442 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -19,6 +19,7 @@
 #include <linux/init.h>
 #include <linux/log2.h>
 #include <linux/sched.h>
+#include <linux/rculist.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index fcba70e57073..953d71e784a9 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -9,6 +9,8 @@
 #include <linux/gfp.h>
 #include <linux/kernel.h>
 #include <linux/random.h>
+#include <linux/rculist.h>
+
 #include "ieee80211_i.h"
 #include "rate.h"
 #include "mesh.h"
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 6a3e1c2181d3..1e1c9b20bab7 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -18,6 +18,8 @@
 #include <linux/bug.h>
 #include <linux/completion.h>
 #include <linux/ieee802154.h>
+#include <linux/rculist.h>
+
 #include <crypto/aead.h>
 #include <crypto/skcipher.h>
 
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 462c5d36b871..def1fbd6bdfd 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -77,6 +77,7 @@
 #include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/cred.h>
+#include <linux/rculist.h>
 #include <linux/user_namespace.h>
 
 #include "include/apparmor.h"
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 838ffa78cfda..00d223e9fb37 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -5,8 +5,10 @@
  */
 
 #include "common.h"
+
 #include <linux/binfmts.h>
 #include <linux/slab.h>
+#include <linux/rculist.h>
 
 /* Variables definitions.*/
 
diff --git a/security/tomoyo/group.c b/security/tomoyo/group.c
index 50092534ec54..944ad77d8fba 100644
--- a/security/tomoyo/group.c
+++ b/security/tomoyo/group.c
@@ -5,6 +5,8 @@
  */
 
 #include <linux/slab.h>
+#include <linux/rculist.h>
+
 #include "common.h"
 
 /**
diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c
index 5fe3679137ae..848317fea704 100644
--- a/security/tomoyo/util.c
+++ b/security/tomoyo/util.c
@@ -5,6 +5,8 @@
  */
 
 #include <linux/slab.h>
+#include <linux/rculist.h>
+
 #include "common.h"
 
 /* Lock for protecting policy. */
-- 
GitLab


From f719ff9bcee2a422647790f12d53d3755f47c727 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 10:57:33 +0100
Subject: [PATCH 0912/1084] sched/headers: Prepare to move the
 task_lock()/unlock() APIs to <linux/sched/task.h>

But first update the code that uses these facilities with the
new header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/mips/math-emu/dsemul.c                | 1 +
 block/blk-ioc.c                            | 1 +
 block/ioprio.c                             | 1 +
 drivers/staging/android/ion/ion.c          | 1 +
 drivers/staging/lustre/lustre/ptlrpc/sec.c | 1 +
 fs/proc/internal.h                         | 1 +
 fs/proc/proc_net.c                         | 1 +
 fs/proc_namespace.c                        | 2 ++
 include/linux/cpuset.h                     | 1 +
 ipc/namespace.c                            | 1 +
 kernel/cgroup/cpuset.c                     | 1 +
 kernel/sched/debug.c                       | 1 +
 kernel/utsname.c                           | 1 +
 mm/mempolicy.c                             | 1 +
 mm/mmu_context.c                           | 1 +
 net/core/net_namespace.c                   | 2 ++
 net/core/netclassid_cgroup.c               | 2 ++
 net/core/netprio_cgroup.c                  | 2 ++
 18 files changed, 22 insertions(+)

diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c
index 6664908514b3..b6bfd3625369 100644
--- a/arch/mips/math-emu/dsemul.c
+++ b/arch/mips/math-emu/dsemul.c
@@ -1,6 +1,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/mm_types.h>
+#include <linux/sched/task.h>
 
 #include <asm/branch.h>
 #include <asm/cacheflush.h>
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index b12f9c87b4c3..6bfa39675337 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -7,6 +7,7 @@
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/slab.h>
+#include <linux/sched/task.h>
 
 #include "blk.h"
 
diff --git a/block/ioprio.c b/block/ioprio.c
index 4b9ab8367dda..0c47a00f92a8 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -27,6 +27,7 @@
 #include <linux/blkdev.h>
 #include <linux/capability.h>
 #include <linux/sched/user.h>
+#include <linux/sched/task.h>
 #include <linux/syscalls.h>
 #include <linux/security.h>
 #include <linux/pid_namespace.h>
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index 2c3ffbcbd621..f45115fce4eb 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -36,6 +36,7 @@
 #include <linux/debugfs.h>
 #include <linux/dma-buf.h>
 #include <linux/idr.h>
+#include <linux/sched/task.h>
 
 #include "ion.h"
 #include "ion_priv.h"
diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec.c b/drivers/staging/lustre/lustre/ptlrpc/sec.c
index 49f34fd655c3..366f2ce20f5e 100644
--- a/drivers/staging/lustre/lustre/ptlrpc/sec.c
+++ b/drivers/staging/lustre/lustre/ptlrpc/sec.c
@@ -40,6 +40,7 @@
 #include <linux/crypto.h>
 #include <linux/cred.h>
 #include <linux/key.h>
+#include <linux/sched/task.h>
 
 #include "../include/obd.h"
 #include "../include/obd_class.h"
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 550e8b183abe..26a6daf02185 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -15,6 +15,7 @@
 #include <linux/atomic.h>
 #include <linux/binfmts.h>
 #include <linux/sched/coredump.h>
+#include <linux/sched/task.h>
 
 struct ctl_table_header;
 struct mempolicy;
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index ffd72a6c6e04..5cbc65d7a1e1 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/module.h>
 #include <linux/bitops.h>
 #include <linux/mount.h>
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 3f1190d18991..b5713fefb4c1 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -10,6 +10,8 @@
 #include <linux/nsproxy.h>
 #include <linux/security.h>
 #include <linux/fs_struct.h>
+#include <linux/sched/task.h>
+
 #include "proc/internal.h" /* only for get_proc_task() in ->open() */
 
 #include "pnode.h"
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index c608c39cb161..611fce58d670 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -10,6 +10,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/topology.h>
+#include <linux/sched/task.h>
 #include <linux/cpumask.h>
 #include <linux/nodemask.h>
 #include <linux/mm.h>
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 1f1d713ac19c..b4d80f9f7246 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -14,6 +14,7 @@
 #include <linux/mount.h>
 #include <linux/user_namespace.h>
 #include <linux/proc_ns.h>
+#include <linux/sched/task.h>
 
 #include "util.h"
 
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index a5c46db2855c..0f41292be0fb 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -45,6 +45,7 @@
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 #include <linux/seq_file.h>
 #include <linux/security.h>
 #include <linux/slab.h>
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index e865d8bfb881..38f019324f1a 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -12,6 +12,7 @@
 
 #include <linux/proc_fs.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 #include <linux/seq_file.h>
 #include <linux/kallsyms.h>
 #include <linux/utsname.h>
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 06585ad296ff..913fe4336d2b 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -17,6 +17,7 @@
 #include <linux/cred.h>
 #include <linux/user_namespace.h>
 #include <linux/proc_ns.h>
+#include <linux/sched/task.h>
 
 static struct ucounts *inc_uts_namespaces(struct user_namespace *ns)
 {
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 18e0105810df..75b2745bac41 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -75,6 +75,7 @@
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/numa_balancing.h>
+#include <linux/sched/task.h>
 #include <linux/nodemask.h>
 #include <linux/cpuset.h>
 #include <linux/slab.h>
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index 8888b124a386..3e612ae748e9 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c
@@ -6,6 +6,7 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 #include <linux/mmu_context.h>
 #include <linux/export.h>
 
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3c4bbec39713..652468ff65b7 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -16,6 +16,8 @@
 #include <linux/export.h>
 #include <linux/user_namespace.h>
 #include <linux/net_namespace.h>
+#include <linux/sched/task.h>
+
 #include <net/sock.h>
 #include <net/netlink.h>
 #include <net/net_namespace.h>
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 11fce17274f6..6ae56037bb13 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -12,6 +12,8 @@
 #include <linux/slab.h>
 #include <linux/cgroup.h>
 #include <linux/fdtable.h>
+#include <linux/sched/task.h>
+
 #include <net/cls_cgroup.h>
 #include <net/sock.h>
 
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 756637dc7a57..0f9275ee5595 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -20,6 +20,8 @@
 #include <linux/cgroup.h>
 #include <linux/rcupdate.h>
 #include <linux/atomic.h>
+#include <linux/sched/task.h>
+
 #include <net/rtnetlink.h>
 #include <net/pkt_cls.h>
 #include <net/sock.h>
-- 
GitLab


From 32ef5517c298042ed58408545f475df43afe1f24 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 11:48:36 +0100
Subject: [PATCH 0913/1084] sched/headers: Prepare to move cputime
 functionality from <linux/sched.h> into <linux/sched/cputime.h>

Introduce a trivial, mostly empty <linux/sched/cputime.h> header
to prepare for the moving of cputime functionality out of sched.h.

Update all code that relies on these facilities.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/kernel/osf_sys.c    | 1 +
 arch/ia64/kernel/time.c        | 2 +-
 arch/powerpc/kernel/time.c     | 2 +-
 arch/s390/kernel/idle.c        | 2 +-
 arch/s390/kernel/vtime.c       | 2 +-
 arch/x86/kernel/apm_32.c       | 1 +
 arch/x86/kvm/hyperv.c          | 2 ++
 drivers/isdn/mISDN/stack.c     | 1 +
 drivers/s390/cio/cio.c         | 2 +-
 fs/binfmt_elf.c                | 1 +
 fs/binfmt_elf_fdpic.c          | 1 +
 fs/proc/array.c                | 1 +
 fs/proc/stat.c                 | 2 +-
 include/linux/sched/cputime.h  | 7 +++++++
 kernel/acct.c                  | 2 ++
 kernel/delayacct.c             | 1 +
 kernel/exit.c                  | 1 +
 kernel/fork.c                  | 1 +
 kernel/sched/cputime.c         | 2 +-
 kernel/sched/sched.h           | 1 +
 kernel/signal.c                | 1 +
 kernel/sys.c                   | 1 +
 kernel/time/itimer.c           | 1 +
 kernel/time/posix-cpu-timers.c | 1 +
 kernel/tsacct.c                | 1 +
 25 files changed, 33 insertions(+), 7 deletions(-)
 create mode 100644 include/linux/sched/cputime.h

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 73446baa632e..0b961093ca5c 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -14,6 +14,7 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sched/cputime.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 144f9db7a876..aa7be020a904 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -22,7 +22,7 @@
 #include <linux/timex.h>
 #include <linux/timekeeper_internal.h>
 #include <linux/platform_device.h>
-#include <linux/cputime.h>
+#include <linux/sched/cputime.h>
 
 #include <asm/machvec.h>
 #include <asm/delay.h>
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 37833c5dc274..07b90725855e 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -58,7 +58,7 @@
 #include <linux/clk-provider.h>
 #include <linux/suspend.h>
 #include <linux/rtc.h>
-#include <linux/cputime.h>
+#include <linux/sched/cputime.h>
 #include <asm/trace.h>
 
 #include <asm/io.h>
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index fb07a70820af..9340b2a07935 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -12,7 +12,7 @@
 #include <linux/notifier.h>
 #include <linux/init.h>
 #include <linux/cpu.h>
-#include <linux/cputime.h>
+#include <linux/sched/cputime.h>
 #include <asm/nmi.h>
 #include <asm/smp.h>
 #include "entry.h"
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 31bd96e81167..c14fc9029912 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -6,7 +6,7 @@
  */
 
 #include <linux/kernel_stat.h>
-#include <linux/cputime.h>
+#include <linux/sched/cputime.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/timex.h>
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index dc04b30cbd60..5a414545e8a3 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -219,6 +219,7 @@
 #include <linux/init.h>
 #include <linux/time.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/cputime.h>
 #include <linux/pm.h>
 #include <linux/capability.h>
 #include <linux/device.h>
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index f701d4430727..ebae57ac5902 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -28,6 +28,8 @@
 
 #include <linux/kvm_host.h>
 #include <linux/highmem.h>
+#include <linux/sched/cputime.h>
+
 #include <asm/apicdef.h>
 #include <trace/events/kvm.h>
 
diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c
index 696f22fd5ab4..8b7faea2ddf8 100644
--- a/drivers/isdn/mISDN/stack.c
+++ b/drivers/isdn/mISDN/stack.c
@@ -19,6 +19,7 @@
 #include <linux/mISDNif.h>
 #include <linux/kthread.h>
 #include <linux/sched.h>
+#include <linux/sched/cputime.h>
 #include <linux/signal.h>
 
 #include "core.h"
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index de6fccc13124..1b350665c823 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -29,7 +29,7 @@
 #include <asm/chpid.h>
 #include <asm/airq.h>
 #include <asm/isc.h>
-#include <linux/cputime.h>
+#include <linux/sched/cputime.h>
 #include <asm/fcx.h>
 #include <asm/nmi.h>
 #include <asm/crw.h>
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 92c00a13b28b..5075fd5c62c8 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -37,6 +37,7 @@
 #include <linux/sched.h>
 #include <linux/sched/coredump.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sched/cputime.h>
 #include <linux/cred.h>
 #include <linux/dax.h>
 #include <linux/uaccess.h>
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 6103a8149ccd..cf93a4fad012 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -17,6 +17,7 @@
 #include <linux/sched.h>
 #include <linux/sched/coredump.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sched/cputime.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/errno.h>
diff --git a/fs/proc/array.c b/fs/proc/array.c
index f3169b58af38..88c355574aa0 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -63,6 +63,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/numa_balancing.h>
 #include <linux/sched/task.h>
+#include <linux/sched/cputime.h>
 #include <linux/proc_fs.h>
 #include <linux/ioport.h>
 #include <linux/uaccess.h>
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index b95556e036bb..bd4e55f4aa20 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -10,7 +10,7 @@
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/irqnr.h>
-#include <linux/cputime.h>
+#include <linux/sched/cputime.h>
 #include <linux/tick.h>
 
 #ifndef arch_irq_stat_cpu
diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h
new file mode 100644
index 000000000000..6ed4fe43de28
--- /dev/null
+++ b/include/linux/sched/cputime.h
@@ -0,0 +1,7 @@
+#ifndef _LINUX_SCHED_CPUTIME_H
+#define _LINUX_SCHED_CPUTIME_H
+
+#include <linux/sched/signal.h>
+#include <linux/cputime.h>
+
+#endif /* _LINUX_SCHED_CPUTIME_H */
diff --git a/kernel/acct.c b/kernel/acct.c
index ca9cb55b5855..5b1284370367 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -56,6 +56,8 @@
 #include <linux/syscalls.h>
 #include <linux/mount.h>
 #include <linux/uaccess.h>
+#include <linux/sched/cputime.h>
+
 #include <asm/div64.h>
 #include <linux/blkdev.h> /* sector_div */
 #include <linux/pid_namespace.h>
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index c94135fc2698..4a1c33416b6a 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -15,6 +15,7 @@
 
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/sched/cputime.h>
 #include <linux/slab.h>
 #include <linux/taskstats.h>
 #include <linux/time.h>
diff --git a/kernel/exit.c b/kernel/exit.c
index 771c33fc9952..e126ebf2400c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -11,6 +11,7 @@
 #include <linux/sched/stat.h>
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sched/cputime.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/capability.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index c87b6f03c710..916e78004b8f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -20,6 +20,7 @@
 #include <linux/sched/stat.h>
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sched/cputime.h>
 #include <linux/rtmutex.h>
 #include <linux/init.h>
 #include <linux/unistd.h>
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 54031fa681e2..f3778e2b46c8 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -4,7 +4,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/static_key.h>
 #include <linux/context_tracking.h>
-#include <linux/cputime.h>
+#include <linux/sched/cputime.h>
 #include "sched.h"
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b1f1c8443837..76e3af3f39f4 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -17,6 +17,7 @@
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sched/cputime.h>
 
 #include <linux/u64_stats_sync.h>
 #include <linux/kernel_stat.h>
diff --git a/kernel/signal.c b/kernel/signal.c
index 2008aa999976..7e59ebc2c25e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -18,6 +18,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
+#include <linux/sched/cputime.h>
 #include <linux/fs.h>
 #include <linux/tty.h>
 #include <linux/binfmts.h>
diff --git a/kernel/sys.c b/kernel/sys.c
index 7f1ecd2e41c1..7ff6d1b10cec 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -55,6 +55,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
 #include <linux/sched/task.h>
+#include <linux/sched/cputime.h>
 #include <linux/rcupdate.h>
 #include <linux/uidgid.h>
 #include <linux/cred.h>
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index f6b961c5e58c..087d6a1279b8 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -11,6 +11,7 @@
 #include <linux/syscalls.h>
 #include <linux/time.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/cputime.h>
 #include <linux/posix-timers.h>
 #include <linux/hrtimer.h>
 #include <trace/events/timer.h>
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index a2475a9f57d8..4513ad16a253 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -3,6 +3,7 @@
  */
 
 #include <linux/sched/signal.h>
+#include <linux/sched/cputime.h>
 #include <linux/posix-timers.h>
 #include <linux/errno.h>
 #include <linux/math64.h>
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index d9a03b80b75d..370724b45391 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -19,6 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/cputime.h>
 #include <linux/tsacct_kern.h>
 #include <linux/acct.h>
 #include <linux/jiffies.h>
-- 
GitLab


From 3905f9ad455e0fd2ddb557566c5561b4a3027c07 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 12:07:04 +0100
Subject: [PATCH 0914/1084] sched/headers: Prepare to move sched_info_on() and
 force_schedstat_enabled() from <linux/sched.h> to <linux/sched/stat.h>

But first update usage sites with the new header dependency.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kvm/cpuid.c | 2 ++
 arch/x86/kvm/x86.c   | 2 ++
 fs/proc/base.c       | 1 +
 kernel/latencytop.c  | 1 +
 kernel/profile.c     | 2 ++
 5 files changed, 8 insertions(+)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 1d155cc56629..efde6cc50875 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -16,6 +16,8 @@
 #include <linux/export.h>
 #include <linux/vmalloc.h>
 #include <linux/uaccess.h>
+#include <linux/sched/stat.h>
+
 #include <asm/processor.h>
 #include <asm/user.h>
 #include <asm/fpu/xstate.h>
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b2a4b11274b0..1faf620a6fdc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -54,6 +54,8 @@
 #include <linux/pvclock_gtod.h>
 #include <linux/kvm_irqfd.h>
 #include <linux/irqbypass.h>
+#include <linux/sched/stat.h>
+
 #include <trace/events/kvm.h>
 
 #include <asm/debugreg.h>
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5914fed3712d..2dae60075f6e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -89,6 +89,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/stat.h>
 #include <linux/flex_array.h>
 #include <linux/posix-timers.h>
 #ifdef CONFIG_HARDWALL
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 545839b838d6..96b4179cee6a 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -56,6 +56,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/stat.h>
 #include <linux/list.h>
 #include <linux/stacktrace.h>
 
diff --git a/kernel/profile.c b/kernel/profile.c
index f67ce0aa6bc4..9aa2a4445b0d 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -25,6 +25,8 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/sched/stat.h>
+
 #include <asm/sections.h>
 #include <asm/irq_regs.h>
 #include <asm/ptrace.h>
-- 
GitLab


From 5c2c5c5514393e31859439ef83a34051d7c68332 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:24:31 +0100
Subject: [PATCH 0915/1084] sched/headers, vfs/execve: Prepare to move the
 do_execve*() prototypes from <linux/sched.h> to <linux/binfmts.h>

But first update the usage sites with the new header dependency.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 init/main.c   | 1 +
 kernel/kmod.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/init/main.c b/init/main.c
index c2d337a136d1..ca9f53fb4125 100644
--- a/init/main.c
+++ b/init/main.c
@@ -15,6 +15,7 @@
 #include <linux/extable.h>
 #include <linux/module.h>
 #include <linux/proc_fs.h>
+#include <linux/binfmts.h>
 #include <linux/kernel.h>
 #include <linux/syscalls.h>
 #include <linux/stackprotector.h>
diff --git a/kernel/kmod.c b/kernel/kmod.c
index ac5f5c2d098d..563f97e2be36 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/binfmts.h>
 #include <linux/syscalls.h>
 #include <linux/unistd.h>
 #include <linux/kmod.h>
-- 
GitLab


From 3f8c24529b42fc5044c2a44bdb8ba69aec2bee37 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:31:22 +0100
Subject: [PATCH 0916/1084] sched/headers: Prepare to move kstack_end() from
 <linux/sched.h> to <linux/sched/task_stack.h>

But first update the usage sites with the new header dependency.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/m32r/kernel/traps.c       | 2 ++
 arch/metag/kernel/stacktrace.c | 1 +
 arch/openrisc/kernel/traps.c   | 1 +
 arch/um/kernel/sysrq.c         | 1 +
 arch/xtensa/kernel/traps.c     | 1 +
 mm/slab.c                      | 1 +
 6 files changed, 7 insertions(+)

diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c
index 7c05bb393597..832bb2bb29bd 100644
--- a/arch/m32r/kernel/traps.c
+++ b/arch/m32r/kernel/traps.c
@@ -15,7 +15,9 @@
 #include <linux/stddef.h>
 #include <linux/ptrace.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/mm.h>
+
 #include <asm/page.h>
 #include <asm/processor.h>
 
diff --git a/arch/metag/kernel/stacktrace.c b/arch/metag/kernel/stacktrace.c
index 4f806d66a58c..91ffc4b75c33 100644
--- a/arch/metag/kernel/stacktrace.c
+++ b/arch/metag/kernel/stacktrace.c
@@ -1,6 +1,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
 
 #include <asm/stacktrace.h>
diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c
index 2354ab88d948..803e9e756f77 100644
--- a/arch/openrisc/kernel/traps.c
+++ b/arch/openrisc/kernel/traps.c
@@ -23,6 +23,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
 #include <linux/extable.h>
 #include <linux/kmod.h>
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index 34ae555c3e70..a76295f7ede9 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/sysrq.h>
 #include <asm/stacktrace.h>
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index d8bb2e62393e..c82c43bff296 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -26,6 +26,7 @@
 #include <linux/kernel.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/stringify.h>
diff --git a/mm/slab.c b/mm/slab.c
index bd63450a9b16..807d86c76908 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -116,6 +116,7 @@
 #include	<linux/kmemcheck.h>
 #include	<linux/memory.h>
 #include	<linux/prefetch.h>
+#include	<linux/sched/task_stack.h>
 
 #include	<net/sock.h>
 
-- 
GitLab


From 61855b6b03df9b6a15bd265c2c3ae7b5e23da312 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:35:41 +0100
Subject: [PATCH 0917/1084] sched/headers: Prepare to move exit_files() and
 exit_itimers() from <linux/sched.h> to <linux/sched/task.h>

But first update the usage site.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/posix-timers.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 1e6623d76750..50a6a47020de 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -35,6 +35,7 @@
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/mutex.h>
+#include <linux/sched/task.h>
 
 #include <linux/uaccess.h>
 #include <linux/list.h>
-- 
GitLab


From 1777e4635507265ba53d8dc4cd248e7d7c306fa0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:47:12 +0100
Subject: [PATCH 0918/1084] sched/headers: Prepare to move _init() prototypes
 from <linux/sched.h> to <linux/sched/init.h>

But first introduce a trivial header and update usage sites.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/ia64/kernel/setup.c   | 1 +
 arch/m32r/kernel/traps.c   | 1 +
 arch/s390/kernel/setup.c   | 1 +
 include/linux/cpu.h        | 2 ++
 include/linux/sched/init.h | 6 ++++++
 init/main.c                | 1 +
 kernel/sched/sched.h       | 1 +
 7 files changed, 13 insertions(+)
 create mode 100644 include/linux/sched/init.h

diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index b2b4f5216180..63bef6fc0f3e 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -29,6 +29,7 @@
 #include <linux/bootmem.h>
 #include <linux/console.h>
 #include <linux/delay.h>
+#include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/reboot.h>
 #include <linux/sched.h>
diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c
index 832bb2bb29bd..647dd94a0c39 100644
--- a/arch/m32r/kernel/traps.c
+++ b/arch/m32r/kernel/traps.c
@@ -17,6 +17,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/mm.h>
+#include <linux/cpu.h>
 
 #include <asm/page.h>
 #include <asm/processor.h>
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 3de07004c02e..911dc0b49be0 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -19,6 +19,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/memblock.h>
 #include <linux/mm.h>
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 21f9c74496e7..f92081234afd 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -30,6 +30,8 @@ struct cpu {
 
 extern void boot_cpu_init(void);
 extern void boot_cpu_state_init(void);
+extern void cpu_init(void);
+extern void trap_init(void);
 
 extern int register_cpu(struct cpu *cpu, int num);
 extern struct device *get_cpu_device(unsigned cpu);
diff --git a/include/linux/sched/init.h b/include/linux/sched/init.h
new file mode 100644
index 000000000000..15e46313e55e
--- /dev/null
+++ b/include/linux/sched/init.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SCHED_INIT_H
+#define _LINUX_SCHED_INIT_H
+
+#include <linux/sched.h>
+
+#endif /* _LINUX_SCHED_INIT_H */
diff --git a/init/main.c b/init/main.c
index ca9f53fb4125..eae2f15657c6 100644
--- a/init/main.c
+++ b/init/main.c
@@ -63,6 +63,7 @@
 #include <linux/device.h>
 #include <linux/kthread.h>
 #include <linux/sched.h>
+#include <linux/sched/init.h>
 #include <linux/signal.h>
 #include <linux/idr.h>
 #include <linux/kgdb.h>
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 76e3af3f39f4..5cbf92214ad8 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -18,6 +18,7 @@
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
 #include <linux/sched/cputime.h>
+#include <linux/sched/init.h>
 
 #include <linux/u64_stats_sync.h>
 #include <linux/kernel_stat.h>
-- 
GitLab


From 0881e7bd341e2158b314596bcf2059e88e68f04e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 15:30:50 +0100
Subject: [PATCH 0919/1084] sched/headers: Prepare to move the
 get_task_struct()/put_task_struct() and related APIs from <linux/sched.h> to
 <linux/sched/task.h>

But first update usage sites with the new header dependency.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/dma/dmatest.c                         | 1 +
 drivers/gpu/drm/etnaviv/etnaviv_gem.c         | 1 +
 drivers/infiniband/core/umem_odp.c            | 1 +
 drivers/infiniband/hw/mlx4/main.c             | 1 +
 drivers/infiniband/hw/mlx5/main.c             | 1 +
 drivers/md/persistent-data/dm-block-manager.c | 1 +
 drivers/misc/cxl/main.c                       | 2 ++
 drivers/net/xen-netback/interface.c           | 1 +
 drivers/oprofile/buffer_sync.c                | 1 +
 drivers/tty/tty_ldsem.c                       | 1 +
 drivers/usb/usbip/usbip_common.h              | 1 +
 kernel/irq/manage.c                           | 1 +
 12 files changed, 13 insertions(+)

diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index c9297605058c..54d581d407aa 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -16,6 +16,7 @@
 #include <linux/freezer.h>
 #include <linux/init.h>
 #include <linux/kthread.h>
+#include <linux/sched/task.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/random.h>
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index ae2882b64b82..fd56f92f3469 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -17,6 +17,7 @@
 #include <linux/spinlock.h>
 #include <linux/shmem_fs.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 
 #include "etnaviv_drv.h"
 #include "etnaviv_gem.h"
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 7279f259494f..cb2742b548bb 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -33,6 +33,7 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 #include <linux/pid.h>
 #include <linux/slab.h>
 #include <linux/export.h>
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 56da8f0d71bb..fba94df28cf1 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -40,6 +40,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/if_vlan.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 
 #include <net/ipv6.h>
 #include <net/addrconf.h>
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 4b1ec3ff152a..4dc0a8785fe0 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -42,6 +42,7 @@
 #endif
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 #include <linux/delay.h>
 #include <rdma/ib_user_verbs.h>
 #include <rdma/ib_addr.h>
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index 0863905dee02..8589e0a14068 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -13,6 +13,7 @@
 #include <linux/rwsem.h>
 #include <linux/device-mapper.h>
 #include <linux/stacktrace.h>
+#include <linux/sched/task.h>
 
 #define DM_MSG_PREFIX "block manager"
 
diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
index cc1706a92ace..b0b6ed31918e 100644
--- a/drivers/misc/cxl/main.c
+++ b/drivers/misc/cxl/main.c
@@ -19,6 +19,8 @@
 #include <linux/slab.h>
 #include <linux/idr.h>
 #include <linux/pci.h>
+#include <linux/sched/task.h>
+
 #include <asm/cputable.h>
 #include <misc/cxl-base.h>
 
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index a2d326760a72..829b26cd4549 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -31,6 +31,7 @@
 #include "common.h"
 
 #include <linux/kthread.h>
+#include <linux/sched/task.h>
 #include <linux/ethtool.h>
 #include <linux/rtnetlink.h>
 #include <linux/if_vlan.h>
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index eb0c35994b54..ac27f3d3fbb4 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -32,6 +32,7 @@
 #include <linux/oprofile.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 #include <linux/gfp.h>
 
 #include "oprofile_stats.h"
diff --git a/drivers/tty/tty_ldsem.c b/drivers/tty/tty_ldsem.c
index 4f6b1b10b537..52b7baef4f7a 100644
--- a/drivers/tty/tty_ldsem.c
+++ b/drivers/tty/tty_ldsem.c
@@ -33,6 +33,7 @@
 #include <linux/tty.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
+#include <linux/sched/task.h>
 
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h
index 9f490375ac92..f8573a52e41a 100644
--- a/drivers/usb/usbip/usbip_common.h
+++ b/drivers/usb/usbip/usbip_common.h
@@ -31,6 +31,7 @@
 #include <linux/types.h>
 #include <linux/usb.h>
 #include <linux/wait.h>
+#include <linux/sched/task.h>
 #include <uapi/linux/usbip.h>
 
 #define USBIP_VERSION "1.0.0"
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 09740952e4de..a4afe5cc5af1 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/sched/rt.h>
+#include <linux/sched/task.h>
 #include <uapi/linux/sched/types.h>
 #include <linux/task_work.h>
 
-- 
GitLab


From 50d34394cee68dd12c5e01fff073d1167700bfce Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 16:03:58 +0100
Subject: [PATCH 0920/1084] sched/headers: Prepare to remove the
 <linux/magic.h> include from <linux/sched/task_stack.h>

Update files that depend on the magic.h inclusion.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/dax/dax.c                 | 1 +
 drivers/virtio/virtio_balloon.c   | 1 +
 include/linux/sched/task_stack.h  | 1 +
 mm/zsmalloc.c                     | 1 +
 security/integrity/evm/evm_main.c | 2 ++
 5 files changed, 6 insertions(+)

diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index b75c77254fdb..8d9829ff2a78 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -13,6 +13,7 @@
 #include <linux/pagemap.h>
 #include <linux/module.h>
 #include <linux/device.h>
+#include <linux/magic.h>
 #include <linux/mount.h>
 #include <linux/pfn_t.h>
 #include <linux/hash.h>
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 9d2738e9217f..a610061fabf6 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -31,6 +31,7 @@
 #include <linux/wait.h>
 #include <linux/mm.h>
 #include <linux/mount.h>
+#include <linux/magic.h>
 
 /*
  * Balloon device works in 4K page units.  So each page is pointed to by
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index 933cd49824c2..d2d578e85f7d 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -2,5 +2,6 @@
 #define _LINUX_SCHED_TASK_STACK_H
 
 #include <linux/sched.h>
+#include <linux/magic.h>
 
 #endif /* _LINUX_SCHED_TASK_STACK_H */
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index b7b1fb6c8c21..b7ee9c34dbd6 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -33,6 +33,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/magic.h>
 #include <linux/bitops.h>
 #include <linux/errno.h>
 #include <linux/highmem.h>
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index e2ed498c0f5f..063d38aef64e 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -22,6 +22,8 @@
 #include <linux/xattr.h>
 #include <linux/integrity.h>
 #include <linux/evm.h>
+#include <linux/magic.h>
+
 #include <crypto/hash.h>
 #include <crypto/algapi.h>
 #include "evm.h"
-- 
GitLab


From b69339ba109549beeaf45c45c52ac7025bfcd954 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 16:15:03 +0100
Subject: [PATCH 0921/1084] sched/headers: Prepare to remove spurious
 <linux/sched.h> inclusion dependencies

In the following patches we are going to remove various headers
from sched.h and other headers that sched.h includes.

To make those patches build cleanly prepare the scene by adding
dependencies to various files that learned to rely on those
to-be-removed dependencies.

These changes all make sense standalone: they add a header for
a data type that a particular .c or .h file is using.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/misc/vmw_vmci/vmci_resource.c | 1 +
 include/linux/sched.h                 | 1 +
 include/sound/control.h               | 1 +
 include/target/target_core_base.h     | 1 +
 4 files changed, 4 insertions(+)

diff --git a/drivers/misc/vmw_vmci/vmci_resource.c b/drivers/misc/vmw_vmci/vmci_resource.c
index 9a53a30de445..1ab6e8737a5f 100644
--- a/drivers/misc/vmw_vmci/vmci_resource.c
+++ b/drivers/misc/vmw_vmci/vmci_resource.c
@@ -17,6 +17,7 @@
 #include <linux/hash.h>
 #include <linux/types.h>
 #include <linux/rculist.h>
+#include <linux/completion.h>
 
 #include "vmci_resource.h"
 #include "vmci_driver.h"
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ea05116bc3c2..b5c6d602dfe9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -18,6 +18,7 @@ struct sched_param {
 #include <linux/types.h>
 #include <linux/timex.h>
 #include <linux/jiffies.h>
+#include <linux/mutex.h>
 #include <linux/plist.h>
 #include <linux/rbtree.h>
 #include <linux/thread_info.h>
diff --git a/include/sound/control.h b/include/sound/control.h
index 21d047f229a1..bd7246de58e7 100644
--- a/include/sound/control.h
+++ b/include/sound/control.h
@@ -22,6 +22,7 @@
  *
  */
 
+#include <linux/wait.h>
 #include <sound/asound.h>
 
 #define snd_kcontrol_chip(kcontrol) ((kcontrol)->private_data)
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 878560e60c75..8be9ba73383d 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -5,6 +5,7 @@
 #include <linux/dma-direction.h> /* enum dma_data_direction */
 #include <linux/percpu_ida.h>    /* struct percpu_ida */
 #include <linux/semaphore.h>     /* struct semaphore */
+#include <linux/completion.h>
 
 #define TARGET_CORE_VERSION		"v5.0"
 
-- 
GitLab


From a60b9eda67beac2318fa159545ba7d022f780736 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: [PATCH 0922/1084] sched/headers: Move scheduler topology interfaces
 to <linux/sched/topology.h>

The vast majority of sched.h users does not require the topology types and
interfaces, so split them out into <linux/sched/topology.h>.

This reduces the size of linux/sched.h by ~6%.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h          | 212 --------------------------------
 include/linux/sched/topology.h | 213 +++++++++++++++++++++++++++++++++
 2 files changed, 213 insertions(+), 212 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b5c6d602dfe9..55480782ce7f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -960,12 +960,6 @@ enum cpu_idle_type {
 # define SCHED_FIXEDPOINT_SHIFT	10
 # define SCHED_FIXEDPOINT_SCALE	(1L << SCHED_FIXEDPOINT_SHIFT)
 
-/*
- * Increase resolution of cpu_capacity calculations
- */
-#define SCHED_CAPACITY_SHIFT	SCHED_FIXEDPOINT_SHIFT
-#define SCHED_CAPACITY_SCALE	(1L << SCHED_CAPACITY_SHIFT)
-
 /*
  * Wake-queues are lists of tasks with a pending wakeup, whose
  * callers have already marked the task as woken internally,
@@ -1016,214 +1010,8 @@ extern void wake_q_add(struct wake_q_head *head,
 		       struct task_struct *task);
 extern void wake_up_q(struct wake_q_head *head);
 
-/*
- * sched-domains (multiprocessor balancing) declarations:
- */
-#ifdef CONFIG_SMP
-#define SD_LOAD_BALANCE		0x0001	/* Do load balancing on this domain. */
-#define SD_BALANCE_NEWIDLE	0x0002	/* Balance when about to become idle */
-#define SD_BALANCE_EXEC		0x0004	/* Balance on exec */
-#define SD_BALANCE_FORK		0x0008	/* Balance on fork, clone */
-#define SD_BALANCE_WAKE		0x0010  /* Balance on wakeup */
-#define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
-#define SD_ASYM_CPUCAPACITY	0x0040  /* Groups have different max cpu capacities */
-#define SD_SHARE_CPUCAPACITY	0x0080	/* Domain members share cpu capacity */
-#define SD_SHARE_POWERDOMAIN	0x0100	/* Domain members share power domain */
-#define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
-#define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
-#define SD_ASYM_PACKING		0x0800  /* Place busy groups earlier in the domain */
-#define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
-#define SD_OVERLAP		0x2000	/* sched_domains of this level overlap */
-#define SD_NUMA			0x4000	/* cross-node balancing */
-
-#ifdef CONFIG_SCHED_SMT
-static inline int cpu_smt_flags(void)
-{
-	return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
-}
-#endif
-
-#ifdef CONFIG_SCHED_MC
-static inline int cpu_core_flags(void)
-{
-	return SD_SHARE_PKG_RESOURCES;
-}
-#endif
-
-#ifdef CONFIG_NUMA
-static inline int cpu_numa_flags(void)
-{
-	return SD_NUMA;
-}
-#endif
-
-extern int arch_asym_cpu_priority(int cpu);
-
-struct sched_domain_attr {
-	int relax_domain_level;
-};
-
-#define SD_ATTR_INIT	(struct sched_domain_attr) {	\
-	.relax_domain_level = -1,			\
-}
-
-extern int sched_domain_level_max;
-
-struct sched_group;
-
-struct sched_domain_shared {
-	atomic_t	ref;
-	atomic_t	nr_busy_cpus;
-	int		has_idle_cores;
-};
-
-struct sched_domain {
-	/* These fields must be setup */
-	struct sched_domain *parent;	/* top domain must be null terminated */
-	struct sched_domain *child;	/* bottom domain must be null terminated */
-	struct sched_group *groups;	/* the balancing groups of the domain */
-	unsigned long min_interval;	/* Minimum balance interval ms */
-	unsigned long max_interval;	/* Maximum balance interval ms */
-	unsigned int busy_factor;	/* less balancing by factor if busy */
-	unsigned int imbalance_pct;	/* No balance until over watermark */
-	unsigned int cache_nice_tries;	/* Leave cache hot tasks for # tries */
-	unsigned int busy_idx;
-	unsigned int idle_idx;
-	unsigned int newidle_idx;
-	unsigned int wake_idx;
-	unsigned int forkexec_idx;
-	unsigned int smt_gain;
-
-	int nohz_idle;			/* NOHZ IDLE status */
-	int flags;			/* See SD_* */
-	int level;
-
-	/* Runtime fields. */
-	unsigned long last_balance;	/* init to jiffies. units in jiffies */
-	unsigned int balance_interval;	/* initialise to 1. units in ms. */
-	unsigned int nr_balance_failed; /* initialise to 0 */
-
-	/* idle_balance() stats */
-	u64 max_newidle_lb_cost;
-	unsigned long next_decay_max_lb_cost;
-
-	u64 avg_scan_cost;		/* select_idle_sibling */
-
-#ifdef CONFIG_SCHEDSTATS
-	/* load_balance() stats */
-	unsigned int lb_count[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
-	unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES];
-
-	/* Active load balancing */
-	unsigned int alb_count;
-	unsigned int alb_failed;
-	unsigned int alb_pushed;
-
-	/* SD_BALANCE_EXEC stats */
-	unsigned int sbe_count;
-	unsigned int sbe_balanced;
-	unsigned int sbe_pushed;
-
-	/* SD_BALANCE_FORK stats */
-	unsigned int sbf_count;
-	unsigned int sbf_balanced;
-	unsigned int sbf_pushed;
-
-	/* try_to_wake_up() stats */
-	unsigned int ttwu_wake_remote;
-	unsigned int ttwu_move_affine;
-	unsigned int ttwu_move_balance;
-#endif
-#ifdef CONFIG_SCHED_DEBUG
-	char *name;
-#endif
-	union {
-		void *private;		/* used during construction */
-		struct rcu_head rcu;	/* used during destruction */
-	};
-	struct sched_domain_shared *shared;
-
-	unsigned int span_weight;
-	/*
-	 * Span of all CPUs in this domain.
-	 *
-	 * NOTE: this field is variable length. (Allocated dynamically
-	 * by attaching extra space to the end of the structure,
-	 * depending on how many CPUs the kernel has booted up with)
-	 */
-	unsigned long span[0];
-};
-
-static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
-{
-	return to_cpumask(sd->span);
-}
-
-extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
-				    struct sched_domain_attr *dattr_new);
-
-/* Allocate an array of sched domains, for partition_sched_domains(). */
-cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
-void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
-
-bool cpus_share_cache(int this_cpu, int that_cpu);
-
-typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
-typedef int (*sched_domain_flags_f)(void);
-
-#define SDTL_OVERLAP	0x01
-
-struct sd_data {
-	struct sched_domain **__percpu sd;
-	struct sched_domain_shared **__percpu sds;
-	struct sched_group **__percpu sg;
-	struct sched_group_capacity **__percpu sgc;
-};
-
-struct sched_domain_topology_level {
-	sched_domain_mask_f mask;
-	sched_domain_flags_f sd_flags;
-	int		    flags;
-	int		    numa_level;
-	struct sd_data      data;
-#ifdef CONFIG_SCHED_DEBUG
-	char                *name;
-#endif
-};
-
-extern void set_sched_topology(struct sched_domain_topology_level *tl);
 extern void wake_up_if_idle(int cpu);
 
-#ifdef CONFIG_SCHED_DEBUG
-# define SD_INIT_NAME(type)		.name = #type
-#else
-# define SD_INIT_NAME(type)
-#endif
-
-#else /* CONFIG_SMP */
-
-struct sched_domain_attr;
-
-static inline void
-partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
-			struct sched_domain_attr *dattr_new)
-{
-}
-
-static inline bool cpus_share_cache(int this_cpu, int that_cpu)
-{
-	return true;
-}
-
-#endif	/* !CONFIG_SMP */
-
-
 struct io_context;			/* See blkdev.h */
 
 
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 184b56b8aa64..b3550b36e65d 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -5,4 +5,217 @@
 
 #include <linux/sched/idle.h>
 
+/*
+ * sched-domains (multiprocessor balancing) declarations:
+ */
+#ifdef CONFIG_SMP
+
+#define SD_LOAD_BALANCE		0x0001	/* Do load balancing on this domain. */
+#define SD_BALANCE_NEWIDLE	0x0002	/* Balance when about to become idle */
+#define SD_BALANCE_EXEC		0x0004	/* Balance on exec */
+#define SD_BALANCE_FORK		0x0008	/* Balance on fork, clone */
+#define SD_BALANCE_WAKE		0x0010  /* Balance on wakeup */
+#define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
+#define SD_ASYM_CPUCAPACITY	0x0040  /* Groups have different max cpu capacities */
+#define SD_SHARE_CPUCAPACITY	0x0080	/* Domain members share cpu capacity */
+#define SD_SHARE_POWERDOMAIN	0x0100	/* Domain members share power domain */
+#define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
+#define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
+#define SD_ASYM_PACKING		0x0800  /* Place busy groups earlier in the domain */
+#define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
+#define SD_OVERLAP		0x2000	/* sched_domains of this level overlap */
+#define SD_NUMA			0x4000	/* cross-node balancing */
+
+/*
+ * Increase resolution of cpu_capacity calculations
+ */
+#define SCHED_CAPACITY_SHIFT	SCHED_FIXEDPOINT_SHIFT
+#define SCHED_CAPACITY_SCALE	(1L << SCHED_CAPACITY_SHIFT)
+
+#ifdef CONFIG_SCHED_SMT
+static inline int cpu_smt_flags(void)
+{
+	return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+}
+#endif
+
+#ifdef CONFIG_SCHED_MC
+static inline int cpu_core_flags(void)
+{
+	return SD_SHARE_PKG_RESOURCES;
+}
+#endif
+
+#ifdef CONFIG_NUMA
+static inline int cpu_numa_flags(void)
+{
+	return SD_NUMA;
+}
+#endif
+
+extern int arch_asym_cpu_priority(int cpu);
+
+struct sched_domain_attr {
+	int relax_domain_level;
+};
+
+#define SD_ATTR_INIT	(struct sched_domain_attr) {	\
+	.relax_domain_level = -1,			\
+}
+
+extern int sched_domain_level_max;
+
+struct sched_group;
+
+struct sched_domain_shared {
+	atomic_t	ref;
+	atomic_t	nr_busy_cpus;
+	int		has_idle_cores;
+};
+
+struct sched_domain {
+	/* These fields must be setup */
+	struct sched_domain *parent;	/* top domain must be null terminated */
+	struct sched_domain *child;	/* bottom domain must be null terminated */
+	struct sched_group *groups;	/* the balancing groups of the domain */
+	unsigned long min_interval;	/* Minimum balance interval ms */
+	unsigned long max_interval;	/* Maximum balance interval ms */
+	unsigned int busy_factor;	/* less balancing by factor if busy */
+	unsigned int imbalance_pct;	/* No balance until over watermark */
+	unsigned int cache_nice_tries;	/* Leave cache hot tasks for # tries */
+	unsigned int busy_idx;
+	unsigned int idle_idx;
+	unsigned int newidle_idx;
+	unsigned int wake_idx;
+	unsigned int forkexec_idx;
+	unsigned int smt_gain;
+
+	int nohz_idle;			/* NOHZ IDLE status */
+	int flags;			/* See SD_* */
+	int level;
+
+	/* Runtime fields. */
+	unsigned long last_balance;	/* init to jiffies. units in jiffies */
+	unsigned int balance_interval;	/* initialise to 1. units in ms. */
+	unsigned int nr_balance_failed; /* initialise to 0 */
+
+	/* idle_balance() stats */
+	u64 max_newidle_lb_cost;
+	unsigned long next_decay_max_lb_cost;
+
+	u64 avg_scan_cost;		/* select_idle_sibling */
+
+#ifdef CONFIG_SCHEDSTATS
+	/* load_balance() stats */
+	unsigned int lb_count[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
+	unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES];
+
+	/* Active load balancing */
+	unsigned int alb_count;
+	unsigned int alb_failed;
+	unsigned int alb_pushed;
+
+	/* SD_BALANCE_EXEC stats */
+	unsigned int sbe_count;
+	unsigned int sbe_balanced;
+	unsigned int sbe_pushed;
+
+	/* SD_BALANCE_FORK stats */
+	unsigned int sbf_count;
+	unsigned int sbf_balanced;
+	unsigned int sbf_pushed;
+
+	/* try_to_wake_up() stats */
+	unsigned int ttwu_wake_remote;
+	unsigned int ttwu_move_affine;
+	unsigned int ttwu_move_balance;
+#endif
+#ifdef CONFIG_SCHED_DEBUG
+	char *name;
+#endif
+	union {
+		void *private;		/* used during construction */
+		struct rcu_head rcu;	/* used during destruction */
+	};
+	struct sched_domain_shared *shared;
+
+	unsigned int span_weight;
+	/*
+	 * Span of all CPUs in this domain.
+	 *
+	 * NOTE: this field is variable length. (Allocated dynamically
+	 * by attaching extra space to the end of the structure,
+	 * depending on how many CPUs the kernel has booted up with)
+	 */
+	unsigned long span[0];
+};
+
+static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
+{
+	return to_cpumask(sd->span);
+}
+
+extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
+				    struct sched_domain_attr *dattr_new);
+
+/* Allocate an array of sched domains, for partition_sched_domains(). */
+cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
+void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
+
+bool cpus_share_cache(int this_cpu, int that_cpu);
+
+typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
+typedef int (*sched_domain_flags_f)(void);
+
+#define SDTL_OVERLAP	0x01
+
+struct sd_data {
+	struct sched_domain **__percpu sd;
+	struct sched_domain_shared **__percpu sds;
+	struct sched_group **__percpu sg;
+	struct sched_group_capacity **__percpu sgc;
+};
+
+struct sched_domain_topology_level {
+	sched_domain_mask_f mask;
+	sched_domain_flags_f sd_flags;
+	int		    flags;
+	int		    numa_level;
+	struct sd_data      data;
+#ifdef CONFIG_SCHED_DEBUG
+	char                *name;
+#endif
+};
+
+extern void set_sched_topology(struct sched_domain_topology_level *tl);
+
+#ifdef CONFIG_SCHED_DEBUG
+# define SD_INIT_NAME(type)		.name = #type
+#else
+# define SD_INIT_NAME(type)
+#endif
+
+#else /* CONFIG_SMP */
+
+struct sched_domain_attr;
+
+static inline void
+partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
+			struct sched_domain_attr *dattr_new)
+{
+}
+
+static inline bool cpus_share_cache(int this_cpu, int that_cpu)
+{
+	return true;
+}
+
+#endif	/* !CONFIG_SMP */
+
 #endif /* _LINUX_SCHED_TOPOLOGY_H */
-- 
GitLab


From b768917d2c08edc4b5616c86a569e524d190434b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:51:00 +0100
Subject: [PATCH 0923/1084] sched/headers: Move the 'cpu_idle_type' enum from
 <linux/sched.h> to <linux/sched/idle.h>

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 7 -------
 include/linux/sched/idle.h | 7 +++++++
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 55480782ce7f..8a8252e9e0c7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -943,13 +943,6 @@ static inline int sched_info_on(void)
 void force_schedstat_enabled(void);
 #endif
 
-enum cpu_idle_type {
-	CPU_IDLE,
-	CPU_NOT_IDLE,
-	CPU_NEWLY_IDLE,
-	CPU_MAX_IDLE_TYPES
-};
-
 /*
  * Integer metrics need fixed point arithmetic, e.g., sched/fair
  * has a few: load, load_avg, util_avg, freq, and capacity.
diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
index a3cf79b86986..6aabc9968cba 100644
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -3,4 +3,11 @@
 
 #include <linux/sched.h>
 
+enum cpu_idle_type {
+	CPU_IDLE,
+	CPU_NOT_IDLE,
+	CPU_NEWLY_IDLE,
+	CPU_MAX_IDLE_TYPES
+};
+
 #endif /* _LINUX_SCHED_IDLE_H */
-- 
GitLab


From 4437722b0486c36dc90a1adf584fca4cea6cf1de Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:58:52 +0100
Subject: [PATCH 0924/1084] sched/headers: Move the wake_up_if_idle() prototype
 to <linux/sched/idle.h>

No need to clutter <linux/sched.h> with this rarely used prototype.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 2 --
 include/linux/sched/idle.h | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8a8252e9e0c7..c30705c3e553 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1003,8 +1003,6 @@ extern void wake_q_add(struct wake_q_head *head,
 		       struct task_struct *task);
 extern void wake_up_q(struct wake_q_head *head);
 
-extern void wake_up_if_idle(int cpu);
-
 struct io_context;			/* See blkdev.h */
 
 
diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
index 6aabc9968cba..72b6b6ab6354 100644
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -10,4 +10,6 @@ enum cpu_idle_type {
 	CPU_MAX_IDLE_TYPES
 };
 
+extern void wake_up_if_idle(int cpu);
+
 #endif /* _LINUX_SCHED_IDLE_H */
-- 
GitLab


From 5dbe91de599423d243738a205367506444ebb4a4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 18:47:28 +0100
Subject: [PATCH 0925/1084] sched/headers: Move idle polling methods to
 <linux/sched/idle.h>

Further reduce the size of <linux/sched.h> by moving these APIs:

	tsk_is_polling()
	__current_set_polling()
	current_set_polling_and_test()
	__current_clr_polling()
	current_clr_polling_and_test()
	current_clr_polling()

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 76 --------------------------------------
 include/linux/sched/idle.h | 76 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 76 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c30705c3e553..4f512e337584 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3176,82 +3176,6 @@ static inline int spin_needbreak(spinlock_t *lock)
 #endif
 }
 
-/*
- * Idle thread specific functions to determine the need_resched
- * polling state.
- */
-#ifdef TIF_POLLING_NRFLAG
-static inline int tsk_is_polling(struct task_struct *p)
-{
-	return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
-}
-
-static inline void __current_set_polling(void)
-{
-	set_thread_flag(TIF_POLLING_NRFLAG);
-}
-
-static inline bool __must_check current_set_polling_and_test(void)
-{
-	__current_set_polling();
-
-	/*
-	 * Polling state must be visible before we test NEED_RESCHED,
-	 * paired by resched_curr()
-	 */
-	smp_mb__after_atomic();
-
-	return unlikely(tif_need_resched());
-}
-
-static inline void __current_clr_polling(void)
-{
-	clear_thread_flag(TIF_POLLING_NRFLAG);
-}
-
-static inline bool __must_check current_clr_polling_and_test(void)
-{
-	__current_clr_polling();
-
-	/*
-	 * Polling state must be visible before we test NEED_RESCHED,
-	 * paired by resched_curr()
-	 */
-	smp_mb__after_atomic();
-
-	return unlikely(tif_need_resched());
-}
-
-#else
-static inline int tsk_is_polling(struct task_struct *p) { return 0; }
-static inline void __current_set_polling(void) { }
-static inline void __current_clr_polling(void) { }
-
-static inline bool __must_check current_set_polling_and_test(void)
-{
-	return unlikely(tif_need_resched());
-}
-static inline bool __must_check current_clr_polling_and_test(void)
-{
-	return unlikely(tif_need_resched());
-}
-#endif
-
-static inline void current_clr_polling(void)
-{
-	__current_clr_polling();
-
-	/*
-	 * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
-	 * Once the bit is cleared, we'll get IPIs with every new
-	 * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
-	 * fold.
-	 */
-	smp_mb(); /* paired with resched_curr() */
-
-	preempt_fold_need_resched();
-}
-
 static __always_inline bool need_resched(void)
 {
 	return unlikely(tif_need_resched());
diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
index 72b6b6ab6354..66ee32f87f0b 100644
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -12,4 +12,80 @@ enum cpu_idle_type {
 
 extern void wake_up_if_idle(int cpu);
 
+/*
+ * Idle thread specific functions to determine the need_resched
+ * polling state.
+ */
+#ifdef TIF_POLLING_NRFLAG
+static inline int tsk_is_polling(struct task_struct *p)
+{
+	return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
+}
+
+static inline void __current_set_polling(void)
+{
+	set_thread_flag(TIF_POLLING_NRFLAG);
+}
+
+static inline bool __must_check current_set_polling_and_test(void)
+{
+	__current_set_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_curr()
+	 */
+	smp_mb__after_atomic();
+
+	return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
+{
+	clear_thread_flag(TIF_POLLING_NRFLAG);
+}
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+	__current_clr_polling();
+
+	/*
+	 * Polling state must be visible before we test NEED_RESCHED,
+	 * paired by resched_curr()
+	 */
+	smp_mb__after_atomic();
+
+	return unlikely(tif_need_resched());
+}
+
+#else
+static inline int tsk_is_polling(struct task_struct *p) { return 0; }
+static inline void __current_set_polling(void) { }
+static inline void __current_clr_polling(void) { }
+
+static inline bool __must_check current_set_polling_and_test(void)
+{
+	return unlikely(tif_need_resched());
+}
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+	return unlikely(tif_need_resched());
+}
+#endif
+
+static inline void current_clr_polling(void)
+{
+	__current_clr_polling();
+
+	/*
+	 * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
+	 * Once the bit is cleared, we'll get IPIs with every new
+	 * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
+	 * fold.
+	 */
+	smp_mb(); /* paired with resched_curr() */
+
+	preempt_fold_need_resched();
+}
+
 #endif /* _LINUX_SCHED_IDLE_H */
-- 
GitLab


From eb61baf69871b9836783a81bc451189edb0d9de2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 17:09:06 +0100
Subject: [PATCH 0926/1084] sched/headers: Move the wake-queue types and
 interfaces from sched.h into <linux/sched/wake_q.h>

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h        | 54 +++---------------------------------
 include/linux/sched/wake_q.h | 47 +++++++++++++++++++++++++++++++
 ipc/msg.c                    |  1 -
 3 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4f512e337584..5cda7cf09505 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -953,56 +953,6 @@ void force_schedstat_enabled(void);
 # define SCHED_FIXEDPOINT_SHIFT	10
 # define SCHED_FIXEDPOINT_SCALE	(1L << SCHED_FIXEDPOINT_SHIFT)
 
-/*
- * Wake-queues are lists of tasks with a pending wakeup, whose
- * callers have already marked the task as woken internally,
- * and can thus carry on. A common use case is being able to
- * do the wakeups once the corresponding user lock as been
- * released.
- *
- * We hold reference to each task in the list across the wakeup,
- * thus guaranteeing that the memory is still valid by the time
- * the actual wakeups are performed in wake_up_q().
- *
- * One per task suffices, because there's never a need for a task to be
- * in two wake queues simultaneously; it is forbidden to abandon a task
- * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is
- * already in a wake queue, the wakeup will happen soon and the second
- * waker can just skip it.
- *
- * The DEFINE_WAKE_Q macro declares and initializes the list head.
- * wake_up_q() does NOT reinitialize the list; it's expected to be
- * called near the end of a function. Otherwise, the list can be
- * re-initialized for later re-use by wake_q_init().
- *
- * Note that this can cause spurious wakeups. schedule() callers
- * must ensure the call is done inside a loop, confirming that the
- * wakeup condition has in fact occurred.
- */
-struct wake_q_node {
-	struct wake_q_node *next;
-};
-
-struct wake_q_head {
-	struct wake_q_node *first;
-	struct wake_q_node **lastp;
-};
-
-#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
-
-#define DEFINE_WAKE_Q(name)				\
-	struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
-
-static inline void wake_q_init(struct wake_q_head *head)
-{
-	head->first = WAKE_Q_TAIL;
-	head->lastp = &head->first;
-}
-
-extern void wake_q_add(struct wake_q_head *head,
-		       struct task_struct *task);
-extern void wake_up_q(struct wake_q_head *head);
-
 struct io_context;			/* See blkdev.h */
 
 
@@ -1234,6 +1184,10 @@ enum perf_event_task_context {
 	perf_nr_task_contexts,
 };
 
+struct wake_q_node {
+	struct wake_q_node *next;
+};
+
 /* Track pages that require TLB flushes */
 struct tlbflush_unmap_batch {
 	/*
diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h
index 6383b35e4eba..d03d8a9047dc 100644
--- a/include/linux/sched/wake_q.h
+++ b/include/linux/sched/wake_q.h
@@ -1,6 +1,53 @@
 #ifndef _LINUX_SCHED_WAKE_Q_H
 #define _LINUX_SCHED_WAKE_Q_H
 
+/*
+ * Wake-queues are lists of tasks with a pending wakeup, whose
+ * callers have already marked the task as woken internally,
+ * and can thus carry on. A common use case is being able to
+ * do the wakeups once the corresponding user lock as been
+ * released.
+ *
+ * We hold reference to each task in the list across the wakeup,
+ * thus guaranteeing that the memory is still valid by the time
+ * the actual wakeups are performed in wake_up_q().
+ *
+ * One per task suffices, because there's never a need for a task to be
+ * in two wake queues simultaneously; it is forbidden to abandon a task
+ * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is
+ * already in a wake queue, the wakeup will happen soon and the second
+ * waker can just skip it.
+ *
+ * The DEFINE_WAKE_Q macro declares and initializes the list head.
+ * wake_up_q() does NOT reinitialize the list; it's expected to be
+ * called near the end of a function. Otherwise, the list can be
+ * re-initialized for later re-use by wake_q_init().
+ *
+ * Note that this can cause spurious wakeups. schedule() callers
+ * must ensure the call is done inside a loop, confirming that the
+ * wakeup condition has in fact occurred.
+ */
+
 #include <linux/sched.h>
 
+struct wake_q_head {
+	struct wake_q_node *first;
+	struct wake_q_node **lastp;
+};
+
+#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
+
+#define DEFINE_WAKE_Q(name)				\
+	struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
+
+static inline void wake_q_init(struct wake_q_head *head)
+{
+	head->first = WAKE_Q_TAIL;
+	head->lastp = &head->first;
+}
+
+extern void wake_q_add(struct wake_q_head *head,
+		       struct task_struct *task);
+extern void wake_up_q(struct wake_q_head *head);
+
 #endif /* _LINUX_SCHED_WAKE_Q_H */
diff --git a/ipc/msg.c b/ipc/msg.c
index ecc387e573f6..104926dc72be 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -30,7 +30,6 @@
 #include <linux/proc_fs.h>
 #include <linux/list.h>
 #include <linux/security.h>
-#include <linux/sched.h>
 #include <linux/sched/wake_q.h>
 #include <linux/syscalls.h>
 #include <linux/audit.h>
-- 
GitLab


From 5689810360c2e88ce1619e8bcfa859852f9a1d1a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Tue, 7 Feb 2017 12:07:18 +0100
Subject: [PATCH 0927/1084] sched/headers: Move scheduler clock interfaces to
 <linux/sched/clock.h>

Move the sched_clock interfaces into a separate header file, to reduce
the size of sched.h.

Include <linux/sched/clock.h> in all files that made use of one of the
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h       | 100 ------------------------------------
 include/linux/sched/clock.h |  98 +++++++++++++++++++++++++++++++++++
 2 files changed, 98 insertions(+), 100 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5cda7cf09505..b42d2f9d673a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2190,103 +2190,6 @@ static inline void calc_load_exit_idle(void) { }
 #define cpu_relax_yield() cpu_relax()
 #endif
 
-/*
- * Do not use outside of architecture code which knows its limitations.
- *
- * sched_clock() has no promise of monotonicity or bounded drift between
- * CPUs, use (which you should not) requires disabling IRQs.
- *
- * Please use one of the three interfaces below.
- */
-extern unsigned long long notrace sched_clock(void);
-/*
- * See the comment in kernel/sched/clock.c
- */
-extern u64 running_clock(void);
-extern u64 sched_clock_cpu(int cpu);
-
-
-extern void sched_clock_init(void);
-
-#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
-static inline void sched_clock_init_late(void)
-{
-}
-
-static inline void sched_clock_tick(void)
-{
-}
-
-static inline void clear_sched_clock_stable(void)
-{
-}
-
-static inline void sched_clock_idle_sleep_event(void)
-{
-}
-
-static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
-{
-}
-
-static inline u64 cpu_clock(int cpu)
-{
-	return sched_clock();
-}
-
-static inline u64 local_clock(void)
-{
-	return sched_clock();
-}
-#else
-extern void sched_clock_init_late(void);
-/*
- * Architectures can set this to 1 if they have specified
- * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
- * but then during bootup it turns out that sched_clock()
- * is reliable after all:
- */
-extern int sched_clock_stable(void);
-extern void clear_sched_clock_stable(void);
-
-extern void sched_clock_tick(void);
-extern void sched_clock_idle_sleep_event(void);
-extern void sched_clock_idle_wakeup_event(u64 delta_ns);
-
-/*
- * As outlined in clock.c, provides a fast, high resolution, nanosecond
- * time source that is monotonic per cpu argument and has bounded drift
- * between cpus.
- *
- * ######################### BIG FAT WARNING ##########################
- * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
- * # go backwards !!                                                  #
- * ####################################################################
- */
-static inline u64 cpu_clock(int cpu)
-{
-	return sched_clock_cpu(cpu);
-}
-
-static inline u64 local_clock(void)
-{
-	return sched_clock_cpu(raw_smp_processor_id());
-}
-#endif
-
-#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-/*
- * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
- * The reason for this explicit opt-in is not to have perf penalty with
- * slow sched_clocks.
- */
-extern void enable_sched_clock_irqtime(void);
-extern void disable_sched_clock_irqtime(void);
-#else
-static inline void enable_sched_clock_irqtime(void) {}
-static inline void disable_sched_clock_irqtime(void) {}
-#endif
-
 extern unsigned long long
 task_sched_runtime(struct task_struct *task);
 
@@ -2297,9 +2200,6 @@ extern void sched_exec(void);
 #define sched_exec()   {}
 #endif
 
-extern void sched_clock_idle_sleep_event(void);
-extern void sched_clock_idle_wakeup_event(u64 delta_ns);
-
 #ifdef CONFIG_HOTPLUG_CPU
 extern void idle_task_exit(void);
 #else
diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
index 7cb7d79b2cf9..ac12f71d359c 100644
--- a/include/linux/sched/clock.h
+++ b/include/linux/sched/clock.h
@@ -3,4 +3,102 @@
 
 #include <linux/sched.h>
 
+/*
+ * Do not use outside of architecture code which knows its limitations.
+ *
+ * sched_clock() has no promise of monotonicity or bounded drift between
+ * CPUs, use (which you should not) requires disabling IRQs.
+ *
+ * Please use one of the three interfaces below.
+ */
+extern unsigned long long notrace sched_clock(void);
+
+/*
+ * See the comment in kernel/sched/clock.c
+ */
+extern u64 running_clock(void);
+extern u64 sched_clock_cpu(int cpu);
+
+
+extern void sched_clock_init(void);
+
+#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+static inline void sched_clock_init_late(void)
+{
+}
+
+static inline void sched_clock_tick(void)
+{
+}
+
+static inline void clear_sched_clock_stable(void)
+{
+}
+
+static inline void sched_clock_idle_sleep_event(void)
+{
+}
+
+static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
+{
+}
+
+static inline u64 cpu_clock(int cpu)
+{
+	return sched_clock();
+}
+
+static inline u64 local_clock(void)
+{
+	return sched_clock();
+}
+#else
+extern void sched_clock_init_late(void);
+/*
+ * Architectures can set this to 1 if they have specified
+ * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
+ * but then during bootup it turns out that sched_clock()
+ * is reliable after all:
+ */
+extern int sched_clock_stable(void);
+extern void clear_sched_clock_stable(void);
+
+extern void sched_clock_tick(void);
+extern void sched_clock_idle_sleep_event(void);
+extern void sched_clock_idle_wakeup_event(u64 delta_ns);
+
+/*
+ * As outlined in clock.c, provides a fast, high resolution, nanosecond
+ * time source that is monotonic per cpu argument and has bounded drift
+ * between cpus.
+ *
+ * ######################### BIG FAT WARNING ##########################
+ * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
+ * # go backwards !!                                                  #
+ * ####################################################################
+ */
+static inline u64 cpu_clock(int cpu)
+{
+	return sched_clock_cpu(cpu);
+}
+
+static inline u64 local_clock(void)
+{
+	return sched_clock_cpu(raw_smp_processor_id());
+}
+#endif
+
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+/*
+ * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
+ * The reason for this explicit opt-in is not to have perf penalty with
+ * slow sched_clocks.
+ */
+extern void enable_sched_clock_irqtime(void);
+extern void disable_sched_clock_irqtime(void);
+#else
+static inline void enable_sched_clock_irqtime(void) {}
+static inline void disable_sched_clock_irqtime(void) {}
+#endif
+
 #endif /* _LINUX_SCHED_CLOCK_H */
-- 
GitLab


From 47913d4ebd99c827c82c4f29eb282a119c3f2aeb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 18:00:26 +0100
Subject: [PATCH 0928/1084] sched/headers, delayacct: Move the 'struct
 task_delay_info' definition from <linux/sched.h> to <linux/delayacct.h>

The 'struct task_delay_info' definition does not have to be in sched.h,
because task_struct only has a pointer to it.

So move it to <linux/delayacct.h> to reduce the size of <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/delayacct.h | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/sched.h     | 39 ++++-----------------------------------
 2 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 00e60f79a9cc..6b769cbd1000 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -30,7 +30,43 @@
 #define DELAYACCT_PF_BLKIO	0x00000002	/* I am waiting on IO */
 
 #ifdef CONFIG_TASK_DELAY_ACCT
+struct task_delay_info {
+	spinlock_t	lock;
+	unsigned int	flags;	/* Private per-task flags */
+
+	/* For each stat XXX, add following, aligned appropriately
+	 *
+	 * struct timespec XXX_start, XXX_end;
+	 * u64 XXX_delay;
+	 * u32 XXX_count;
+	 *
+	 * Atomicity of updates to XXX_delay, XXX_count protected by
+	 * single lock above (split into XXX_lock if contention is an issue).
+	 */
+
+	/*
+	 * XXX_count is incremented on every XXX operation, the delay
+	 * associated with the operation is added to XXX_delay.
+	 * XXX_delay contains the accumulated delay time in nanoseconds.
+	 */
+	u64 blkio_start;	/* Shared by blkio, swapin */
+	u64 blkio_delay;	/* wait for sync block io completion */
+	u64 swapin_delay;	/* wait for swapin block io completion */
+	u32 blkio_count;	/* total count of the number of sync block */
+				/* io operations performed */
+	u32 swapin_count;	/* total count of the number of swapin block */
+				/* io operations performed */
+
+	u64 freepages_start;
+	u64 freepages_delay;	/* wait for memory reclaim */
+	u32 freepages_count;	/* total count of memory reclaim */
+};
+#endif
 
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#ifdef CONFIG_TASK_DELAY_ACCT
 extern int delayacct_on;	/* Delay accounting turned on/off */
 extern struct kmem_cache *delayacct_cache;
 extern void delayacct_init(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b42d2f9d673a..7d6998858fa3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -893,39 +893,7 @@ struct sched_info {
 };
 #endif /* CONFIG_SCHED_INFO */
 
-#ifdef CONFIG_TASK_DELAY_ACCT
-struct task_delay_info {
-	spinlock_t	lock;
-	unsigned int	flags;	/* Private per-task flags */
-
-	/* For each stat XXX, add following, aligned appropriately
-	 *
-	 * struct timespec XXX_start, XXX_end;
-	 * u64 XXX_delay;
-	 * u32 XXX_count;
-	 *
-	 * Atomicity of updates to XXX_delay, XXX_count protected by
-	 * single lock above (split into XXX_lock if contention is an issue).
-	 */
-
-	/*
-	 * XXX_count is incremented on every XXX operation, the delay
-	 * associated with the operation is added to XXX_delay.
-	 * XXX_delay contains the accumulated delay time in nanoseconds.
-	 */
-	u64 blkio_start;	/* Shared by blkio, swapin */
-	u64 blkio_delay;	/* wait for sync block io completion */
-	u64 swapin_delay;	/* wait for swapin block io completion */
-	u32 blkio_count;	/* total count of the number of sync block */
-				/* io operations performed */
-	u32 swapin_count;	/* total count of the number of swapin block */
-				/* io operations performed */
-
-	u64 freepages_start;
-	u64 freepages_delay;	/* wait for memory reclaim */
-	u32 freepages_count;	/* total count of memory reclaim */
-};
-#endif	/* CONFIG_TASK_DELAY_ACCT */
+struct task_delay_info;
 
 static inline int sched_info_on(void)
 {
@@ -1612,9 +1580,10 @@ struct task_struct {
 
 	struct page_frag task_frag;
 
-#ifdef	CONFIG_TASK_DELAY_ACCT
-	struct task_delay_info *delays;
+#ifdef CONFIG_TASK_DELAY_ACCT
+	struct task_delay_info		*delays;
 #endif
+
 #ifdef CONFIG_FAULT_INJECTION
 	int make_it_fail;
 #endif
-- 
GitLab


From e2d1e2aec572a2138dea74d53be54a1406d419c0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 18:07:51 +0100
Subject: [PATCH 0929/1084] sched/headers: Move various ABI definitions to
 <uapi/linux/sched/types.h>

Move scheduler ABI types (struct sched_attr, struct sched_param, etc.) into
the new UAPI header.

This further reduces the size and complexity of <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h            | 70 +-------------------------------
 include/uapi/linux/sched/types.h | 68 +++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+), 68 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7d6998858fa3..5c481906e835 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -5,11 +5,6 @@
 
 #include <linux/sched/prio.h>
 
-
-struct sched_param {
-	int sched_priority;
-};
-
 #include <asm/param.h>	/* for HZ */
 
 #include <linux/capability.h>
@@ -64,69 +59,8 @@ struct sched_param {
 
 #include <asm/processor.h>
 
-#define SCHED_ATTR_SIZE_VER0	48	/* sizeof first published struct */
-
-/*
- * Extended scheduling parameters data structure.
- *
- * This is needed because the original struct sched_param can not be
- * altered without introducing ABI issues with legacy applications
- * (e.g., in sched_getparam()).
- *
- * However, the possibility of specifying more than just a priority for
- * the tasks may be useful for a wide variety of application fields, e.g.,
- * multimedia, streaming, automation and control, and many others.
- *
- * This variant (sched_attr) is meant at describing a so-called
- * sporadic time-constrained task. In such model a task is specified by:
- *  - the activation period or minimum instance inter-arrival time;
- *  - the maximum (or average, depending on the actual scheduling
- *    discipline) computation time of all instances, a.k.a. runtime;
- *  - the deadline (relative to the actual activation time) of each
- *    instance.
- * Very briefly, a periodic (sporadic) task asks for the execution of
- * some specific computation --which is typically called an instance--
- * (at most) every period. Moreover, each instance typically lasts no more
- * than the runtime and must be completed by time instant t equal to
- * the instance activation time + the deadline.
- *
- * This is reflected by the actual fields of the sched_attr structure:
- *
- *  @size		size of the structure, for fwd/bwd compat.
- *
- *  @sched_policy	task's scheduling policy
- *  @sched_flags	for customizing the scheduler behaviour
- *  @sched_nice		task's nice value      (SCHED_NORMAL/BATCH)
- *  @sched_priority	task's static priority (SCHED_FIFO/RR)
- *  @sched_deadline	representative of the task's deadline
- *  @sched_runtime	representative of the task's runtime
- *  @sched_period	representative of the task's period
- *
- * Given this task model, there are a multiplicity of scheduling algorithms
- * and policies, that can be used to ensure all the tasks will make their
- * timing constraints.
- *
- * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the
- * only user of this new interface. More information about the algorithm
- * available in the scheduling class file or in Documentation/.
- */
-struct sched_attr {
-	u32 size;
-
-	u32 sched_policy;
-	u64 sched_flags;
-
-	/* SCHED_NORMAL, SCHED_BATCH */
-	s32 sched_nice;
-
-	/* SCHED_FIFO, SCHED_RR */
-	u32 sched_priority;
-
-	/* SCHED_DEADLINE */
-	u64 sched_runtime;
-	u64 sched_deadline;
-	u64 sched_period;
-};
+struct sched_attr;
+struct sched_param;
 
 struct futex_pi_state;
 struct robust_list_head;
diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h
index d162d315f4b5..307acbc82d80 100644
--- a/include/uapi/linux/sched/types.h
+++ b/include/uapi/linux/sched/types.h
@@ -3,4 +3,72 @@
 
 #include <linux/types.h>
 
+struct sched_param {
+	int sched_priority;
+};
+
+#define SCHED_ATTR_SIZE_VER0	48	/* sizeof first published struct */
+
+/*
+ * Extended scheduling parameters data structure.
+ *
+ * This is needed because the original struct sched_param can not be
+ * altered without introducing ABI issues with legacy applications
+ * (e.g., in sched_getparam()).
+ *
+ * However, the possibility of specifying more than just a priority for
+ * the tasks may be useful for a wide variety of application fields, e.g.,
+ * multimedia, streaming, automation and control, and many others.
+ *
+ * This variant (sched_attr) is meant at describing a so-called
+ * sporadic time-constrained task. In such model a task is specified by:
+ *  - the activation period or minimum instance inter-arrival time;
+ *  - the maximum (or average, depending on the actual scheduling
+ *    discipline) computation time of all instances, a.k.a. runtime;
+ *  - the deadline (relative to the actual activation time) of each
+ *    instance.
+ * Very briefly, a periodic (sporadic) task asks for the execution of
+ * some specific computation --which is typically called an instance--
+ * (at most) every period. Moreover, each instance typically lasts no more
+ * than the runtime and must be completed by time instant t equal to
+ * the instance activation time + the deadline.
+ *
+ * This is reflected by the actual fields of the sched_attr structure:
+ *
+ *  @size		size of the structure, for fwd/bwd compat.
+ *
+ *  @sched_policy	task's scheduling policy
+ *  @sched_flags	for customizing the scheduler behaviour
+ *  @sched_nice		task's nice value      (SCHED_NORMAL/BATCH)
+ *  @sched_priority	task's static priority (SCHED_FIFO/RR)
+ *  @sched_deadline	representative of the task's deadline
+ *  @sched_runtime	representative of the task's runtime
+ *  @sched_period	representative of the task's period
+ *
+ * Given this task model, there are a multiplicity of scheduling algorithms
+ * and policies, that can be used to ensure all the tasks will make their
+ * timing constraints.
+ *
+ * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the
+ * only user of this new interface. More information about the algorithm
+ * available in the scheduling class file or in Documentation/.
+ */
+struct sched_attr {
+	u32 size;
+
+	u32 sched_policy;
+	u64 sched_flags;
+
+	/* SCHED_NORMAL, SCHED_BATCH */
+	s32 sched_nice;
+
+	/* SCHED_FIFO, SCHED_RR */
+	u32 sched_priority;
+
+	/* SCHED_DEADLINE */
+	u64 sched_runtime;
+	u64 sched_deadline;
+	u64 sched_period;
+};
+
 #endif /* _UAPI_LINUX_SCHED_TYPES_H */
-- 
GitLab


From dea38c74cb9205341f52b8d8ae18f61247a43ea8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 18:25:37 +0100
Subject: [PATCH 0930/1084] sched/headers: Move loadavg related definitions
 from <linux/sched.h> to <linux/sched/loadavg.h>

Move these bits to <linux/sched/loadavg.h>, to reduce the size and
complexity of <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h         | 27 ---------------------------
 include/linux/sched/loadavg.h | 27 +++++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c481906e835..78adf7a0cac1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -71,31 +71,6 @@ struct blk_plug;
 struct filename;
 struct nameidata;
 
-/*
- * These are the constant used to fake the fixed-point load-average
- * counting. Some notes:
- *  - 11 bit fractions expand to 22 bits by the multiplies: this gives
- *    a load-average precision of 10 bits integer + 11 bits fractional
- *  - if you want to count load-averages more often, you need more
- *    precision, or rounding will get you. With 2-second counting freq,
- *    the EXP_n values would be 1981, 2034 and 2043 if still using only
- *    11 bit fractions.
- */
-extern unsigned long avenrun[];		/* Load averages */
-extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift);
-
-#define FSHIFT		11		/* nr of bits of precision */
-#define FIXED_1		(1<<FSHIFT)	/* 1.0 as fixed-point */
-#define LOAD_FREQ	(5*HZ+1)	/* 5 sec intervals */
-#define EXP_1		1884		/* 1/exp(5sec/1min) as fixed-point */
-#define EXP_5		2014		/* 1/exp(5sec/5min) */
-#define EXP_15		2037		/* 1/exp(5sec/15min) */
-
-#define CALC_LOAD(load,exp,n) \
-	load *= exp; \
-	load += n*(FIXED_1-exp); \
-	load >>= FSHIFT;
-
 extern unsigned long total_forks;
 extern int nr_threads;
 DECLARE_PER_CPU(unsigned long, process_counts);
@@ -106,8 +81,6 @@ extern unsigned long nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
 extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
 
-extern void calc_global_load(unsigned long ticks);
-
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
 extern void cpu_load_update_nohz_start(void);
 extern void cpu_load_update_nohz_stop(void);
diff --git a/include/linux/sched/loadavg.h b/include/linux/sched/loadavg.h
index 3ae74be327e8..c392e28ce0ac 100644
--- a/include/linux/sched/loadavg.h
+++ b/include/linux/sched/loadavg.h
@@ -3,4 +3,31 @@
 
 #include <linux/sched.h>
 
+/*
+ * These are the constant used to fake the fixed-point load-average
+ * counting. Some notes:
+ *  - 11 bit fractions expand to 22 bits by the multiplies: this gives
+ *    a load-average precision of 10 bits integer + 11 bits fractional
+ *  - if you want to count load-averages more often, you need more
+ *    precision, or rounding will get you. With 2-second counting freq,
+ *    the EXP_n values would be 1981, 2034 and 2043 if still using only
+ *    11 bit fractions.
+ */
+extern unsigned long avenrun[];		/* Load averages */
+extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift);
+
+#define FSHIFT		11		/* nr of bits of precision */
+#define FIXED_1		(1<<FSHIFT)	/* 1.0 as fixed-point */
+#define LOAD_FREQ	(5*HZ+1)	/* 5 sec intervals */
+#define EXP_1		1884		/* 1/exp(5sec/1min) as fixed-point */
+#define EXP_5		2014		/* 1/exp(5sec/5min) */
+#define EXP_15		2037		/* 1/exp(5sec/15min) */
+
+#define CALC_LOAD(load,exp,n) \
+	load *= exp; \
+	load += n*(FIXED_1-exp); \
+	load >>= FSHIFT;
+
+extern void calc_global_load(unsigned long ticks);
+
 #endif /* _LINUX_SCHED_LOADAVG_H */
-- 
GitLab


From de8f1c77313d8c908b2897f268d466c13df161d4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 18:41:24 +0100
Subject: [PATCH 0931/1084] sched/headers: Move autogroup APIs into
 <linux/sched/autogroup.h>

Further reduce the size of sched.h.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h           | 18 ------------------
 include/linux/sched/autogroup.h | 21 +++++++++++++++++++++
 2 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78adf7a0cac1..042620729230 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2092,24 +2092,6 @@ static inline void wake_up_nohz_cpu(int cpu) { }
 extern u64 scheduler_tick_max_deferment(void);
 #endif
 
-#ifdef CONFIG_SCHED_AUTOGROUP
-extern void sched_autogroup_create_attach(struct task_struct *p);
-extern void sched_autogroup_detach(struct task_struct *p);
-extern void sched_autogroup_fork(struct signal_struct *sig);
-extern void sched_autogroup_exit(struct signal_struct *sig);
-extern void sched_autogroup_exit_task(struct task_struct *p);
-#ifdef CONFIG_PROC_FS
-extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
-extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice);
-#endif
-#else
-static inline void sched_autogroup_create_attach(struct task_struct *p) { }
-static inline void sched_autogroup_detach(struct task_struct *p) { }
-static inline void sched_autogroup_fork(struct signal_struct *sig) { }
-static inline void sched_autogroup_exit(struct signal_struct *sig) { }
-static inline void sched_autogroup_exit_task(struct task_struct *p) { }
-#endif
-
 extern int yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
diff --git a/include/linux/sched/autogroup.h b/include/linux/sched/autogroup.h
index d745f22e57dc..586bdf37330d 100644
--- a/include/linux/sched/autogroup.h
+++ b/include/linux/sched/autogroup.h
@@ -3,4 +3,25 @@
 
 #include <linux/sched.h>
 
+struct signal_struct;
+struct seq_file;
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+extern void sched_autogroup_create_attach(struct task_struct *p);
+extern void sched_autogroup_detach(struct task_struct *p);
+extern void sched_autogroup_fork(struct signal_struct *sig);
+extern void sched_autogroup_exit(struct signal_struct *sig);
+extern void sched_autogroup_exit_task(struct task_struct *p);
+#ifdef CONFIG_PROC_FS
+extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
+extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice);
+#endif
+#else
+static inline void sched_autogroup_create_attach(struct task_struct *p) { }
+static inline void sched_autogroup_detach(struct task_struct *p) { }
+static inline void sched_autogroup_fork(struct signal_struct *sig) { }
+static inline void sched_autogroup_exit(struct signal_struct *sig) { }
+static inline void sched_autogroup_exit_task(struct task_struct *p) { }
+#endif
+
 #endif /* _LINUX_SCHED_AUTOGROUP_H */
-- 
GitLab


From 1657b8f84ed9fc1d2a100671f1d42d6286f20073 Mon Sep 17 00:00:00 2001
From: Waldemar Rymarkiewicz <ext.waldemar.rymarkiewicz@tieto.com>
Date: Fri, 24 Feb 2017 23:30:03 +0100
Subject: [PATCH 0932/1084] ath10k: search SMBIOS for OEM board file extension

Board Data File (BDF) is loaded upon driver boot-up procedure. The right
board data file is identified, among others, by device and sybsystem ids.

The problem, however, can occur when the (default) board data file cannot
fulfill with the vendor requirements and it is necessary to use a different
board data file.

To solve the issue QCA uses SMBIOS type 0xF8 to store Board Data File Name
Extension to specify the extension/variant name. The driver will take the
extension suffix into consideration and will load the right (non-default)
board data file if necessary.

If it is unnecessary to use extension board data file, please leave the
SMBIOS field blank and default configuration will be used.

Example:
If a default board data file for a specific board is identified by a string
      "bus=pci,vendor=168c,device=003e,subsystem-vendor=1028,
       subsystem-device=0310"
then the OEM specific data file, if used, could be identified by variant
suffix:
      "bus=pci,vendor=168c,device=003e,subsystem-vendor=1028,
       subsystem-device=0310,variant=DE_1AB"

If board data file name extension is set but board-2.bin does not contain
board data file for the variant, the driver will fallback to the default
board data file not to break backward compatibility.

This was first applied in commit f2593cb1b291 ("ath10k: Search SMBIOS for OEM
board file extension") but later reverted in commit 005c3490e9db ("Revert
"ath10k: Search SMBIOS for OEM board file extension"". This patch is now
otherwise the same as commit f2593cb1b291 except the regression fixed.

Signed-off-by: Waldemar Rymarkiewicz <ext.waldemar.rymarkiewicz@tieto.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/core.c | 101 ++++++++++++++++++++++++-
 drivers/net/wireless/ath/ath10k/core.h |  19 +++++
 2 files changed, 117 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index dd902b43f8f7..0a8e29e9a0eb 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -18,6 +18,8 @@
 #include <linux/module.h>
 #include <linux/firmware.h>
 #include <linux/of.h>
+#include <linux/dmi.h>
+#include <linux/ctype.h>
 #include <asm/byteorder.h>
 
 #include "core.h"
@@ -711,6 +713,72 @@ static int ath10k_core_get_board_id_from_otp(struct ath10k *ar)
 	return 0;
 }
 
+static void ath10k_core_check_bdfext(const struct dmi_header *hdr, void *data)
+{
+	struct ath10k *ar = data;
+	const char *bdf_ext;
+	const char *magic = ATH10K_SMBIOS_BDF_EXT_MAGIC;
+	u8 bdf_enabled;
+	int i;
+
+	if (hdr->type != ATH10K_SMBIOS_BDF_EXT_TYPE)
+		return;
+
+	if (hdr->length != ATH10K_SMBIOS_BDF_EXT_LENGTH) {
+		ath10k_dbg(ar, ATH10K_DBG_BOOT,
+			   "wrong smbios bdf ext type length (%d).\n",
+			   hdr->length);
+		return;
+	}
+
+	bdf_enabled = *((u8 *)hdr + ATH10K_SMBIOS_BDF_EXT_OFFSET);
+	if (!bdf_enabled) {
+		ath10k_dbg(ar, ATH10K_DBG_BOOT, "bdf variant name not found.\n");
+		return;
+	}
+
+	/* Only one string exists (per spec) */
+	bdf_ext = (char *)hdr + hdr->length;
+
+	if (memcmp(bdf_ext, magic, strlen(magic)) != 0) {
+		ath10k_dbg(ar, ATH10K_DBG_BOOT,
+			   "bdf variant magic does not match.\n");
+		return;
+	}
+
+	for (i = 0; i < strlen(bdf_ext); i++) {
+		if (!isascii(bdf_ext[i]) || !isprint(bdf_ext[i])) {
+			ath10k_dbg(ar, ATH10K_DBG_BOOT,
+				   "bdf variant name contains non ascii chars.\n");
+			return;
+		}
+	}
+
+	/* Copy extension name without magic suffix */
+	if (strscpy(ar->id.bdf_ext, bdf_ext + strlen(magic),
+		    sizeof(ar->id.bdf_ext)) < 0) {
+		ath10k_dbg(ar, ATH10K_DBG_BOOT,
+			   "bdf variant string is longer than the buffer can accommodate (variant: %s)\n",
+			    bdf_ext);
+		return;
+	}
+
+	ath10k_dbg(ar, ATH10K_DBG_BOOT,
+		   "found and validated bdf variant smbios_type 0x%x bdf %s\n",
+		   ATH10K_SMBIOS_BDF_EXT_TYPE, bdf_ext);
+}
+
+static int ath10k_core_check_smbios(struct ath10k *ar)
+{
+	ar->id.bdf_ext[0] = '\0';
+	dmi_walk(ath10k_core_check_bdfext, ar);
+
+	if (ar->id.bdf_ext[0] == '\0')
+		return -ENODATA;
+
+	return 0;
+}
+
 static int ath10k_download_and_run_otp(struct ath10k *ar)
 {
 	u32 result, address = ar->hw_params.patch_load_addr;
@@ -1020,6 +1088,23 @@ static int ath10k_core_fetch_board_data_api_n(struct ath10k *ar,
 		case ATH10K_BD_IE_BOARD:
 			ret = ath10k_core_parse_bd_ie_board(ar, data, ie_len,
 							    boardname);
+			if (ret == -ENOENT && ar->id.bdf_ext[0] != '\0') {
+				/* try default bdf if variant was not found */
+				char *s, *v = ",variant=";
+				char boardname2[100];
+
+				strlcpy(boardname2, boardname,
+					sizeof(boardname2));
+
+				s = strstr(boardname2, v);
+				if (s)
+					*s = '\0';  /* strip ",variant=%s" */
+
+				ret = ath10k_core_parse_bd_ie_board(ar, data,
+								    ie_len,
+								    boardname2);
+			}
+
 			if (ret == -ENOENT)
 				/* no match found, continue */
 				break;
@@ -1057,6 +1142,9 @@ static int ath10k_core_fetch_board_data_api_n(struct ath10k *ar,
 static int ath10k_core_create_board_name(struct ath10k *ar, char *name,
 					 size_t name_len)
 {
+	/* strlen(',variant=') + strlen(ar->id.bdf_ext) */
+	char variant[9 + ATH10K_SMBIOS_BDF_EXT_STR_LENGTH] = { 0 };
+
 	if (ar->id.bmi_ids_valid) {
 		scnprintf(name, name_len,
 			  "bus=%s,bmi-chip-id=%d,bmi-board-id=%d",
@@ -1066,12 +1154,15 @@ static int ath10k_core_create_board_name(struct ath10k *ar, char *name,
 		goto out;
 	}
 
+	if (ar->id.bdf_ext[0] != '\0')
+		scnprintf(variant, sizeof(variant), ",variant=%s",
+			  ar->id.bdf_ext);
+
 	scnprintf(name, name_len,
-		  "bus=%s,vendor=%04x,device=%04x,subsystem-vendor=%04x,subsystem-device=%04x",
+		  "bus=%s,vendor=%04x,device=%04x,subsystem-vendor=%04x,subsystem-device=%04x%s",
 		  ath10k_bus_str(ar->hif.bus),
 		  ar->id.vendor, ar->id.device,
-		  ar->id.subsystem_vendor, ar->id.subsystem_device);
-
+		  ar->id.subsystem_vendor, ar->id.subsystem_device, variant);
 out:
 	ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot using board name '%s'\n", name);
 
@@ -2128,6 +2219,10 @@ static int ath10k_core_probe_fw(struct ath10k *ar)
 		goto err_free_firmware_files;
 	}
 
+	ret = ath10k_core_check_smbios(ar);
+	if (ret)
+		ath10k_dbg(ar, ATH10K_DBG_BOOT, "bdf variant name not set.\n");
+
 	ret = ath10k_core_fetch_board_file(ar);
 	if (ret) {
 		ath10k_err(ar, "failed to fetch board file: %d\n", ret);
diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 757242ef52ac..88d14be7fcce 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -69,6 +69,23 @@
 #define ATH10K_NAPI_BUDGET      64
 #define ATH10K_NAPI_QUOTA_LIMIT 60
 
+/* SMBIOS type containing Board Data File Name Extension */
+#define ATH10K_SMBIOS_BDF_EXT_TYPE 0xF8
+
+/* SMBIOS type structure length (excluding strings-set) */
+#define ATH10K_SMBIOS_BDF_EXT_LENGTH 0x9
+
+/* Offset pointing to Board Data File Name Extension */
+#define ATH10K_SMBIOS_BDF_EXT_OFFSET 0x8
+
+/* Board Data File Name Extension string length.
+ * String format: BDF_<Customer ID>_<Extension>\0
+ */
+#define ATH10K_SMBIOS_BDF_EXT_STR_LENGTH 0x20
+
+/* The magic used by QCA spec */
+#define ATH10K_SMBIOS_BDF_EXT_MAGIC "BDF_"
+
 struct ath10k;
 
 enum ath10k_bus {
@@ -798,6 +815,8 @@ struct ath10k {
 		bool bmi_ids_valid;
 		u8 bmi_board_id;
 		u8 bmi_chip_id;
+
+		char bdf_ext[ATH10K_SMBIOS_BDF_EXT_STR_LENGTH];
 	} id;
 
 	int fw_api;
-- 
GitLab


From c96d0a1c47abd5c4fa544dcedb5fac4d020ac58b Mon Sep 17 00:00:00 2001
From: Paulo Flabiano Smorigo <pfsmorigo@linux.vnet.ibm.com>
Date: Wed, 1 Mar 2017 10:58:20 -0300
Subject: [PATCH 0933/1084] crypto: vmx - Use skcipher for cbc fallback

Cc: stable@vger.kernel.org #4.10
Signed-off-by: Paulo Flabiano Smorigo <pfsmorigo@linux.vnet.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/vmx/aes_cbc.c | 47 ++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
index 94ad5c0adbcb..72a26eb4e954 100644
--- a/drivers/crypto/vmx/aes_cbc.c
+++ b/drivers/crypto/vmx/aes_cbc.c
@@ -27,11 +27,12 @@
 #include <asm/switch_to.h>
 #include <crypto/aes.h>
 #include <crypto/scatterwalk.h>
+#include <crypto/skcipher.h>
 
 #include "aesp8-ppc.h"
 
 struct p8_aes_cbc_ctx {
-	struct crypto_blkcipher *fallback;
+	struct crypto_skcipher *fallback;
 	struct aes_key enc_key;
 	struct aes_key dec_key;
 };
@@ -39,7 +40,7 @@ struct p8_aes_cbc_ctx {
 static int p8_aes_cbc_init(struct crypto_tfm *tfm)
 {
 	const char *alg;
-	struct crypto_blkcipher *fallback;
+	struct crypto_skcipher *fallback;
 	struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	if (!(alg = crypto_tfm_alg_name(tfm))) {
@@ -47,8 +48,9 @@ static int p8_aes_cbc_init(struct crypto_tfm *tfm)
 		return -ENOENT;
 	}
 
-	fallback =
-	    crypto_alloc_blkcipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
+	fallback = crypto_alloc_skcipher(alg, 0,
+			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+
 	if (IS_ERR(fallback)) {
 		printk(KERN_ERR
 		       "Failed to allocate transformation for '%s': %ld\n",
@@ -56,11 +58,12 @@ static int p8_aes_cbc_init(struct crypto_tfm *tfm)
 		return PTR_ERR(fallback);
 	}
 	printk(KERN_INFO "Using '%s' as fallback implementation.\n",
-	       crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback));
+		crypto_skcipher_driver_name(fallback));
+
 
-	crypto_blkcipher_set_flags(
+	crypto_skcipher_set_flags(
 		fallback,
-		crypto_blkcipher_get_flags((struct crypto_blkcipher *)tfm));
+		crypto_skcipher_get_flags((struct crypto_skcipher *)tfm));
 	ctx->fallback = fallback;
 
 	return 0;
@@ -71,7 +74,7 @@ static void p8_aes_cbc_exit(struct crypto_tfm *tfm)
 	struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	if (ctx->fallback) {
-		crypto_free_blkcipher(ctx->fallback);
+		crypto_free_skcipher(ctx->fallback);
 		ctx->fallback = NULL;
 	}
 }
@@ -91,7 +94,7 @@ static int p8_aes_cbc_setkey(struct crypto_tfm *tfm, const u8 *key,
 	pagefault_enable();
 	preempt_enable();
 
-	ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen);
+	ret += crypto_skcipher_setkey(ctx->fallback, key, keylen);
 	return ret;
 }
 
@@ -103,15 +106,14 @@ static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 	struct p8_aes_cbc_ctx *ctx =
 		crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm));
-	struct blkcipher_desc fallback_desc = {
-		.tfm = ctx->fallback,
-		.info = desc->info,
-		.flags = desc->flags
-	};
 
 	if (in_interrupt()) {
-		ret = crypto_blkcipher_encrypt(&fallback_desc, dst, src,
-					       nbytes);
+		SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback);
+		skcipher_request_set_tfm(req, ctx->fallback);
+		skcipher_request_set_callback(req, desc->flags, NULL, NULL);
+		skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
+		ret = crypto_skcipher_encrypt(req);
+		skcipher_request_zero(req);
 	} else {
 		preempt_disable();
 		pagefault_disable();
@@ -144,15 +146,14 @@ static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 	struct p8_aes_cbc_ctx *ctx =
 		crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm));
-	struct blkcipher_desc fallback_desc = {
-		.tfm = ctx->fallback,
-		.info = desc->info,
-		.flags = desc->flags
-	};
 
 	if (in_interrupt()) {
-		ret = crypto_blkcipher_decrypt(&fallback_desc, dst, src,
-					       nbytes);
+		SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback);
+		skcipher_request_set_tfm(req, ctx->fallback);
+		skcipher_request_set_callback(req, desc->flags, NULL, NULL);
+		skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
+		ret = crypto_skcipher_decrypt(req);
+		skcipher_request_zero(req);
 	} else {
 		preempt_disable();
 		pagefault_disable();
-- 
GitLab


From 5839f555fa576be57371686265206398d9ea1480 Mon Sep 17 00:00:00 2001
From: Paulo Flabiano Smorigo <pfsmorigo@linux.vnet.ibm.com>
Date: Wed, 1 Mar 2017 11:00:00 -0300
Subject: [PATCH 0934/1084] crypto: vmx - Use skcipher for xts fallback

Cc: stable@vger.kernel.org #4.10
Signed-off-by: Paulo Flabiano Smorigo <pfsmorigo@linux.vnet.ibm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/vmx/aes_xts.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c
index 24353ec336c5..6adc9290557a 100644
--- a/drivers/crypto/vmx/aes_xts.c
+++ b/drivers/crypto/vmx/aes_xts.c
@@ -28,11 +28,12 @@
 #include <crypto/aes.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/xts.h>
+#include <crypto/skcipher.h>
 
 #include "aesp8-ppc.h"
 
 struct p8_aes_xts_ctx {
-	struct crypto_blkcipher *fallback;
+	struct crypto_skcipher *fallback;
 	struct aes_key enc_key;
 	struct aes_key dec_key;
 	struct aes_key tweak_key;
@@ -41,7 +42,7 @@ struct p8_aes_xts_ctx {
 static int p8_aes_xts_init(struct crypto_tfm *tfm)
 {
 	const char *alg;
-	struct crypto_blkcipher *fallback;
+	struct crypto_skcipher *fallback;
 	struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	if (!(alg = crypto_tfm_alg_name(tfm))) {
@@ -49,8 +50,8 @@ static int p8_aes_xts_init(struct crypto_tfm *tfm)
 		return -ENOENT;
 	}
 
-	fallback =
-		crypto_alloc_blkcipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
+	fallback = crypto_alloc_skcipher(alg, 0,
+			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(fallback)) {
 		printk(KERN_ERR
 			"Failed to allocate transformation for '%s': %ld\n",
@@ -58,11 +59,11 @@ static int p8_aes_xts_init(struct crypto_tfm *tfm)
 		return PTR_ERR(fallback);
 	}
 	printk(KERN_INFO "Using '%s' as fallback implementation.\n",
-		crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback));
+		crypto_skcipher_driver_name(fallback));
 
-	crypto_blkcipher_set_flags(
+	crypto_skcipher_set_flags(
 		fallback,
-		crypto_blkcipher_get_flags((struct crypto_blkcipher *)tfm));
+		crypto_skcipher_get_flags((struct crypto_skcipher *)tfm));
 	ctx->fallback = fallback;
 
 	return 0;
@@ -73,7 +74,7 @@ static void p8_aes_xts_exit(struct crypto_tfm *tfm)
 	struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	if (ctx->fallback) {
-		crypto_free_blkcipher(ctx->fallback);
+		crypto_free_skcipher(ctx->fallback);
 		ctx->fallback = NULL;
 	}
 }
@@ -98,7 +99,7 @@ static int p8_aes_xts_setkey(struct crypto_tfm *tfm, const u8 *key,
 	pagefault_enable();
 	preempt_enable();
 
-	ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen);
+	ret += crypto_skcipher_setkey(ctx->fallback, key, keylen);
 	return ret;
 }
 
@@ -113,15 +114,14 @@ static int p8_aes_xts_crypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 	struct p8_aes_xts_ctx *ctx =
 		crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm));
-	struct blkcipher_desc fallback_desc = {
-		.tfm = ctx->fallback,
-		.info = desc->info,
-		.flags = desc->flags
-	};
 
 	if (in_interrupt()) {
-		ret = enc ? crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes) :
-                            crypto_blkcipher_decrypt(&fallback_desc, dst, src, nbytes);
+		SKCIPHER_REQUEST_ON_STACK(req, ctx->fallback);
+		skcipher_request_set_tfm(req, ctx->fallback);
+		skcipher_request_set_callback(req, desc->flags, NULL, NULL);
+		skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
+		ret = enc? crypto_skcipher_encrypt(req) : crypto_skcipher_decrypt(req);
+		skcipher_request_zero(req);
 	} else {
 		preempt_disable();
 		pagefault_disable();
-- 
GitLab


From 0695d7dc1d9f19b82ec2cae24856bddce278cfe6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 24 Feb 2017 16:43:36 +0100
Subject: [PATCH 0935/1084] orangefs: Use RCU for destroy_inode

freeing of inodes must be RCU-delayed on all filesystems

Cc: stable@vger.kernel.org
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/orangefs/super.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
index c48859f16e7b..67c24351a67f 100644
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -115,6 +115,13 @@ static struct inode *orangefs_alloc_inode(struct super_block *sb)
 	return &orangefs_inode->vfs_inode;
 }
 
+static void orangefs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+	kmem_cache_free(orangefs_inode_cache, orangefs_inode);
+}
+
 static void orangefs_destroy_inode(struct inode *inode)
 {
 	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
@@ -123,7 +130,7 @@ static void orangefs_destroy_inode(struct inode *inode)
 			"%s: deallocated %p destroying inode %pU\n",
 			__func__, orangefs_inode, get_khandle_from_ino(inode));
 
-	kmem_cache_free(orangefs_inode_cache, orangefs_inode);
+	call_rcu(&inode->i_rcu, orangefs_i_callback);
 }
 
 /*
-- 
GitLab


From e53aff45c490ea04aa5d9e3cffa65b6b54397455 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Wed, 1 Mar 2017 15:15:07 -0700
Subject: [PATCH 0936/1084] selftests: lib.mk Fix individual test builds

In commit a8ba798bc8ec ("selftests: enable O and KBUILD_OUTPUT"), added
support to generate compile targets in a user specified directory. OUTPUT
variable controls the location which is undefined when tests are built in
the test directory or with "make -C tools/testing/selftests/x86".

make -C tools/testing/selftests/x86/
make: Entering directory '/lkml/linux_4.11/tools/testing/selftests/x86'
Makefile:44: warning: overriding recipe for target 'clean'
../lib.mk:51: warning: ignoring old recipe for target 'clean'
gcc -m64 -o /single_step_syscall_64 -O2 -g -std=gnu99 -pthread -Wall  single_step_syscall.c -lrt -ldl
/usr/bin/ld: cannot open output file /single_step_syscall_64: Permission denied
collect2: error: ld returned 1 exit status
Makefile:50: recipe for target '/single_step_syscall_64' failed
make: *** [/single_step_syscall_64] Error 1
make: Leaving directory '/lkml/linux_4.11/tools/testing/selftests/x86'

Same failure with "cd tools/testing/selftests/x86/;make" run.

Fix this with a change to lib.mk to define OUTPUT to be the pwd when
MAKELEVEL is 0. This covers both cases mentioned above.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/lib.mk | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index ce96d80ad64f..775c589ac3c0 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -2,6 +2,10 @@
 # Makefile can operate with or without the kbuild infrastructure.
 CC := $(CROSS_COMPILE)gcc
 
+ifeq (0,$(MAKELEVEL))
+OUTPUT := $(shell pwd)
+endif
+
 TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
 TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
 
-- 
GitLab


From 59f082e464ae0533813d5a1c3149b22563da93d7 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Wed, 1 Feb 2017 09:53:14 -0800
Subject: [PATCH 0937/1084] blk-mq: allocate blk_mq_tags and requests in
 correct node

blk_mq_tags/requests of specific hardware queue are mostly used in
specific cpus, which might not be in the same numa node as disk. For
example, a nvme card is in node 0. half hardware queue will be used by
node 0, the other node 1.

Signed-off-by: Shaohua Li <shli@fb.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9e6b064e5339..d84c66fb37b7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1713,16 +1713,20 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
 					unsigned int reserved_tags)
 {
 	struct blk_mq_tags *tags;
+	int node;
+
+	node = blk_mq_hw_queue_to_node(set->mq_map, hctx_idx);
+	if (node == NUMA_NO_NODE)
+		node = set->numa_node;
 
-	tags = blk_mq_init_tags(nr_tags, reserved_tags,
-				set->numa_node,
+	tags = blk_mq_init_tags(nr_tags, reserved_tags, node,
 				BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
 	if (!tags)
 		return NULL;
 
 	tags->rqs = kzalloc_node(nr_tags * sizeof(struct request *),
 				 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
-				 set->numa_node);
+				 node);
 	if (!tags->rqs) {
 		blk_mq_free_tags(tags);
 		return NULL;
@@ -1730,7 +1734,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
 
 	tags->static_rqs = kzalloc_node(nr_tags * sizeof(struct request *),
 				 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
-				 set->numa_node);
+				 node);
 	if (!tags->static_rqs) {
 		kfree(tags->rqs);
 		blk_mq_free_tags(tags);
@@ -1750,6 +1754,11 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 {
 	unsigned int i, j, entries_per_page, max_order = 4;
 	size_t rq_size, left;
+	int node;
+
+	node = blk_mq_hw_queue_to_node(set->mq_map, hctx_idx);
+	if (node == NUMA_NO_NODE)
+		node = set->numa_node;
 
 	INIT_LIST_HEAD(&tags->page_list);
 
@@ -1771,7 +1780,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 			this_order--;
 
 		do {
-			page = alloc_pages_node(set->numa_node,
+			page = alloc_pages_node(node,
 				GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
 				this_order);
 			if (page)
@@ -1804,7 +1813,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 			if (set->ops->init_request) {
 				if (set->ops->init_request(set->driver_data,
 						rq, hctx_idx, i,
-						set->numa_node)) {
+						node)) {
 					tags->static_rqs[i] = NULL;
 					goto fail;
 				}
-- 
GitLab


From 27ddb689909cd0bab30524a5f720ae3a3e55acac Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Wed, 1 Feb 2017 09:53:15 -0800
Subject: [PATCH 0938/1084] PCI: add an API to get node from vector

Next patch will use the API to get the node from vector for nvme device

Signed-off-by: Shaohua Li <shli@fb.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/pci/msi.c   | 16 ++++++++++++++++
 include/linux/pci.h |  6 ++++++
 2 files changed, 22 insertions(+)

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 980eaf588281..d571bc330686 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1298,6 +1298,22 @@ const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr)
 }
 EXPORT_SYMBOL(pci_irq_get_affinity);
 
+/**
+ * pci_irq_get_node - return the numa node of a particular msi vector
+ * @pdev:	PCI device to operate on
+ * @vec:	device-relative interrupt vector index (0-based).
+ */
+int pci_irq_get_node(struct pci_dev *pdev, int vec)
+{
+	const struct cpumask *mask;
+
+	mask = pci_irq_get_affinity(pdev, vec);
+	if (mask)
+		return local_memory_node(cpu_to_node(cpumask_first(mask)));
+	return dev_to_node(&pdev->dev);
+}
+EXPORT_SYMBOL(pci_irq_get_node);
+
 struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
 {
 	return to_pci_dev(desc->dev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 282ed32244ce..eb3da1a04e6c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1323,6 +1323,7 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
 void pci_free_irq_vectors(struct pci_dev *dev);
 int pci_irq_vector(struct pci_dev *dev, unsigned int nr);
 const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec);
+int pci_irq_get_node(struct pci_dev *pdev, int vec);
 
 #else
 static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; }
@@ -1370,6 +1371,11 @@ static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev,
 {
 	return cpu_possible_mask;
 }
+
+static inline int pci_irq_get_node(struct pci_dev *pdev, int vec)
+{
+	return first_online_node;
+}
 #endif
 
 static inline int
-- 
GitLab


From d3af3ecdc62c46fa67ce7a681f173acb1d750e33 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Wed, 1 Feb 2017 09:53:16 -0800
Subject: [PATCH 0939/1084] nvme: allocate nvme_queue in correct node

nvme_queue is per-cpu queue (mostly). Allocating it in node where blk-mq
will use it.

Signed-off-by: Shaohua Li <shli@fb.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/pci.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 57a1af52b06e..eee8f8426ff2 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1038,9 +1038,10 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
 }
 
 static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
-							int depth)
+							int depth, int node)
 {
-	struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL);
+	struct nvme_queue *nvmeq = kzalloc_node(sizeof(*nvmeq), GFP_KERNEL,
+							node);
 	if (!nvmeq)
 		return NULL;
 
@@ -1217,7 +1218,8 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 
 	nvmeq = dev->queues[0];
 	if (!nvmeq) {
-		nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
+		nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH,
+					dev_to_node(dev->dev));
 		if (!nvmeq)
 			return -ENOMEM;
 	}
@@ -1309,7 +1311,9 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
 	int ret = 0;
 
 	for (i = dev->queue_count; i <= dev->max_qid; i++) {
-		if (!nvme_alloc_queue(dev, i, dev->q_depth)) {
+		/* vector == qid - 1, match nvme_create_queue */
+		if (!nvme_alloc_queue(dev, i, dev->q_depth,
+		     pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) {
 			ret = -ENOMEM;
 			break;
 		}
-- 
GitLab


From 415b806de5576b656f3ff94366589af9a161d0c8 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 27 Feb 2017 10:04:39 -0700
Subject: [PATCH 0940/1084] blk-mq-sched: Allocate sched reserved tags as
 specified in the original queue tagset

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>

Modified by me to also check at driver tag allocation time if the
original request was reserved, so we can be sure to allocate a
properly reserved tag at that point in time, too.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sched.c | 3 ++-
 block/blk-mq-tag.c   | 2 +-
 block/blk-mq-tag.h   | 6 ++++++
 block/blk-mq.c       | 3 +++
 4 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 98c7b061781e..46ca965fff5c 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -454,7 +454,8 @@ int blk_mq_sched_setup(struct request_queue *q)
 	 */
 	ret = 0;
 	queue_for_each_hw_ctx(q, hctx, i) {
-		hctx->sched_tags = blk_mq_alloc_rq_map(set, i, q->nr_requests, 0);
+		hctx->sched_tags = blk_mq_alloc_rq_map(set, i,
+				q->nr_requests, set->reserved_tags);
 		if (!hctx->sched_tags) {
 			ret = -ENOMEM;
 			break;
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 54c84363c1b2..e48bc2c72615 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -181,7 +181,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
 void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
 		    struct blk_mq_ctx *ctx, unsigned int tag)
 {
-	if (tag >= tags->nr_reserved_tags) {
+	if (!blk_mq_tag_is_reserved(tags, tag)) {
 		const int real_tag = tag - tags->nr_reserved_tags;
 
 		BUG_ON(real_tag >= tags->nr_tags);
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 63497423c5cd..5cb51e53cc03 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -85,4 +85,10 @@ static inline void blk_mq_tag_set_rq(struct blk_mq_hw_ctx *hctx,
 	hctx->tags->rqs[tag] = rq;
 }
 
+static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags,
+					  unsigned int tag)
+{
+	return tag < tags->nr_reserved_tags;
+}
+
 #endif
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d84c66fb37b7..4df5fb42c74f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -852,6 +852,9 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
 		return true;
 	}
 
+	if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
+		data.flags |= BLK_MQ_REQ_RESERVED;
+
 	rq->tag = blk_mq_get_tag(&data);
 	if (rq->tag >= 0) {
 		if (blk_mq_tag_busy(data.hctx)) {
-- 
GitLab


From 6d2809d51a5079f01a416d91dd63b0766cb685d0 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Mon, 27 Feb 2017 10:28:27 -0800
Subject: [PATCH 0941/1084] blk-mq: make blk_mq_alloc_request_hctx() allocate a
 scheduler request

blk_mq_alloc_request_hctx() allocates a driver request directly, unlike
its blk_mq_alloc_request() counterpart. It also crashes because it
doesn't update the tags->rqs map.

Fix it by making it allocate a scheduler request.

Reported-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Tested-by: Sagi Grimberg <sagi@grimberg.me>
---
 block/blk-mq-sched.c | 11 +++++------
 block/blk-mq.c       | 33 +++++++++++++++------------------
 2 files changed, 20 insertions(+), 24 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 46ca965fff5c..5697b23412a1 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -110,15 +110,14 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
 					 struct blk_mq_alloc_data *data)
 {
 	struct elevator_queue *e = q->elevator;
-	struct blk_mq_hw_ctx *hctx;
-	struct blk_mq_ctx *ctx;
 	struct request *rq;
 
 	blk_queue_enter_live(q);
-	ctx = blk_mq_get_ctx(q);
-	hctx = blk_mq_map_queue(q, ctx->cpu);
-
-	blk_mq_set_alloc_data(data, q, data->flags, ctx, hctx);
+	data->q = q;
+	if (likely(!data->ctx))
+		data->ctx = blk_mq_get_ctx(q);
+	if (likely(!data->hctx))
+		data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
 
 	if (e) {
 		data->flags |= BLK_MQ_REQ_INTERNAL;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4df5fb42c74f..85a7047d8b03 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -273,10 +273,9 @@ EXPORT_SYMBOL(blk_mq_alloc_request);
 struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
 		unsigned int flags, unsigned int hctx_idx)
 {
-	struct blk_mq_hw_ctx *hctx;
-	struct blk_mq_ctx *ctx;
+	struct blk_mq_alloc_data alloc_data = { .flags = flags };
 	struct request *rq;
-	struct blk_mq_alloc_data alloc_data;
+	unsigned int cpu;
 	int ret;
 
 	/*
@@ -299,25 +298,23 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
 	 * Check if the hardware context is actually mapped to anything.
 	 * If not tell the caller that it should skip this queue.
 	 */
-	hctx = q->queue_hw_ctx[hctx_idx];
-	if (!blk_mq_hw_queue_mapped(hctx)) {
-		ret = -EXDEV;
-		goto out_queue_exit;
+	alloc_data.hctx = q->queue_hw_ctx[hctx_idx];
+	if (!blk_mq_hw_queue_mapped(alloc_data.hctx)) {
+		blk_queue_exit(q);
+		return ERR_PTR(-EXDEV);
 	}
-	ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
+	cpu = cpumask_first(alloc_data.hctx->cpumask);
+	alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
 
-	blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
-	rq = __blk_mq_alloc_request(&alloc_data, rw);
-	if (!rq) {
-		ret = -EWOULDBLOCK;
-		goto out_queue_exit;
-	}
-
-	return rq;
+	rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
 
-out_queue_exit:
+	blk_mq_put_ctx(alloc_data.ctx);
 	blk_queue_exit(q);
-	return ERR_PTR(ret);
+
+	if (!rq)
+		return ERR_PTR(-EWOULDBLOCK);
+
+	return rq;
 }
 EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
 
-- 
GitLab


From 59748398992c9c3e9d600e56cb2a5c0c546fe129 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Mon, 27 Feb 2017 09:47:54 -0800
Subject: [PATCH 0942/1084] blk-mq: kill blk_mq_set_alloc_data()

Nothing is using it anymore.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Tested-by: Sagi Grimberg <sagi@grimberg.me>
---
 block/blk-mq.h | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/block/blk-mq.h b/block/blk-mq.h
index 24b2256186f3..088ced003c13 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -146,16 +146,6 @@ struct blk_mq_alloc_data {
 	struct blk_mq_hw_ctx *hctx;
 };
 
-static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data,
-		struct request_queue *q, unsigned int flags,
-		struct blk_mq_ctx *ctx, struct blk_mq_hw_ctx *hctx)
-{
-	data->q = q;
-	data->flags = flags;
-	data->ctx = ctx;
-	data->hctx = hctx;
-}
-
 static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
 {
 	if (data->flags & BLK_MQ_REQ_INTERNAL)
-- 
GitLab


From 562bef4259776c19cb2423d43af1f99183910a4d Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Mon, 27 Feb 2017 09:47:55 -0800
Subject: [PATCH 0943/1084] blk-mq: move update of tags->rqs to
 __blk_mq_alloc_request()

No functional difference, it just makes a little more sense to update
the tag map where we actually allocate the tag.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Tested-by: Sagi Grimberg <sagi@grimberg.me>
---
 block/blk-mq-sched.c | 2 --
 block/blk-mq.c       | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 5697b23412a1..09af8ff18719 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -134,8 +134,6 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
 			rq = __blk_mq_alloc_request(data, op);
 	} else {
 		rq = __blk_mq_alloc_request(data, op);
-		if (rq)
-			data->hctx->tags->rqs[rq->tag] = rq;
 	}
 
 	if (rq) {
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 85a7047d8b03..94593c6282d8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -234,6 +234,7 @@ struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
 			}
 			rq->tag = tag;
 			rq->internal_tag = -1;
+			data->hctx->tags->rqs[rq->tag] = rq;
 		}
 
 		blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);
-- 
GitLab


From 6a8a21546507a3ec88e81c2ec927a3fb63efa8ff Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Wed, 1 Mar 2017 11:47:22 -0500
Subject: [PATCH 0944/1084] nbd: stop leaking sockets

This was introduced in the multi-connection patch, we've been leaking
socket's ever since.

Fixes: 9561a7a ("nbd: add multi-connection support")
cc: stable@vger.kernel.org
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nbd.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 0bf2b21a62cb..c7e93f62366f 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -689,8 +689,10 @@ static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev)
 	    nbd->num_connections) {
 		int i;
 
-		for (i = 0; i < nbd->num_connections; i++)
+		for (i = 0; i < nbd->num_connections; i++) {
+			sockfd_put(nbd->socks[i]->sock);
 			kfree(nbd->socks[i]);
+		}
 		kfree(nbd->socks);
 		nbd->socks = NULL;
 		nbd->num_connections = 0;
-- 
GitLab


From 6bae363ee3057a14eec93440826813603559273a Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Wed, 1 Mar 2017 14:22:10 -0500
Subject: [PATCH 0945/1084] blk-mq: Export blk_mq_freeze_queue_wait

Drivers can start a freeze, so this provides a way to wait for frozen.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 3 ++-
 include/linux/blk-mq.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 94593c6282d8..8da2c04bb88f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -75,10 +75,11 @@ void blk_mq_freeze_queue_start(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
 
-static void blk_mq_freeze_queue_wait(struct request_queue *q)
+void blk_mq_freeze_queue_wait(struct request_queue *q)
 {
 	wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
 }
+EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait);
 
 /*
  * Guarantee no request is in use, so we can change any data structure of
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 001d30d727c5..8dacf680c851 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -245,6 +245,7 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
 void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_unfreeze_queue(struct request_queue *q);
 void blk_mq_freeze_queue_start(struct request_queue *q);
+void blk_mq_freeze_queue_wait(struct request_queue *q);
 int blk_mq_reinit_tagset(struct blk_mq_tag_set *set);
 
 int blk_mq_map_queues(struct blk_mq_tag_set *set);
-- 
GitLab


From f91328c40a559362b6e7b7bfee01ca17fda87592 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Wed, 1 Mar 2017 14:22:11 -0500
Subject: [PATCH 0946/1084] blk-mq: Provide freeze queue timeout

A driver may wish to take corrective action if queued requests do not
complete within a set time.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 9 +++++++++
 include/linux/blk-mq.h | 2 ++
 2 files changed, 11 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 8da2c04bb88f..a5e66a7a3506 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -81,6 +81,15 @@ void blk_mq_freeze_queue_wait(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait);
 
+int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
+				     unsigned long timeout)
+{
+	return wait_event_timeout(q->mq_freeze_wq,
+					percpu_ref_is_zero(&q->q_usage_counter),
+					timeout);
+}
+EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout);
+
 /*
  * Guarantee no request is in use, so we can change any data structure of
  * the queue afterward.
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8dacf680c851..b296a9006117 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -246,6 +246,8 @@ void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_unfreeze_queue(struct request_queue *q);
 void blk_mq_freeze_queue_start(struct request_queue *q);
 void blk_mq_freeze_queue_wait(struct request_queue *q);
+int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
+				     unsigned long timeout);
 int blk_mq_reinit_tagset(struct blk_mq_tag_set *set);
 
 int blk_mq_map_queues(struct blk_mq_tag_set *set);
-- 
GitLab


From 302ad8cc09339ea261eef58a8d5f4a116a8ffda5 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Wed, 1 Mar 2017 14:22:12 -0500
Subject: [PATCH 0947/1084] nvme: Complete all stuck requests

If the nvme driver is shutting down its controller, the drievr will not
start the queues up again, preventing blk-mq's hot CPU notifier from
making forward progress.

To fix that, this patch starts a request_queue freeze when the driver
resets a controller so no new requests may enter. The driver will wait
for frozen after IO queues are restarted to ensure the queue reference
can be reinitialized when nvme requests to unfreeze the queues.

If the driver is doing a safe shutdown, the driver will wait for the
controller to successfully complete all inflight requests so that we
don't unnecessarily fail them. Once the controller has been disabled,
the queues will be restarted to force remaining entered requests to end
in failure so that blk-mq's hot cpu notifier may progress.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/core.c | 47 ++++++++++++++++++++++++++++++++++++++++
 drivers/nvme/host/nvme.h |  4 ++++
 drivers/nvme/host/pci.c  | 33 +++++++++++++++++++++++-----
 3 files changed, 79 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 25ec4e585220..9b3b57fef446 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2344,6 +2344,53 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_kill_queues);
 
+void nvme_unfreeze(struct nvme_ctrl *ctrl)
+{
+	struct nvme_ns *ns;
+
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_for_each_entry(ns, &ctrl->namespaces, list)
+		blk_mq_unfreeze_queue(ns->queue);
+	mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_unfreeze);
+
+void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
+{
+	struct nvme_ns *ns;
+
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_for_each_entry(ns, &ctrl->namespaces, list) {
+		timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout);
+		if (timeout <= 0)
+			break;
+	}
+	mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);
+
+void nvme_wait_freeze(struct nvme_ctrl *ctrl)
+{
+	struct nvme_ns *ns;
+
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_for_each_entry(ns, &ctrl->namespaces, list)
+		blk_mq_freeze_queue_wait(ns->queue);
+	mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_wait_freeze);
+
+void nvme_start_freeze(struct nvme_ctrl *ctrl)
+{
+	struct nvme_ns *ns;
+
+	mutex_lock(&ctrl->namespaces_mutex);
+	list_for_each_entry(ns, &ctrl->namespaces, list)
+		blk_mq_freeze_queue_start(ns->queue);
+	mutex_unlock(&ctrl->namespaces_mutex);
+}
+EXPORT_SYMBOL_GPL(nvme_start_freeze);
+
 void nvme_stop_queues(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a3da1e90b99d..2aa20e3e5675 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -294,6 +294,10 @@ void nvme_queue_async_events(struct nvme_ctrl *ctrl);
 void nvme_stop_queues(struct nvme_ctrl *ctrl);
 void nvme_start_queues(struct nvme_ctrl *ctrl);
 void nvme_kill_queues(struct nvme_ctrl *ctrl);
+void nvme_unfreeze(struct nvme_ctrl *ctrl);
+void nvme_wait_freeze(struct nvme_ctrl *ctrl);
+void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
+void nvme_start_freeze(struct nvme_ctrl *ctrl);
 
 #define NVME_QID_ANY -1
 struct request *nvme_alloc_request(struct request_queue *q,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index eee8f8426ff2..26a5fd05fe88 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1675,21 +1675,34 @@ static void nvme_pci_disable(struct nvme_dev *dev)
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 {
 	int i, queues;
-	u32 csts = -1;
+	bool dead = true;
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
 	del_timer_sync(&dev->watchdog_timer);
 
 	mutex_lock(&dev->shutdown_lock);
-	if (pci_is_enabled(to_pci_dev(dev->dev))) {
-		nvme_stop_queues(&dev->ctrl);
-		csts = readl(dev->bar + NVME_REG_CSTS);
+	if (pci_is_enabled(pdev)) {
+		u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+		if (dev->ctrl.state == NVME_CTRL_LIVE)
+			nvme_start_freeze(&dev->ctrl);
+		dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
+			pdev->error_state  != pci_channel_io_normal);
 	}
 
+	/*
+	 * Give the controller a chance to complete all entered requests if
+	 * doing a safe shutdown.
+	 */
+	if (!dead && shutdown)
+		nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
+	nvme_stop_queues(&dev->ctrl);
+
 	queues = dev->online_queues - 1;
 	for (i = dev->queue_count - 1; i > 0; i--)
 		nvme_suspend_queue(dev->queues[i]);
 
-	if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
+	if (dead) {
 		/* A device might become IO incapable very soon during
 		 * probe, before the admin queue is configured. Thus,
 		 * queue_count can be 0 here.
@@ -1704,6 +1717,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 
 	blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
 	blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
+
+	/*
+	 * The driver will not be starting up queues again if shutting down so
+	 * must flush all entered requests to their failed completion to avoid
+	 * deadlocking blk-mq hot-cpu notifier.
+	 */
+	if (shutdown)
+		nvme_start_queues(&dev->ctrl);
 	mutex_unlock(&dev->shutdown_lock);
 }
 
@@ -1826,7 +1847,9 @@ static void nvme_reset_work(struct work_struct *work)
 		nvme_remove_namespaces(&dev->ctrl);
 	} else {
 		nvme_start_queues(&dev->ctrl);
+		nvme_wait_freeze(&dev->ctrl);
 		nvme_dev_add(dev);
+		nvme_unfreeze(&dev->ctrl);
 	}
 
 	if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
-- 
GitLab


From e02898b423802b1f3a3aaa7f16e896da069ba8f7 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Wed, 1 Mar 2017 10:42:38 -0800
Subject: [PATCH 0948/1084] loop: fix LO_FLAGS_PARTSCAN hang

loop_reread_partitions() needs to do I/O, but we just froze the queue,
so we end up waiting forever. This can easily be reproduced with losetup
-P. Fix it by moving the reread to after we unfreeze the queue.

Fixes: ecdd09597a57 ("block/loop: fix race between I/O and set_status")
Reported-by: Tejun Heo <tj@kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/loop.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 4b52a1690329..132c9f371dce 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1142,13 +1142,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 	     (info->lo_flags & LO_FLAGS_AUTOCLEAR))
 		lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
 
-	if ((info->lo_flags & LO_FLAGS_PARTSCAN) &&
-	     !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
-		lo->lo_flags |= LO_FLAGS_PARTSCAN;
-		lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
-		loop_reread_partitions(lo, lo->lo_device);
-	}
-
 	lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
 	lo->lo_init[0] = info->lo_init[0];
 	lo->lo_init[1] = info->lo_init[1];
@@ -1163,6 +1156,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 
  exit:
 	blk_mq_unfreeze_queue(lo->lo_queue);
+
+	if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) &&
+	     !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
+		lo->lo_flags |= LO_FLAGS_PARTSCAN;
+		lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
+		loop_reread_partitions(lo, lo->lo_device);
+	}
+
 	return err;
 }
 
-- 
GitLab


From a5a79d00017c9eee68a9bcb40d5dfd6f45f17461 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 2 Mar 2017 16:50:13 +0100
Subject: [PATCH 0949/1084] block: Initialize bd_bdi on inode initialization

So far we initialized bd_bdi only in bdget(). That is fine for normal
bdev inodes however for the special case of the root inode of
blockdev_superblock that function is never called and thus bd_bdi is
left uninitialized. As a result bdev_evict_inode() may oops doing
bdi_put(root->bd_bdi) on that inode as can be seen when doing:

mount -t bdev none /mnt

Fix the problem by initializing bd_bdi when first allocating the inode
and then reinitializing bd_bdi in bdev_evict_inode().

Thanks to syzkaller team for finding the problem.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Fixes: b1d2dc5659b4 ("block: Make blk_get_backing_dev_info() safe without open bdev")
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/block_dev.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 77c30f15a02c..2eca00ec4370 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -870,6 +870,7 @@ static void init_once(void *foo)
 #ifdef CONFIG_SYSFS
 	INIT_LIST_HEAD(&bdev->bd_holder_disks);
 #endif
+	bdev->bd_bdi = &noop_backing_dev_info;
 	inode_init_once(&ei->vfs_inode);
 	/* Initialize mutex for freeze. */
 	mutex_init(&bdev->bd_fsfreeze_mutex);
@@ -884,8 +885,10 @@ static void bdev_evict_inode(struct inode *inode)
 	spin_lock(&bdev_lock);
 	list_del_init(&bdev->bd_list);
 	spin_unlock(&bdev_lock);
-	if (bdev->bd_bdi != &noop_backing_dev_info)
+	if (bdev->bd_bdi != &noop_backing_dev_info) {
 		bdi_put(bdev->bd_bdi);
+		bdev->bd_bdi = &noop_backing_dev_info;
+	}
 }
 
 static const struct super_operations bdev_sops = {
@@ -988,7 +991,6 @@ struct block_device *bdget(dev_t dev)
 		bdev->bd_contains = NULL;
 		bdev->bd_super = NULL;
 		bdev->bd_inode = inode;
-		bdev->bd_bdi = &noop_backing_dev_info;
 		bdev->bd_block_size = i_blocksize(inode);
 		bdev->bd_part_count = 0;
 		bdev->bd_invalidated = 0;
-- 
GitLab


From 3131d970f0d86bea41811766d55fc638e382daf3 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 27 Feb 2017 10:13:38 +0100
Subject: [PATCH 0950/1084] ARM: ux500: resume the second core properly

The pen hold/release scheme was copied over to Ux500 from the ARM
reference designs like most of these at the time. It is not needed
at all, and was mostly removed in commit c00def71efd9
"ARM: ux500: simplify secondary CPU boot".

However on the suspend/resume path and hot plug/unplug of CPUs,
the .cpu_die() callback was still waiting for the pen to be
released which made it spin forever and the second core never come
back online after suspend/resume.

Fix this by simply replacing the strange custom .cpu_die() with
a oneline wfi() just like e.g. the qcom platform does. This fixes
the issue and makes the second core come up properly after
suspend/resume.

As a side effect, this rids us of the completely surplus local
setup.h and hotplug.c files, and we just compile this into platsmp.c
with everything else SMP.

Cc: stable@vger.kernel.org
Fixes: c00def71efd9 ("ARM: ux500: simplify secondary CPU boot")
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-ux500/Makefile     |  3 ---
 arch/arm/mach-ux500/cpu-db8500.c |  2 --
 arch/arm/mach-ux500/hotplug.c    | 37 --------------------------------
 arch/arm/mach-ux500/platsmp.c    |  9 ++++++--
 arch/arm/mach-ux500/setup.h      | 16 --------------
 5 files changed, 7 insertions(+), 60 deletions(-)
 delete mode 100644 arch/arm/mach-ux500/hotplug.c
 delete mode 100644 arch/arm/mach-ux500/setup.h

diff --git a/arch/arm/mach-ux500/Makefile b/arch/arm/mach-ux500/Makefile
index c2499bff4986..a9a3453548f4 100644
--- a/arch/arm/mach-ux500/Makefile
+++ b/arch/arm/mach-ux500/Makefile
@@ -5,7 +5,4 @@
 obj-y				:= pm.o
 obj-$(CONFIG_UX500_SOC_DB8500)	+= cpu-db8500.o
 obj-$(CONFIG_SMP)		+= platsmp.o
-obj-$(CONFIG_HOTPLUG_CPU)	+= hotplug.o
 obj-$(CONFIG_PM_GENERIC_DOMAINS) += pm_domains.o
-
-CFLAGS_hotplug.o		+= -march=armv7-a
diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c
index 24529cf58df6..28083ef72819 100644
--- a/arch/arm/mach-ux500/cpu-db8500.c
+++ b/arch/arm/mach-ux500/cpu-db8500.c
@@ -31,8 +31,6 @@
 #include <asm/mach/map.h>
 #include <asm/mach/arch.h>
 
-#include "setup.h"
-
 #include "db8500-regs.h"
 
 static int __init ux500_l2x0_unlock(void)
diff --git a/arch/arm/mach-ux500/hotplug.c b/arch/arm/mach-ux500/hotplug.c
deleted file mode 100644
index 1cbed0331fd3..000000000000
--- a/arch/arm/mach-ux500/hotplug.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) STMicroelectronics 2009
- * Copyright (C) ST-Ericsson SA 2010
- *
- * License Terms: GNU General Public License v2
- *	Based on ARM realview platform
- *
- * Author: Sundar Iyer <sundar.iyer@stericsson.com>
- *
- */
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/smp.h>
-
-#include <asm/smp_plat.h>
-
-#include "setup.h"
-
-/*
- * platform-specific code to shutdown a CPU
- *
- * Called with IRQs disabled
- */
-void ux500_cpu_die(unsigned int cpu)
-{
-	/* directly enter low power state, skipping secure registers */
-	for (;;) {
-		__asm__ __volatile__("dsb\n\t" "wfi\n\t"
-				: : : "memory");
-		if (pen_release == cpu_logical_map(cpu)) {
-			/*
-			 * OK, proper wakeup, we're done
-			 */
-			break;
-		}
-	}
-}
diff --git a/arch/arm/mach-ux500/platsmp.c b/arch/arm/mach-ux500/platsmp.c
index e0ee139fdebf..19c165aeecd2 100644
--- a/arch/arm/mach-ux500/platsmp.c
+++ b/arch/arm/mach-ux500/platsmp.c
@@ -23,8 +23,6 @@
 #include <asm/smp_plat.h>
 #include <asm/smp_scu.h>
 
-#include "setup.h"
-
 #include "db8500-regs.h"
 
 /* Magic triggers in backup RAM */
@@ -90,6 +88,13 @@ static int ux500_boot_secondary(unsigned int cpu, struct task_struct *idle)
 	return 0;
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+void ux500_cpu_die(unsigned int cpu)
+{
+	wfi();
+}
+#endif
+
 static const struct smp_operations ux500_smp_ops __initconst = {
 	.smp_prepare_cpus	= ux500_smp_prepare_cpus,
 	.smp_boot_secondary	= ux500_boot_secondary,
diff --git a/arch/arm/mach-ux500/setup.h b/arch/arm/mach-ux500/setup.h
deleted file mode 100644
index 988e7c77068d..000000000000
--- a/arch/arm/mach-ux500/setup.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * Copyright (C) 2009 ST-Ericsson.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * These symbols are needed for board-specific files to call their
- * own cpu-specific files
- */
-#ifndef __ASM_ARCH_SETUP_H
-#define __ASM_ARCH_SETUP_H
-
-extern void ux500_cpu_die(unsigned int cpu);
-
-#endif /*  __ASM_ARCH_SETUP_H */
-- 
GitLab


From 474c90156c8dcc2fa815e6716cc9394d7930cb9c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 2 Mar 2017 12:17:22 -0800
Subject: [PATCH 0951/1084] give up on gcc ilog2() constant optimizations

gcc-7 has an "optimization" pass that completely screws up, and
generates the code expansion for the (impossible) case of calling
ilog2() with a zero constant, even when the code gcc compiles does not
actually have a zero constant.

And we try to generate a compile-time error for anybody doing ilog2() on
a constant where that doesn't make sense (be it zero or negative).  So
now gcc7 will fail the build due to our sanity checking, because it
created that constant-zero case that didn't actually exist in the source
code.

There's a whole long discussion on the kernel mailing about how to work
around this gcc bug.  The gcc people themselevs have discussed their
"feature" in

   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72785

but it's all water under the bridge, because while it looked at one
point like it would be solved by the time gcc7 was released, that was
not to be.

So now we have to deal with this compiler braindamage.

And the only simple approach seems to be to just delete the code that
tries to warn about bad uses of ilog2().

So now "ilog2()" will just return 0 not just for the value 1, but for
any non-positive value too.

It's not like I can recall anybody having ever actually tried to use
this function on any invalid value, but maybe the sanity check just
meant that such code never made it out in public.

Reported-by: Laura Abbott <labbott@redhat.com>
Cc: John Stultz <john.stultz@linaro.org>,
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/log2.h       | 13 ++-----------
 tools/include/linux/log2.h | 13 ++-----------
 2 files changed, 4 insertions(+), 22 deletions(-)

diff --git a/include/linux/log2.h b/include/linux/log2.h
index ef3d4f67118c..c373295f359f 100644
--- a/include/linux/log2.h
+++ b/include/linux/log2.h
@@ -15,12 +15,6 @@
 #include <linux/types.h>
 #include <linux/bitops.h>
 
-/*
- * deal with unrepresentable constant logarithms
- */
-extern __attribute__((const, noreturn))
-int ____ilog2_NaN(void);
-
 /*
  * non-constant log of base 2 calculators
  * - the arch may override these in asm/bitops.h if they can be implemented
@@ -85,7 +79,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n)
 #define ilog2(n)				\
 (						\
 	__builtin_constant_p(n) ? (		\
-		(n) < 1 ? ____ilog2_NaN() :	\
+		(n) < 2 ? 0 :			\
 		(n) & (1ULL << 63) ? 63 :	\
 		(n) & (1ULL << 62) ? 62 :	\
 		(n) & (1ULL << 61) ? 61 :	\
@@ -148,10 +142,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n)
 		(n) & (1ULL <<  4) ?  4 :	\
 		(n) & (1ULL <<  3) ?  3 :	\
 		(n) & (1ULL <<  2) ?  2 :	\
-		(n) & (1ULL <<  1) ?  1 :	\
-		(n) & (1ULL <<  0) ?  0 :	\
-		____ilog2_NaN()			\
-				   ) :		\
+		1 ) :				\
 	(sizeof(n) <= 4) ?			\
 	__ilog2_u32(n) :			\
 	__ilog2_u64(n)				\
diff --git a/tools/include/linux/log2.h b/tools/include/linux/log2.h
index 41446668ccce..d5677d39c1e4 100644
--- a/tools/include/linux/log2.h
+++ b/tools/include/linux/log2.h
@@ -12,12 +12,6 @@
 #ifndef _TOOLS_LINUX_LOG2_H
 #define _TOOLS_LINUX_LOG2_H
 
-/*
- * deal with unrepresentable constant logarithms
- */
-extern __attribute__((const, noreturn))
-int ____ilog2_NaN(void);
-
 /*
  * non-constant log of base 2 calculators
  * - the arch may override these in asm/bitops.h if they can be implemented
@@ -78,7 +72,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n)
 #define ilog2(n)				\
 (						\
 	__builtin_constant_p(n) ? (		\
-		(n) < 1 ? ____ilog2_NaN() :	\
+		(n) < 2 ? 0 :			\
 		(n) & (1ULL << 63) ? 63 :	\
 		(n) & (1ULL << 62) ? 62 :	\
 		(n) & (1ULL << 61) ? 61 :	\
@@ -141,10 +135,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n)
 		(n) & (1ULL <<  4) ?  4 :	\
 		(n) & (1ULL <<  3) ?  3 :	\
 		(n) & (1ULL <<  2) ?  2 :	\
-		(n) & (1ULL <<  1) ?  1 :	\
-		(n) & (1ULL <<  0) ?  0 :	\
-		____ilog2_NaN()			\
-				   ) :		\
+		1 ) :				\
 	(sizeof(n) <= 4) ?			\
 	__ilog2_u32(n) :			\
 	__ilog2_u64(n)				\
-- 
GitLab


From e3330039ea28dc199e3b2da993895ff742a91adf Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 27 Feb 2017 16:07:43 -0800
Subject: [PATCH 0952/1084] ipv6: check for ip6_null_entry in
 __ip6_del_rt_siblings()

Andrey reported a NULL pointer deref bug in ipv6_route_ioctl()
-> ip6_route_del() -> __ip6_del_rt_siblings() code path. This is
because ip6_null_entry is returned in this path since ip6_null_entry
is kinda default for a ipv6 route table root node. Quote from
David Ahern:

 ip6_null_entry is the root of all ipv6 fib tables making it integrated
 into the table ...

We should ignore any attempt of trying to delete it, like we do in
__ip6_del_rt() path and several others.

Reported-by: Andrey Konovalov <andreyknvl@google.com>
Fixes: 0ae8133586ad ("net: ipv6: Allow shorthand delete of all nexthops in multipath route")
Cc: David Ahern <dsa@cumulusnetworks.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d94f1dfa54c8..43ca90d50ae9 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2169,10 +2169,13 @@ int ip6_del_rt(struct rt6_info *rt)
 static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 {
 	struct nl_info *info = &cfg->fc_nlinfo;
+	struct net *net = info->nl_net;
 	struct sk_buff *skb = NULL;
 	struct fib6_table *table;
-	int err;
+	int err = -ENOENT;
 
+	if (rt == net->ipv6.ip6_null_entry)
+		goto out_put;
 	table = rt->rt6i_table;
 	write_lock_bh(&table->tb6_lock);
 
@@ -2184,7 +2187,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 		if (skb) {
 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
 
-			if (rt6_fill_node(info->nl_net, skb, rt,
+			if (rt6_fill_node(net, skb, rt,
 					  NULL, NULL, 0, RTM_DELROUTE,
 					  info->portid, seq, 0) < 0) {
 				kfree_skb(skb);
@@ -2198,17 +2201,18 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 					 rt6i_siblings) {
 			err = fib6_del(sibling, info);
 			if (err)
-				goto out;
+				goto out_unlock;
 		}
 	}
 
 	err = fib6_del(rt, info);
-out:
+out_unlock:
 	write_unlock_bh(&table->tb6_lock);
+out_put:
 	ip6_rt_put(rt);
 
 	if (skb) {
-		rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV6_ROUTE,
+		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
 			    info->nlh, gfp_any());
 	}
 	return err;
-- 
GitLab


From 9aea7779b764a11e357d3c74af6aee3cf90f2045 Mon Sep 17 00:00:00 2001
From: Alban Bedel <alban.bedel@avionic-design.de>
Date: Tue, 28 Feb 2017 18:08:55 +0100
Subject: [PATCH 0953/1084] drivers: net: xgene: Fix crash on DT systems

On DT systems the driver require a clock, but the probe just print a
warning and continue, leading to a crash when resetting the device.
To fix this crash and properly handle probe deferals only ignore the
missing clock if DT isn't used or if the clock doesn't exist.

Signed-off-by: Alban Bedel <alban.bedel@avionic-design.de>
Acked-by: Iyappan Subramanian <isubramanian@apm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index e536301acfde..b3568c453b14 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -1749,6 +1749,12 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
 
 	pdata->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(pdata->clk)) {
+		/* Abort if the clock is defined but couldn't be retrived.
+		 * Always abort if the clock is missing on DT system as
+		 * the driver can't cope with this case.
+		 */
+		if (PTR_ERR(pdata->clk) != -ENOENT || dev->of_node)
+			return PTR_ERR(pdata->clk);
 		/* Firmware may have set up the clock already. */
 		dev_info(dev, "clocks have been setup already\n");
 	}
-- 
GitLab


From 402168b4c2dc0734b8fbd282eff77da0275c5129 Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Tue, 28 Feb 2017 15:02:51 -0600
Subject: [PATCH 0954/1084] amd-xgbe: Stop the PHY before releasing interrupts

Some configurations require the use of the hardware's MDIO support to
communicate with external PHYs. The MDIO commands indicate completion
through the device interrupt. When bringing down the device the interrupts
were released before stopping the external PHY, resulting in MDIO command
timeouts. Move the stopping of the PHY to before the releasing of the
interrupts.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 3aa457c8ca21..248f60d171a5 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1131,12 +1131,12 @@ static void xgbe_stop(struct xgbe_prv_data *pdata)
 	hw_if->disable_tx(pdata);
 	hw_if->disable_rx(pdata);
 
+	phy_if->phy_stop(pdata);
+
 	xgbe_free_irqs(pdata);
 
 	xgbe_napi_disable(pdata, 1);
 
-	phy_if->phy_stop(pdata);
-
 	hw_if->exit(pdata);
 
 	channel = pdata->channel;
-- 
GitLab


From b42c6761fd1651f564491b53016046c9ebf0b2a9 Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Tue, 28 Feb 2017 15:03:01 -0600
Subject: [PATCH 0955/1084] amd-xgbe: Be sure to set MDIO modes on device
 (re)start

The MDIO register mode is set when the device is probed. But when the
device is brought down and then back up, the MDIO register mode has been
reset.  Be sure to reset the mode during device startup and only change
the mode of the address specified.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-dev.c    |  2 +-
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 22 +++++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index a7d16db5c4b2..937f37a5dcb2 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -1323,7 +1323,7 @@ static int xgbe_read_ext_mii_regs(struct xgbe_prv_data *pdata, int addr,
 static int xgbe_set_ext_mii_mode(struct xgbe_prv_data *pdata, unsigned int port,
 				 enum xgbe_mdio_mode mode)
 {
-	unsigned int reg_val = 0;
+	unsigned int reg_val = XGMAC_IOREAD(pdata, MAC_MDIOCL22R);
 
 	switch (mode) {
 	case XGBE_MDIO_MODE_CL22:
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 9d8c953083b4..04804cbb7dc1 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -875,6 +875,16 @@ static int xgbe_phy_find_phy_device(struct xgbe_prv_data *pdata)
 	    !phy_data->sfp_phy_avail)
 		return 0;
 
+	/* Set the proper MDIO mode for the PHY */
+	ret = pdata->hw_if.set_ext_mii_mode(pdata, phy_data->mdio_addr,
+					    phy_data->phydev_mode);
+	if (ret) {
+		netdev_err(pdata->netdev,
+			   "mdio port/clause not compatible (%u/%u)\n",
+			   phy_data->mdio_addr, phy_data->phydev_mode);
+		return ret;
+	}
+
 	/* Create and connect to the PHY device */
 	phydev = get_phy_device(phy_data->mii, phy_data->mdio_addr,
 				(phy_data->phydev_mode == XGBE_MDIO_MODE_CL45));
@@ -2722,6 +2732,18 @@ static int xgbe_phy_start(struct xgbe_prv_data *pdata)
 	if (ret)
 		return ret;
 
+	/* Set the proper MDIO mode for the re-driver */
+	if (phy_data->redrv && !phy_data->redrv_if) {
+		ret = pdata->hw_if.set_ext_mii_mode(pdata, phy_data->redrv_addr,
+						    XGBE_MDIO_MODE_CL22);
+		if (ret) {
+			netdev_err(pdata->netdev,
+				   "redriver mdio port not compatible (%u)\n",
+				   phy_data->redrv_addr);
+			return ret;
+		}
+	}
+
 	/* Start in highest supported mode */
 	xgbe_phy_set_mode(pdata, phy_data->start_mode);
 
-- 
GitLab


From 2697ea5a859b83ca49511dcfd98daf42584eb3cf Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Tue, 28 Feb 2017 15:03:10 -0600
Subject: [PATCH 0956/1084] amd-xgbe: Don't overwrite SFP PHY mod_absent
 settings

If an SFP module is not present, xgbe_phy_sfp_phy_settings() should
return after applying the default settings. Currently there is no return
statement and the default settings are overwritten.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 04804cbb7dc1..e707c49cc55a 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -716,6 +716,8 @@ static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata)
 		pdata->phy.duplex = DUPLEX_UNKNOWN;
 		pdata->phy.autoneg = AUTONEG_ENABLE;
 		pdata->phy.advertising = pdata->phy.supported;
+
+		return;
 	}
 
 	pdata->phy.advertising &= ~ADVERTISED_Autoneg;
-- 
GitLab


From 7b36a7189fc320f0b783dd51bd1f541db56cfbdd Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 2 Mar 2017 13:59:08 -0700
Subject: [PATCH 0957/1084] block: don't call ioc_exit_icq() with the queue
 lock held for blk-mq

For legacy scheduling, we always call ioc_exit_icq() with both the
ioc and queue lock held. This poses a problem for blk-mq with
scheduling, since the queue lock isn't what we use in the scheduler.
And since we don't need the queue lock held for ioc exit there,
don't grab it and leave any extra locking up to the blk-mq scheduler.

Reported-by: Paolo Valente <paolo.valente@linaro.org>
Tested-by: Paolo Valente <paolo.valente@linaro.org>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-ioc.c   | 44 +++++++++++++++++++++++++++++++-------------
 block/blk-sysfs.c |  2 --
 block/elevator.c  |  2 --
 3 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index b12f9c87b4c3..0d7c96c0ba71 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -36,8 +36,8 @@ static void icq_free_icq_rcu(struct rcu_head *head)
 }
 
 /*
- * Exit an icq. Called with both ioc and q locked for sq, only ioc locked for
- * mq.
+ * Exit an icq. Called with ioc locked for blk-mq, and with both ioc
+ * and queue locked for legacy.
  */
 static void ioc_exit_icq(struct io_cq *icq)
 {
@@ -54,7 +54,10 @@ static void ioc_exit_icq(struct io_cq *icq)
 	icq->flags |= ICQ_EXITED;
 }
 
-/* Release an icq.  Called with both ioc and q locked. */
+/*
+ * Release an icq. Called with ioc locked for blk-mq, and with both ioc
+ * and queue locked for legacy.
+ */
 static void ioc_destroy_icq(struct io_cq *icq)
 {
 	struct io_context *ioc = icq->ioc;
@@ -62,7 +65,6 @@ static void ioc_destroy_icq(struct io_cq *icq)
 	struct elevator_type *et = q->elevator->type;
 
 	lockdep_assert_held(&ioc->lock);
-	lockdep_assert_held(q->queue_lock);
 
 	radix_tree_delete(&ioc->icq_tree, icq->q->id);
 	hlist_del_init(&icq->ioc_node);
@@ -222,24 +224,40 @@ void exit_io_context(struct task_struct *task)
 	put_io_context_active(ioc);
 }
 
+static void __ioc_clear_queue(struct list_head *icq_list)
+{
+	unsigned long flags;
+
+	while (!list_empty(icq_list)) {
+		struct io_cq *icq = list_entry(icq_list->next,
+					       struct io_cq, q_node);
+		struct io_context *ioc = icq->ioc;
+
+		spin_lock_irqsave(&ioc->lock, flags);
+		ioc_destroy_icq(icq);
+		spin_unlock_irqrestore(&ioc->lock, flags);
+	}
+}
+
 /**
  * ioc_clear_queue - break any ioc association with the specified queue
  * @q: request_queue being cleared
  *
- * Walk @q->icq_list and exit all io_cq's.  Must be called with @q locked.
+ * Walk @q->icq_list and exit all io_cq's.
  */
 void ioc_clear_queue(struct request_queue *q)
 {
-	lockdep_assert_held(q->queue_lock);
+	LIST_HEAD(icq_list);
 
-	while (!list_empty(&q->icq_list)) {
-		struct io_cq *icq = list_entry(q->icq_list.next,
-					       struct io_cq, q_node);
-		struct io_context *ioc = icq->ioc;
+	spin_lock_irq(q->queue_lock);
+	list_splice_init(&q->icq_list, &icq_list);
 
-		spin_lock(&ioc->lock);
-		ioc_destroy_icq(icq);
-		spin_unlock(&ioc->lock);
+	if (q->mq_ops) {
+		spin_unlock_irq(q->queue_lock);
+		__ioc_clear_queue(&icq_list);
+	} else {
+		__ioc_clear_queue(&icq_list);
+		spin_unlock_irq(q->queue_lock);
 	}
 }
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 002af836aa87..c44b321335f3 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -815,9 +815,7 @@ static void blk_release_queue(struct kobject *kobj)
 	blkcg_exit_queue(q);
 
 	if (q->elevator) {
-		spin_lock_irq(q->queue_lock);
 		ioc_clear_queue(q);
-		spin_unlock_irq(q->queue_lock);
 		elevator_exit(q->elevator);
 	}
 
diff --git a/block/elevator.c b/block/elevator.c
index ac1c9f481a98..01139f549b5b 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -983,9 +983,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 		if (old_registered)
 			elv_unregister_queue(q);
 
-		spin_lock_irq(q->queue_lock);
 		ioc_clear_queue(q);
-		spin_unlock_irq(q->queue_lock);
 	}
 
 	/* allocate, init and register new elevator */
-- 
GitLab


From 11bd44f62d86115796109b0349e6e191bc99b45a Mon Sep 17 00:00:00 2001
From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Wed, 1 Mar 2017 11:18:53 +0530
Subject: [PATCH 0958/1084] cxgb4: update latest firmware version supported

Change t4fw_version.h to update latest firmware version
number to 1.16.33.0.

Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
index 5fdaa16426c5..fa376444e57c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
@@ -37,7 +37,7 @@
 
 #define T4FW_VERSION_MAJOR 0x01
 #define T4FW_VERSION_MINOR 0x10
-#define T4FW_VERSION_MICRO 0x1A
+#define T4FW_VERSION_MICRO 0x21
 #define T4FW_VERSION_BUILD 0x00
 
 #define T4FW_MIN_VERSION_MAJOR 0x01
@@ -46,7 +46,7 @@
 
 #define T5FW_VERSION_MAJOR 0x01
 #define T5FW_VERSION_MINOR 0x10
-#define T5FW_VERSION_MICRO 0x1A
+#define T5FW_VERSION_MICRO 0x21
 #define T5FW_VERSION_BUILD 0x00
 
 #define T5FW_MIN_VERSION_MAJOR 0x00
@@ -55,7 +55,7 @@
 
 #define T6FW_VERSION_MAJOR 0x01
 #define T6FW_VERSION_MINOR 0x10
-#define T6FW_VERSION_MICRO 0x1A
+#define T6FW_VERSION_MICRO 0x21
 #define T6FW_VERSION_BUILD 0x00
 
 #define T6FW_MIN_VERSION_MAJOR 0x00
-- 
GitLab


From f1304f7ba3981e71dcf2ac7db92949eeab49b1bf Mon Sep 17 00:00:00 2001
From: Peter Downs <padowns@gmail.com>
Date: Wed, 1 Mar 2017 01:01:17 -0800
Subject: [PATCH 0959/1084] openvswitch: actions: fixed a brace coding style
 warning

Fixed a brace coding style warning reported by checkpatch.pl

Signed-off-by: Peter Downs <padowns@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/actions.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index b1beb2b94ec7..c82301ce3fff 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -796,9 +796,8 @@ static void ovs_fragment(struct net *net, struct vport *vport,
 		unsigned long orig_dst;
 		struct rt6_info ovs_rt;
 
-		if (!v6ops) {
+		if (!v6ops)
 			goto err;
-		}
 
 		prepare_frag(vport, skb, orig_network_offset,
 			     ovs_key_mac_proto(key));
-- 
GitLab


From d5afb6f9b6bb2c57bd0c05e76e12489dc0d037d9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 1 Mar 2017 16:35:07 -0300
Subject: [PATCH 0960/1084] dccp: Unlock sock before calling sk_free()

The code where sk_clone() came from created a new socket and locked it,
but then, on the error path didn't unlock it.

This problem stayed there for a long while, till b0691c8ee7c2 ("net:
Unlock sock before calling sk_free()") fixed it, but unfortunately the
callers of sk_clone() (now sk_clone_locked()) were not audited and the
one in dccp_create_openreq_child() remained.

Now in the age of the syskaller fuzzer, this was finally uncovered, as
reported by Dmitry:

 ---- 8< ----

I've got the following report while running syzkaller fuzzer on
86292b33d4b7 ("Merge branch 'akpm' (patches from Andrew)")

  [ BUG: held lock freed! ]
  4.10.0+ #234 Not tainted
  -------------------------
  syz-executor6/6898 is freeing memory
  ffff88006286cac0-ffff88006286d3b7, with a lock still held there!
   (slock-AF_INET6){+.-...}, at: [<ffffffff8362c2c9>] spin_lock
  include/linux/spinlock.h:299 [inline]
   (slock-AF_INET6){+.-...}, at: [<ffffffff8362c2c9>]
  sk_clone_lock+0x3d9/0x12c0 net/core/sock.c:1504
  5 locks held by syz-executor6/6898:
   #0:  (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff839a34b4>] lock_sock
  include/net/sock.h:1460 [inline]
   #0:  (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff839a34b4>]
  inet_stream_connect+0x44/0xa0 net/ipv4/af_inet.c:681
   #1:  (rcu_read_lock){......}, at: [<ffffffff83bc1c2a>]
  inet6_csk_xmit+0x12a/0x5d0 net/ipv6/inet6_connection_sock.c:126
   #2:  (rcu_read_lock){......}, at: [<ffffffff8369b424>] __skb_unlink
  include/linux/skbuff.h:1767 [inline]
   #2:  (rcu_read_lock){......}, at: [<ffffffff8369b424>] __skb_dequeue
  include/linux/skbuff.h:1783 [inline]
   #2:  (rcu_read_lock){......}, at: [<ffffffff8369b424>]
  process_backlog+0x264/0x730 net/core/dev.c:4835
   #3:  (rcu_read_lock){......}, at: [<ffffffff83aeb5c0>]
  ip6_input_finish+0x0/0x1700 net/ipv6/ip6_input.c:59
   #4:  (slock-AF_INET6){+.-...}, at: [<ffffffff8362c2c9>] spin_lock
  include/linux/spinlock.h:299 [inline]
   #4:  (slock-AF_INET6){+.-...}, at: [<ffffffff8362c2c9>]
  sk_clone_lock+0x3d9/0x12c0 net/core/sock.c:1504

Fix it just like was done by b0691c8ee7c2 ("net: Unlock sock before calling
sk_free()").

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170301153510.GE15145@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/minisocks.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 53eddf99e4f6..d20d948a98ed 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -122,6 +122,7 @@ struct sock *dccp_create_openreq_child(const struct sock *sk,
 			/* It is still raw copy of parent, so invalidate
 			 * destructor and make plain sk_free() */
 			newsk->sk_destruct = NULL;
+			bh_unlock_sock(newsk);
 			sk_free(newsk);
 			return NULL;
 		}
-- 
GitLab


From 94352d45092c23874532221b4d1e4721df9d63df Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 1 Mar 2017 16:35:08 -0300
Subject: [PATCH 0961/1084] net: Introduce sk_clone_lock() error path routine

When handling problems in cloning a socket with the sk_clone_locked()
function we need to perform several steps that were open coded in it and
its callers, so introduce a routine to avoid this duplication:
sk_free_unlock_clone().

Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/n/net-ui6laqkotycunhtmqryl9bfx@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h   |  1 +
 net/core/sock.c      | 16 +++++++++++-----
 net/dccp/minisocks.c |  6 +-----
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 9ccefa5c5487..5e5997654db6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1526,6 +1526,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 void sk_free(struct sock *sk);
 void sk_destruct(struct sock *sk);
 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority);
+void sk_free_unlock_clone(struct sock *sk);
 
 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
 			     gfp_t priority);
diff --git a/net/core/sock.c b/net/core/sock.c
index e7d74940e863..f6fd79f33097 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1539,11 +1539,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 			is_charged = sk_filter_charge(newsk, filter);
 
 		if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
-			/* It is still raw copy of parent, so invalidate
-			 * destructor and make plain sk_free() */
-			newsk->sk_destruct = NULL;
-			bh_unlock_sock(newsk);
-			sk_free(newsk);
+			sk_free_unlock_clone(newsk);
 			newsk = NULL;
 			goto out;
 		}
@@ -1592,6 +1588,16 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 }
 EXPORT_SYMBOL_GPL(sk_clone_lock);
 
+void sk_free_unlock_clone(struct sock *sk)
+{
+	/* It is still raw copy of parent, so invalidate
+	 * destructor and make plain sk_free() */
+	sk->sk_destruct = NULL;
+	bh_unlock_sock(sk);
+	sk_free(sk);
+}
+EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
+
 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 {
 	u32 max_segs = 1;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index d20d948a98ed..e267e6f4c9a5 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -119,11 +119,7 @@ struct sock *dccp_create_openreq_child(const struct sock *sk,
 		 * Activate features: initialise CCIDs, sequence windows etc.
 		 */
 		if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) {
-			/* It is still raw copy of parent, so invalidate
-			 * destructor and make plain sk_free() */
-			newsk->sk_destruct = NULL;
-			bh_unlock_sock(newsk);
-			sk_free(newsk);
+			sk_free_unlock_clone(newsk);
 			return NULL;
 		}
 		dccp_init_xmit_timers(newsk);
-- 
GitLab


From 113285b473824922498d07d7f82459507b9792eb Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 2 Mar 2017 13:26:04 -0700
Subject: [PATCH 0962/1084] blk-mq: ensure that bd->last is always set
 correctly

When drivers are called with a request in blk-mq, blk-mq flags the
state such that the driver knows if this is the last request in
this call chain or not. The driver can then use that information
to defer kicking off IO until bd->last is true. However, with blk-mq
and scheduling, we need to allocate a driver tag for a request before
it can be issued. If we fail to allocate such a tag, we could end up
in the situation where the last request issued did not have
bd->last == true set. This can then cause a driver hang.

This fixes a hang with virtio-blk, which uses bd->last as a hint
on whether to kick the queue or not.

Reported-by: Chris Mason <clm@fb.com>
Tested-by: Chris Mason <clm@fb.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 50 +++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 43 insertions(+), 7 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a5e66a7a3506..e797607dab89 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -876,12 +876,9 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
 	return false;
 }
 
-static void blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
-				  struct request *rq)
+static void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
+				    struct request *rq)
 {
-	if (rq->tag == -1 || rq->internal_tag == -1)
-		return;
-
 	blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag);
 	rq->tag = -1;
 
@@ -891,6 +888,26 @@ static void blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
 	}
 }
 
+static void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx,
+				       struct request *rq)
+{
+	if (rq->tag == -1 || rq->internal_tag == -1)
+		return;
+
+	__blk_mq_put_driver_tag(hctx, rq);
+}
+
+static void blk_mq_put_driver_tag(struct request *rq)
+{
+	struct blk_mq_hw_ctx *hctx;
+
+	if (rq->tag == -1 || rq->internal_tag == -1)
+		return;
+
+	hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu);
+	__blk_mq_put_driver_tag(hctx, rq);
+}
+
 /*
  * If we fail getting a driver tag because all the driver tags are already
  * assigned and on the dispatch list, BUT the first entry does not have a
@@ -1000,7 +1017,19 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 
 		bd.rq = rq;
 		bd.list = dptr;
-		bd.last = list_empty(list);
+
+		/*
+		 * Flag last if we have no more requests, or if we have more
+		 * but can't assign a driver tag to it.
+		 */
+		if (list_empty(list))
+			bd.last = true;
+		else {
+			struct request *nxt;
+
+			nxt = list_first_entry(list, struct request, queuelist);
+			bd.last = !blk_mq_get_driver_tag(nxt, NULL, false);
+		}
 
 		ret = q->mq_ops->queue_rq(hctx, &bd);
 		switch (ret) {
@@ -1008,7 +1037,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 			queued++;
 			break;
 		case BLK_MQ_RQ_QUEUE_BUSY:
-			blk_mq_put_driver_tag(hctx, rq);
+			blk_mq_put_driver_tag_hctx(hctx, rq);
 			list_add(&rq->queuelist, list);
 			__blk_mq_requeue_request(rq);
 			break;
@@ -1038,6 +1067,13 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 	 * that is where we will continue on next queue run.
 	 */
 	if (!list_empty(list)) {
+		/*
+		 * If we got a driver tag for the next request already,
+		 * free it again.
+		 */
+		rq = list_first_entry(list, struct request, queuelist);
+		blk_mq_put_driver_tag(rq);
+
 		spin_lock(&hctx->lock);
 		list_splice_init(list, &hctx->dispatch);
 		spin_unlock(&hctx->lock);
-- 
GitLab


From 7db92362d2fee5887f6b0c41653b8c9f8f5d6020 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Wed, 1 Mar 2017 13:29:48 -0800
Subject: [PATCH 0963/1084] tcp: fix potential double free issue for
 fastopen_req

tp->fastopen_req could potentially be double freed if a malicious
user does the following:
1. Enable TCP_FASTOPEN_CONNECT sockopt and do a connect() on the socket.
2. Call connect() with AF_UNSPEC to disconnect the socket.
3. Make this socket a listening socket by calling listen().
4. Accept incoming connections and generate child sockets. All child
   sockets will get a copy of the pointer of fastopen_req.
5. Call close() on all sockets. fastopen_req will get freed multiple
   times.

Fixes: 19f6d3f3c842 ("net/tcp-fastopen: Add new API support")
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index da385ae997a3..cf4555581282 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1110,9 +1110,14 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 	flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
 	err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
 				    msg->msg_namelen, flags, 1);
-	inet->defer_connect = 0;
-	*copied = tp->fastopen_req->copied;
-	tcp_free_fastopen_req(tp);
+	/* fastopen_req could already be freed in __inet_stream_connect
+	 * if the connection times out or gets rst
+	 */
+	if (tp->fastopen_req) {
+		*copied = tp->fastopen_req->copied;
+		tcp_free_fastopen_req(tp);
+		inet->defer_connect = 0;
+	}
 	return err;
 }
 
@@ -2318,6 +2323,10 @@ int tcp_disconnect(struct sock *sk, int flags)
 	memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
 	__sk_dst_reset(sk);
 
+	/* Clean up fastopen related fields */
+	tcp_free_fastopen_req(tp);
+	inet->defer_connect = 0;
+
 	WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
 
 	sk->sk_error_report(sk);
-- 
GitLab


From 332524eaf739296bfc86c26d1b86a8c0040d7818 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 21 Feb 2017 21:42:30 +0100
Subject: [PATCH 0964/1084] ARM: deconfig: fix the moxart defconfig

The moxart defconfig wasn't even building a kernel for Moxart,
it was building a kernel for V4T on the nothing platform. Switch
to MULTI_V4 and keep the right drivers, update a few selections.
Now it (presumably) builds a minimalist Moxart kernel again.

Cc: Jonas Jensen <jonas.jensen@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/configs/moxart_defconfig | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm/configs/moxart_defconfig b/arch/arm/configs/moxart_defconfig
index a3cb76cfb828..b2ddd534867f 100644
--- a/arch/arm/configs/moxart_defconfig
+++ b/arch/arm/configs/moxart_defconfig
@@ -18,9 +18,8 @@ CONFIG_EMBEDDED=y
 # CONFIG_LBDAF is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
-CONFIG_ARCH_MULTI_V4T=y
+CONFIG_ARCH_MULTI_V4=y
 # CONFIG_ARCH_MULTI_V7 is not set
-CONFIG_KEYBOARD_GPIO_POLLED=y
 CONFIG_ARCH_MOXART=y
 CONFIG_MACH_UC7112LX=y
 CONFIG_PREEMPT=y
@@ -94,12 +93,10 @@ CONFIG_SERIAL_8250_EXTENDED=y
 CONFIG_SERIAL_8250_SHARE_IRQ=y
 CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
-CONFIG_DEBUG_GPIO=y
-CONFIG_GPIO_SYSFS=y
 CONFIG_GPIO_MOXART=y
-CONFIG_POWER_SUPPLY=y
 CONFIG_POWER_RESET=y
 CONFIG_POWER_RESET_GPIO=y
+CONFIG_POWER_SUPPLY=y
 # CONFIG_HWMON is not set
 CONFIG_WATCHDOG=y
 CONFIG_WATCHDOG_CORE=y
@@ -107,10 +104,13 @@ CONFIG_WATCHDOG_NOWAYOUT=y
 CONFIG_MOXART_WDT=y
 # CONFIG_USB_SUPPORT is not set
 CONFIG_MMC=y
-CONFIG_MMC_SDHCI_MOXART=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_MOXART=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
 CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_TRIGGERS=y
 CONFIG_LEDS_TRIGGER_TIMER=y
 CONFIG_LEDS_TRIGGER_ONESHOT=y
 CONFIG_LEDS_TRIGGER_HEARTBEAT=y
-- 
GitLab


From be12502e2e64854dbe0a2ddff6d26ec1143d6890 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 1 Mar 2017 17:24:47 -0800
Subject: [PATCH 0965/1084] drivers: net: ethernet: remove incorrect __exit
 markups

Even if bus is not hot-pluggable, devices can be unbound from the
driver via sysfs, so we should not be using __exit annotations on
remove() methods. The only exception is drivers registered with
platform_driver_probe() which specifically disables sysfs bind/unbind
attributes.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/declance.c        | 30 +++++++++++-----------
 drivers/net/ethernet/broadcom/sb1250-mac.c |  4 +--
 drivers/net/ethernet/faraday/ftgmac100.c   |  4 +--
 drivers/net/ethernet/faraday/ftmac100.c    |  4 +--
 drivers/net/ethernet/seeq/sgiseeq.c        |  4 +--
 drivers/net/ethernet/sgi/meth.c            |  4 +--
 6 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c
index 76e5fc7adff5..6c98901f1b89 100644
--- a/drivers/net/ethernet/amd/declance.c
+++ b/drivers/net/ethernet/amd/declance.c
@@ -1276,18 +1276,6 @@ static int dec_lance_probe(struct device *bdev, const int type)
 	return ret;
 }
 
-static void __exit dec_lance_remove(struct device *bdev)
-{
-	struct net_device *dev = dev_get_drvdata(bdev);
-	resource_size_t start, len;
-
-	unregister_netdev(dev);
-	start = to_tc_dev(bdev)->resource.start;
-	len = to_tc_dev(bdev)->resource.end - start + 1;
-	release_mem_region(start, len);
-	free_netdev(dev);
-}
-
 /* Find all the lance cards on the system and initialize them */
 static int __init dec_lance_platform_probe(void)
 {
@@ -1320,7 +1308,7 @@ static void __exit dec_lance_platform_remove(void)
 
 #ifdef CONFIG_TC
 static int dec_lance_tc_probe(struct device *dev);
-static int __exit dec_lance_tc_remove(struct device *dev);
+static int dec_lance_tc_remove(struct device *dev);
 
 static const struct tc_device_id dec_lance_tc_table[] = {
 	{ "DEC     ", "PMAD-AA " },
@@ -1334,7 +1322,7 @@ static struct tc_driver dec_lance_tc_driver = {
 		.name	= "declance",
 		.bus	= &tc_bus_type,
 		.probe	= dec_lance_tc_probe,
-		.remove	= __exit_p(dec_lance_tc_remove),
+		.remove	= dec_lance_tc_remove,
 	},
 };
 
@@ -1346,7 +1334,19 @@ static int dec_lance_tc_probe(struct device *dev)
         return status;
 }
 
-static int __exit dec_lance_tc_remove(struct device *dev)
+static void dec_lance_remove(struct device *bdev)
+{
+	struct net_device *dev = dev_get_drvdata(bdev);
+	resource_size_t start, len;
+
+	unregister_netdev(dev);
+	start = to_tc_dev(bdev)->resource.start;
+	len = to_tc_dev(bdev)->resource.end - start + 1;
+	release_mem_region(start, len);
+	free_netdev(dev);
+}
+
+static int dec_lance_tc_remove(struct device *dev)
 {
         put_device(dev);
         dec_lance_remove(dev);
diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index 89d4feba1a9a..55c8e25b43d9 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
@@ -2617,7 +2617,7 @@ static int sbmac_probe(struct platform_device *pldev)
 	return err;
 }
 
-static int __exit sbmac_remove(struct platform_device *pldev)
+static int sbmac_remove(struct platform_device *pldev)
 {
 	struct net_device *dev = platform_get_drvdata(pldev);
 	struct sbmac_softc *sc = netdev_priv(dev);
@@ -2634,7 +2634,7 @@ static int __exit sbmac_remove(struct platform_device *pldev)
 
 static struct platform_driver sbmac_driver = {
 	.probe = sbmac_probe,
-	.remove = __exit_p(sbmac_remove),
+	.remove = sbmac_remove,
 	.driver = {
 		.name = sbmac_string,
 	},
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 262587240c86..928b0df2b8e0 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -1456,7 +1456,7 @@ static int ftgmac100_probe(struct platform_device *pdev)
 	return err;
 }
 
-static int __exit ftgmac100_remove(struct platform_device *pdev)
+static int ftgmac100_remove(struct platform_device *pdev)
 {
 	struct net_device *netdev;
 	struct ftgmac100 *priv;
@@ -1483,7 +1483,7 @@ MODULE_DEVICE_TABLE(of, ftgmac100_of_match);
 
 static struct platform_driver ftgmac100_driver = {
 	.probe	= ftgmac100_probe,
-	.remove	= __exit_p(ftgmac100_remove),
+	.remove	= ftgmac100_remove,
 	.driver	= {
 		.name		= DRV_NAME,
 		.of_match_table	= ftgmac100_of_match,
diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index c0ddbbe6c226..6ac336b546e6 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -1156,7 +1156,7 @@ static int ftmac100_probe(struct platform_device *pdev)
 	return err;
 }
 
-static int __exit ftmac100_remove(struct platform_device *pdev)
+static int ftmac100_remove(struct platform_device *pdev)
 {
 	struct net_device *netdev;
 	struct ftmac100 *priv;
@@ -1176,7 +1176,7 @@ static int __exit ftmac100_remove(struct platform_device *pdev)
 
 static struct platform_driver ftmac100_driver = {
 	.probe		= ftmac100_probe,
-	.remove		= __exit_p(ftmac100_remove),
+	.remove		= ftmac100_remove,
 	.driver		= {
 		.name	= DRV_NAME,
 	},
diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c
index ed34196028b8..70347720fdf9 100644
--- a/drivers/net/ethernet/seeq/sgiseeq.c
+++ b/drivers/net/ethernet/seeq/sgiseeq.c
@@ -807,7 +807,7 @@ static int sgiseeq_probe(struct platform_device *pdev)
 	return err;
 }
 
-static int __exit sgiseeq_remove(struct platform_device *pdev)
+static int sgiseeq_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 	struct sgiseeq_private *sp = netdev_priv(dev);
@@ -822,7 +822,7 @@ static int __exit sgiseeq_remove(struct platform_device *pdev)
 
 static struct platform_driver sgiseeq_driver = {
 	.probe	= sgiseeq_probe,
-	.remove	= __exit_p(sgiseeq_remove),
+	.remove	= sgiseeq_remove,
 	.driver = {
 		.name	= "sgiseeq",
 	}
diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c
index 69d2d30e5ef1..ea55abd62ec7 100644
--- a/drivers/net/ethernet/sgi/meth.c
+++ b/drivers/net/ethernet/sgi/meth.c
@@ -854,7 +854,7 @@ static int meth_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int __exit meth_remove(struct platform_device *pdev)
+static int meth_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = platform_get_drvdata(pdev);
 
@@ -866,7 +866,7 @@ static int __exit meth_remove(struct platform_device *pdev)
 
 static struct platform_driver meth_driver = {
 	.probe	= meth_probe,
-	.remove	= __exit_p(meth_remove),
+	.remove	= meth_remove,
 	.driver = {
 		.name	= "meth",
 	}
-- 
GitLab


From 9d6acb3bc9058d1fd7a5297d71f14213679bb4bd Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Wed, 1 Mar 2017 20:48:39 -0800
Subject: [PATCH 0966/1084] ipv6: ignore null_entry in inet6_rtm_getroute() too

Like commit 1f17e2f2c8a8 ("net: ipv6: ignore null_entry on route dumps"),
we need to ignore null entry in inet6_rtm_getroute() too.

Return -ENETUNREACH here to sync with IPv4 behavior, as suggested by David.

Fixes: a1a22c1206 ("net: ipv6: Keep nexthop of multipath route on admin down")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 43ca90d50ae9..229bfcc451ef 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3632,6 +3632,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
 	}
 
+	if (rt == net->ipv6.ip6_null_entry) {
+		err = rt->dst.error;
+		ip6_rt_put(rt);
+		goto errout;
+	}
+
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb) {
 		ip6_rt_put(rt);
-- 
GitLab


From 152669bd3cd2407d6f556009b95ee249c0c1a462 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Thu, 2 Mar 2017 13:00:53 +0000
Subject: [PATCH 0967/1084] netvsc: fix use-after-free in netvsc_change_mtu()

'nvdev' is freed in rndis_filter_device_remove -> netvsc_device_remove ->
free_netvsc_device, so we mustn't access it, before it's re-created in
rndis_filter_device_add -> netvsc_device_add.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Reviewed-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc_drv.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 2d3cdb026a99..bc05c895d958 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -859,15 +859,22 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 	if (ret)
 		goto out;
 
+	memset(&device_info, 0, sizeof(device_info));
+	device_info.ring_size = ring_size;
+	device_info.num_chn = nvdev->num_chn;
+	device_info.max_num_vrss_chns = nvdev->num_chn;
+
 	ndevctx->start_remove = true;
 	rndis_filter_device_remove(hdev, nvdev);
 
+	/* 'nvdev' has been freed in rndis_filter_device_remove() ->
+	 * netvsc_device_remove () -> free_netvsc_device().
+	 * We mustn't access it before it's re-created in
+	 * rndis_filter_device_add() -> netvsc_device_add().
+	 */
+
 	ndev->mtu = mtu;
 
-	memset(&device_info, 0, sizeof(device_info));
-	device_info.ring_size = ring_size;
-	device_info.num_chn = nvdev->num_chn;
-	device_info.max_num_vrss_chns = nvdev->num_chn;
 	rndis_filter_device_add(hdev, &device_info);
 
 out:
-- 
GitLab


From 31c05415f5b471fd333fe42629788364faea8e0d Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 2 Mar 2017 12:24:36 -0800
Subject: [PATCH 0968/1084] bonding: use ETH_MAX_MTU as max mtu

This restores the ability of setting bond device's mtu to 9000.

Fixes: 91572088e3fd ("net: use core MTU range checking in core net infra")
Reported-by: daznis@gmail.com
Reported-by: Brad Campbell <lists2009@fnarfbargle.com>
Cc: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 6321f12630c8..8a4ba8b88e52 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4179,6 +4179,7 @@ void bond_setup(struct net_device *bond_dev)
 
 	/* Initialize the device entry points */
 	ether_setup(bond_dev);
+	bond_dev->max_mtu = ETH_MAX_MTU;
 	bond_dev->netdev_ops = &bond_netdev_ops;
 	bond_dev->ethtool_ops = &bond_ethtool_ops;
 
-- 
GitLab


From 9f674e48c13dcbc31ac903433727837795b81efe Mon Sep 17 00:00:00 2001
From: Anoob Soman <anoob.soman@citrix.com>
Date: Thu, 2 Mar 2017 10:50:20 +0000
Subject: [PATCH 0969/1084] xen-netback: Use GFP_ATOMIC to allocate hash

Allocation of new_hash, inside xenvif_new_hash(), always happen
in softirq context, so use GFP_ATOMIC instead of GFP_KERNEL for new
hash allocation.

Signed-off-by: Anoob Soman <anoob.soman@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/hash.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c
index e8c5dddc54ba..3c4c58b9fe76 100644
--- a/drivers/net/xen-netback/hash.c
+++ b/drivers/net/xen-netback/hash.c
@@ -39,7 +39,7 @@ static void xenvif_add_hash(struct xenvif *vif, const u8 *tag,
 	unsigned long flags;
 	bool found;
 
-	new = kmalloc(sizeof(*entry), GFP_KERNEL);
+	new = kmalloc(sizeof(*entry), GFP_ATOMIC);
 	if (!new)
 		return;
 
-- 
GitLab


From f0712928be1a66c99c35f871b4df7fa23ec1574a Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Wed, 22 Feb 2017 14:47:17 +0100
Subject: [PATCH 0970/1084] CIFS: use DFS pathnames in SMB2+ Create requests

When connected to a DFS capable share, the client must set the
SMB2_FLAGS_DFS_OPERATIONS flag in the SMB2 header and use
DFS path names: "<server>\<share>\<path>" *without* leading \\.

Sources:

[MS-SMB2] 3.2.5.5 Receiving an SMB2 TREE_CONNECT Response
> TreeConnect.IsDfsShare MUST be set to TRUE, if the SMB2_SHARE_CAP_DFS
> bit is set in the Capabilities field of the response.

[MS-SMB2] 3.2.4.3 Application Requests Opening a File
> If TreeConnect.IsDfsShare is TRUE, the SMB2_FLAGS_DFS_OPERATIONS flag
> is set in the Flags field.

[MS-SMB2] 2.2.13 SMB2 CREATE Request, NameOffset:
> If SMB2_FLAGS_DFS_OPERATIONS is set in the Flags field of the SMB2
> header, the file name includes a prefix that will be processed during
> DFS name normalization as specified in section 3.3.5.9. Otherwise, the
> file name is relative to the share that is identified by the TreeId in
> the SMB2 header.

Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Acked-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2pdu.c | 96 +++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 80 insertions(+), 16 deletions(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 2fd93eeed15a..2069431b32e3 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1528,6 +1528,51 @@ add_durable_context(struct kvec *iov, unsigned int *num_iovec,
 	return 0;
 }
 
+static int
+alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len,
+			    const char *treename, const __le16 *path)
+{
+	int treename_len, path_len;
+	struct nls_table *cp;
+	const __le16 sep[] = {cpu_to_le16('\\'), cpu_to_le16(0x0000)};
+
+	/*
+	 * skip leading "\\"
+	 */
+	treename_len = strlen(treename);
+	if (treename_len < 2 || !(treename[0] == '\\' && treename[1] == '\\'))
+		return -EINVAL;
+
+	treename += 2;
+	treename_len -= 2;
+
+	path_len = UniStrnlen((wchar_t *)path, PATH_MAX);
+
+	/*
+	 * make room for one path separator between the treename and
+	 * path
+	 */
+	*out_len = treename_len + 1 + path_len;
+
+	/*
+	 * final path needs to be null-terminated UTF16 with a
+	 * size aligned to 8
+	 */
+
+	*out_size = roundup((*out_len+1)*2, 8);
+	*out_path = kzalloc(*out_size, GFP_KERNEL);
+	if (!*out_path)
+		return -ENOMEM;
+
+	cp = load_nls_default();
+	cifs_strtoUTF16(*out_path, treename, treename_len, cp);
+	UniStrcat(*out_path, sep);
+	UniStrcat(*out_path, path);
+	unload_nls(cp);
+
+	return 0;
+}
+
 int
 SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 	  __u8 *oplock, struct smb2_file_all_info *buf,
@@ -1576,30 +1621,49 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 	req->ShareAccess = FILE_SHARE_ALL_LE;
 	req->CreateDisposition = cpu_to_le32(oparms->disposition);
 	req->CreateOptions = cpu_to_le32(oparms->create_options & CREATE_OPTIONS_MASK);
-	uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2;
-	/* do not count rfc1001 len field */
-	req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req) - 4);
 
 	iov[0].iov_base = (char *)req;
 	/* 4 for rfc1002 length field */
 	iov[0].iov_len = get_rfc1002_length(req) + 4;
-
-	/* MUST set path len (NameLength) to 0 opening root of share */
-	req->NameLength = cpu_to_le16(uni_path_len - 2);
 	/* -1 since last byte is buf[0] which is sent below (path) */
 	iov[0].iov_len--;
-	if (uni_path_len % 8 != 0) {
-		copy_size = uni_path_len / 8 * 8;
-		if (copy_size < uni_path_len)
-			copy_size += 8;
-
-		copy_path = kzalloc(copy_size, GFP_KERNEL);
-		if (!copy_path)
-			return -ENOMEM;
-		memcpy((char *)copy_path, (const char *)path,
-			uni_path_len);
+
+	req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req) - 4);
+
+	/* [MS-SMB2] 2.2.13 NameOffset:
+	 * If SMB2_FLAGS_DFS_OPERATIONS is set in the Flags field of
+	 * the SMB2 header, the file name includes a prefix that will
+	 * be processed during DFS name normalization as specified in
+	 * section 3.3.5.9. Otherwise, the file name is relative to
+	 * the share that is identified by the TreeId in the SMB2
+	 * header.
+	 */
+	if (tcon->share_flags & SHI1005_FLAGS_DFS) {
+		int name_len;
+
+		req->hdr.sync_hdr.Flags |= SMB2_FLAGS_DFS_OPERATIONS;
+		rc = alloc_path_with_tree_prefix(&copy_path, &copy_size,
+						 &name_len,
+						 tcon->treeName, path);
+		if (rc)
+			return rc;
+		req->NameLength = cpu_to_le16(name_len * 2);
 		uni_path_len = copy_size;
 		path = copy_path;
+	} else {
+		uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2;
+		/* MUST set path len (NameLength) to 0 opening root of share */
+		req->NameLength = cpu_to_le16(uni_path_len - 2);
+		if (uni_path_len % 8 != 0) {
+			copy_size = roundup(uni_path_len, 8);
+			copy_path = kzalloc(copy_size, GFP_KERNEL);
+			if (!copy_path)
+				return -ENOMEM;
+			memcpy((char *)copy_path, (const char *)path,
+			       uni_path_len);
+			uni_path_len = copy_size;
+			path = copy_path;
+		}
 	}
 
 	iov[1].iov_len = uni_path_len;
-- 
GitLab


From 9d49640a21bffd730a6ebf2a0032e022f7caf84a Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Mon, 13 Feb 2017 16:16:49 +0100
Subject: [PATCH 0971/1084] CIFS: implement get_dfs_refer for SMB2+

in SMB2+ the get_dfs_refer operation uses a FSCTL. The request can be
made on any Tree Connection according to the specs. Since Samba only
accepted it on an IPC connection until recently, try that first.

https://lists.samba.org/archive/samba-technical/2017-February/118859.html

3.2.4.20.3 Application Requests DFS Referral Information:
> The client MUST search for an existing Session and TreeConnect to any
> share on the server identified by ServerName for the user identified by
> UserCredentials. If no Session and TreeConnect are found, the client
> MUST establish a new Session and TreeConnect to IPC$ on the target
> server as described in section 3.2.4.2 using the supplied ServerName and
> UserCredentials.

Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2ops.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/cifs/smb2pdu.h |   8 ++++
 2 files changed, 109 insertions(+)

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index eba00cd3bd16..b360c381b00e 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1104,6 +1104,103 @@ smb2_new_lease_key(struct cifs_fid *fid)
 	generate_random_uuid(fid->lease_key);
 }
 
+static int
+smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
+		   const char *search_name,
+		   struct dfs_info3_param **target_nodes,
+		   unsigned int *num_of_nodes,
+		   const struct nls_table *nls_codepage, int remap)
+{
+	int rc;
+	__le16 *utf16_path = NULL;
+	int utf16_path_len = 0;
+	struct cifs_tcon *tcon;
+	struct fsctl_get_dfs_referral_req *dfs_req = NULL;
+	struct get_dfs_referral_rsp *dfs_rsp = NULL;
+	u32 dfs_req_size = 0, dfs_rsp_size = 0;
+
+	cifs_dbg(FYI, "smb2_get_dfs_refer path <%s>\n", search_name);
+
+	/*
+	 * Use any tcon from the current session. Here, the first one.
+	 */
+	spin_lock(&cifs_tcp_ses_lock);
+	tcon = list_first_entry_or_null(&ses->tcon_list, struct cifs_tcon,
+					tcon_list);
+	if (tcon)
+		tcon->tc_count++;
+	spin_unlock(&cifs_tcp_ses_lock);
+
+	if (!tcon) {
+		cifs_dbg(VFS, "session %p has no tcon available for a dfs referral request\n",
+			 ses);
+		rc = -ENOTCONN;
+		goto out;
+	}
+
+	utf16_path = cifs_strndup_to_utf16(search_name, PATH_MAX,
+					   &utf16_path_len,
+					   nls_codepage, remap);
+	if (!utf16_path) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	dfs_req_size = sizeof(*dfs_req) + utf16_path_len;
+	dfs_req = kzalloc(dfs_req_size, GFP_KERNEL);
+	if (!dfs_req) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* Highest DFS referral version understood */
+	dfs_req->MaxReferralLevel = DFS_VERSION;
+
+	/* Path to resolve in an UTF-16 null-terminated string */
+	memcpy(dfs_req->RequestFileName, utf16_path, utf16_path_len);
+
+	do {
+		/* try first with IPC */
+		rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
+				FSCTL_DFS_GET_REFERRALS,
+				true /* is_fsctl */, true /* use_ipc */,
+				(char *)dfs_req, dfs_req_size,
+				(char **)&dfs_rsp, &dfs_rsp_size);
+		if (rc == -ENOTCONN) {
+			/* try with normal tcon */
+			rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
+					FSCTL_DFS_GET_REFERRALS,
+					true /* is_fsctl */, false /*use_ipc*/,
+					(char *)dfs_req, dfs_req_size,
+					(char **)&dfs_rsp, &dfs_rsp_size);
+		}
+	} while (rc == -EAGAIN);
+
+	if (rc) {
+		cifs_dbg(VFS, "ioctl error in smb2_get_dfs_refer rc=%d\n", rc);
+		goto out;
+	}
+
+	rc = parse_dfs_referrals(dfs_rsp, dfs_rsp_size,
+				 num_of_nodes, target_nodes,
+				 nls_codepage, remap, search_name,
+				 true /* is_unicode */);
+	if (rc) {
+		cifs_dbg(VFS, "parse error in smb2_get_dfs_refer rc=%d\n", rc);
+		goto out;
+	}
+
+ out:
+	if (tcon) {
+		spin_lock(&cifs_tcp_ses_lock);
+		tcon->tc_count--;
+		spin_unlock(&cifs_tcp_ses_lock);
+	}
+	kfree(utf16_path);
+	kfree(dfs_req);
+	kfree(dfs_rsp);
+	return rc;
+}
 #define SMB2_SYMLINK_STRUCT_SIZE \
 	(sizeof(struct smb2_err_rsp) - 1 + sizeof(struct smb2_symlink_err_rsp))
 
@@ -2283,6 +2380,7 @@ struct smb_version_operations smb20_operations = {
 	.clone_range = smb2_clone_range,
 	.wp_retry_size = smb2_wp_retry_size,
 	.dir_needs_close = smb2_dir_needs_close,
+	.get_dfs_refer = smb2_get_dfs_refer,
 };
 
 struct smb_version_operations smb21_operations = {
@@ -2364,6 +2462,7 @@ struct smb_version_operations smb21_operations = {
 	.wp_retry_size = smb2_wp_retry_size,
 	.dir_needs_close = smb2_dir_needs_close,
 	.enum_snapshots = smb3_enum_snapshots,
+	.get_dfs_refer = smb2_get_dfs_refer,
 };
 
 struct smb_version_operations smb30_operations = {
@@ -2455,6 +2554,7 @@ struct smb_version_operations smb30_operations = {
 	.free_transform_rq = smb3_free_transform_rq,
 	.is_transform_hdr = smb3_is_transform_hdr,
 	.receive_transform = smb3_receive_transform,
+	.get_dfs_refer = smb2_get_dfs_refer,
 };
 
 #ifdef CONFIG_CIFS_SMB311
@@ -2547,6 +2647,7 @@ struct smb_version_operations smb311_operations = {
 	.free_transform_rq = smb3_free_transform_rq,
 	.is_transform_hdr = smb3_is_transform_hdr,
 	.receive_transform = smb3_receive_transform,
+	.get_dfs_refer = smb2_get_dfs_refer,
 };
 #endif /* CIFS_SMB311 */
 
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index c03b252501a1..18700fd25a0b 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -695,6 +695,14 @@ struct fsctl_get_integrity_information_rsp {
 /* Integrity flags for above */
 #define FSCTL_INTEGRITY_FLAG_CHECKSUM_ENFORCEMENT_OFF	0x00000001
 
+/* See MS-DFSC 2.2.2 */
+struct fsctl_get_dfs_referral_req {
+	__le16 MaxReferralLevel;
+	__u8 RequestFileName[];
+} __packed;
+
+/* DFS response is struct get_dfs_refer_rsp */
+
 /* See MS-SMB2 2.2.31.3 */
 struct network_resiliency_req {
 	__le32 Timeout;
-- 
GitLab


From 165a5e22fafb127ecb5914e12e8c32a1f0d3f820 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 8 Feb 2017 08:05:56 +0100
Subject: [PATCH 0972/1084] block: Move bdi_unregister() to del_gendisk()

Commit 6cd18e711dd8 "block: destroy bdi before blockdev is
unregistered." moved bdi unregistration (at that time through
bdi_destroy()) from blk_release_queue() to blk_cleanup_queue() because
it needs to happen before blk_unregister_region() call in del_gendisk()
for MD. SCSI though will free up the device number from sd_remove()
called through a maze of callbacks from device_del() in
__scsi_remove_device() before blk_cleanup_queue() and thus similar races
as described in 6cd18e711dd8 can happen for SCSI as well as reported by
Omar [1].

Moving bdi_unregister() to del_gendisk() works for MD and fixes the
problem for SCSI since del_gendisk() gets called from sd_remove() before
freeing the device number.

This also makes device_add_disk() (calling bdi_register_owner()) more
symmetric with del_gendisk().

[1] http://marc.info/?l=linux-block&m=148554717109098&w=2

Tested-by: Lekshmi Pillai <lekshmicpillai@in.ibm.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Tested-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c | 1 -
 block/genhd.c    | 5 +++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index b9e857f4afe8..1086dac8724c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -578,7 +578,6 @@ void blk_cleanup_queue(struct request_queue *q)
 		q->queue_lock = &q->__queue_lock;
 	spin_unlock_irq(lock);
 
-	bdi_unregister(q->backing_dev_info);
 	put_disk_devt(q->disk_devt);
 
 	/* @q is and will stay empty, shutdown and put */
diff --git a/block/genhd.c b/block/genhd.c
index 2f444b87a5f2..b26a5ea115d0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -681,6 +681,11 @@ void del_gendisk(struct gendisk *disk)
 	disk->flags &= ~GENHD_FL_UP;
 
 	sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
+	/*
+	 * Unregister bdi before releasing device numbers (as they can get
+	 * reused and we'd get clashes in sysfs).
+	 */
+	bdi_unregister(disk->queue->backing_dev_info);
 	blk_unregister_queue(disk);
 	blk_unregister_region(disk_devt(disk), disk->minors);
 
-- 
GitLab


From 68e21be2916b359fd8afb536c1911dc014cfd03e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 19:08:20 +0100
Subject: [PATCH 0973/1084] sched/headers: Move task->mm handling methods to
 <linux/sched/mm.h>

Move the following task->mm helper APIs into a new header file,
<linux/sched/mm.h>, to further reduce the size and complexity
of <linux/sched.h>.

Here are how the APIs are used in various kernel files:

  # mm_alloc():
  arch/arm/mach-rpc/ecard.c
  fs/exec.c
  include/linux/sched/mm.h
  kernel/fork.c

  # __mmdrop():
  arch/arc/include/asm/mmu_context.h
  include/linux/sched/mm.h
  kernel/fork.c

  # mmdrop():
  arch/arm/mach-rpc/ecard.c
  arch/m68k/sun3/mmu_emu.c
  arch/x86/mm/tlb.c
  drivers/gpu/drm/amd/amdkfd/kfd_process.c
  drivers/gpu/drm/i915/i915_gem_userptr.c
  drivers/infiniband/hw/hfi1/file_ops.c
  drivers/vfio/vfio_iommu_spapr_tce.c
  fs/exec.c
  fs/proc/base.c
  fs/proc/task_mmu.c
  fs/proc/task_nommu.c
  fs/userfaultfd.c
  include/linux/mmu_notifier.h
  include/linux/sched/mm.h
  kernel/fork.c
  kernel/futex.c
  kernel/sched/core.c
  mm/khugepaged.c
  mm/ksm.c
  mm/mmu_context.c
  mm/mmu_notifier.c
  mm/oom_kill.c
  virt/kvm/kvm_main.c

  # mmdrop_async_fn():
  include/linux/sched/mm.h

  # mmdrop_async():
  include/linux/sched/mm.h
  kernel/fork.c

  # mmget_not_zero():
  fs/userfaultfd.c
  include/linux/sched/mm.h
  mm/oom_kill.c

  # mmput():
  arch/arc/include/asm/mmu_context.h
  arch/arc/kernel/troubleshoot.c
  arch/frv/mm/mmu-context.c
  arch/powerpc/platforms/cell/spufs/context.c
  arch/sparc/include/asm/mmu_context_32.h
  drivers/android/binder.c
  drivers/gpu/drm/etnaviv/etnaviv_gem.c
  drivers/gpu/drm/i915/i915_gem_userptr.c
  drivers/infiniband/core/umem.c
  drivers/infiniband/core/umem_odp.c
  drivers/infiniband/core/uverbs_main.c
  drivers/infiniband/hw/mlx4/main.c
  drivers/infiniband/hw/mlx5/main.c
  drivers/infiniband/hw/usnic/usnic_uiom.c
  drivers/iommu/amd_iommu_v2.c
  drivers/iommu/intel-svm.c
  drivers/lguest/lguest_user.c
  drivers/misc/cxl/fault.c
  drivers/misc/mic/scif/scif_rma.c
  drivers/oprofile/buffer_sync.c
  drivers/vfio/vfio_iommu_type1.c
  drivers/vhost/vhost.c
  drivers/xen/gntdev.c
  fs/exec.c
  fs/proc/array.c
  fs/proc/base.c
  fs/proc/task_mmu.c
  fs/proc/task_nommu.c
  fs/userfaultfd.c
  include/linux/sched/mm.h
  kernel/cpuset.c
  kernel/events/core.c
  kernel/events/uprobes.c
  kernel/exit.c
  kernel/fork.c
  kernel/ptrace.c
  kernel/sys.c
  kernel/trace/trace_output.c
  kernel/tsacct.c
  mm/memcontrol.c
  mm/memory.c
  mm/mempolicy.c
  mm/migrate.c
  mm/mmu_notifier.c
  mm/nommu.c
  mm/oom_kill.c
  mm/process_vm_access.c
  mm/rmap.c
  mm/swapfile.c
  mm/util.c
  virt/kvm/async_pf.c

  # mmput_async():
  include/linux/sched/mm.h
  kernel/fork.c
  mm/oom_kill.c

  # get_task_mm():
  arch/arc/kernel/troubleshoot.c
  arch/powerpc/platforms/cell/spufs/context.c
  drivers/android/binder.c
  drivers/gpu/drm/etnaviv/etnaviv_gem.c
  drivers/infiniband/core/umem.c
  drivers/infiniband/core/umem_odp.c
  drivers/infiniband/hw/mlx4/main.c
  drivers/infiniband/hw/mlx5/main.c
  drivers/infiniband/hw/usnic/usnic_uiom.c
  drivers/iommu/amd_iommu_v2.c
  drivers/iommu/intel-svm.c
  drivers/lguest/lguest_user.c
  drivers/misc/cxl/fault.c
  drivers/misc/mic/scif/scif_rma.c
  drivers/oprofile/buffer_sync.c
  drivers/vfio/vfio_iommu_type1.c
  drivers/vhost/vhost.c
  drivers/xen/gntdev.c
  fs/proc/array.c
  fs/proc/base.c
  fs/proc/task_mmu.c
  include/linux/sched/mm.h
  kernel/cpuset.c
  kernel/events/core.c
  kernel/exit.c
  kernel/fork.c
  kernel/ptrace.c
  kernel/sys.c
  kernel/trace/trace_output.c
  kernel/tsacct.c
  mm/memcontrol.c
  mm/memory.c
  mm/mempolicy.c
  mm/migrate.c
  mm/mmu_notifier.c
  mm/nommu.c
  mm/util.c

  # mm_access():
  fs/proc/base.c
  include/linux/sched/mm.h
  kernel/fork.c
  mm/process_vm_access.c

  # mm_release():
  arch/arc/include/asm/mmu_context.h
  fs/exec.c
  include/linux/sched/mm.h
  include/uapi/linux/sched.h
  kernel/exit.c
  kernel/fork.c

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/kernel/smp.c         |  2 +-
 arch/arc/kernel/smp.c           |  2 +-
 arch/arm/kernel/smp.c           |  2 +-
 arch/arm64/kernel/smp.c         |  2 +-
 arch/blackfin/mach-common/smp.c |  2 +-
 arch/hexagon/kernel/smp.c       |  2 +-
 arch/ia64/kernel/setup.c        |  2 +-
 arch/m32r/kernel/setup.c        |  2 +-
 arch/metag/kernel/smp.c         |  2 +-
 arch/mips/kernel/traps.c        |  2 +-
 arch/parisc/kernel/smp.c        |  2 +-
 arch/powerpc/kernel/smp.c       |  2 +-
 arch/s390/kernel/processor.c    |  2 +
 arch/score/kernel/traps.c       |  1 +
 arch/sh/kernel/smp.c            |  2 +-
 arch/sparc/kernel/leon_smp.c    |  2 +-
 arch/sparc/kernel/smp_64.c      |  2 +-
 arch/sparc/kernel/sun4d_smp.c   |  2 +-
 arch/sparc/kernel/sun4m_smp.c   |  2 +-
 arch/sparc/kernel/traps_32.c    |  2 +-
 arch/sparc/kernel/traps_64.c    |  2 +-
 arch/tile/kernel/smpboot.c      |  2 +-
 arch/x86/kernel/cpu/common.c    |  2 +-
 arch/xtensa/kernel/smp.c        |  1 +
 include/linux/sched.h           | 95 ---------------------------------
 include/linux/sched/mm.h        | 95 +++++++++++++++++++++++++++++++++
 26 files changed, 120 insertions(+), 116 deletions(-)

diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index acb4b146a607..9fc560459ebd 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -14,7 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/kernel_stat.h>
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/mm.h>
 #include <linux/err.h>
 #include <linux/threads.h>
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index b8e8d3944481..f46267153ec2 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -13,7 +13,7 @@
  */
 
 #include <linux/spinlock.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/interrupt.h>
 #include <linux/profile.h>
 #include <linux/mm.h>
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index b724cff7ad60..572a8df1b766 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -11,7 +11,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 83c0a839a6ad..ef1caae02110 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -21,7 +21,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 3ac98252d299..b32ddab7966c 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -11,7 +11,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
 #include <linux/cache.h>
diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c
index 1f63e91a353b..5dbc15549e01 100644
--- a/arch/hexagon/kernel/smp.c
+++ b/arch/hexagon/kernel/smp.c
@@ -25,7 +25,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/smp.h>
 #include <linux/spinlock.h>
 #include <linux/cpu.h>
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 63bef6fc0f3e..23e3fd61e335 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -32,7 +32,7 @@
 #include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/reboot.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/task_stack.h>
 #include <linux/seq_file.h>
diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c
index b18bc0bd6544..1a9e977287e6 100644
--- a/arch/m32r/kernel/setup.c
+++ b/arch/m32r/kernel/setup.c
@@ -11,7 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/stddef.h>
 #include <linux/fs.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/ioport.h>
 #include <linux/mm.h>
 #include <linux/bootmem.h>
diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
index cab2aa64ca82..232a12bf3f99 100644
--- a/arch/metag/kernel/smp.c
+++ b/arch/metag/kernel/smp.c
@@ -12,7 +12,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/interrupt.h>
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 1a9366a157cb..c7d17cfb32f6 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -23,7 +23,7 @@
 #include <linux/module.h>
 #include <linux/extable.h>
 #include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/debug.h>
 #include <linux/smp.h>
 #include <linux/spinlock.h>
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 67b452b41ff6..63365106ea19 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -21,7 +21,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/smp.h>
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index fce17789c675..46f89e66a273 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -19,7 +19,7 @@
 
 #include <linux/kernel.h>
 #include <linux/export.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/topology.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index bf7854523831..928b929a6261 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -8,11 +8,13 @@
 
 #include <linux/cpufeature.h>
 #include <linux/kernel.h>
+#include <linux/sched/mm.h>
 #include <linux/init.h>
 #include <linux/seq_file.h>
 #include <linux/mm_types.h>
 #include <linux/delay.h>
 #include <linux/cpu.h>
+
 #include <asm/diag.h>
 #include <asm/facility.h>
 #include <asm/elf.h>
diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c
index fe68de6c746c..e359ec675869 100644
--- a/arch/score/kernel/traps.c
+++ b/arch/score/kernel/traps.c
@@ -24,6 +24,7 @@
  */
 
 #include <linux/extable.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
 #include <linux/mm_types.h>
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 4abf119c129c..c483422ea4d0 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -20,7 +20,7 @@
 #include <linux/module.h>
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/hotplug.h>
 #include <linux/atomic.h>
 #include <linux/clockchips.h>
diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c
index b99d33797e1d..db7acf27bea2 100644
--- a/arch/sparc/kernel/leon_smp.c
+++ b/arch/sparc/kernel/leon_smp.c
@@ -9,7 +9,7 @@
 #include <asm/head.h>
 
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/threads.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 15052d364e04..b3bc0ac757cc 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -5,7 +5,7 @@
 
 #include <linux/export.h>
 #include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/hotplug.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c
index 7b55c50eabe5..af93b50e3ce4 100644
--- a/arch/sparc/kernel/sun4d_smp.c
+++ b/arch/sparc/kernel/sun4d_smp.c
@@ -10,7 +10,7 @@
 #include <linux/interrupt.h>
 #include <linux/profile.h>
 #include <linux/delay.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/cpu.h>
 
 #include <asm/cacheflush.h>
diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c
index 633c4cf6fdb0..5547fcb1d72d 100644
--- a/arch/sparc/kernel/sun4m_smp.c
+++ b/arch/sparc/kernel/sun4m_smp.c
@@ -8,7 +8,7 @@
 #include <linux/interrupt.h>
 #include <linux/profile.h>
 #include <linux/delay.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/cpu.h>
 
 #include <asm/cacheflush.h>
diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c
index 2de72a49308a..466d4aed06c7 100644
--- a/arch/sparc/kernel/traps_32.c
+++ b/arch/sparc/kernel/traps_32.c
@@ -9,7 +9,7 @@
  * I hate traps on the sparc, grrr...
  */
 
-#include <linux/sched.h>  /* for jiffies */
+#include <linux/sched/mm.h>
 #include <linux/sched/debug.h>
 #include <linux/mm_types.h>
 #include <linux/kernel.h>
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index 4ff4c35f76b2..196ee5eb4d48 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/extable.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/debug.h>
 #include <linux/linkage.h>
 #include <linux/kernel.h>
diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c
index f3fdd0c39b12..869c22e57561 100644
--- a/arch/tile/kernel/smpboot.c
+++ b/arch/tile/kernel/smpboot.c
@@ -16,7 +16,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/task.h>
 #include <linux/kernel_stat.h>
 #include <linux/bootmem.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f2fd8fefc589..b11b38c3b0bd 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -7,7 +7,7 @@
 #include <linux/string.h>
 #include <linux/ctype.h>
 #include <linux/delay.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/task.h>
 #include <linux/init.h>
diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c
index fd894eaa63f3..932d64689bac 100644
--- a/arch/xtensa/kernel/smp.c
+++ b/arch/xtensa/kernel/smp.c
@@ -21,6 +21,7 @@
 #include <linux/irq.h>
 #include <linux/kdebug.h>
 #include <linux/module.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/reboot.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 042620729230..d29bbe0ee41f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2379,101 +2379,6 @@ static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
 	return sp;
 }
 
-/*
- * Routines for handling mm_structs
- */
-extern struct mm_struct * mm_alloc(void);
-
-/**
- * mmgrab() - Pin a &struct mm_struct.
- * @mm: The &struct mm_struct to pin.
- *
- * Make sure that @mm will not get freed even after the owning task
- * exits. This doesn't guarantee that the associated address space
- * will still exist later on and mmget_not_zero() has to be used before
- * accessing it.
- *
- * This is a preferred way to to pin @mm for a longer/unbounded amount
- * of time.
- *
- * Use mmdrop() to release the reference acquired by mmgrab().
- *
- * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
- * of &mm_struct.mm_count vs &mm_struct.mm_users.
- */
-static inline void mmgrab(struct mm_struct *mm)
-{
-	atomic_inc(&mm->mm_count);
-}
-
-/* mmdrop drops the mm and the page tables */
-extern void __mmdrop(struct mm_struct *);
-static inline void mmdrop(struct mm_struct *mm)
-{
-	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
-		__mmdrop(mm);
-}
-
-static inline void mmdrop_async_fn(struct work_struct *work)
-{
-	struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work);
-	__mmdrop(mm);
-}
-
-static inline void mmdrop_async(struct mm_struct *mm)
-{
-	if (unlikely(atomic_dec_and_test(&mm->mm_count))) {
-		INIT_WORK(&mm->async_put_work, mmdrop_async_fn);
-		schedule_work(&mm->async_put_work);
-	}
-}
-
-/**
- * mmget() - Pin the address space associated with a &struct mm_struct.
- * @mm: The address space to pin.
- *
- * Make sure that the address space of the given &struct mm_struct doesn't
- * go away. This does not protect against parts of the address space being
- * modified or freed, however.
- *
- * Never use this function to pin this address space for an
- * unbounded/indefinite amount of time.
- *
- * Use mmput() to release the reference acquired by mmget().
- *
- * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
- * of &mm_struct.mm_count vs &mm_struct.mm_users.
- */
-static inline void mmget(struct mm_struct *mm)
-{
-	atomic_inc(&mm->mm_users);
-}
-
-static inline bool mmget_not_zero(struct mm_struct *mm)
-{
-	return atomic_inc_not_zero(&mm->mm_users);
-}
-
-/* mmput gets rid of the mappings and all user-space */
-extern void mmput(struct mm_struct *);
-#ifdef CONFIG_MMU
-/* same as above but performs the slow path from the async context. Can
- * be called from the atomic context as well
- */
-extern void mmput_async(struct mm_struct *);
-#endif
-
-/* Grab a reference to a task's mm, if it is not already going away */
-extern struct mm_struct *get_task_mm(struct task_struct *task);
-/*
- * Grab a reference to a task's mm, if it is not already going away
- * and ptrace_may_access with the mode parameter passed to it
- * succeeds.
- */
-extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
-/* Remove the current tasks stale references to the old mm_struct */
-extern void mm_release(struct task_struct *, struct mm_struct *);
-
 #ifdef CONFIG_HAVE_COPY_THREAD_TLS
 extern int copy_thread_tls(unsigned long, unsigned long, unsigned long,
 			struct task_struct *, unsigned long);
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index d32e3932b2e3..be1ae55f5ab9 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -5,4 +5,99 @@
 #include <linux/mm_types.h>
 #include <linux/gfp.h>
 
+/*
+ * Routines for handling mm_structs
+ */
+extern struct mm_struct * mm_alloc(void);
+
+/**
+ * mmgrab() - Pin a &struct mm_struct.
+ * @mm: The &struct mm_struct to pin.
+ *
+ * Make sure that @mm will not get freed even after the owning task
+ * exits. This doesn't guarantee that the associated address space
+ * will still exist later on and mmget_not_zero() has to be used before
+ * accessing it.
+ *
+ * This is a preferred way to to pin @mm for a longer/unbounded amount
+ * of time.
+ *
+ * Use mmdrop() to release the reference acquired by mmgrab().
+ *
+ * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
+ * of &mm_struct.mm_count vs &mm_struct.mm_users.
+ */
+static inline void mmgrab(struct mm_struct *mm)
+{
+	atomic_inc(&mm->mm_count);
+}
+
+/* mmdrop drops the mm and the page tables */
+extern void __mmdrop(struct mm_struct *);
+static inline void mmdrop(struct mm_struct *mm)
+{
+	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
+		__mmdrop(mm);
+}
+
+static inline void mmdrop_async_fn(struct work_struct *work)
+{
+	struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work);
+	__mmdrop(mm);
+}
+
+static inline void mmdrop_async(struct mm_struct *mm)
+{
+	if (unlikely(atomic_dec_and_test(&mm->mm_count))) {
+		INIT_WORK(&mm->async_put_work, mmdrop_async_fn);
+		schedule_work(&mm->async_put_work);
+	}
+}
+
+/**
+ * mmget() - Pin the address space associated with a &struct mm_struct.
+ * @mm: The address space to pin.
+ *
+ * Make sure that the address space of the given &struct mm_struct doesn't
+ * go away. This does not protect against parts of the address space being
+ * modified or freed, however.
+ *
+ * Never use this function to pin this address space for an
+ * unbounded/indefinite amount of time.
+ *
+ * Use mmput() to release the reference acquired by mmget().
+ *
+ * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
+ * of &mm_struct.mm_count vs &mm_struct.mm_users.
+ */
+static inline void mmget(struct mm_struct *mm)
+{
+	atomic_inc(&mm->mm_users);
+}
+
+static inline bool mmget_not_zero(struct mm_struct *mm)
+{
+	return atomic_inc_not_zero(&mm->mm_users);
+}
+
+/* mmput gets rid of the mappings and all user-space */
+extern void mmput(struct mm_struct *);
+#ifdef CONFIG_MMU
+/* same as above but performs the slow path from the async context. Can
+ * be called from the atomic context as well
+ */
+extern void mmput_async(struct mm_struct *);
+#endif
+
+/* Grab a reference to a task's mm, if it is not already going away */
+extern struct mm_struct *get_task_mm(struct task_struct *task);
+/*
+ * Grab a reference to a task's mm, if it is not already going away
+ * and ptrace_may_access with the mode parameter passed to it
+ * succeeds.
+ */
+extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
+/* Remove the current tasks stale references to the old mm_struct */
+extern void mm_release(struct task_struct *, struct mm_struct *);
+
 #endif /* _LINUX_SCHED_MM_H */
-- 
GitLab


From 11701c6768367294c5086738d49196192aaf3d60 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 19:21:47 +0100
Subject: [PATCH 0974/1084] sched/headers: Move task->mm coredumping related
 defines and methods from <linux/sched.h> to <linux/sched/coredump.h>

This further reduces the size and complexity of <linux/sched.h>.

These are the definitions and APIs that are moved:

  # MMF_*:
  fs/binfmt_elf.c
  fs/binfmt_elf_fdpic.c
  fs/exec.c
  fs/proc/base.c
  include/linux/khugepaged.h
  include/linux/ksm.h
  include/linux/sched/coredump.h
  kernel/events/uprobes.c
  kernel/fork.c
  mm/huge_memory.c
  mm/khugepaged.c
  mm/ksm.c
  mm/memory.c
  mm/oom_kill.c

  # SUID_DUMP_*:
  arch/ia64/include/asm/processor.h
  fs/coredump.c
  fs/exec.c
  fs/proc/internal.h
  include/linux/sched/coredump.h
  kernel/ptrace.c
  kernel/sys.c
  kernel/sysctl.c

  # get_dumpable():
  arch/ia64/include/asm/processor.h
  fs/coredump.c
  fs/exec.c
  fs/proc/internal.h
  include/linux/sched/coredump.h
  kernel/ptrace.c
  kernel/sys.c

  # set_dumpable():
  fs/exec.c
  include/linux/sched/coredump.h
  kernel/cred.c
  kernel/sys.c

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h          | 68 ---------------------------------
 include/linux/sched/coredump.h | 69 ++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 68 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d29bbe0ee41f..7934cd0acbc7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -361,74 +361,6 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
 #endif
 
-#define SUID_DUMP_DISABLE	0	/* No setuid dumping */
-#define SUID_DUMP_USER		1	/* Dump as user of process */
-#define SUID_DUMP_ROOT		2	/* Dump as root */
-
-/* mm flags */
-
-/* for SUID_DUMP_* above */
-#define MMF_DUMPABLE_BITS 2
-#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
-
-extern void set_dumpable(struct mm_struct *mm, int value);
-/*
- * This returns the actual value of the suid_dumpable flag. For things
- * that are using this for checking for privilege transitions, it must
- * test against SUID_DUMP_USER rather than treating it as a boolean
- * value.
- */
-static inline int __get_dumpable(unsigned long mm_flags)
-{
-	return mm_flags & MMF_DUMPABLE_MASK;
-}
-
-static inline int get_dumpable(struct mm_struct *mm)
-{
-	return __get_dumpable(mm->flags);
-}
-
-/* coredump filter bits */
-#define MMF_DUMP_ANON_PRIVATE	2
-#define MMF_DUMP_ANON_SHARED	3
-#define MMF_DUMP_MAPPED_PRIVATE	4
-#define MMF_DUMP_MAPPED_SHARED	5
-#define MMF_DUMP_ELF_HEADERS	6
-#define MMF_DUMP_HUGETLB_PRIVATE 7
-#define MMF_DUMP_HUGETLB_SHARED  8
-#define MMF_DUMP_DAX_PRIVATE	9
-#define MMF_DUMP_DAX_SHARED	10
-
-#define MMF_DUMP_FILTER_SHIFT	MMF_DUMPABLE_BITS
-#define MMF_DUMP_FILTER_BITS	9
-#define MMF_DUMP_FILTER_MASK \
-	(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
-#define MMF_DUMP_FILTER_DEFAULT \
-	((1 << MMF_DUMP_ANON_PRIVATE) |	(1 << MMF_DUMP_ANON_SHARED) |\
-	 (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
-
-#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
-# define MMF_DUMP_MASK_DEFAULT_ELF	(1 << MMF_DUMP_ELF_HEADERS)
-#else
-# define MMF_DUMP_MASK_DEFAULT_ELF	0
-#endif
-					/* leave room for more dump flags */
-#define MMF_VM_MERGEABLE	16	/* KSM may merge identical pages */
-#define MMF_VM_HUGEPAGE		17	/* set when VM_HUGEPAGE is set on vma */
-/*
- * This one-shot flag is dropped due to necessity of changing exe once again
- * on NFS restore
- */
-//#define MMF_EXE_FILE_CHANGED	18	/* see prctl_set_mm_exe_file() */
-
-#define MMF_HAS_UPROBES		19	/* has uprobes */
-#define MMF_RECALC_UPROBES	20	/* MMF_HAS_UPROBES can be wrong */
-#define MMF_OOM_SKIP		21	/* mm is of no interest for the OOM killer */
-#define MMF_UNSTABLE		22	/* mm is unstable for copy_from_user */
-#define MMF_HUGE_ZERO_PAGE	23      /* mm has ever used the global huge zero page */
-
-#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
-
 struct sighand_struct {
 	atomic_t		count;
 	struct k_sigaction	action[_NSIG];
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index ab81e564e076..f46912aa4f4c 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -2,5 +2,74 @@
 #define _LINUX_SCHED_COREDUMP_H
 
 #include <linux/sched.h>
+#include <linux/mm_types.h>
+
+#define SUID_DUMP_DISABLE	0	/* No setuid dumping */
+#define SUID_DUMP_USER		1	/* Dump as user of process */
+#define SUID_DUMP_ROOT		2	/* Dump as root */
+
+/* mm flags */
+
+/* for SUID_DUMP_* above */
+#define MMF_DUMPABLE_BITS 2
+#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
+
+extern void set_dumpable(struct mm_struct *mm, int value);
+/*
+ * This returns the actual value of the suid_dumpable flag. For things
+ * that are using this for checking for privilege transitions, it must
+ * test against SUID_DUMP_USER rather than treating it as a boolean
+ * value.
+ */
+static inline int __get_dumpable(unsigned long mm_flags)
+{
+	return mm_flags & MMF_DUMPABLE_MASK;
+}
+
+static inline int get_dumpable(struct mm_struct *mm)
+{
+	return __get_dumpable(mm->flags);
+}
+
+/* coredump filter bits */
+#define MMF_DUMP_ANON_PRIVATE	2
+#define MMF_DUMP_ANON_SHARED	3
+#define MMF_DUMP_MAPPED_PRIVATE	4
+#define MMF_DUMP_MAPPED_SHARED	5
+#define MMF_DUMP_ELF_HEADERS	6
+#define MMF_DUMP_HUGETLB_PRIVATE 7
+#define MMF_DUMP_HUGETLB_SHARED  8
+#define MMF_DUMP_DAX_PRIVATE	9
+#define MMF_DUMP_DAX_SHARED	10
+
+#define MMF_DUMP_FILTER_SHIFT	MMF_DUMPABLE_BITS
+#define MMF_DUMP_FILTER_BITS	9
+#define MMF_DUMP_FILTER_MASK \
+	(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
+#define MMF_DUMP_FILTER_DEFAULT \
+	((1 << MMF_DUMP_ANON_PRIVATE) |	(1 << MMF_DUMP_ANON_SHARED) |\
+	 (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
+
+#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
+# define MMF_DUMP_MASK_DEFAULT_ELF	(1 << MMF_DUMP_ELF_HEADERS)
+#else
+# define MMF_DUMP_MASK_DEFAULT_ELF	0
+#endif
+					/* leave room for more dump flags */
+#define MMF_VM_MERGEABLE	16	/* KSM may merge identical pages */
+#define MMF_VM_HUGEPAGE		17	/* set when VM_HUGEPAGE is set on vma */
+/*
+ * This one-shot flag is dropped due to necessity of changing exe once again
+ * on NFS restore
+ */
+//#define MMF_EXE_FILE_CHANGED	18	/* see prctl_set_mm_exe_file() */
+
+#define MMF_HAS_UPROBES		19	/* has uprobes */
+#define MMF_RECALC_UPROBES	20	/* MMF_HAS_UPROBES can be wrong */
+#define MMF_OOM_SKIP		21	/* mm is of no interest for the OOM killer */
+#define MMF_UNSTABLE		22	/* mm is unstable for copy_from_user */
+#define MMF_HUGE_ZERO_PAGE	23      /* mm has ever used the global huge zero page */
+
+#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
 
 #endif /* _LINUX_SCHED_COREDUMP_H */
-- 
GitLab


From c3edc4010e9d102eb7b8f17d15c2ebc425fed63c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 08:35:14 +0100
Subject: [PATCH 0975/1084] sched/headers: Move task_struct::signal and
 task_struct::sighand types and accessors into <linux/sched/signal.h>
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

task_struct::signal and task_struct::sighand are pointers, which would normally make it
straightforward to not define those types in sched.h.

That is not so, because the types are accompanied by a myriad of APIs (macros and inline
functions) that dereference them.

Split the types and the APIs out of sched.h and move them into a new header, <linux/sched/signal.h>.

With this change sched.h does not know about 'struct signal' and 'struct sighand' anymore,
trying to put accessors into sched.h as a test fails the following way:

  ./include/linux/sched.h: In function ‘test_signal_types’:
  ./include/linux/sched.h:2461:18: error: dereferencing pointer to incomplete type ‘struct signal_struct’
                    ^

This reduces the size and complexity of sched.h significantly.

Update all headers and .c code that relied on getting the signal handling
functionality from <linux/sched.h> to include <linux/sched/signal.h>.

The list of affected files in the preparatory patch was partly generated by
grepping for the APIs, and partly by doing coverage build testing, both
all[yes|mod|def|no]config builds on 64-bit and 32-bit x86, and an array of
cross-architecture builds.

Nevertheless some (trivial) build breakage is still expected related to rare
Kconfig combinations and in-flight patches to various kernel code, but most
of it should be handled by this patch.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/nwfpe/fpmodule.c    |   2 +-
 arch/sh/kernel/cpu/sh4/fpu.c |   3 +-
 drivers/net/tap.c            |   2 +-
 include/linux/sched.h        | 499 +---------------------------------
 include/linux/sched/signal.h | 502 +++++++++++++++++++++++++++++++++++
 kernel/cgroup/cgroup-v1.c    |   1 +
 mm/vmalloc.c                 |   2 +-
 net/smc/af_smc.c             |   2 +
 net/smc/smc_clc.c            |   2 +
 net/smc/smc_close.c          |   2 +
 net/smc/smc_rx.c             |   2 +
 net/smc/smc_tx.c             |   2 +
 12 files changed, 520 insertions(+), 501 deletions(-)

diff --git a/arch/arm/nwfpe/fpmodule.c b/arch/arm/nwfpe/fpmodule.c
index ec717c190e2c..1365e8650843 100644
--- a/arch/arm/nwfpe/fpmodule.c
+++ b/arch/arm/nwfpe/fpmodule.c
@@ -31,7 +31,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/init.h>
 
 #include <asm/thread_notify.h>
diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c
index 69ab4d3c8d41..95fd2dcb83da 100644
--- a/arch/sh/kernel/cpu/sh4/fpu.c
+++ b/arch/sh/kernel/cpu/sh4/fpu.c
@@ -10,8 +10,7 @@
  *
  * FIXME! These routines have not been tested for big endian case.
  */
-#include <linux/sched.h>
-#include <linux/signal.h>
+#include <linux/sched/signal.h>
 #include <linux/io.h>
 #include <cpu/fpu.h>
 #include <asm/processor.h>
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 35b55a2fa1a1..4d4173d25dd0 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -8,7 +8,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/cache.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/wait.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7934cd0acbc7..c1586104d4c0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -71,6 +71,9 @@ struct blk_plug;
 struct filename;
 struct nameidata;
 
+struct signal_struct;
+struct sighand_struct;
+
 extern unsigned long total_forks;
 extern int nr_threads;
 DECLARE_PER_CPU(unsigned long, process_counts);
@@ -361,13 +364,6 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
 #endif
 
-struct sighand_struct {
-	atomic_t		count;
-	struct k_sigaction	action[_NSIG];
-	spinlock_t		siglock;
-	wait_queue_head_t	signalfd_wqh;
-};
-
 struct pacct_struct {
 	int			ac_flag;
 	long			ac_exitcode;
@@ -485,195 +481,6 @@ struct thread_group_cputimer {
 #include <linux/rwsem.h>
 struct autogroup;
 
-/*
- * NOTE! "signal_struct" does not have its own
- * locking, because a shared signal_struct always
- * implies a shared sighand_struct, so locking
- * sighand_struct is always a proper superset of
- * the locking of signal_struct.
- */
-struct signal_struct {
-	atomic_t		sigcnt;
-	atomic_t		live;
-	int			nr_threads;
-	struct list_head	thread_head;
-
-	wait_queue_head_t	wait_chldexit;	/* for wait4() */
-
-	/* current thread group signal load-balancing target: */
-	struct task_struct	*curr_target;
-
-	/* shared signal handling: */
-	struct sigpending	shared_pending;
-
-	/* thread group exit support */
-	int			group_exit_code;
-	/* overloaded:
-	 * - notify group_exit_task when ->count is equal to notify_count
-	 * - everyone except group_exit_task is stopped during signal delivery
-	 *   of fatal signals, group_exit_task processes the signal.
-	 */
-	int			notify_count;
-	struct task_struct	*group_exit_task;
-
-	/* thread group stop support, overloads group_exit_code too */
-	int			group_stop_count;
-	unsigned int		flags; /* see SIGNAL_* flags below */
-
-	/*
-	 * PR_SET_CHILD_SUBREAPER marks a process, like a service
-	 * manager, to re-parent orphan (double-forking) child processes
-	 * to this process instead of 'init'. The service manager is
-	 * able to receive SIGCHLD signals and is able to investigate
-	 * the process until it calls wait(). All children of this
-	 * process will inherit a flag if they should look for a
-	 * child_subreaper process at exit.
-	 */
-	unsigned int		is_child_subreaper:1;
-	unsigned int		has_child_subreaper:1;
-
-#ifdef CONFIG_POSIX_TIMERS
-
-	/* POSIX.1b Interval Timers */
-	int			posix_timer_id;
-	struct list_head	posix_timers;
-
-	/* ITIMER_REAL timer for the process */
-	struct hrtimer real_timer;
-	ktime_t it_real_incr;
-
-	/*
-	 * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use
-	 * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these
-	 * values are defined to 0 and 1 respectively
-	 */
-	struct cpu_itimer it[2];
-
-	/*
-	 * Thread group totals for process CPU timers.
-	 * See thread_group_cputimer(), et al, for details.
-	 */
-	struct thread_group_cputimer cputimer;
-
-	/* Earliest-expiration cache. */
-	struct task_cputime cputime_expires;
-
-	struct list_head cpu_timers[3];
-
-#endif
-
-	struct pid *leader_pid;
-
-#ifdef CONFIG_NO_HZ_FULL
-	atomic_t tick_dep_mask;
-#endif
-
-	struct pid *tty_old_pgrp;
-
-	/* boolean value for session group leader */
-	int leader;
-
-	struct tty_struct *tty; /* NULL if no tty */
-
-#ifdef CONFIG_SCHED_AUTOGROUP
-	struct autogroup *autogroup;
-#endif
-	/*
-	 * Cumulative resource counters for dead threads in the group,
-	 * and for reaped dead child processes forked by this group.
-	 * Live threads maintain their own counters and add to these
-	 * in __exit_signal, except for the group leader.
-	 */
-	seqlock_t stats_lock;
-	u64 utime, stime, cutime, cstime;
-	u64 gtime;
-	u64 cgtime;
-	struct prev_cputime prev_cputime;
-	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
-	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
-	unsigned long inblock, oublock, cinblock, coublock;
-	unsigned long maxrss, cmaxrss;
-	struct task_io_accounting ioac;
-
-	/*
-	 * Cumulative ns of schedule CPU time fo dead threads in the
-	 * group, not including a zombie group leader, (This only differs
-	 * from jiffies_to_ns(utime + stime) if sched_clock uses something
-	 * other than jiffies.)
-	 */
-	unsigned long long sum_sched_runtime;
-
-	/*
-	 * We don't bother to synchronize most readers of this at all,
-	 * because there is no reader checking a limit that actually needs
-	 * to get both rlim_cur and rlim_max atomically, and either one
-	 * alone is a single word that can safely be read normally.
-	 * getrlimit/setrlimit use task_lock(current->group_leader) to
-	 * protect this instead of the siglock, because they really
-	 * have no need to disable irqs.
-	 */
-	struct rlimit rlim[RLIM_NLIMITS];
-
-#ifdef CONFIG_BSD_PROCESS_ACCT
-	struct pacct_struct pacct;	/* per-process accounting information */
-#endif
-#ifdef CONFIG_TASKSTATS
-	struct taskstats *stats;
-#endif
-#ifdef CONFIG_AUDIT
-	unsigned audit_tty;
-	struct tty_audit_buf *tty_audit_buf;
-#endif
-
-	/*
-	 * Thread is the potential origin of an oom condition; kill first on
-	 * oom
-	 */
-	bool oom_flag_origin;
-	short oom_score_adj;		/* OOM kill score adjustment */
-	short oom_score_adj_min;	/* OOM kill score adjustment min value.
-					 * Only settable by CAP_SYS_RESOURCE. */
-	struct mm_struct *oom_mm;	/* recorded mm when the thread group got
-					 * killed by the oom killer */
-
-	struct mutex cred_guard_mutex;	/* guard against foreign influences on
-					 * credential calculations
-					 * (notably. ptrace) */
-};
-
-/*
- * Bits in flags field of signal_struct.
- */
-#define SIGNAL_STOP_STOPPED	0x00000001 /* job control stop in effect */
-#define SIGNAL_STOP_CONTINUED	0x00000002 /* SIGCONT since WCONTINUED reap */
-#define SIGNAL_GROUP_EXIT	0x00000004 /* group exit in progress */
-#define SIGNAL_GROUP_COREDUMP	0x00000008 /* coredump in progress */
-/*
- * Pending notifications to parent.
- */
-#define SIGNAL_CLD_STOPPED	0x00000010
-#define SIGNAL_CLD_CONTINUED	0x00000020
-#define SIGNAL_CLD_MASK		(SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED)
-
-#define SIGNAL_UNKILLABLE	0x00000040 /* for init: ignore fatal signals */
-
-#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \
-			  SIGNAL_STOP_CONTINUED)
-
-static inline void signal_set_stop_flags(struct signal_struct *sig,
-					 unsigned int flags)
-{
-	WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP));
-	sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags;
-}
-
-/* If true, all threads except ->group_exit_task have pending SIGKILL */
-static inline int signal_group_exit(const struct signal_struct *sig)
-{
-	return	(sig->flags & SIGNAL_GROUP_EXIT) ||
-		(sig->group_exit_task != NULL);
-}
-
 /*
  * Some day this will be a full-fledged user tracking system..
  */
@@ -2126,190 +1933,8 @@ extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
 extern void sched_dead(struct task_struct *p);
 
 extern void proc_caches_init(void);
-extern void flush_signals(struct task_struct *);
-extern void ignore_signals(struct task_struct *);
-extern void flush_signal_handlers(struct task_struct *, int force_default);
-extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
-
-static inline int kernel_dequeue_signal(siginfo_t *info)
-{
-	struct task_struct *tsk = current;
-	siginfo_t __info;
-	int ret;
-
-	spin_lock_irq(&tsk->sighand->siglock);
-	ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info);
-	spin_unlock_irq(&tsk->sighand->siglock);
-
-	return ret;
-}
-
-static inline void kernel_signal_stop(void)
-{
-	spin_lock_irq(&current->sighand->siglock);
-	if (current->jobctl & JOBCTL_STOP_DEQUEUED)
-		__set_current_state(TASK_STOPPED);
-	spin_unlock_irq(&current->sighand->siglock);
-
-	schedule();
-}
 
 extern void release_task(struct task_struct * p);
-extern int send_sig_info(int, struct siginfo *, struct task_struct *);
-extern int force_sigsegv(int, struct task_struct *);
-extern int force_sig_info(int, struct siginfo *, struct task_struct *);
-extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
-extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
-extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *,
-				const struct cred *, u32);
-extern int kill_pgrp(struct pid *pid, int sig, int priv);
-extern int kill_pid(struct pid *pid, int sig, int priv);
-extern int kill_proc_info(int, struct siginfo *, pid_t);
-extern __must_check bool do_notify_parent(struct task_struct *, int);
-extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
-extern void force_sig(int, struct task_struct *);
-extern int send_sig(int, struct task_struct *, int);
-extern int zap_other_threads(struct task_struct *p);
-extern struct sigqueue *sigqueue_alloc(void);
-extern void sigqueue_free(struct sigqueue *);
-extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
-extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
-
-#ifdef TIF_RESTORE_SIGMASK
-/*
- * Legacy restore_sigmask accessors.  These are inefficient on
- * SMP architectures because they require atomic operations.
- */
-
-/**
- * set_restore_sigmask() - make sure saved_sigmask processing gets done
- *
- * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code
- * will run before returning to user mode, to process the flag.  For
- * all callers, TIF_SIGPENDING is already set or it's no harm to set
- * it.  TIF_RESTORE_SIGMASK need not be in the set of bits that the
- * arch code will notice on return to user mode, in case those bits
- * are scarce.  We set TIF_SIGPENDING here to ensure that the arch
- * signal code always gets run when TIF_RESTORE_SIGMASK is set.
- */
-static inline void set_restore_sigmask(void)
-{
-	set_thread_flag(TIF_RESTORE_SIGMASK);
-	WARN_ON(!test_thread_flag(TIF_SIGPENDING));
-}
-static inline void clear_restore_sigmask(void)
-{
-	clear_thread_flag(TIF_RESTORE_SIGMASK);
-}
-static inline bool test_restore_sigmask(void)
-{
-	return test_thread_flag(TIF_RESTORE_SIGMASK);
-}
-static inline bool test_and_clear_restore_sigmask(void)
-{
-	return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK);
-}
-
-#else	/* TIF_RESTORE_SIGMASK */
-
-/* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */
-static inline void set_restore_sigmask(void)
-{
-	current->restore_sigmask = true;
-	WARN_ON(!test_thread_flag(TIF_SIGPENDING));
-}
-static inline void clear_restore_sigmask(void)
-{
-	current->restore_sigmask = false;
-}
-static inline bool test_restore_sigmask(void)
-{
-	return current->restore_sigmask;
-}
-static inline bool test_and_clear_restore_sigmask(void)
-{
-	if (!current->restore_sigmask)
-		return false;
-	current->restore_sigmask = false;
-	return true;
-}
-#endif
-
-static inline void restore_saved_sigmask(void)
-{
-	if (test_and_clear_restore_sigmask())
-		__set_current_blocked(&current->saved_sigmask);
-}
-
-static inline sigset_t *sigmask_to_save(void)
-{
-	sigset_t *res = &current->blocked;
-	if (unlikely(test_restore_sigmask()))
-		res = &current->saved_sigmask;
-	return res;
-}
-
-static inline int kill_cad_pid(int sig, int priv)
-{
-	return kill_pid(cad_pid, sig, priv);
-}
-
-/* These can be the second arg to send_sig_info/send_group_sig_info.  */
-#define SEND_SIG_NOINFO ((struct siginfo *) 0)
-#define SEND_SIG_PRIV	((struct siginfo *) 1)
-#define SEND_SIG_FORCED	((struct siginfo *) 2)
-
-/*
- * True if we are on the alternate signal stack.
- */
-static inline int on_sig_stack(unsigned long sp)
-{
-	/*
-	 * If the signal stack is SS_AUTODISARM then, by construction, we
-	 * can't be on the signal stack unless user code deliberately set
-	 * SS_AUTODISARM when we were already on it.
-	 *
-	 * This improves reliability: if user state gets corrupted such that
-	 * the stack pointer points very close to the end of the signal stack,
-	 * then this check will enable the signal to be handled anyway.
-	 */
-	if (current->sas_ss_flags & SS_AUTODISARM)
-		return 0;
-
-#ifdef CONFIG_STACK_GROWSUP
-	return sp >= current->sas_ss_sp &&
-		sp - current->sas_ss_sp < current->sas_ss_size;
-#else
-	return sp > current->sas_ss_sp &&
-		sp - current->sas_ss_sp <= current->sas_ss_size;
-#endif
-}
-
-static inline int sas_ss_flags(unsigned long sp)
-{
-	if (!current->sas_ss_size)
-		return SS_DISABLE;
-
-	return on_sig_stack(sp) ? SS_ONSTACK : 0;
-}
-
-static inline void sas_ss_reset(struct task_struct *p)
-{
-	p->sas_ss_sp = 0;
-	p->sas_ss_size = 0;
-	p->sas_ss_flags = SS_DISABLE;
-}
-
-static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
-{
-	if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp))
-#ifdef CONFIG_STACK_GROWSUP
-		return current->sas_ss_sp;
-#else
-		return current->sas_ss_sp + current->sas_ss_size;
-#endif
-	return sp;
-}
 
 #ifdef CONFIG_HAVE_COPY_THREAD_TLS
 extern int copy_thread_tls(unsigned long, unsigned long, unsigned long,
@@ -2338,10 +1963,8 @@ static inline void exit_thread(struct task_struct *tsk)
 #endif
 
 extern void exit_files(struct task_struct *);
-extern void __cleanup_sighand(struct sighand_struct *);
 
 extern void exit_itimers(struct signal_struct *);
-extern void flush_itimer_signals(void);
 
 extern void do_group_exit(int);
 
@@ -2376,81 +1999,6 @@ static inline unsigned long wait_task_inactive(struct task_struct *p,
 }
 #endif
 
-#define tasklist_empty() \
-	list_empty(&init_task.tasks)
-
-#define next_task(p) \
-	list_entry_rcu((p)->tasks.next, struct task_struct, tasks)
-
-#define for_each_process(p) \
-	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
-
-extern bool current_is_single_threaded(void);
-
-/*
- * Careful: do_each_thread/while_each_thread is a double loop so
- *          'break' will not work as expected - use goto instead.
- */
-#define do_each_thread(g, t) \
-	for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
-
-#define while_each_thread(g, t) \
-	while ((t = next_thread(t)) != g)
-
-#define __for_each_thread(signal, t)	\
-	list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node)
-
-#define for_each_thread(p, t)		\
-	__for_each_thread((p)->signal, t)
-
-/* Careful: this is a double loop, 'break' won't work as expected. */
-#define for_each_process_thread(p, t)	\
-	for_each_process(p) for_each_thread(p, t)
-
-typedef int (*proc_visitor)(struct task_struct *p, void *data);
-void walk_process_tree(struct task_struct *top, proc_visitor, void *);
-
-static inline int get_nr_threads(struct task_struct *tsk)
-{
-	return tsk->signal->nr_threads;
-}
-
-static inline bool thread_group_leader(struct task_struct *p)
-{
-	return p->exit_signal >= 0;
-}
-
-/* Do to the insanities of de_thread it is possible for a process
- * to have the pid of the thread group leader without actually being
- * the thread group leader.  For iteration through the pids in proc
- * all we care about is that we have a task with the appropriate
- * pid, we don't actually care if we have the right task.
- */
-static inline bool has_group_leader_pid(struct task_struct *p)
-{
-	return task_pid(p) == p->signal->leader_pid;
-}
-
-static inline
-bool same_thread_group(struct task_struct *p1, struct task_struct *p2)
-{
-	return p1->signal == p2->signal;
-}
-
-static inline struct task_struct *next_thread(const struct task_struct *p)
-{
-	return list_entry_rcu(p->thread_group.next,
-			      struct task_struct, thread_group);
-}
-
-static inline int thread_group_empty(struct task_struct *p)
-{
-	return list_empty(&p->thread_group);
-}
-
-#define delay_group_leader(p) \
-		(thread_group_leader(p) && !thread_group_empty(p))
-
 /*
  * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
  * subscriptions and synchronises with wait4().  Also used in procfs.  Also
@@ -2471,25 +2019,6 @@ static inline void task_unlock(struct task_struct *p)
 	spin_unlock(&p->alloc_lock);
 }
 
-extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
-							unsigned long *flags);
-
-static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk,
-						       unsigned long *flags)
-{
-	struct sighand_struct *ret;
-
-	ret = __lock_task_sighand(tsk, flags);
-	(void)__cond_lock(&tsk->sighand->siglock, ret);
-	return ret;
-}
-
-static inline void unlock_task_sighand(struct task_struct *tsk,
-						unsigned long *flags)
-{
-	spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
-}
-
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 
 static inline struct thread_info *task_thread_info(struct task_struct *task)
@@ -2862,28 +2391,6 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
 }
 #endif /* CONFIG_MEMCG */
 
-static inline unsigned long task_rlimit(const struct task_struct *tsk,
-		unsigned int limit)
-{
-	return READ_ONCE(tsk->signal->rlim[limit].rlim_cur);
-}
-
-static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
-		unsigned int limit)
-{
-	return READ_ONCE(tsk->signal->rlim[limit].rlim_max);
-}
-
-static inline unsigned long rlimit(unsigned int limit)
-{
-	return task_rlimit(current, limit);
-}
-
-static inline unsigned long rlimit_max(unsigned int limit)
-{
-	return task_rlimit_max(current, limit);
-}
-
 #define SCHED_CPUFREQ_RT	(1U << 0)
 #define SCHED_CPUFREQ_DL	(1U << 1)
 #define SCHED_CPUFREQ_IOWAIT	(1U << 2)
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index c6958a53fef3..53fe5450f431 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -8,4 +8,506 @@
 #include <linux/sched/jobctl.h>
 #include <linux/sched/task.h>
 
+/*
+ * Types defining task->signal and task->sighand and APIs using them:
+ */
+
+struct sighand_struct {
+	atomic_t		count;
+	struct k_sigaction	action[_NSIG];
+	spinlock_t		siglock;
+	wait_queue_head_t	signalfd_wqh;
+};
+
+/*
+ * NOTE! "signal_struct" does not have its own
+ * locking, because a shared signal_struct always
+ * implies a shared sighand_struct, so locking
+ * sighand_struct is always a proper superset of
+ * the locking of signal_struct.
+ */
+struct signal_struct {
+	atomic_t		sigcnt;
+	atomic_t		live;
+	int			nr_threads;
+	struct list_head	thread_head;
+
+	wait_queue_head_t	wait_chldexit;	/* for wait4() */
+
+	/* current thread group signal load-balancing target: */
+	struct task_struct	*curr_target;
+
+	/* shared signal handling: */
+	struct sigpending	shared_pending;
+
+	/* thread group exit support */
+	int			group_exit_code;
+	/* overloaded:
+	 * - notify group_exit_task when ->count is equal to notify_count
+	 * - everyone except group_exit_task is stopped during signal delivery
+	 *   of fatal signals, group_exit_task processes the signal.
+	 */
+	int			notify_count;
+	struct task_struct	*group_exit_task;
+
+	/* thread group stop support, overloads group_exit_code too */
+	int			group_stop_count;
+	unsigned int		flags; /* see SIGNAL_* flags below */
+
+	/*
+	 * PR_SET_CHILD_SUBREAPER marks a process, like a service
+	 * manager, to re-parent orphan (double-forking) child processes
+	 * to this process instead of 'init'. The service manager is
+	 * able to receive SIGCHLD signals and is able to investigate
+	 * the process until it calls wait(). All children of this
+	 * process will inherit a flag if they should look for a
+	 * child_subreaper process at exit.
+	 */
+	unsigned int		is_child_subreaper:1;
+	unsigned int		has_child_subreaper:1;
+
+#ifdef CONFIG_POSIX_TIMERS
+
+	/* POSIX.1b Interval Timers */
+	int			posix_timer_id;
+	struct list_head	posix_timers;
+
+	/* ITIMER_REAL timer for the process */
+	struct hrtimer real_timer;
+	ktime_t it_real_incr;
+
+	/*
+	 * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use
+	 * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these
+	 * values are defined to 0 and 1 respectively
+	 */
+	struct cpu_itimer it[2];
+
+	/*
+	 * Thread group totals for process CPU timers.
+	 * See thread_group_cputimer(), et al, for details.
+	 */
+	struct thread_group_cputimer cputimer;
+
+	/* Earliest-expiration cache. */
+	struct task_cputime cputime_expires;
+
+	struct list_head cpu_timers[3];
+
+#endif
+
+	struct pid *leader_pid;
+
+#ifdef CONFIG_NO_HZ_FULL
+	atomic_t tick_dep_mask;
+#endif
+
+	struct pid *tty_old_pgrp;
+
+	/* boolean value for session group leader */
+	int leader;
+
+	struct tty_struct *tty; /* NULL if no tty */
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+	struct autogroup *autogroup;
+#endif
+	/*
+	 * Cumulative resource counters for dead threads in the group,
+	 * and for reaped dead child processes forked by this group.
+	 * Live threads maintain their own counters and add to these
+	 * in __exit_signal, except for the group leader.
+	 */
+	seqlock_t stats_lock;
+	u64 utime, stime, cutime, cstime;
+	u64 gtime;
+	u64 cgtime;
+	struct prev_cputime prev_cputime;
+	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
+	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
+	unsigned long inblock, oublock, cinblock, coublock;
+	unsigned long maxrss, cmaxrss;
+	struct task_io_accounting ioac;
+
+	/*
+	 * Cumulative ns of schedule CPU time fo dead threads in the
+	 * group, not including a zombie group leader, (This only differs
+	 * from jiffies_to_ns(utime + stime) if sched_clock uses something
+	 * other than jiffies.)
+	 */
+	unsigned long long sum_sched_runtime;
+
+	/*
+	 * We don't bother to synchronize most readers of this at all,
+	 * because there is no reader checking a limit that actually needs
+	 * to get both rlim_cur and rlim_max atomically, and either one
+	 * alone is a single word that can safely be read normally.
+	 * getrlimit/setrlimit use task_lock(current->group_leader) to
+	 * protect this instead of the siglock, because they really
+	 * have no need to disable irqs.
+	 */
+	struct rlimit rlim[RLIM_NLIMITS];
+
+#ifdef CONFIG_BSD_PROCESS_ACCT
+	struct pacct_struct pacct;	/* per-process accounting information */
+#endif
+#ifdef CONFIG_TASKSTATS
+	struct taskstats *stats;
+#endif
+#ifdef CONFIG_AUDIT
+	unsigned audit_tty;
+	struct tty_audit_buf *tty_audit_buf;
+#endif
+
+	/*
+	 * Thread is the potential origin of an oom condition; kill first on
+	 * oom
+	 */
+	bool oom_flag_origin;
+	short oom_score_adj;		/* OOM kill score adjustment */
+	short oom_score_adj_min;	/* OOM kill score adjustment min value.
+					 * Only settable by CAP_SYS_RESOURCE. */
+	struct mm_struct *oom_mm;	/* recorded mm when the thread group got
+					 * killed by the oom killer */
+
+	struct mutex cred_guard_mutex;	/* guard against foreign influences on
+					 * credential calculations
+					 * (notably. ptrace) */
+};
+
+/*
+ * Bits in flags field of signal_struct.
+ */
+#define SIGNAL_STOP_STOPPED	0x00000001 /* job control stop in effect */
+#define SIGNAL_STOP_CONTINUED	0x00000002 /* SIGCONT since WCONTINUED reap */
+#define SIGNAL_GROUP_EXIT	0x00000004 /* group exit in progress */
+#define SIGNAL_GROUP_COREDUMP	0x00000008 /* coredump in progress */
+/*
+ * Pending notifications to parent.
+ */
+#define SIGNAL_CLD_STOPPED	0x00000010
+#define SIGNAL_CLD_CONTINUED	0x00000020
+#define SIGNAL_CLD_MASK		(SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED)
+
+#define SIGNAL_UNKILLABLE	0x00000040 /* for init: ignore fatal signals */
+
+#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \
+			  SIGNAL_STOP_CONTINUED)
+
+static inline void signal_set_stop_flags(struct signal_struct *sig,
+					 unsigned int flags)
+{
+	WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP));
+	sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags;
+}
+
+/* If true, all threads except ->group_exit_task have pending SIGKILL */
+static inline int signal_group_exit(const struct signal_struct *sig)
+{
+	return	(sig->flags & SIGNAL_GROUP_EXIT) ||
+		(sig->group_exit_task != NULL);
+}
+
+extern void flush_signals(struct task_struct *);
+extern void ignore_signals(struct task_struct *);
+extern void flush_signal_handlers(struct task_struct *, int force_default);
+extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
+
+static inline int kernel_dequeue_signal(siginfo_t *info)
+{
+	struct task_struct *tsk = current;
+	siginfo_t __info;
+	int ret;
+
+	spin_lock_irq(&tsk->sighand->siglock);
+	ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info);
+	spin_unlock_irq(&tsk->sighand->siglock);
+
+	return ret;
+}
+
+static inline void kernel_signal_stop(void)
+{
+	spin_lock_irq(&current->sighand->siglock);
+	if (current->jobctl & JOBCTL_STOP_DEQUEUED)
+		__set_current_state(TASK_STOPPED);
+	spin_unlock_irq(&current->sighand->siglock);
+
+	schedule();
+}
+extern int send_sig_info(int, struct siginfo *, struct task_struct *);
+extern int force_sigsegv(int, struct task_struct *);
+extern int force_sig_info(int, struct siginfo *, struct task_struct *);
+extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
+extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
+extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *,
+				const struct cred *, u32);
+extern int kill_pgrp(struct pid *pid, int sig, int priv);
+extern int kill_pid(struct pid *pid, int sig, int priv);
+extern int kill_proc_info(int, struct siginfo *, pid_t);
+extern __must_check bool do_notify_parent(struct task_struct *, int);
+extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
+extern void force_sig(int, struct task_struct *);
+extern int send_sig(int, struct task_struct *, int);
+extern int zap_other_threads(struct task_struct *p);
+extern struct sigqueue *sigqueue_alloc(void);
+extern void sigqueue_free(struct sigqueue *);
+extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
+extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
+
+#ifdef TIF_RESTORE_SIGMASK
+/*
+ * Legacy restore_sigmask accessors.  These are inefficient on
+ * SMP architectures because they require atomic operations.
+ */
+
+/**
+ * set_restore_sigmask() - make sure saved_sigmask processing gets done
+ *
+ * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code
+ * will run before returning to user mode, to process the flag.  For
+ * all callers, TIF_SIGPENDING is already set or it's no harm to set
+ * it.  TIF_RESTORE_SIGMASK need not be in the set of bits that the
+ * arch code will notice on return to user mode, in case those bits
+ * are scarce.  We set TIF_SIGPENDING here to ensure that the arch
+ * signal code always gets run when TIF_RESTORE_SIGMASK is set.
+ */
+static inline void set_restore_sigmask(void)
+{
+	set_thread_flag(TIF_RESTORE_SIGMASK);
+	WARN_ON(!test_thread_flag(TIF_SIGPENDING));
+}
+static inline void clear_restore_sigmask(void)
+{
+	clear_thread_flag(TIF_RESTORE_SIGMASK);
+}
+static inline bool test_restore_sigmask(void)
+{
+	return test_thread_flag(TIF_RESTORE_SIGMASK);
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+	return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK);
+}
+
+#else	/* TIF_RESTORE_SIGMASK */
+
+/* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */
+static inline void set_restore_sigmask(void)
+{
+	current->restore_sigmask = true;
+	WARN_ON(!test_thread_flag(TIF_SIGPENDING));
+}
+static inline void clear_restore_sigmask(void)
+{
+	current->restore_sigmask = false;
+}
+static inline bool test_restore_sigmask(void)
+{
+	return current->restore_sigmask;
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+	if (!current->restore_sigmask)
+		return false;
+	current->restore_sigmask = false;
+	return true;
+}
+#endif
+
+static inline void restore_saved_sigmask(void)
+{
+	if (test_and_clear_restore_sigmask())
+		__set_current_blocked(&current->saved_sigmask);
+}
+
+static inline sigset_t *sigmask_to_save(void)
+{
+	sigset_t *res = &current->blocked;
+	if (unlikely(test_restore_sigmask()))
+		res = &current->saved_sigmask;
+	return res;
+}
+
+static inline int kill_cad_pid(int sig, int priv)
+{
+	return kill_pid(cad_pid, sig, priv);
+}
+
+/* These can be the second arg to send_sig_info/send_group_sig_info.  */
+#define SEND_SIG_NOINFO ((struct siginfo *) 0)
+#define SEND_SIG_PRIV	((struct siginfo *) 1)
+#define SEND_SIG_FORCED	((struct siginfo *) 2)
+
+/*
+ * True if we are on the alternate signal stack.
+ */
+static inline int on_sig_stack(unsigned long sp)
+{
+	/*
+	 * If the signal stack is SS_AUTODISARM then, by construction, we
+	 * can't be on the signal stack unless user code deliberately set
+	 * SS_AUTODISARM when we were already on it.
+	 *
+	 * This improves reliability: if user state gets corrupted such that
+	 * the stack pointer points very close to the end of the signal stack,
+	 * then this check will enable the signal to be handled anyway.
+	 */
+	if (current->sas_ss_flags & SS_AUTODISARM)
+		return 0;
+
+#ifdef CONFIG_STACK_GROWSUP
+	return sp >= current->sas_ss_sp &&
+		sp - current->sas_ss_sp < current->sas_ss_size;
+#else
+	return sp > current->sas_ss_sp &&
+		sp - current->sas_ss_sp <= current->sas_ss_size;
+#endif
+}
+
+static inline int sas_ss_flags(unsigned long sp)
+{
+	if (!current->sas_ss_size)
+		return SS_DISABLE;
+
+	return on_sig_stack(sp) ? SS_ONSTACK : 0;
+}
+
+static inline void sas_ss_reset(struct task_struct *p)
+{
+	p->sas_ss_sp = 0;
+	p->sas_ss_size = 0;
+	p->sas_ss_flags = SS_DISABLE;
+}
+
+static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
+{
+	if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp))
+#ifdef CONFIG_STACK_GROWSUP
+		return current->sas_ss_sp;
+#else
+		return current->sas_ss_sp + current->sas_ss_size;
+#endif
+	return sp;
+}
+
+extern void __cleanup_sighand(struct sighand_struct *);
+extern void flush_itimer_signals(void);
+
+#define tasklist_empty() \
+	list_empty(&init_task.tasks)
+
+#define next_task(p) \
+	list_entry_rcu((p)->tasks.next, struct task_struct, tasks)
+
+#define for_each_process(p) \
+	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
+
+extern bool current_is_single_threaded(void);
+
+/*
+ * Careful: do_each_thread/while_each_thread is a double loop so
+ *          'break' will not work as expected - use goto instead.
+ */
+#define do_each_thread(g, t) \
+	for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
+
+#define while_each_thread(g, t) \
+	while ((t = next_thread(t)) != g)
+
+#define __for_each_thread(signal, t)	\
+	list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node)
+
+#define for_each_thread(p, t)		\
+	__for_each_thread((p)->signal, t)
+
+/* Careful: this is a double loop, 'break' won't work as expected. */
+#define for_each_process_thread(p, t)	\
+	for_each_process(p) for_each_thread(p, t)
+
+typedef int (*proc_visitor)(struct task_struct *p, void *data);
+void walk_process_tree(struct task_struct *top, proc_visitor, void *);
+
+static inline int get_nr_threads(struct task_struct *tsk)
+{
+	return tsk->signal->nr_threads;
+}
+
+static inline bool thread_group_leader(struct task_struct *p)
+{
+	return p->exit_signal >= 0;
+}
+
+/* Do to the insanities of de_thread it is possible for a process
+ * to have the pid of the thread group leader without actually being
+ * the thread group leader.  For iteration through the pids in proc
+ * all we care about is that we have a task with the appropriate
+ * pid, we don't actually care if we have the right task.
+ */
+static inline bool has_group_leader_pid(struct task_struct *p)
+{
+	return task_pid(p) == p->signal->leader_pid;
+}
+
+static inline
+bool same_thread_group(struct task_struct *p1, struct task_struct *p2)
+{
+	return p1->signal == p2->signal;
+}
+
+static inline struct task_struct *next_thread(const struct task_struct *p)
+{
+	return list_entry_rcu(p->thread_group.next,
+			      struct task_struct, thread_group);
+}
+
+static inline int thread_group_empty(struct task_struct *p)
+{
+	return list_empty(&p->thread_group);
+}
+
+#define delay_group_leader(p) \
+		(thread_group_leader(p) && !thread_group_empty(p))
+
+extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
+							unsigned long *flags);
+
+static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk,
+						       unsigned long *flags)
+{
+	struct sighand_struct *ret;
+
+	ret = __lock_task_sighand(tsk, flags);
+	(void)__cond_lock(&tsk->sighand->siglock, ret);
+	return ret;
+}
+
+static inline void unlock_task_sighand(struct task_struct *tsk,
+						unsigned long *flags)
+{
+	spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
+}
+
+static inline unsigned long task_rlimit(const struct task_struct *tsk,
+		unsigned int limit)
+{
+	return READ_ONCE(tsk->signal->rlim[limit].rlim_cur);
+}
+
+static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
+		unsigned int limit)
+{
+	return READ_ONCE(tsk->signal->rlim[limit].rlim_max);
+}
+
+static inline unsigned long rlimit(unsigned int limit)
+{
+	return task_rlimit(current, limit);
+}
+
+static inline unsigned long rlimit_max(unsigned int limit)
+{
+	return task_rlimit_max(current, limit);
+}
+
 #endif /* _LINUX_SCHED_SIGNAL_H */
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index fc34bcf2329f..08d2cb605101 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -5,6 +5,7 @@
 #include <linux/sort.h>
 #include <linux/delay.h>
 #include <linux/mm.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/delayacct.h>
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index be93949b4885..b4024d688f38 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -12,7 +12,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/highmem.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 5d4208ad029e..85837ab90e89 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -27,6 +27,8 @@
 #include <linux/inetdevice.h>
 #include <linux/workqueue.h>
 #include <linux/in.h>
+#include <linux/sched/signal.h>
+
 #include <net/sock.h>
 #include <net/tcp.h>
 #include <net/smc.h>
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index cc6b6f8651eb..e41f594a1e1d 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -11,6 +11,8 @@
 
 #include <linux/in.h>
 #include <linux/if_ether.h>
+#include <linux/sched/signal.h>
+
 #include <net/sock.h>
 #include <net/tcp.h>
 
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 03dfcc6b7661..67a71d170bed 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -9,6 +9,8 @@
  */
 
 #include <linux/workqueue.h>
+#include <linux/sched/signal.h>
+
 #include <net/sock.h>
 
 #include "smc.h"
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 5d1878732f46..c4ef9a4ec569 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -11,6 +11,8 @@
 
 #include <linux/net.h>
 #include <linux/rcupdate.h>
+#include <linux/sched/signal.h>
+
 #include <net/sock.h>
 
 #include "smc.h"
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 6e73b28915ea..69a0013dd25c 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -15,6 +15,8 @@
 #include <linux/net.h>
 #include <linux/rcupdate.h>
 #include <linux/workqueue.h>
+#include <linux/sched/signal.h>
+
 #include <net/sock.h>
 
 #include "smc.h"
-- 
GitLab


From bcbb6a5bf7df6e37ba652d1f426eab042ec4f56b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 10:22:42 +0100
Subject: [PATCH 0976/1084] sched/headers: Move 'struct user_struct' definition
 and APIs to the new <linux/sched/user.h> header

'struct user_struct' was added to sched.h historically, but it's actually
entirely independent of task_struct and of scheduler details, so move
it to its own header.

Fix up .c files using those facilities.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 53 -------------------------------------
 include/linux/sched/user.h | 54 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 53 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c1586104d4c0..71efbcafaa31 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -349,7 +349,6 @@ extern void io_schedule(void);
 void __noreturn do_task_dead(void);
 
 struct nsproxy;
-struct user_namespace;
 
 #ifdef CONFIG_MMU
 extern void arch_pick_mmap_layout(struct mm_struct *mm);
@@ -481,49 +480,6 @@ struct thread_group_cputimer {
 #include <linux/rwsem.h>
 struct autogroup;
 
-/*
- * Some day this will be a full-fledged user tracking system..
- */
-struct user_struct {
-	atomic_t __count;	/* reference count */
-	atomic_t processes;	/* How many processes does this user have? */
-	atomic_t sigpending;	/* How many pending signals does this user have? */
-#ifdef CONFIG_FANOTIFY
-	atomic_t fanotify_listeners;
-#endif
-#ifdef CONFIG_EPOLL
-	atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
-#endif
-#ifdef CONFIG_POSIX_MQUEUE
-	/* protected by mq_lock	*/
-	unsigned long mq_bytes;	/* How many bytes can be allocated to mqueue? */
-#endif
-	unsigned long locked_shm; /* How many pages of mlocked shm ? */
-	unsigned long unix_inflight;	/* How many files in flight in unix sockets */
-	atomic_long_t pipe_bufs;  /* how many pages are allocated in pipe buffers */
-
-#ifdef CONFIG_KEYS
-	struct key *uid_keyring;	/* UID specific keyring */
-	struct key *session_keyring;	/* UID's default session keyring */
-#endif
-
-	/* Hash table maintenance information */
-	struct hlist_node uidhash_node;
-	kuid_t uid;
-
-#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL)
-	atomic_long_t locked_vm;
-#endif
-};
-
-extern int uids_sysfs_init(void);
-
-extern struct user_struct *find_user(kuid_t);
-
-extern struct user_struct root_user;
-#define INIT_USER (&root_user)
-
-
 struct backing_dev_info;
 struct reclaim_state;
 
@@ -1908,15 +1864,6 @@ extern struct task_struct *find_task_by_vpid(pid_t nr);
 extern struct task_struct *find_task_by_pid_ns(pid_t nr,
 		struct pid_namespace *ns);
 
-/* per-UID process charging. */
-extern struct user_struct * alloc_uid(kuid_t);
-static inline struct user_struct *get_uid(struct user_struct *u)
-{
-	atomic_inc(&u->__count);
-	return u;
-}
-extern void free_uid(struct user_struct *);
-
 #include <asm/current.h>
 
 extern void xtime_update(unsigned long ticks);
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 83238e5ddadd..40c6363da5ef 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -3,4 +3,58 @@
 
 #include <linux/sched.h>
 
+struct key;
+
+/*
+ * Some day this will be a full-fledged user tracking system..
+ */
+struct user_struct {
+	atomic_t __count;	/* reference count */
+	atomic_t processes;	/* How many processes does this user have? */
+	atomic_t sigpending;	/* How many pending signals does this user have? */
+#ifdef CONFIG_FANOTIFY
+	atomic_t fanotify_listeners;
+#endif
+#ifdef CONFIG_EPOLL
+	atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
+#endif
+#ifdef CONFIG_POSIX_MQUEUE
+	/* protected by mq_lock	*/
+	unsigned long mq_bytes;	/* How many bytes can be allocated to mqueue? */
+#endif
+	unsigned long locked_shm; /* How many pages of mlocked shm ? */
+	unsigned long unix_inflight;	/* How many files in flight in unix sockets */
+	atomic_long_t pipe_bufs;  /* how many pages are allocated in pipe buffers */
+
+#ifdef CONFIG_KEYS
+	struct key *uid_keyring;	/* UID specific keyring */
+	struct key *session_keyring;	/* UID's default session keyring */
+#endif
+
+	/* Hash table maintenance information */
+	struct hlist_node uidhash_node;
+	kuid_t uid;
+
+#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL)
+	atomic_long_t locked_vm;
+#endif
+};
+
+extern int uids_sysfs_init(void);
+
+extern struct user_struct *find_user(kuid_t);
+
+extern struct user_struct root_user;
+#define INIT_USER (&root_user)
+
+
+/* per-UID process charging. */
+extern struct user_struct * alloc_uid(kuid_t);
+static inline struct user_struct *get_uid(struct user_struct *u)
+{
+	atomic_inc(&u->__count);
+	return u;
+}
+extern void free_uid(struct user_struct *);
+
 #endif /* _LINUX_SCHED_USER_H */
-- 
GitLab


From d151b27d3f86589308cd8c891fdfd2db5f8e80d6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 11:17:23 +0100
Subject: [PATCH 0977/1084] sched/headers: Move softlockup detector watchdog
 methods to <linux/nmi.h>

These methods don't belong into <linux/sched.h>, they are neither directly
related to task_struct or are scheduler functionality.

Put them next to the other watchdog methods in <linux/nmi.h>.

( Arguably that header's name is a misnomer, and this patch makes it
  more so - but it should be renamed in another patch. )

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/nmi.h   | 37 +++++++++++++++++++++++++++++++++++++
 include/linux/sched.h | 37 -------------------------------------
 2 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 0a3fadc32693..aa3cd0878270 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -7,6 +7,43 @@
 #include <linux/sched.h>
 #include <asm/irq.h>
 
+#ifdef CONFIG_LOCKUP_DETECTOR
+extern void touch_softlockup_watchdog_sched(void);
+extern void touch_softlockup_watchdog(void);
+extern void touch_softlockup_watchdog_sync(void);
+extern void touch_all_softlockup_watchdogs(void);
+extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
+				  void __user *buffer,
+				  size_t *lenp, loff_t *ppos);
+extern unsigned int  softlockup_panic;
+extern unsigned int  hardlockup_panic;
+void lockup_detector_init(void);
+#else
+static inline void touch_softlockup_watchdog_sched(void)
+{
+}
+static inline void touch_softlockup_watchdog(void)
+{
+}
+static inline void touch_softlockup_watchdog_sync(void)
+{
+}
+static inline void touch_all_softlockup_watchdogs(void)
+{
+}
+static inline void lockup_detector_init(void)
+{
+}
+#endif
+
+#ifdef CONFIG_DETECT_HUNG_TASK
+void reset_hung_task_detector(void);
+#else
+static inline void reset_hung_task_detector(void)
+{
+}
+#endif
+
 /*
  * The run state of the lockup detectors is controlled by the content of the
  * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 71efbcafaa31..8a1d296c53a0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -286,43 +286,6 @@ extern int sched_cpu_dying(unsigned int cpu);
 
 extern void sched_show_task(struct task_struct *p);
 
-#ifdef CONFIG_LOCKUP_DETECTOR
-extern void touch_softlockup_watchdog_sched(void);
-extern void touch_softlockup_watchdog(void);
-extern void touch_softlockup_watchdog_sync(void);
-extern void touch_all_softlockup_watchdogs(void);
-extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
-				  void __user *buffer,
-				  size_t *lenp, loff_t *ppos);
-extern unsigned int  softlockup_panic;
-extern unsigned int  hardlockup_panic;
-void lockup_detector_init(void);
-#else
-static inline void touch_softlockup_watchdog_sched(void)
-{
-}
-static inline void touch_softlockup_watchdog(void)
-{
-}
-static inline void touch_softlockup_watchdog_sync(void)
-{
-}
-static inline void touch_all_softlockup_watchdogs(void)
-{
-}
-static inline void lockup_detector_init(void)
-{
-}
-#endif
-
-#ifdef CONFIG_DETECT_HUNG_TASK
-void reset_hung_task_detector(void);
-#else
-static inline void reset_hung_task_detector(void)
-{
-}
-#endif
-
 /* Attach to any functions which should be ignored in wchan output. */
 #define __sched		__attribute__((__section__(".sched.text")))
 
-- 
GitLab


From 8d88460edc05224b8d7ae3372173153876a02825 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 12:06:10 +0100
Subject: [PATCH 0978/1084] sched/headers: Move 'struct pacct_struct' and
 'struct cpu_itimer' form <linux/sched.h> to <linux/sched/signal.h>

These structures are actually part of 'struct signal', so move them to <linux/sched/signal.h>
where they belong.

This further decreases the size and complexity of <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h        | 13 -------------
 include/linux/sched/signal.h | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8a1d296c53a0..8bf111efe98e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -326,19 +326,6 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
 #endif
 
-struct pacct_struct {
-	int			ac_flag;
-	long			ac_exitcode;
-	unsigned long		ac_mem;
-	u64			ac_utime, ac_stime;
-	unsigned long		ac_minflt, ac_majflt;
-};
-
-struct cpu_itimer {
-	u64 expires;
-	u64 incr;
-};
-
 /**
  * struct prev_cputime - snaphsot of system and user cputime
  * @utime: time spent in user mode
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 53fe5450f431..30e7ceed3ef6 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -19,6 +19,22 @@ struct sighand_struct {
 	wait_queue_head_t	signalfd_wqh;
 };
 
+/*
+ * Per-process accounting stats:
+ */
+struct pacct_struct {
+	int			ac_flag;
+	long			ac_exitcode;
+	unsigned long		ac_mem;
+	u64			ac_utime, ac_stime;
+	unsigned long		ac_minflt, ac_majflt;
+};
+
+struct cpu_itimer {
+	u64 expires;
+	u64 incr;
+};
+
 /*
  * NOTE! "signal_struct" does not have its own
  * locking, because a shared signal_struct always
-- 
GitLab


From 7284c6d419b5a6ae9806927f1fd4f0cfd19a16f5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 12:14:19 +0100
Subject: [PATCH 0979/1084] sched/headers: Move the cpufreq interfaces to
 <linux/sched/cpufreq.h>

No need to have this in the generic <linux/sched.h> header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h         | 17 -----------------
 include/linux/sched/cpufreq.h | 21 +++++++++++++++++++++
 2 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8bf111efe98e..aae79706ec4f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2288,21 +2288,4 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
 }
 #endif /* CONFIG_MEMCG */
 
-#define SCHED_CPUFREQ_RT	(1U << 0)
-#define SCHED_CPUFREQ_DL	(1U << 1)
-#define SCHED_CPUFREQ_IOWAIT	(1U << 2)
-
-#define SCHED_CPUFREQ_RT_DL	(SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
-
-#ifdef CONFIG_CPU_FREQ
-struct update_util_data {
-       void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
-};
-
-void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
-                       void (*func)(struct update_util_data *data, u64 time,
-				    unsigned int flags));
-void cpufreq_remove_update_util_hook(int cpu);
-#endif /* CONFIG_CPU_FREQ */
-
 #endif
diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
index 07ed9664fa20..2bdcfbfc4c30 100644
--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h
@@ -3,4 +3,25 @@
 
 #include <linux/sched.h>
 
+/*
+ * Interface between cpufreq drivers and the scheduler:
+ */
+
+#define SCHED_CPUFREQ_RT	(1U << 0)
+#define SCHED_CPUFREQ_DL	(1U << 1)
+#define SCHED_CPUFREQ_IOWAIT	(1U << 2)
+
+#define SCHED_CPUFREQ_RT_DL	(SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
+
+#ifdef CONFIG_CPU_FREQ
+struct update_util_data {
+       void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
+};
+
+void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
+                       void (*func)(struct update_util_data *data, u64 time,
+				    unsigned int flags));
+void cpufreq_remove_update_util_hook(int cpu);
+#endif /* CONFIG_CPU_FREQ */
+
 #endif /* _LINUX_SCHED_CPUFREQ_H */
-- 
GitLab


From 4240c8bf877f1145571106a2934c5cea0b51b178 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 12:18:24 +0100
Subject: [PATCH 0980/1084] sched/headers: Move more mm_struct related
 functionality from <linux/sched.h> to <linux/sched/mm.h>

Neither the mmap_layout nor the mm_update_next_owner() methods need to be
in <linux/sched.h> - move them to the more appropriate <linux/sched/mm.h> header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h    | 21 ---------------------
 include/linux/sched/mm.h | 21 +++++++++++++++++++++
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index aae79706ec4f..1ddb82be6b50 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -313,19 +313,6 @@ void __noreturn do_task_dead(void);
 
 struct nsproxy;
 
-#ifdef CONFIG_MMU
-extern void arch_pick_mmap_layout(struct mm_struct *mm);
-extern unsigned long
-arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
-		       unsigned long, unsigned long);
-extern unsigned long
-arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
-			  unsigned long len, unsigned long pgoff,
-			  unsigned long flags);
-#else
-static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
-#endif
-
 /**
  * struct prev_cputime - snaphsot of system and user cputime
  * @utime: time spent in user mode
@@ -2280,12 +2267,4 @@ static inline void inc_syscw(struct task_struct *tsk)
 #define TASK_SIZE_OF(tsk)	TASK_SIZE
 #endif
 
-#ifdef CONFIG_MEMCG
-extern void mm_update_next_owner(struct mm_struct *mm);
-#else
-static inline void mm_update_next_owner(struct mm_struct *mm)
-{
-}
-#endif /* CONFIG_MEMCG */
-
 #endif
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index be1ae55f5ab9..93fad9f0f466 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -100,4 +100,25 @@ extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
 /* Remove the current tasks stale references to the old mm_struct */
 extern void mm_release(struct task_struct *, struct mm_struct *);
 
+#ifdef CONFIG_MEMCG
+extern void mm_update_next_owner(struct mm_struct *mm);
+#else
+static inline void mm_update_next_owner(struct mm_struct *mm)
+{
+}
+#endif /* CONFIG_MEMCG */
+
+#ifdef CONFIG_MMU
+extern void arch_pick_mmap_layout(struct mm_struct *mm);
+extern unsigned long
+arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
+		       unsigned long, unsigned long);
+extern unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
+			  unsigned long len, unsigned long pgoff,
+			  unsigned long flags);
+#else
+static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
+#endif
+
 #endif /* _LINUX_SCHED_MM_H */
-- 
GitLab


From abe722a1c59728c6c0ea4e4d5efcfe397c8abebc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 12:27:56 +0100
Subject: [PATCH 0981/1084] sched/headers: Move the 'init_mm' declaration from
 <linux/sched.h> to <linux/mm_types.h>

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm_types.h | 2 ++
 include/linux/sched.h    | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6daaf0a51968..28bc710d3467 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -546,6 +546,8 @@ struct mm_struct {
 	struct work_struct async_put_work;
 };
 
+extern struct mm_struct init_mm;
+
 static inline void mm_init_cpumask(struct mm_struct *mm)
 {
 #ifdef CONFIG_CPUMASK_OFFSTACK
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1ddb82be6b50..253d8ab6256c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1782,8 +1782,6 @@ static inline int kstack_end(void *addr)
 extern union thread_union init_thread_union;
 extern struct task_struct init_task;
 
-extern struct   mm_struct init_mm;
-
 extern struct pid_namespace init_pid_ns;
 
 /*
-- 
GitLab


From d026ce796cbca3c49678a68bb4a39fb4b9cf8192 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 12:32:21 +0100
Subject: [PATCH 0982/1084] sched/headers: Move in_vfork() from <linux/sched.h>
 to <linux/sched/mm.h>

The in_vfork() function deals with task->mm, so it better belongs
into <linux/sched/mm.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h    | 26 --------------------------
 include/linux/sched/mm.h | 26 ++++++++++++++++++++++++++
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 253d8ab6256c..aa60812b4b7a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1242,32 +1242,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
 #define TNF_FAULT_LOCAL	0x08
 #define TNF_MIGRATE_FAIL 0x10
 
-static inline bool in_vfork(struct task_struct *tsk)
-{
-	bool ret;
-
-	/*
-	 * need RCU to access ->real_parent if CLONE_VM was used along with
-	 * CLONE_PARENT.
-	 *
-	 * We check real_parent->mm == tsk->mm because CLONE_VFORK does not
-	 * imply CLONE_VM
-	 *
-	 * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus
-	 * ->real_parent is not necessarily the task doing vfork(), so in
-	 * theory we can't rely on task_lock() if we want to dereference it.
-	 *
-	 * And in this case we can't trust the real_parent->mm == tsk->mm
-	 * check, it can be false negative. But we do not care, if init or
-	 * another oom-unkillable task does this it should blame itself.
-	 */
-	rcu_read_lock();
-	ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm;
-	rcu_read_unlock();
-
-	return ret;
-}
-
 #ifdef CONFIG_NUMA_BALANCING
 extern void task_numa_fault(int last_node, int node, int pages, int flags);
 extern pid_t task_numa_group_id(struct task_struct *p);
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 93fad9f0f466..64d83fa8d93a 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -121,4 +121,30 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
 #endif
 
+static inline bool in_vfork(struct task_struct *tsk)
+{
+	bool ret;
+
+	/*
+	 * need RCU to access ->real_parent if CLONE_VM was used along with
+	 * CLONE_PARENT.
+	 *
+	 * We check real_parent->mm == tsk->mm because CLONE_VFORK does not
+	 * imply CLONE_VM
+	 *
+	 * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus
+	 * ->real_parent is not necessarily the task doing vfork(), so in
+	 * theory we can't rely on task_lock() if we want to dereference it.
+	 *
+	 * And in this case we can't trust the real_parent->mm == tsk->mm
+	 * check, it can be false negative. But we do not care, if init or
+	 * another oom-unkillable task does this it should blame itself.
+	 */
+	rcu_read_lock();
+	ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm;
+	rcu_read_unlock();
+
+	return ret;
+}
+
 #endif /* _LINUX_SCHED_MM_H */
-- 
GitLab


From 5647028d550c959577527cec9c0f29fbcea029ea Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 12:39:17 +0100
Subject: [PATCH 0983/1084] sched/headers: Move the NUMA balancing interfaces
 from <linux/sched.h> to <linux/sched/numa_balancing.h>

Split out the interface between the scheduler and the MM which
deals with page fault driven NUMA balancing, into the new
<linux/sched/numa_balancing.h> header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h                | 35 ------------------------
 include/linux/sched/numa_balancing.h | 40 ++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index aa60812b4b7a..fcaea1e7b08a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1236,41 +1236,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
 }
 #endif
 
-#define TNF_MIGRATED	0x01
-#define TNF_NO_GROUP	0x02
-#define TNF_SHARED	0x04
-#define TNF_FAULT_LOCAL	0x08
-#define TNF_MIGRATE_FAIL 0x10
-
-#ifdef CONFIG_NUMA_BALANCING
-extern void task_numa_fault(int last_node, int node, int pages, int flags);
-extern pid_t task_numa_group_id(struct task_struct *p);
-extern void set_numabalancing_state(bool enabled);
-extern void task_numa_free(struct task_struct *p);
-extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
-					int src_nid, int dst_cpu);
-#else
-static inline void task_numa_fault(int last_node, int node, int pages,
-				   int flags)
-{
-}
-static inline pid_t task_numa_group_id(struct task_struct *p)
-{
-	return 0;
-}
-static inline void set_numabalancing_state(bool enabled)
-{
-}
-static inline void task_numa_free(struct task_struct *p)
-{
-}
-static inline bool should_numa_migrate_memory(struct task_struct *p,
-				struct page *page, int src_nid, int dst_cpu)
-{
-	return true;
-}
-#endif
-
 static inline struct pid *task_pid(struct task_struct *task)
 {
 	return task->pids[PIDTYPE_PID].pid;
diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h
index 999182279f78..35d5fc77b4be 100644
--- a/include/linux/sched/numa_balancing.h
+++ b/include/linux/sched/numa_balancing.h
@@ -1,6 +1,46 @@
 #ifndef _LINUX_SCHED_NUMA_BALANCING_H
 #define _LINUX_SCHED_NUMA_BALANCING_H
 
+/*
+ * This is the interface between the scheduler and the MM that
+ * implements memory access pattern based NUMA-balancing:
+ */
+
 #include <linux/sched.h>
 
+#define TNF_MIGRATED	0x01
+#define TNF_NO_GROUP	0x02
+#define TNF_SHARED	0x04
+#define TNF_FAULT_LOCAL	0x08
+#define TNF_MIGRATE_FAIL 0x10
+
+#ifdef CONFIG_NUMA_BALANCING
+extern void task_numa_fault(int last_node, int node, int pages, int flags);
+extern pid_t task_numa_group_id(struct task_struct *p);
+extern void set_numabalancing_state(bool enabled);
+extern void task_numa_free(struct task_struct *p);
+extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
+					int src_nid, int dst_cpu);
+#else
+static inline void task_numa_fault(int last_node, int node, int pages,
+				   int flags)
+{
+}
+static inline pid_t task_numa_group_id(struct task_struct *p)
+{
+	return 0;
+}
+static inline void set_numabalancing_state(bool enabled)
+{
+}
+static inline void task_numa_free(struct task_struct *p)
+{
+}
+static inline bool should_numa_migrate_memory(struct task_struct *p,
+				struct page *page, int src_nid, int dst_cpu)
+{
+	return true;
+}
+#endif
+
 #endif /* _LINUX_SCHED_NUMA_BALANCING_H */
-- 
GitLab


From c41cfc6c5ba46050b416c0b0b2621cbe68c4669c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 12:45:58 +0100
Subject: [PATCH 0984/1084] sched/headers: Move the JOBCTL_ defines and methods
 from <linux/sched.h> to <linux/sched/jobctl.h>

Only a small fraction of sched.h users actually utilizes these defines,
and they are not scheduler functionality in any case, so move them
into their separate header.

(Also make <linux/sched/jobctl.h> a self-contained header.)

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h        | 30 ------------------------------
 include/linux/sched/jobctl.h | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index fcaea1e7b08a..46ba8f1b5f1f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1549,36 +1549,6 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
 TASK_PFA_TEST(LMK_WAITING, lmk_waiting)
 TASK_PFA_SET(LMK_WAITING, lmk_waiting)
 
-/*
- * task->jobctl flags
- */
-#define JOBCTL_STOP_SIGMASK	0xffff	/* signr of the last group stop */
-
-#define JOBCTL_STOP_DEQUEUED_BIT 16	/* stop signal dequeued */
-#define JOBCTL_STOP_PENDING_BIT	17	/* task should stop for group stop */
-#define JOBCTL_STOP_CONSUME_BIT	18	/* consume group stop count */
-#define JOBCTL_TRAP_STOP_BIT	19	/* trap for STOP */
-#define JOBCTL_TRAP_NOTIFY_BIT	20	/* trap for NOTIFY */
-#define JOBCTL_TRAPPING_BIT	21	/* switching to TRACED */
-#define JOBCTL_LISTENING_BIT	22	/* ptracer is listening for events */
-
-#define JOBCTL_STOP_DEQUEUED	(1UL << JOBCTL_STOP_DEQUEUED_BIT)
-#define JOBCTL_STOP_PENDING	(1UL << JOBCTL_STOP_PENDING_BIT)
-#define JOBCTL_STOP_CONSUME	(1UL << JOBCTL_STOP_CONSUME_BIT)
-#define JOBCTL_TRAP_STOP	(1UL << JOBCTL_TRAP_STOP_BIT)
-#define JOBCTL_TRAP_NOTIFY	(1UL << JOBCTL_TRAP_NOTIFY_BIT)
-#define JOBCTL_TRAPPING		(1UL << JOBCTL_TRAPPING_BIT)
-#define JOBCTL_LISTENING	(1UL << JOBCTL_LISTENING_BIT)
-
-#define JOBCTL_TRAP_MASK	(JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
-#define JOBCTL_PENDING_MASK	(JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
-
-extern bool task_set_jobctl_pending(struct task_struct *task,
-				    unsigned long mask);
-extern void task_clear_jobctl_trapping(struct task_struct *task);
-extern void task_clear_jobctl_pending(struct task_struct *task,
-				      unsigned long mask);
-
 static inline void rcu_copy_process(struct task_struct *p)
 {
 #ifdef CONFIG_PREEMPT_RCU
diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h
index 228e4644937b..016afa0fb3bb 100644
--- a/include/linux/sched/jobctl.h
+++ b/include/linux/sched/jobctl.h
@@ -1,4 +1,36 @@
 #ifndef _LINUX_SCHED_JOBCTL_H
 #define _LINUX_SCHED_JOBCTL_H
 
+#include <linux/types.h>
+
+struct task_struct;
+
+/*
+ * task->jobctl flags
+ */
+#define JOBCTL_STOP_SIGMASK	0xffff	/* signr of the last group stop */
+
+#define JOBCTL_STOP_DEQUEUED_BIT 16	/* stop signal dequeued */
+#define JOBCTL_STOP_PENDING_BIT	17	/* task should stop for group stop */
+#define JOBCTL_STOP_CONSUME_BIT	18	/* consume group stop count */
+#define JOBCTL_TRAP_STOP_BIT	19	/* trap for STOP */
+#define JOBCTL_TRAP_NOTIFY_BIT	20	/* trap for NOTIFY */
+#define JOBCTL_TRAPPING_BIT	21	/* switching to TRACED */
+#define JOBCTL_LISTENING_BIT	22	/* ptracer is listening for events */
+
+#define JOBCTL_STOP_DEQUEUED	(1UL << JOBCTL_STOP_DEQUEUED_BIT)
+#define JOBCTL_STOP_PENDING	(1UL << JOBCTL_STOP_PENDING_BIT)
+#define JOBCTL_STOP_CONSUME	(1UL << JOBCTL_STOP_CONSUME_BIT)
+#define JOBCTL_TRAP_STOP	(1UL << JOBCTL_TRAP_STOP_BIT)
+#define JOBCTL_TRAP_NOTIFY	(1UL << JOBCTL_TRAP_NOTIFY_BIT)
+#define JOBCTL_TRAPPING		(1UL << JOBCTL_TRAPPING_BIT)
+#define JOBCTL_LISTENING	(1UL << JOBCTL_LISTENING_BIT)
+
+#define JOBCTL_TRAP_MASK	(JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
+#define JOBCTL_PENDING_MASK	(JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
+
+extern bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask);
+extern void task_clear_jobctl_trapping(struct task_struct *task);
+extern void task_clear_jobctl_pending(struct task_struct *task, unsigned long mask);
+
 #endif /* _LINUX_SCHED_JOBCTL_H */
-- 
GitLab


From 30a1baab81189f01c427c4939610b2dde2dbec37 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 10:06:45 +0100
Subject: [PATCH 0985/1084] sched/headers: Remove various unrelated headers
 from <linux/sched.h>

Remove the following header inclusions from <linux/sched.h>:

	#include <asm/param.h>
	#include <linux/threads.h>
	#include <linux/kernel.h>
	#include <linux/types.h>
	#include <linux/timex.h>
	#include <linux/jiffies.h>
	#include <linux/rbtree.h>
	#include <linux/thread_info.h>
	#include <linux/cpumask.h>
	#include <linux/errno.h>
	#include <linux/nodemask.h>
	#include <linux/preempt.h>
	#include <asm/page.h>
	#include <linux/smp.h>
	#include <linux/compiler.h>
	#include <linux/completion.h>
	#include <linux/percpu.h>
	#include <linux/topology.h>
	#include <linux/rcupdate.h>
	#include <linux/time.h>
	#include <linux/timer.h>
	#include <linux/llist.h>
	#include <linux/uidgid.h>
	#include <asm/processor.h>

because they are either not required, or are already included
naturally as part of the remaining headers.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 28 ----------------------------
 1 file changed, 28 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 46ba8f1b5f1f..7752025679c0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -5,60 +5,32 @@
 
 #include <linux/sched/prio.h>
 
-#include <asm/param.h>	/* for HZ */
-
 #include <linux/capability.h>
-#include <linux/threads.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/timex.h>
-#include <linux/jiffies.h>
 #include <linux/mutex.h>
 #include <linux/plist.h>
-#include <linux/rbtree.h>
-#include <linux/thread_info.h>
-#include <linux/cpumask.h>
-#include <linux/errno.h>
-#include <linux/nodemask.h>
 #include <linux/mm_types.h>
-#include <linux/preempt.h>
-
-#include <asm/page.h>
 #include <asm/ptrace.h>
 
-#include <linux/smp.h>
 #include <linux/sem.h>
 #include <linux/shm.h>
 #include <linux/signal.h>
-#include <linux/compiler.h>
-#include <linux/completion.h>
 #include <linux/signal_types.h>
 #include <linux/pid.h>
-#include <linux/percpu.h>
-#include <linux/topology.h>
 #include <linux/seccomp.h>
-#include <linux/rcupdate.h>
 #include <linux/rculist.h>
 #include <linux/rtmutex.h>
 
-#include <linux/time.h>
-#include <linux/param.h>
 #include <linux/resource.h>
-#include <linux/timer.h>
 #include <linux/hrtimer.h>
 #include <linux/kcov.h>
 #include <linux/task_io_accounting.h>
 #include <linux/latencytop.h>
 #include <linux/cred.h>
-#include <linux/llist.h>
-#include <linux/uidgid.h>
 #include <linux/gfp.h>
 #include <linux/topology.h>
 #include <linux/magic.h>
 #include <linux/cgroup-defs.h>
 
-#include <asm/processor.h>
-
 struct sched_attr;
 struct sched_param;
 
-- 
GitLab


From 9a07000400c853c57477c2a5138ae9f556c40897 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 19:10:05 +0100
Subject: [PATCH 0986/1084] sched/headers: Move CONFIG_TASK_XACCT bits from
 <linux/sched.h> to <linux/sched/xacct.h>

The CONFIG_TASK_XACCT=y accounting inline functions are only used by
fs/read_write.c, so move them into their separate header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h       | 38 ---------------------------------
 include/linux/sched/xacct.h | 42 +++++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 38 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7752025679c0..1201312e111e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2104,44 +2104,6 @@ extern struct task_group root_task_group;
 extern int task_can_switch_user(struct user_struct *up,
 					struct task_struct *tsk);
 
-#ifdef CONFIG_TASK_XACCT
-static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
-{
-	tsk->ioac.rchar += amt;
-}
-
-static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
-{
-	tsk->ioac.wchar += amt;
-}
-
-static inline void inc_syscr(struct task_struct *tsk)
-{
-	tsk->ioac.syscr++;
-}
-
-static inline void inc_syscw(struct task_struct *tsk)
-{
-	tsk->ioac.syscw++;
-}
-#else
-static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
-{
-}
-
-static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
-{
-}
-
-static inline void inc_syscr(struct task_struct *tsk)
-{
-}
-
-static inline void inc_syscw(struct task_struct *tsk)
-{
-}
-#endif
-
 #ifndef TASK_SIZE_OF
 #define TASK_SIZE_OF(tsk)	TASK_SIZE
 #endif
diff --git a/include/linux/sched/xacct.h b/include/linux/sched/xacct.h
index 890f7ce5cd27..a28156a0d34a 100644
--- a/include/linux/sched/xacct.h
+++ b/include/linux/sched/xacct.h
@@ -1,6 +1,48 @@
 #ifndef _LINUX_SCHED_XACCT_H
 #define _LINUX_SCHED_XACCT_H
 
+/*
+ * Extended task accounting methods:
+ */
+
 #include <linux/sched.h>
 
+#ifdef CONFIG_TASK_XACCT
+static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
+{
+	tsk->ioac.rchar += amt;
+}
+
+static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
+{
+	tsk->ioac.wchar += amt;
+}
+
+static inline void inc_syscr(struct task_struct *tsk)
+{
+	tsk->ioac.syscr++;
+}
+
+static inline void inc_syscw(struct task_struct *tsk)
+{
+	tsk->ioac.syscw++;
+}
+#else
+static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
+{
+}
+
+static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
+{
+}
+
+static inline void inc_syscr(struct task_struct *tsk)
+{
+}
+
+static inline void inc_syscw(struct task_struct *tsk)
+{
+}
+#endif
+
 #endif /* _LINUX_SCHED_XACCT_H */
-- 
GitLab


From 2a1f062a4acf0be50516ceece92a7182a173d55a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 19:15:33 +0100
Subject: [PATCH 0987/1084] sched/headers: Move signal wakeup & sigpending
 methods from <linux/sched.h> into <linux/sched/signal.h>

This reduces the size of <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h        | 51 ----------------------------------
 include/linux/sched/signal.h | 53 +++++++++++++++++++++++++++++++++++-
 2 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1201312e111e..1e0400069e95 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1931,37 +1931,6 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
 	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
 }
 
-static inline int restart_syscall(void)
-{
-	set_tsk_thread_flag(current, TIF_SIGPENDING);
-	return -ERESTARTNOINTR;
-}
-
-static inline int signal_pending(struct task_struct *p)
-{
-	return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
-}
-
-static inline int __fatal_signal_pending(struct task_struct *p)
-{
-	return unlikely(sigismember(&p->pending.signal, SIGKILL));
-}
-
-static inline int fatal_signal_pending(struct task_struct *p)
-{
-	return signal_pending(p) && __fatal_signal_pending(p);
-}
-
-static inline int signal_pending_state(long state, struct task_struct *p)
-{
-	if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL)))
-		return 0;
-	if (!signal_pending(p))
-		return 0;
-
-	return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
-}
-
 /*
  * cond_resched() and cond_resched_lock(): latency reduction via
  * explicit rescheduling in places that are safe. The return
@@ -2028,26 +1997,6 @@ static __always_inline bool need_resched(void)
 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
 void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
 
-/*
- * Reevaluate whether the task has signals pending delivery.
- * Wake the task if so.
- * This is required every time the blocked sigset_t changes.
- * callers must hold sighand->siglock.
- */
-extern void recalc_sigpending_and_wake(struct task_struct *t);
-extern void recalc_sigpending(void);
-
-extern void signal_wake_up_state(struct task_struct *t, unsigned int state);
-
-static inline void signal_wake_up(struct task_struct *t, bool resume)
-{
-	signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0);
-}
-static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
-{
-	signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
-}
-
 /*
  * Wrappers for p->thread_info->cpu access. No-op on UP.
  */
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 30e7ceed3ef6..b3545fb4333a 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -3,10 +3,10 @@
 
 #include <linux/rculist.h>
 #include <linux/signal.h>
-#include <linux/cred.h>
 #include <linux/sched.h>
 #include <linux/sched/jobctl.h>
 #include <linux/sched/task.h>
+#include <linux/cred.h>
 
 /*
  * Types defining task->signal and task->sighand and APIs using them:
@@ -271,6 +271,57 @@ extern void sigqueue_free(struct sigqueue *);
 extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
 extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
 
+static inline int restart_syscall(void)
+{
+	set_tsk_thread_flag(current, TIF_SIGPENDING);
+	return -ERESTARTNOINTR;
+}
+
+static inline int signal_pending(struct task_struct *p)
+{
+	return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
+}
+
+static inline int __fatal_signal_pending(struct task_struct *p)
+{
+	return unlikely(sigismember(&p->pending.signal, SIGKILL));
+}
+
+static inline int fatal_signal_pending(struct task_struct *p)
+{
+	return signal_pending(p) && __fatal_signal_pending(p);
+}
+
+static inline int signal_pending_state(long state, struct task_struct *p)
+{
+	if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL)))
+		return 0;
+	if (!signal_pending(p))
+		return 0;
+
+	return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
+}
+
+/*
+ * Reevaluate whether the task has signals pending delivery.
+ * Wake the task if so.
+ * This is required every time the blocked sigset_t changes.
+ * callers must hold sighand->siglock.
+ */
+extern void recalc_sigpending_and_wake(struct task_struct *t);
+extern void recalc_sigpending(void);
+
+extern void signal_wake_up_state(struct task_struct *t, unsigned int state);
+
+static inline void signal_wake_up(struct task_struct *t, bool resume)
+{
+	signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0);
+}
+static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
+{
+	signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
+}
+
 #ifdef TIF_RESTORE_SIGMASK
 /*
  * Legacy restore_sigmask accessors.  These are inefficient on
-- 
GitLab


From 74444edaa0b2ef946e846d5ddf1ba90efcd7c200 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 20:43:54 +0100
Subject: [PATCH 0988/1084] sched/headers: Move the memalloc_noio_*() APIs to
 <linux/sched/mm.h>

In preparation to remove the <linux/gfp.h> include from <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h    | 22 ----------------------
 include/linux/sched/mm.h | 22 ++++++++++++++++++++++
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1e0400069e95..dbc3ff04750a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1468,28 +1468,6 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *s
 #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
 #define used_math() tsk_used_math(current)
 
-/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags
- * __GFP_FS is also cleared as it implies __GFP_IO.
- */
-static inline gfp_t memalloc_noio_flags(gfp_t flags)
-{
-	if (unlikely(current->flags & PF_MEMALLOC_NOIO))
-		flags &= ~(__GFP_IO | __GFP_FS);
-	return flags;
-}
-
-static inline unsigned int memalloc_noio_save(void)
-{
-	unsigned int flags = current->flags & PF_MEMALLOC_NOIO;
-	current->flags |= PF_MEMALLOC_NOIO;
-	return flags;
-}
-
-static inline void memalloc_noio_restore(unsigned int flags)
-{
-	current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
-}
-
 /* Per-process atomic flags. */
 #define PFA_NO_NEW_PRIVS 0	/* May not gain new privileges. */
 #define PFA_SPREAD_PAGE  1      /* Spread page cache over cpuset */
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 64d83fa8d93a..62dca4e0b4e0 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -147,4 +147,26 @@ static inline bool in_vfork(struct task_struct *tsk)
 	return ret;
 }
 
+/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags
+ * __GFP_FS is also cleared as it implies __GFP_IO.
+ */
+static inline gfp_t memalloc_noio_flags(gfp_t flags)
+{
+	if (unlikely(current->flags & PF_MEMALLOC_NOIO))
+		flags &= ~(__GFP_IO | __GFP_FS);
+	return flags;
+}
+
+static inline unsigned int memalloc_noio_save(void)
+{
+	unsigned int flags = current->flags & PF_MEMALLOC_NOIO;
+	current->flags |= PF_MEMALLOC_NOIO;
+	return flags;
+}
+
+static inline void memalloc_noio_restore(unsigned int flags)
+{
+	current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
+}
+
 #endif /* _LINUX_SCHED_MM_H */
-- 
GitLab


From 3605df49556ebb7641d1f8ec2e7eeecf4dd734bf Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 11:03:12 +0100
Subject: [PATCH 0989/1084] sched/headers: Move task statistics APIs from
 <linux/sched.h> to <linux/sched/stat.h>

There are a number of task statistics related variables and methods exported
via sched.h - collect them into <linux/sched/stat.h> and include it from
their usage sites.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/s390/appldata/appldata_base.c |  1 +
 include/linux/sched.h              | 10 ----------
 include/linux/sched/stat.h         | 18 ++++++++++++++++++
 3 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index 5a8dfa22da7c..ef3fb1b9201f 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -12,6 +12,7 @@
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
 #include <linux/module.h>
+#include <linux/sched/stat.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index dbc3ff04750a..415baf253517 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -46,16 +46,6 @@ struct nameidata;
 struct signal_struct;
 struct sighand_struct;
 
-extern unsigned long total_forks;
-extern int nr_threads;
-DECLARE_PER_CPU(unsigned long, process_counts);
-extern int nr_processes(void);
-extern unsigned long nr_running(void);
-extern bool single_task_running(void);
-extern unsigned long nr_iowait(void);
-extern unsigned long nr_iowait_cpu(int cpu);
-extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
-
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
 extern void cpu_load_update_nohz_start(void);
 extern void cpu_load_update_nohz_stop(void);
diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
index 30fe012aecb0..0d52ceb6d3ae 100644
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -3,4 +3,22 @@
 
 #include <linux/sched.h>
 
+/*
+ * Various counters maintained by the scheduler and fork(),
+ * exposed via /proc, sys.c or used by drivers via these APIs.
+ *
+ * ( Note that all these values are aquired without locking,
+ *   so they can only be relied on in narrow circumstances. )
+ */
+
+extern unsigned long total_forks;
+extern int nr_threads;
+DECLARE_PER_CPU(unsigned long, process_counts);
+extern int nr_processes(void);
+extern unsigned long nr_running(void);
+extern bool single_task_running(void);
+extern unsigned long nr_iowait(void);
+extern unsigned long nr_iowait_cpu(int cpu);
+extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
+
 #endif /* _LINUX_SCHED_STAT_H */
-- 
GitLab


From 752b3ca734cbc17c0456b846ae886c0ed39c5703 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 11:12:59 +0100
Subject: [PATCH 0990/1084] sched/headers: Move the NOHZ APIs from
 <linux/sched.h> to <linux/sched/nohz.h>

There's a number of NOHZ/dyntics related functionality in <linux/sched.h>,
but only a handful of timer files are making use of them.

Move them into their own header. This better documents these APIs
and unclutters <linux/sched.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 35 ----------------------------------
 include/linux/sched/nohz.h | 39 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 35 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 415baf253517..5f9bfc603d19 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -46,14 +46,6 @@ struct nameidata;
 struct signal_struct;
 struct sighand_struct;
 
-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-extern void cpu_load_update_nohz_start(void);
-extern void cpu_load_update_nohz_stop(void);
-#else
-static inline void cpu_load_update_nohz_start(void) { }
-static inline void cpu_load_update_nohz_stop(void) { }
-#endif
-
 extern void dump_cpu_task(int cpu);
 
 struct seq_file;
@@ -204,15 +196,6 @@ extern cpumask_var_t cpu_isolated_map;
 
 extern int runqueue_is_locked(int cpu);
 
-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-extern void nohz_balance_enter_idle(int cpu);
-extern void set_cpu_sd_state_idle(void);
-extern int get_nohz_timer_target(void);
-#else
-static inline void nohz_balance_enter_idle(int cpu) { }
-static inline void set_cpu_sd_state_idle(void) { }
-#endif
-
 /*
  * Only dump TASK_* tasks. (0 for all tasks)
  */
@@ -1535,14 +1518,6 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
 }
 #endif
 
-#ifdef CONFIG_NO_HZ_COMMON
-void calc_load_enter_idle(void);
-void calc_load_exit_idle(void);
-#else
-static inline void calc_load_enter_idle(void) { }
-static inline void calc_load_exit_idle(void) { }
-#endif /* CONFIG_NO_HZ_COMMON */
-
 #ifndef cpu_relax_yield
 #define cpu_relax_yield() cpu_relax()
 #endif
@@ -1563,16 +1538,6 @@ extern void idle_task_exit(void);
 static inline void idle_task_exit(void) {}
 #endif
 
-#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)
-extern void wake_up_nohz_cpu(int cpu);
-#else
-static inline void wake_up_nohz_cpu(int cpu) { }
-#endif
-
-#ifdef CONFIG_NO_HZ_FULL
-extern u64 scheduler_tick_max_deferment(void);
-#endif
-
 extern int yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
index fe6caf0dab10..9471f0736a3a 100644
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -3,4 +3,43 @@
 
 #include <linux/sched.h>
 
+/*
+ * This is the interface between the scheduler and nohz/dyntics:
+ */
+
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+extern void cpu_load_update_nohz_start(void);
+extern void cpu_load_update_nohz_stop(void);
+#else
+static inline void cpu_load_update_nohz_start(void) { }
+static inline void cpu_load_update_nohz_stop(void) { }
+#endif
+
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+extern void nohz_balance_enter_idle(int cpu);
+extern void set_cpu_sd_state_idle(void);
+extern int get_nohz_timer_target(void);
+#else
+static inline void nohz_balance_enter_idle(int cpu) { }
+static inline void set_cpu_sd_state_idle(void) { }
+#endif
+
+#ifdef CONFIG_NO_HZ_COMMON
+void calc_load_enter_idle(void);
+void calc_load_exit_idle(void);
+#else
+static inline void calc_load_enter_idle(void) { }
+static inline void calc_load_exit_idle(void) { }
+#endif /* CONFIG_NO_HZ_COMMON */
+
+#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)
+extern void wake_up_nohz_cpu(int cpu);
+#else
+static inline void wake_up_nohz_cpu(int cpu) { }
+#endif
+
+#ifdef CONFIG_NO_HZ_FULL
+extern u64 scheduler_tick_max_deferment(void);
+#endif
+
 #endif /* _LINUX_SCHED_NOHZ_H */
-- 
GitLab


From d3d1e320d43a7bad9603acf0214406a1e8795c63 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 11:16:09 +0100
Subject: [PATCH 0991/1084] sched/headers: Move debugging functions from
 <linux/sched.h> to <linux/sched/debug.h>

Collect the various scheduler and task state debugging APIs scattered
around <linux/sched.h> into the new <linux/sched/debug.h> header.

In particular the show_regs() and show_stack() prototype affects many files,
update them.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h       | 36 -----------------------------
 include/linux/sched/debug.h | 46 +++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 36 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5f9bfc603d19..76a2e522be29 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -46,15 +46,9 @@ struct nameidata;
 struct signal_struct;
 struct sighand_struct;
 
-extern void dump_cpu_task(int cpu);
-
 struct seq_file;
 struct cfs_rq;
 struct task_group;
-#ifdef CONFIG_SCHED_DEBUG
-extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
-extern void proc_sched_set_task(struct task_struct *p);
-#endif
 
 /*
  * Task state bitmask. NOTE! These bits are also
@@ -196,25 +190,6 @@ extern cpumask_var_t cpu_isolated_map;
 
 extern int runqueue_is_locked(int cpu);
 
-/*
- * Only dump TASK_* tasks. (0 for all tasks)
- */
-extern void show_state_filter(unsigned long state_filter);
-
-static inline void show_state(void)
-{
-	show_state_filter(0);
-}
-
-extern void show_regs(struct pt_regs *);
-
-/*
- * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
- * task), SP is the stack pointer of the first frame that should be shown in the back
- * trace (or NULL if the entire call-chain of the task should be shown).
- */
-extern void show_stack(struct task_struct *task, unsigned long *sp);
-
 extern void cpu_init (void);
 extern void trap_init(void);
 extern void update_process_times(int user);
@@ -229,17 +204,6 @@ extern int sched_cpu_dying(unsigned int cpu);
 # define sched_cpu_dying	NULL
 #endif
 
-extern void sched_show_task(struct task_struct *p);
-
-/* Attach to any functions which should be ignored in wchan output. */
-#define __sched		__attribute__((__section__(".sched.text")))
-
-/* Linker adds these: start and end of __sched functions */
-extern char __sched_text_start[], __sched_text_end[];
-
-/* Is this address in the __sched functions? */
-extern int in_sched_functions(unsigned long addr);
-
 #define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
 extern signed long schedule_timeout(signed long timeout);
 extern signed long schedule_timeout_interruptible(signed long timeout);
diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h
index 55bc0cd35fd5..853bbef0b47b 100644
--- a/include/linux/sched/debug.h
+++ b/include/linux/sched/debug.h
@@ -3,4 +3,50 @@
 
 #include <linux/sched.h>
 
+/*
+ * Various scheduler/task debugging interfaces:
+ */
+
+struct task_struct;
+
+extern void dump_cpu_task(int cpu);
+
+/*
+ * Only dump TASK_* tasks. (0 for all tasks)
+ */
+extern void show_state_filter(unsigned long state_filter);
+
+static inline void show_state(void)
+{
+	show_state_filter(0);
+}
+
+struct pt_regs;
+
+extern void show_regs(struct pt_regs *);
+
+/*
+ * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
+ * task), SP is the stack pointer of the first frame that should be shown in the back
+ * trace (or NULL if the entire call-chain of the task should be shown).
+ */
+extern void show_stack(struct task_struct *task, unsigned long *sp);
+
+extern void sched_show_task(struct task_struct *p);
+
+#ifdef CONFIG_SCHED_DEBUG
+struct seq_file;
+extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
+extern void proc_sched_set_task(struct task_struct *p);
+#endif
+
+/* Attach to any functions which should be ignored in wchan output. */
+#define __sched		__attribute__((__section__(".sched.text")))
+
+/* Linker adds these: start and end of __sched functions */
+extern char __sched_text_start[], __sched_text_end[];
+
+/* Is this address in the __sched functions? */
+extern int in_sched_functions(unsigned long addr);
+
 #endif /* _LINUX_SCHED_DEBUG_H */
-- 
GitLab


From 63cc9d6fca8c220c2406f1376100a9acb55197af Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 12:04:39 +0100
Subject: [PATCH 0992/1084] sched/headers, time/timekeeping: Move the
 xtime_update() prototype from <linux/sched.h> to <linux/time.h>

This was in <linux/sched.h> only for hysterical raisins.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 --
 include/linux/time.h  | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 76a2e522be29..5a4fbc76feb4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1579,8 +1579,6 @@ extern struct task_struct *find_task_by_pid_ns(pid_t nr,
 
 #include <asm/current.h>
 
-extern void xtime_update(unsigned long ticks);
-
 extern int wake_up_state(struct task_struct *tsk, unsigned int state);
 extern int wake_up_process(struct task_struct *tsk);
 extern void wake_up_new_task(struct task_struct *tsk);
diff --git a/include/linux/time.h b/include/linux/time.h
index 23f0f5ce3090..4f4ab65b6e61 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -167,6 +167,8 @@ static inline bool timespec_inject_offset_valid(const struct timespec *ts)
 extern u32 (*arch_gettimeoffset)(void);
 #endif
 
+extern void xtime_update(unsigned long ticks);
+
 struct itimerval;
 extern int do_setitimer(int which, struct itimerval *value,
 			struct itimerval *ovalue);
-- 
GitLab


From 70b8157e61d0143fb44ae9482557d7aca365da3d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 12:11:00 +0100
Subject: [PATCH 0993/1084] sched/headers: Move <asm/current.h> include from
 the middle of <linux/sched.h> to the header portion

Linux-0.01 already defined 'current' in the middle of sched.h, so this
is an ancient historical precedent - but still in a modern kernel it
looks a bit weird that we have:

	#include <asm/current.h>

in the middle of the header.

Move it further up. If this was done for some obscure dependency
reasons then we'll trigger and document it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5a4fbc76feb4..ac0fed4c3130 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -31,6 +31,8 @@
 #include <linux/magic.h>
 #include <linux/cgroup-defs.h>
 
+#include <asm/current.h>
+
 struct sched_attr;
 struct sched_param;
 
@@ -1577,8 +1579,6 @@ extern struct task_struct *find_task_by_vpid(pid_t nr);
 extern struct task_struct *find_task_by_pid_ns(pid_t nr,
 		struct pid_namespace *ns);
 
-#include <asm/current.h>
-
 extern int wake_up_state(struct task_struct *tsk, unsigned int state);
 extern int wake_up_process(struct task_struct *tsk);
 extern void wake_up_new_task(struct task_struct *tsk);
-- 
GitLab


From 0ca0156973a47e689f3bc817e26e15fff3f84eec Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 14:52:01 +0100
Subject: [PATCH 0994/1084] sched/headers: Split hotplug CPU interfaces out of
 <linux/sched.h> into <linux/sched/hotplug.h>

Split the CPU hotplug scheduler APIs out of the common header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h         | 15 ---------------
 include/linux/sched/hotplug.h | 20 ++++++++++++++++++++
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ac0fed4c3130..25cc0adb3e08 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -196,15 +196,6 @@ extern void cpu_init (void);
 extern void trap_init(void);
 extern void update_process_times(int user);
 extern void scheduler_tick(void);
-extern int sched_cpu_starting(unsigned int cpu);
-extern int sched_cpu_activate(unsigned int cpu);
-extern int sched_cpu_deactivate(unsigned int cpu);
-
-#ifdef CONFIG_HOTPLUG_CPU
-extern int sched_cpu_dying(unsigned int cpu);
-#else
-# define sched_cpu_dying	NULL
-#endif
 
 #define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
 extern signed long schedule_timeout(signed long timeout);
@@ -1498,12 +1489,6 @@ extern void sched_exec(void);
 #define sched_exec()   {}
 #endif
 
-#ifdef CONFIG_HOTPLUG_CPU
-extern void idle_task_exit(void);
-#else
-static inline void idle_task_exit(void) {}
-#endif
-
 extern int yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h
index 34670ed24894..c608d3c1ddb8 100644
--- a/include/linux/sched/hotplug.h
+++ b/include/linux/sched/hotplug.h
@@ -3,4 +3,24 @@
 
 #include <linux/sched.h>
 
+/*
+ * Scheduler interfaces for hotplug CPU support:
+ */
+
+extern int sched_cpu_starting(unsigned int cpu);
+extern int sched_cpu_activate(unsigned int cpu);
+extern int sched_cpu_deactivate(unsigned int cpu);
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern int sched_cpu_dying(unsigned int cpu);
+#else
+# define sched_cpu_dying	NULL
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern void idle_task_exit(void);
+#else
+static inline void idle_task_exit(void) {}
+#endif
+
 #endif /* _LINUX_SCHED_HOTPLUG_H */
-- 
GitLab


From 901b14bd946a8b7ea211105b6207e082ddd36846 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 15:24:12 +0100
Subject: [PATCH 0995/1084] sched/headers: Move task lifetime APIs from
 <linux/sched.h> to <linux/sched/task.h>

There's a fair amount of task lifetime management (a.k.a fork()/exit())
related APIs in <linux/sched.h>, but only a small fraction of
the users of the generic sched.h header make use of them.

Move these functions to the <linux/sched/task.h> header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 91 -----------------------------------
 include/linux/sched/task.h | 97 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+), 91 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 25cc0adb3e08..b1677c8db03f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -165,28 +165,10 @@ struct task_group;
 /* Task command name length */
 #define TASK_COMM_LEN 16
 
-#include <linux/spinlock.h>
-
-/*
- * This serializes "schedule()" and also protects
- * the run-queue from deletions/modifications (but
- * _adding_ to the beginning of the run-queue has
- * a separate lock).
- */
-extern rwlock_t tasklist_lock;
-extern spinlock_t mmlist_lock;
-
 struct task_struct;
 
-#ifdef CONFIG_PROVE_RCU
-extern int lockdep_tasklist_lock_is_held(void);
-#endif /* #ifdef CONFIG_PROVE_RCU */
-
 extern void sched_init(void);
 extern void sched_init_smp(void);
-extern asmlinkage void schedule_tail(struct task_struct *prev);
-extern void init_idle(struct task_struct *idle, int cpu);
-extern void init_idle_bootup_task(struct task_struct *idle);
 
 extern cpumask_var_t cpu_isolated_map;
 
@@ -211,8 +193,6 @@ extern void io_schedule_finish(int token);
 extern long io_schedule_timeout(long timeout);
 extern void io_schedule(void);
 
-void __noreturn do_task_dead(void);
-
 struct nsproxy;
 
 /**
@@ -1120,24 +1100,6 @@ struct task_struct {
  */
 };
 
-#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
-extern int arch_task_struct_size __read_mostly;
-#else
-# define arch_task_struct_size (sizeof(struct task_struct))
-#endif
-
-#ifdef CONFIG_VMAP_STACK
-static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
-{
-	return t->stack_vm_area;
-}
-#else
-static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
-{
-	return NULL;
-}
-#endif
-
 static inline struct pid *task_pid(struct task_struct *task)
 {
 	return task->pids[PIDTYPE_PID].pid;
@@ -1429,21 +1391,6 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
 TASK_PFA_TEST(LMK_WAITING, lmk_waiting)
 TASK_PFA_SET(LMK_WAITING, lmk_waiting)
 
-static inline void rcu_copy_process(struct task_struct *p)
-{
-#ifdef CONFIG_PREEMPT_RCU
-	p->rcu_read_lock_nesting = 0;
-	p->rcu_read_unlock_special.s = 0;
-	p->rcu_blocked_node = NULL;
-	INIT_LIST_HEAD(&p->rcu_node_entry);
-#endif /* #ifdef CONFIG_PREEMPT_RCU */
-#ifdef CONFIG_TASKS_RCU
-	p->rcu_tasks_holdout = false;
-	INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
-	p->rcu_tasks_idle_cpu = -1;
-#endif /* #ifdef CONFIG_TASKS_RCU */
-}
-
 static inline void tsk_restore_flags(struct task_struct *task,
 				unsigned long orig_flags, unsigned long flags)
 {
@@ -1572,45 +1519,11 @@ extern void wake_up_new_task(struct task_struct *tsk);
 #else
  static inline void kick_process(struct task_struct *tsk) { }
 #endif
-extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
-extern void sched_dead(struct task_struct *p);
-
-extern void proc_caches_init(void);
-
-extern void release_task(struct task_struct * p);
-
-#ifdef CONFIG_HAVE_COPY_THREAD_TLS
-extern int copy_thread_tls(unsigned long, unsigned long, unsigned long,
-			struct task_struct *, unsigned long);
-#else
-extern int copy_thread(unsigned long, unsigned long, unsigned long,
-			struct task_struct *);
-
-/* Architectures that haven't opted into copy_thread_tls get the tls argument
- * via pt_regs, so ignore the tls argument passed via C. */
-static inline int copy_thread_tls(
-		unsigned long clone_flags, unsigned long sp, unsigned long arg,
-		struct task_struct *p, unsigned long tls)
-{
-	return copy_thread(clone_flags, sp, arg, p);
-}
-#endif
-extern void flush_thread(void);
-
-#ifdef CONFIG_HAVE_EXIT_THREAD
-extern void exit_thread(struct task_struct *tsk);
-#else
-static inline void exit_thread(struct task_struct *tsk)
-{
-}
-#endif
 
 extern void exit_files(struct task_struct *);
 
 extern void exit_itimers(struct signal_struct *);
 
-extern void do_group_exit(int);
-
 extern int do_execve(struct filename *,
 		     const char __user * const __user *,
 		     const char __user * const __user *);
@@ -1618,10 +1531,6 @@ extern int do_execveat(int, struct filename *,
 		       const char __user * const __user *,
 		       const char __user * const __user *,
 		       int);
-extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long);
-extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
-struct task_struct *fork_idle(int);
-extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 
 extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
 static inline void set_task_comm(struct task_struct *tsk, const char *from)
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 0023c91ff821..e93638a03515 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -1,6 +1,103 @@
 #ifndef _LINUX_SCHED_TASK_H
 #define _LINUX_SCHED_TASK_H
 
+/*
+ * Interface between the scheduler and various task lifetime (fork()/exit())
+ * functionality:
+ */
+
 #include <linux/sched.h>
 
+/*
+ * This serializes "schedule()" and also protects
+ * the run-queue from deletions/modifications (but
+ * _adding_ to the beginning of the run-queue has
+ * a separate lock).
+ */
+extern rwlock_t tasklist_lock;
+extern spinlock_t mmlist_lock;
+
+#ifdef CONFIG_PROVE_RCU
+extern int lockdep_tasklist_lock_is_held(void);
+#endif /* #ifdef CONFIG_PROVE_RCU */
+
+extern asmlinkage void schedule_tail(struct task_struct *prev);
+extern void init_idle(struct task_struct *idle, int cpu);
+extern void init_idle_bootup_task(struct task_struct *idle);
+
+static inline void rcu_copy_process(struct task_struct *p)
+{
+#ifdef CONFIG_PREEMPT_RCU
+	p->rcu_read_lock_nesting = 0;
+	p->rcu_read_unlock_special.s = 0;
+	p->rcu_blocked_node = NULL;
+	INIT_LIST_HEAD(&p->rcu_node_entry);
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
+#ifdef CONFIG_TASKS_RCU
+	p->rcu_tasks_holdout = false;
+	INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
+	p->rcu_tasks_idle_cpu = -1;
+#endif /* #ifdef CONFIG_TASKS_RCU */
+}
+
+extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
+extern void sched_dead(struct task_struct *p);
+
+void __noreturn do_task_dead(void);
+
+extern void proc_caches_init(void);
+
+extern void release_task(struct task_struct * p);
+
+#ifdef CONFIG_HAVE_COPY_THREAD_TLS
+extern int copy_thread_tls(unsigned long, unsigned long, unsigned long,
+			struct task_struct *, unsigned long);
+#else
+extern int copy_thread(unsigned long, unsigned long, unsigned long,
+			struct task_struct *);
+
+/* Architectures that haven't opted into copy_thread_tls get the tls argument
+ * via pt_regs, so ignore the tls argument passed via C. */
+static inline int copy_thread_tls(
+		unsigned long clone_flags, unsigned long sp, unsigned long arg,
+		struct task_struct *p, unsigned long tls)
+{
+	return copy_thread(clone_flags, sp, arg, p);
+}
+#endif
+extern void flush_thread(void);
+
+#ifdef CONFIG_HAVE_EXIT_THREAD
+extern void exit_thread(struct task_struct *tsk);
+#else
+static inline void exit_thread(struct task_struct *tsk)
+{
+}
+#endif
+extern void do_group_exit(int);
+
+extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long);
+extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
+struct task_struct *fork_idle(int);
+extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+
+
+#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
+extern int arch_task_struct_size __read_mostly;
+#else
+# define arch_task_struct_size (sizeof(struct task_struct))
+#endif
+
+#ifdef CONFIG_VMAP_STACK
+static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+{
+	return t->stack_vm_area;
+}
+#else
+static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+{
+	return NULL;
+}
+#endif
+
 #endif /* _LINUX_SCHED_TASK_H */
-- 
GitLab


From 6bfbaa51ed47774492d83d182a86068cc35aa4c6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 21:37:49 +0100
Subject: [PATCH 0996/1084] sched/headers, RCU: Move rcu_copy_process() from
 <linux/sched/task.h> to kernel/fork.c

Move rcu_copy_process() into kernel/fork.c, which is the only
user of this inline function.

This simplifies <linux/sched/task.h> to the level that <linux/sched.h>
does not have to be included in it anymore - which change is done
in a subsequent patch.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/task.h | 15 ---------------
 kernel/fork.c              | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index e93638a03515..20ed9108f261 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -25,21 +25,6 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
 extern void init_idle(struct task_struct *idle, int cpu);
 extern void init_idle_bootup_task(struct task_struct *idle);
 
-static inline void rcu_copy_process(struct task_struct *p)
-{
-#ifdef CONFIG_PREEMPT_RCU
-	p->rcu_read_lock_nesting = 0;
-	p->rcu_read_unlock_special.s = 0;
-	p->rcu_blocked_node = NULL;
-	INIT_LIST_HEAD(&p->rcu_node_entry);
-#endif /* #ifdef CONFIG_PREEMPT_RCU */
-#ifdef CONFIG_TASKS_RCU
-	p->rcu_tasks_holdout = false;
-	INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
-	p->rcu_tasks_idle_cpu = -1;
-#endif /* #ifdef CONFIG_TASKS_RCU */
-}
-
 extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
 extern void sched_dead(struct task_struct *p);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 916e78004b8f..6c463c80e93d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1465,6 +1465,21 @@ init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
 	 task->pids[type].pid = pid;
 }
 
+static inline void rcu_copy_process(struct task_struct *p)
+{
+#ifdef CONFIG_PREEMPT_RCU
+	p->rcu_read_lock_nesting = 0;
+	p->rcu_read_unlock_special.s = 0;
+	p->rcu_blocked_node = NULL;
+	INIT_LIST_HEAD(&p->rcu_node_entry);
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
+#ifdef CONFIG_TASKS_RCU
+	p->rcu_tasks_holdout = false;
+	INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
+	p->rcu_tasks_idle_cpu = -1;
+#endif /* #ifdef CONFIG_TASKS_RCU */
+}
+
 /*
  * This creates a new process as a copy of the old one,
  * but does not actually start it yet.
-- 
GitLab


From c7af7877eeacfeaaf6a1b6f54c481292ef116837 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 22:01:58 +0100
Subject: [PATCH 0997/1084] sched/core: Move, sort and clean up <linux/sched.h>
 structure predeclarations

Most of the structure predeclarations were at the head of sched.h, but not
all of them - there were a number of lines spread around sched.h, in
random places.

Move them to the head, and also sort them alphabetically.

Remove unused entries.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 56 ++++++++++++++++++-------------------------
 1 file changed, 23 insertions(+), 33 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b1677c8db03f..5398356e33c7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -33,24 +33,35 @@
 
 #include <asm/current.h>
 
-struct sched_attr;
-struct sched_param;
-
-struct futex_pi_state;
-struct robust_list_head;
+/* task_struct member predeclarations: */
+struct audit_context;
+struct autogroup;
+struct backing_dev_info;
 struct bio_list;
-struct fs_struct;
-struct perf_event_context;
 struct blk_plug;
+struct cfs_rq;
 struct filename;
+struct fs_struct;
+struct futex_pi_state;
+struct io_context;
+struct mempolicy;
 struct nameidata;
-
-struct signal_struct;
-struct sighand_struct;
-
+struct nsproxy;
+struct perf_event_context;
+struct pid_namespace;
+struct pipe_inode_info;
+struct rcu_node;
+struct reclaim_state;
+struct robust_list_head;
+struct sched_attr;
+struct sched_param;
 struct seq_file;
-struct cfs_rq;
+struct sighand_struct;
+struct signal_struct;
+struct task_delay_info;
 struct task_group;
+struct task_struct;
+struct uts_namespace;
 
 /*
  * Task state bitmask. NOTE! These bits are also
@@ -165,8 +176,6 @@ struct task_group;
 /* Task command name length */
 #define TASK_COMM_LEN 16
 
-struct task_struct;
-
 extern void sched_init(void);
 extern void sched_init_smp(void);
 
@@ -193,8 +202,6 @@ extern void io_schedule_finish(int token);
 extern long io_schedule_timeout(long timeout);
 extern void io_schedule(void);
 
-struct nsproxy;
-
 /**
  * struct prev_cputime - snaphsot of system and user cputime
  * @utime: time spent in user mode
@@ -297,10 +304,6 @@ struct thread_group_cputimer {
 };
 
 #include <linux/rwsem.h>
-struct autogroup;
-
-struct backing_dev_info;
-struct reclaim_state;
 
 #ifdef CONFIG_SCHED_INFO
 struct sched_info {
@@ -314,8 +317,6 @@ struct sched_info {
 };
 #endif /* CONFIG_SCHED_INFO */
 
-struct task_delay_info;
-
 static inline int sched_info_on(void)
 {
 #ifdef CONFIG_SCHEDSTATS
@@ -342,20 +343,12 @@ void force_schedstat_enabled(void);
 # define SCHED_FIXEDPOINT_SHIFT	10
 # define SCHED_FIXEDPOINT_SCALE	(1L << SCHED_FIXEDPOINT_SHIFT)
 
-struct io_context;			/* See blkdev.h */
-
-
 #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
 extern void prefetch_stack(struct task_struct *t);
 #else
 static inline void prefetch_stack(struct task_struct *t) { }
 #endif
 
-struct audit_context;		/* See audit.c */
-struct mempolicy;
-struct pipe_inode_info;
-struct uts_namespace;
-
 struct load_weight {
 	unsigned long weight;
 	u32 inv_weight;
@@ -564,7 +557,6 @@ union rcu_special {
 	} b; /* Bits. */
 	u32 s; /* Set of bits. */
 };
-struct rcu_node;
 
 enum perf_event_task_context {
 	perf_invalid_context = -1,
@@ -1125,8 +1117,6 @@ static inline struct pid *task_session(struct task_struct *task)
 	return task->group_leader->pids[PIDTYPE_SID].pid;
 }
 
-struct pid_namespace;
-
 /*
  * the helpers to get the task's different pids as they are seen
  * from various namespaces
-- 
GitLab


From d04b0ad37e4b6ac39a56c823ae76ab37cd044dc7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 22:07:57 +0100
Subject: [PATCH 0998/1084] sched/headers: Move the PREEMPT_COUNT defines from
 <linux/sched.h> to <linux/preempt.h>

These defines are not really part of the scheduler's driver API, but are
related to the preempt count - so move them to <linux/preempt.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/preempt.h | 21 +++++++++++++++++++++
 include/linux/sched.h   | 21 ---------------------
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 7eeceac52dea..cae461224948 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -55,6 +55,27 @@
 /* We use the MSB mostly because its available */
 #define PREEMPT_NEED_RESCHED	0x80000000
 
+#define PREEMPT_DISABLED	(PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
+
+/*
+ * Disable preemption until the scheduler is running -- use an unconditional
+ * value so that it also works on !PREEMPT_COUNT kernels.
+ *
+ * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count().
+ */
+#define INIT_PREEMPT_COUNT	PREEMPT_OFFSET
+
+/*
+ * Initial preempt_count value; reflects the preempt_count schedule invariant
+ * which states that during context switches:
+ *
+ *    preempt_count() == 2*PREEMPT_DISABLE_OFFSET
+ *
+ * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels.
+ * Note: See finish_task_switch().
+ */
+#define FORK_PREEMPT_COUNT	(2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
+
 /* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */
 #include <asm/preempt.h>
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5398356e33c7..3b3e31da416e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -265,27 +265,6 @@ struct task_cputime_atomic {
 		.sum_exec_runtime = ATOMIC64_INIT(0),		\
 	}
 
-#define PREEMPT_DISABLED	(PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
-
-/*
- * Disable preemption until the scheduler is running -- use an unconditional
- * value so that it also works on !PREEMPT_COUNT kernels.
- *
- * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count().
- */
-#define INIT_PREEMPT_COUNT	PREEMPT_OFFSET
-
-/*
- * Initial preempt_count value; reflects the preempt_count schedule invariant
- * which states that during context switches:
- *
- *    preempt_count() == 2*PREEMPT_DISABLE_OFFSET
- *
- * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels.
- * Note: See finish_task_switch().
- */
-#define FORK_PREEMPT_COUNT	(2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
-
 /**
  * struct thread_group_cputimer - thread group interval timer counts
  * @cputime_atomic:	atomic thread group interval timers.
-- 
GitLab


From f3ac60671954c8d413532627b1be13a76f394c49 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 22:59:33 +0100
Subject: [PATCH 0999/1084] sched/headers: Move task-stack related APIs from
 <linux/sched.h> to <linux/sched/task_stack.h>

Split out the task->stack related functionality, which is not really
part of the core scheduler APIs.

Only keep task_thread_info() because it's used by sched.h.

Update the code that uses those facilities.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/mips/kernel/smp-cps.c       |   2 +-
 arch/mips/sibyte/sb1250/smp.c    |   2 +-
 include/linux/sched.h            | 115 +++----------------------------
 include/linux/sched/task_stack.h | 104 ++++++++++++++++++++++++++++
 4 files changed, 115 insertions(+), 108 deletions(-)

diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 1d3188c23bb8..6d45f05538c8 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -11,7 +11,7 @@
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/irqchip/mips-gic.h>
-#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/sched/hotplug.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c
index 1cf66f5ff23d..0a4a2c3982d8 100644
--- a/arch/mips/sibyte/sb1250/smp.c
+++ b/arch/mips/sibyte/sb1250/smp.c
@@ -21,7 +21,7 @@
 #include <linux/interrupt.h>
 #include <linux/smp.h>
 #include <linux/kernel_stat.h>
-#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/mmu_context.h>
 #include <asm/io.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3b3e31da416e..af9590c8bfb0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1450,6 +1450,15 @@ union thread_union {
 	unsigned long stack[THREAD_SIZE/sizeof(long)];
 };
 
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+static inline struct thread_info *task_thread_info(struct task_struct *task)
+{
+	return &task->thread_info;
+}
+#elif !defined(__HAVE_THREAD_FUNCTIONS)
+# define task_thread_info(task)	((struct thread_info *)(task)->stack)
+#endif
+
 #ifndef __HAVE_ARCH_KSTACK_END
 static inline int kstack_end(void *addr)
 {
@@ -1540,112 +1549,6 @@ static inline void task_unlock(struct task_struct *p)
 	spin_unlock(&p->alloc_lock);
 }
 
-#ifdef CONFIG_THREAD_INFO_IN_TASK
-
-static inline struct thread_info *task_thread_info(struct task_struct *task)
-{
-	return &task->thread_info;
-}
-
-/*
- * When accessing the stack of a non-current task that might exit, use
- * try_get_task_stack() instead.  task_stack_page will return a pointer
- * that could get freed out from under you.
- */
-static inline void *task_stack_page(const struct task_struct *task)
-{
-	return task->stack;
-}
-
-#define setup_thread_stack(new,old)	do { } while(0)
-
-static inline unsigned long *end_of_stack(const struct task_struct *task)
-{
-	return task->stack;
-}
-
-#elif !defined(__HAVE_THREAD_FUNCTIONS)
-
-#define task_thread_info(task)	((struct thread_info *)(task)->stack)
-#define task_stack_page(task)	((void *)(task)->stack)
-
-static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
-{
-	*task_thread_info(p) = *task_thread_info(org);
-	task_thread_info(p)->task = p;
-}
-
-/*
- * Return the address of the last usable long on the stack.
- *
- * When the stack grows down, this is just above the thread
- * info struct. Going any lower will corrupt the threadinfo.
- *
- * When the stack grows up, this is the highest address.
- * Beyond that position, we corrupt data on the next page.
- */
-static inline unsigned long *end_of_stack(struct task_struct *p)
-{
-#ifdef CONFIG_STACK_GROWSUP
-	return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1;
-#else
-	return (unsigned long *)(task_thread_info(p) + 1);
-#endif
-}
-
-#endif
-
-#ifdef CONFIG_THREAD_INFO_IN_TASK
-static inline void *try_get_task_stack(struct task_struct *tsk)
-{
-	return atomic_inc_not_zero(&tsk->stack_refcount) ?
-		task_stack_page(tsk) : NULL;
-}
-
-extern void put_task_stack(struct task_struct *tsk);
-#else
-static inline void *try_get_task_stack(struct task_struct *tsk)
-{
-	return task_stack_page(tsk);
-}
-
-static inline void put_task_stack(struct task_struct *tsk) {}
-#endif
-
-#define task_stack_end_corrupted(task) \
-		(*(end_of_stack(task)) != STACK_END_MAGIC)
-
-static inline int object_is_on_stack(void *obj)
-{
-	void *stack = task_stack_page(current);
-
-	return (obj >= stack) && (obj < (stack + THREAD_SIZE));
-}
-
-extern void thread_stack_cache_init(void);
-
-#ifdef CONFIG_DEBUG_STACK_USAGE
-static inline unsigned long stack_not_used(struct task_struct *p)
-{
-	unsigned long *n = end_of_stack(p);
-
-	do { 	/* Skip over canary */
-# ifdef CONFIG_STACK_GROWSUP
-		n--;
-# else
-		n++;
-# endif
-	} while (!*n);
-
-# ifdef CONFIG_STACK_GROWSUP
-	return (unsigned long)end_of_stack(p) - (unsigned long)n;
-# else
-	return (unsigned long)n - (unsigned long)end_of_stack(p);
-# endif
-}
-#endif
-extern void set_task_stack_end_magic(struct task_struct *tsk);
-
 /* set thread flags in other task's structures
  * - see asm/thread_info.h for TIF_xxxx flags available
  */
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index d2d578e85f7d..aaa5c2a6a0e9 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -1,7 +1,111 @@
 #ifndef _LINUX_SCHED_TASK_STACK_H
 #define _LINUX_SCHED_TASK_STACK_H
 
+/*
+ * task->stack (kernel stack) handling interfaces:
+ */
+
 #include <linux/sched.h>
 #include <linux/magic.h>
 
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+
+/*
+ * When accessing the stack of a non-current task that might exit, use
+ * try_get_task_stack() instead.  task_stack_page will return a pointer
+ * that could get freed out from under you.
+ */
+static inline void *task_stack_page(const struct task_struct *task)
+{
+	return task->stack;
+}
+
+#define setup_thread_stack(new,old)	do { } while(0)
+
+static inline unsigned long *end_of_stack(const struct task_struct *task)
+{
+	return task->stack;
+}
+
+#elif !defined(__HAVE_THREAD_FUNCTIONS)
+
+#define task_stack_page(task)	((void *)(task)->stack)
+
+static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
+{
+	*task_thread_info(p) = *task_thread_info(org);
+	task_thread_info(p)->task = p;
+}
+
+/*
+ * Return the address of the last usable long on the stack.
+ *
+ * When the stack grows down, this is just above the thread
+ * info struct. Going any lower will corrupt the threadinfo.
+ *
+ * When the stack grows up, this is the highest address.
+ * Beyond that position, we corrupt data on the next page.
+ */
+static inline unsigned long *end_of_stack(struct task_struct *p)
+{
+#ifdef CONFIG_STACK_GROWSUP
+	return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1;
+#else
+	return (unsigned long *)(task_thread_info(p) + 1);
+#endif
+}
+
+#endif
+
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+static inline void *try_get_task_stack(struct task_struct *tsk)
+{
+	return atomic_inc_not_zero(&tsk->stack_refcount) ?
+		task_stack_page(tsk) : NULL;
+}
+
+extern void put_task_stack(struct task_struct *tsk);
+#else
+static inline void *try_get_task_stack(struct task_struct *tsk)
+{
+	return task_stack_page(tsk);
+}
+
+static inline void put_task_stack(struct task_struct *tsk) {}
+#endif
+
+#define task_stack_end_corrupted(task) \
+		(*(end_of_stack(task)) != STACK_END_MAGIC)
+
+static inline int object_is_on_stack(void *obj)
+{
+	void *stack = task_stack_page(current);
+
+	return (obj >= stack) && (obj < (stack + THREAD_SIZE));
+}
+
+extern void thread_stack_cache_init(void);
+
+#ifdef CONFIG_DEBUG_STACK_USAGE
+static inline unsigned long stack_not_used(struct task_struct *p)
+{
+	unsigned long *n = end_of_stack(p);
+
+	do { 	/* Skip over canary */
+# ifdef CONFIG_STACK_GROWSUP
+		n--;
+# else
+		n++;
+# endif
+	} while (!*n);
+
+# ifdef CONFIG_STACK_GROWSUP
+	return (unsigned long)end_of_stack(p) - (unsigned long)n;
+# else
+	return (unsigned long)n - (unsigned long)end_of_stack(p);
+# endif
+}
+#endif
+extern void set_task_stack_end_magic(struct task_struct *tsk);
+
 #endif /* _LINUX_SCHED_TASK_STACK_H */
-- 
GitLab


From 70806b21e4d64f01193a2b64a053b75ff53a2b10 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 23:15:21 +0100
Subject: [PATCH 1000/1084] sched/headers: Move the 'root_task_group'
 declaration to <linux/sched/autogroup.h>

Also remove the duplicate declaration from <linux/init_task.h>.

( That declaration was originally duplicated for dependency hell reasons,
  but there's no problem including the much smaller <linux/sched/autogroup.h>
  header now, to pick up the right prototype. )

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/init_task.h       | 2 --
 include/linux/sched.h           | 4 ----
 include/linux/sched/autogroup.h | 5 +++++
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index f6841c19c913..91d9049f0039 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -151,8 +151,6 @@ extern struct group_info init_groups;
 
 extern struct cred init_cred;
 
-extern struct task_group root_task_group;
-
 #ifdef CONFIG_CGROUP_SCHED
 # define INIT_CGROUP_SCHED(tsk)						\
 	.sched_task_group = &root_task_group,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index af9590c8bfb0..4934733bd6cb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1707,10 +1707,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
-#ifdef CONFIG_CGROUP_SCHED
-extern struct task_group root_task_group;
-#endif /* CONFIG_CGROUP_SCHED */
-
 extern int task_can_switch_user(struct user_struct *up,
 					struct task_struct *tsk);
 
diff --git a/include/linux/sched/autogroup.h b/include/linux/sched/autogroup.h
index 586bdf37330d..fd6855548d0c 100644
--- a/include/linux/sched/autogroup.h
+++ b/include/linux/sched/autogroup.h
@@ -4,6 +4,7 @@
 #include <linux/sched.h>
 
 struct signal_struct;
+struct task_group;
 struct seq_file;
 
 #ifdef CONFIG_SCHED_AUTOGROUP
@@ -24,4 +25,8 @@ static inline void sched_autogroup_exit(struct signal_struct *sig) { }
 static inline void sched_autogroup_exit_task(struct task_struct *p) { }
 #endif
 
+#ifdef CONFIG_CGROUP_SCHED
+extern struct task_group root_task_group;
+#endif /* CONFIG_CGROUP_SCHED */
+
 #endif /* _LINUX_SCHED_AUTOGROUP_H */
-- 
GitLab


From 76960dbf9b235b957d9d730be2c6c2a70f709026 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 23:43:50 +0100
Subject: [PATCH 1001/1084] signals: Move signal data types from
 <linux/signal.h> to <linux/signal_types.h>

Separate out just the pure data types - sched.h will be able to use
this reduced size header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/signal.h       | 54 ---------------------------------
 include/linux/signal_types.h | 59 ++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 54 deletions(-)

diff --git a/include/linux/signal.h b/include/linux/signal.h
index d1c2b05a7a55..94ad6eea9550 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -8,24 +8,6 @@ struct task_struct;
 
 /* for sysctl */
 extern int print_fatal_signals;
-/*
- * Real Time signals may be queued.
- */
-
-struct sigqueue {
-	struct list_head list;
-	int flags;
-	siginfo_t info;
-	struct user_struct *user;
-};
-
-/* flags values. */
-#define SIGQUEUE_PREALLOC	1
-
-struct sigpending {
-	struct list_head list;
-	sigset_t signal;
-};
 
 #ifndef HAVE_ARCH_COPY_SIGINFO
 
@@ -271,42 +253,6 @@ extern void set_current_blocked(sigset_t *);
 extern void __set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
 
-struct sigaction {
-#ifndef __ARCH_HAS_IRIX_SIGACTION
-	__sighandler_t	sa_handler;
-	unsigned long	sa_flags;
-#else
-	unsigned int	sa_flags;
-	__sighandler_t	sa_handler;
-#endif
-#ifdef __ARCH_HAS_SA_RESTORER
-	__sigrestore_t sa_restorer;
-#endif
-	sigset_t	sa_mask;	/* mask last for extensibility */
-};
-
-struct k_sigaction {
-	struct sigaction sa;
-#ifdef __ARCH_HAS_KA_RESTORER
-	__sigrestore_t ka_restorer;
-#endif
-};
- 
-#ifdef CONFIG_OLD_SIGACTION
-struct old_sigaction {
-	__sighandler_t sa_handler;
-	old_sigset_t sa_mask;
-	unsigned long sa_flags;
-	__sigrestore_t sa_restorer;
-};
-#endif
-
-struct ksignal {
-	struct k_sigaction ka;
-	siginfo_t info;
-	int sig;
-};
-
 extern int get_signal(struct ksignal *ksig);
 extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping);
 extern void exit_signals(struct task_struct *tsk);
diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h
index 28799c88f490..16d862a3d8f3 100644
--- a/include/linux/signal_types.h
+++ b/include/linux/signal_types.h
@@ -1,7 +1,66 @@
 #ifndef _LINUX_SIGNAL_TYPES_H
 #define _LINUX_SIGNAL_TYPES_H
 
+/*
+ * Basic signal handling related data type definitions:
+ */
+
 #include <linux/list.h>
 #include <uapi/linux/signal.h>
 
+/*
+ * Real Time signals may be queued.
+ */
+
+struct sigqueue {
+	struct list_head list;
+	int flags;
+	siginfo_t info;
+	struct user_struct *user;
+};
+
+/* flags values. */
+#define SIGQUEUE_PREALLOC	1
+
+struct sigpending {
+	struct list_head list;
+	sigset_t signal;
+};
+
+struct sigaction {
+#ifndef __ARCH_HAS_IRIX_SIGACTION
+	__sighandler_t	sa_handler;
+	unsigned long	sa_flags;
+#else
+	unsigned int	sa_flags;
+	__sighandler_t	sa_handler;
+#endif
+#ifdef __ARCH_HAS_SA_RESTORER
+	__sigrestore_t sa_restorer;
+#endif
+	sigset_t	sa_mask;	/* mask last for extensibility */
+};
+
+struct k_sigaction {
+	struct sigaction sa;
+#ifdef __ARCH_HAS_KA_RESTORER
+	__sigrestore_t ka_restorer;
+#endif
+};
+
+#ifdef CONFIG_OLD_SIGACTION
+struct old_sigaction {
+	__sighandler_t sa_handler;
+	old_sigset_t sa_mask;
+	unsigned long sa_flags;
+	__sigrestore_t sa_restorer;
+};
+#endif
+
+struct ksignal {
+	struct k_sigaction ka;
+	siginfo_t info;
+	int sig;
+};
+
 #endif /* _LINUX_SIGNAL_TYPES_H */
-- 
GitLab


From 9e7d2e44dd88ba7e29c165b6fca428e384afa5a8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 00:12:19 +0100
Subject: [PATCH 1002/1084] mm/headers, sched/headers: Move task related MM
 types from <linux/mm_types.> to <linux/mm_types_task.h>

Separate all the MM types that are embedded directly in 'struct task_struct'
into the <linux/mm_types_task.h> header.

The goal is to include this header in <linux/sched.h>, not the full <linux/mm_types.h>
header, to reduce the size, complexity and coupling of <linux/sched.h>.

(This patch does not change <linux/sched.h> yet.)

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm_types.h      | 49 -------------------------------
 include/linux/mm_types_task.h | 55 +++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 49 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 28bc710d3467..f60f45fe226f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -24,11 +24,6 @@
 struct address_space;
 struct mem_cgroup;
 
-#define USE_SPLIT_PTE_PTLOCKS	(NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
-#define USE_SPLIT_PMD_PTLOCKS	(USE_SPLIT_PTE_PTLOCKS && \
-		IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK))
-#define ALLOC_SPLIT_PTLOCKS	(SPINLOCK_SIZE > BITS_PER_LONG/8)
-
 /*
  * Each physical page in the system has a struct page associated with
  * it to keep track of whatever it is we are using the page for at the
@@ -231,17 +226,6 @@ struct page {
 #endif
 ;
 
-struct page_frag {
-	struct page *page;
-#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
-	__u32 offset;
-	__u32 size;
-#else
-	__u16 offset;
-	__u16 size;
-#endif
-};
-
 #define PAGE_FRAG_CACHE_MAX_SIZE	__ALIGN_MASK(32768, ~PAGE_MASK)
 #define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)
 
@@ -360,18 +344,6 @@ struct vm_area_struct {
 	struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
 };
 
-/*
- * The per task VMA cache array:
- */
-#define VMACACHE_BITS 2
-#define VMACACHE_SIZE (1U << VMACACHE_BITS)
-#define VMACACHE_MASK (VMACACHE_SIZE - 1)
-
-struct vmacache {
-	u32 seqnum;
-	struct vm_area_struct *vmas[VMACACHE_SIZE];
-};
-
 struct core_thread {
 	struct task_struct *task;
 	struct core_thread *next;
@@ -383,27 +355,6 @@ struct core_state {
 	struct completion startup;
 };
 
-enum {
-	MM_FILEPAGES,	/* Resident file mapping pages */
-	MM_ANONPAGES,	/* Resident anonymous pages */
-	MM_SWAPENTS,	/* Anonymous swap entries */
-	MM_SHMEMPAGES,	/* Resident shared memory pages */
-	NR_MM_COUNTERS
-};
-
-#if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU)
-#define SPLIT_RSS_COUNTING
-/* per-thread cached information, */
-struct task_rss_stat {
-	int events;	/* for synchronization threshold */
-	int count[NR_MM_COUNTERS];
-};
-#endif /* USE_SPLIT_PTE_PTLOCKS */
-
-struct mm_rss_stat {
-	atomic_long_t count[NR_MM_COUNTERS];
-};
-
 struct kioctx_table;
 struct mm_struct {
 	struct vm_area_struct *mmap;		/* list of VMAs */
diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index 2419d302f03c..9526d8b9fe0e 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -1,10 +1,65 @@
 #ifndef _LINUX_MM_TYPES_TASK_H
 #define _LINUX_MM_TYPES_TASK_H
 
+/*
+ * Here are the definitions of the MM data types that are embedded in 'struct task_struct'.
+ *
+ * (These are defined separately to decouple sched.h from mm_types.h as much as possible.)
+ */
+
 #include <linux/types.h>
 #include <linux/threads.h>
 #include <linux/atomic.h>
 
 #include <asm/page.h>
 
+#define USE_SPLIT_PTE_PTLOCKS	(NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
+#define USE_SPLIT_PMD_PTLOCKS	(USE_SPLIT_PTE_PTLOCKS && \
+		IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK))
+#define ALLOC_SPLIT_PTLOCKS	(SPINLOCK_SIZE > BITS_PER_LONG/8)
+
+/*
+ * The per task VMA cache array:
+ */
+#define VMACACHE_BITS 2
+#define VMACACHE_SIZE (1U << VMACACHE_BITS)
+#define VMACACHE_MASK (VMACACHE_SIZE - 1)
+
+struct vmacache {
+	u32 seqnum;
+	struct vm_area_struct *vmas[VMACACHE_SIZE];
+};
+
+enum {
+	MM_FILEPAGES,	/* Resident file mapping pages */
+	MM_ANONPAGES,	/* Resident anonymous pages */
+	MM_SWAPENTS,	/* Anonymous swap entries */
+	MM_SHMEMPAGES,	/* Resident shared memory pages */
+	NR_MM_COUNTERS
+};
+
+#if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU)
+#define SPLIT_RSS_COUNTING
+/* per-thread cached information, */
+struct task_rss_stat {
+	int events;	/* for synchronization threshold */
+	int count[NR_MM_COUNTERS];
+};
+#endif /* USE_SPLIT_PTE_PTLOCKS */
+
+struct mm_rss_stat {
+	atomic_long_t count[NR_MM_COUNTERS];
+};
+
+struct page_frag {
+	struct page *page;
+#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
+	__u32 offset;
+	__u32 size;
+#else
+	__u16 offset;
+	__u16 size;
+#endif
+};
+
 #endif /* _LINUX_MM_TYPES_TASK_H */
-- 
GitLab


From 77ba809e8b39b4e384df0433e2bd3dd0907dad29 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 00:16:44 +0100
Subject: [PATCH 1003/1084] sched/headers: Remove the <linux/mm_types.h>
 dependency from <linux/sched.h>

Use the freshly introduced, reduced size <linux/mm_types_task.h> header instead.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/video/fbdev/auo_k190x.c | 1 +
 include/linux/sched.h           | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/auo_k190x.c b/drivers/video/fbdev/auo_k190x.c
index 9580374667ba..0d06038324e0 100644
--- a/drivers/video/fbdev/auo_k190x.c
+++ b/drivers/video/fbdev/auo_k190x.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/sched/mm.h>
 #include <linux/kernel.h>
 #include <linux/gpio.h>
 #include <linux/platform_device.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4934733bd6cb..3eb284741790 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -8,7 +8,7 @@
 #include <linux/capability.h>
 #include <linux/mutex.h>
 #include <linux/plist.h>
-#include <linux/mm_types.h>
+#include <linux/mm_types_task.h>
 #include <asm/ptrace.h>
 
 #include <linux/sem.h>
-- 
GitLab


From cdc75e9f7b14f29efcf4b162a3c673733e96db79 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 01:20:53 +0100
Subject: [PATCH 1004/1084] sched/headers: Move 'init_task' and
 'init_thread_union' from <linux/sched.h> to <linux/sched/task.h>

'init_task' is really not part of core scheduler APIs but part of
the fork() interface between the scheduler and process management.

So move the declarations.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 3 ---
 include/linux/sched/task.h | 6 ++++++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3eb284741790..4ab105a2a8f9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1469,9 +1469,6 @@ static inline int kstack_end(void *addr)
 }
 #endif
 
-extern union thread_union init_thread_union;
-extern struct task_struct init_task;
-
 extern struct pid_namespace init_pid_ns;
 
 /*
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 20ed9108f261..1be049a18d1b 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -8,6 +8,9 @@
 
 #include <linux/sched.h>
 
+struct task_struct;
+union thread_union;
+
 /*
  * This serializes "schedule()" and also protects
  * the run-queue from deletions/modifications (but
@@ -17,6 +20,9 @@
 extern rwlock_t tasklist_lock;
 extern spinlock_t mmlist_lock;
 
+extern union thread_union init_thread_union;
+extern struct task_struct init_task;
+
 #ifdef CONFIG_PROVE_RCU
 extern int lockdep_tasklist_lock_is_held(void);
 #endif /* #ifdef CONFIG_PROVE_RCU */
-- 
GitLab


From 56cd697366b6d6f67acb6c58ac7f3b185d11ef07 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 10:57:33 +0100
Subject: [PATCH 1005/1084] sched/headers: Move the task_lock()/unlock() APIs
 to <linux/sched/task.h>

The task_lock()/task_unlock() APIs are not realated to core scheduling,
they are task lifetime APIs, i.e. they belong into <linux/sched/task.h>.

Move them.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 20 --------------------
 include/linux/sched/task.h | 20 ++++++++++++++++++++
 kernel/cgroup/cgroup-v1.c  |  1 +
 kernel/cgroup/namespace.c  |  2 +-
 4 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4ab105a2a8f9..d481c129a822 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1526,26 +1526,6 @@ static inline unsigned long wait_task_inactive(struct task_struct *p,
 }
 #endif
 
-/*
- * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
- * subscriptions and synchronises with wait4().  Also used in procfs.  Also
- * pins the final release of task.io_context.  Also protects ->cpuset and
- * ->cgroup.subsys[]. And ->vfork_done.
- *
- * Nests both inside and outside of read_lock(&tasklist_lock).
- * It must not be nested with write_lock_irq(&tasklist_lock),
- * neither inside nor outside.
- */
-static inline void task_lock(struct task_struct *p)
-{
-	spin_lock(&p->alloc_lock);
-}
-
-static inline void task_unlock(struct task_struct *p)
-{
-	spin_unlock(&p->alloc_lock);
-}
-
 /* set thread flags in other task's structures
  * - see asm/thread_info.h for TIF_xxxx flags available
  */
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 1be049a18d1b..2be9fde588a7 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -91,4 +91,24 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
 }
 #endif
 
+/*
+ * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
+ * subscriptions and synchronises with wait4().  Also used in procfs.  Also
+ * pins the final release of task.io_context.  Also protects ->cpuset and
+ * ->cgroup.subsys[]. And ->vfork_done.
+ *
+ * Nests both inside and outside of read_lock(&tasklist_lock).
+ * It must not be nested with write_lock_irq(&tasklist_lock),
+ * neither inside nor outside.
+ */
+static inline void task_lock(struct task_struct *p)
+{
+	spin_lock(&p->alloc_lock);
+}
+
+static inline void task_unlock(struct task_struct *p)
+{
+	spin_unlock(&p->alloc_lock);
+}
+
 #endif /* _LINUX_SCHED_TASK_H */
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 08d2cb605101..abc585858685 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -6,6 +6,7 @@
 #include <linux/delay.h>
 #include <linux/mm.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/delayacct.h>
diff --git a/kernel/cgroup/namespace.c b/kernel/cgroup/namespace.c
index cff7ea62c38f..96d38dab6fb2 100644
--- a/kernel/cgroup/namespace.c
+++ b/kernel/cgroup/namespace.c
@@ -1,6 +1,6 @@
 #include "cgroup-internal.h"
 
-#include <linux/sched.h>
+#include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/nsproxy.h>
 #include <linux/proc_ns.h>
-- 
GitLab


From 1050b27c52f615bc0e772b3119881e5304ccde6b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 11:48:36 +0100
Subject: [PATCH 1006/1084] sched/headers: Move cputime functionality from
 <linux/sched.h> and <linux/cputime.h> into <linux/sched/cputime.h>

Move cputime related functionality out of <linux/sched.h>, as most code
that includes <linux/sched.h> does not use that functionality.

Move data types that are not included in task_struct directly to
the signal definitions, into <linux/sched/signal.h>.

Also merge the (small) existing <linux/cputime.h> header into <linux/sched/cputime.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cputime.h       |  13 ---
 include/linux/sched.h         |  89 -----------------
 include/linux/sched/cputime.h | 182 +++++++++++++++++++++++++++++++++-
 include/linux/sched/signal.h  |  33 ++++++
 kernel/sched/stats.h          | 111 ---------------------
 5 files changed, 214 insertions(+), 214 deletions(-)
 delete mode 100644 include/linux/cputime.h

diff --git a/include/linux/cputime.h b/include/linux/cputime.h
deleted file mode 100644
index a691dc4ddc13..000000000000
--- a/include/linux/cputime.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __LINUX_CPUTIME_H
-#define __LINUX_CPUTIME_H
-
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-#include <asm/cputime.h>
-
-#ifndef cputime_to_nsecs
-# define cputime_to_nsecs(__ct)	\
-	(cputime_to_usecs(__ct) * NSEC_PER_USEC)
-#endif
-
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
-#endif /* __LINUX_CPUTIME_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d481c129a822..2b43f55e4956 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -219,14 +219,6 @@ struct prev_cputime {
 #endif
 };
 
-static inline void prev_cputime_init(struct prev_cputime *prev)
-{
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-	prev->utime = prev->stime = 0;
-	raw_spin_lock_init(&prev->lock);
-#endif
-}
-
 /**
  * struct task_cputime - collected CPU time counts
  * @utime:		time spent in user mode, in nanoseconds
@@ -248,40 +240,6 @@ struct task_cputime {
 #define prof_exp	stime
 #define sched_exp	sum_exec_runtime
 
-/*
- * This is the atomic variant of task_cputime, which can be used for
- * storing and updating task_cputime statistics without locking.
- */
-struct task_cputime_atomic {
-	atomic64_t utime;
-	atomic64_t stime;
-	atomic64_t sum_exec_runtime;
-};
-
-#define INIT_CPUTIME_ATOMIC \
-	(struct task_cputime_atomic) {				\
-		.utime = ATOMIC64_INIT(0),			\
-		.stime = ATOMIC64_INIT(0),			\
-		.sum_exec_runtime = ATOMIC64_INIT(0),		\
-	}
-
-/**
- * struct thread_group_cputimer - thread group interval timer counts
- * @cputime_atomic:	atomic thread group interval timers.
- * @running:		true when there are timers running and
- *			@cputime_atomic receives updates.
- * @checking_timer:	true when a thread in the group is in the
- *			process of checking for thread group timers.
- *
- * This structure contains the version of task_cputime, above, that is
- * used for thread group CPU timer calculations.
- */
-struct thread_group_cputimer {
-	struct task_cputime_atomic cputime_atomic;
-	bool running;
-	bool checking_timer;
-};
-
 #include <linux/rwsem.h>
 
 #ifdef CONFIG_SCHED_INFO
@@ -1234,44 +1192,6 @@ static inline void put_task_struct(struct task_struct *t)
 struct task_struct *task_rcu_dereference(struct task_struct **ptask);
 struct task_struct *try_get_task_struct(struct task_struct **ptask);
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-extern void task_cputime(struct task_struct *t,
-			 u64 *utime, u64 *stime);
-extern u64 task_gtime(struct task_struct *t);
-#else
-static inline void task_cputime(struct task_struct *t,
-				u64 *utime, u64 *stime)
-{
-	*utime = t->utime;
-	*stime = t->stime;
-}
-
-static inline u64 task_gtime(struct task_struct *t)
-{
-	return t->gtime;
-}
-#endif
-
-#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
-static inline void task_cputime_scaled(struct task_struct *t,
-				       u64 *utimescaled,
-				       u64 *stimescaled)
-{
-	*utimescaled = t->utimescaled;
-	*stimescaled = t->stimescaled;
-}
-#else
-static inline void task_cputime_scaled(struct task_struct *t,
-				       u64 *utimescaled,
-				       u64 *stimescaled)
-{
-	task_cputime(t, utimescaled, stimescaled);
-}
-#endif
-
-extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
-extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
-
 /*
  * Per process flags
  */
@@ -1395,9 +1315,6 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
 #define cpu_relax_yield() cpu_relax()
 #endif
 
-extern unsigned long long
-task_sched_runtime(struct task_struct *task);
-
 /* sched_exec is called by processes performing an exec */
 #ifdef CONFIG_SMP
 extern void sched_exec(void);
@@ -1629,12 +1546,6 @@ static __always_inline bool need_resched(void)
 	return unlikely(tif_need_resched());
 }
 
-/*
- * Thread group CPU time accounting.
- */
-void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
-void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
-
 /*
  * Wrappers for p->thread_info->cpu access. No-op on UP.
  */
diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h
index 6ed4fe43de28..4c5b9735c1ae 100644
--- a/include/linux/sched/cputime.h
+++ b/include/linux/sched/cputime.h
@@ -2,6 +2,186 @@
 #define _LINUX_SCHED_CPUTIME_H
 
 #include <linux/sched/signal.h>
-#include <linux/cputime.h>
+
+/*
+ * cputime accounting APIs:
+ */
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#include <asm/cputime.h>
+
+#ifndef cputime_to_nsecs
+# define cputime_to_nsecs(__ct)	\
+	(cputime_to_usecs(__ct) * NSEC_PER_USEC)
+#endif
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+extern void task_cputime(struct task_struct *t,
+			 u64 *utime, u64 *stime);
+extern u64 task_gtime(struct task_struct *t);
+#else
+static inline void task_cputime(struct task_struct *t,
+				u64 *utime, u64 *stime)
+{
+	*utime = t->utime;
+	*stime = t->stime;
+}
+
+static inline u64 task_gtime(struct task_struct *t)
+{
+	return t->gtime;
+}
+#endif
+
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+static inline void task_cputime_scaled(struct task_struct *t,
+				       u64 *utimescaled,
+				       u64 *stimescaled)
+{
+	*utimescaled = t->utimescaled;
+	*stimescaled = t->stimescaled;
+}
+#else
+static inline void task_cputime_scaled(struct task_struct *t,
+				       u64 *utimescaled,
+				       u64 *stimescaled)
+{
+	task_cputime(t, utimescaled, stimescaled);
+}
+#endif
+
+extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
+extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
+
+
+/*
+ * Thread group CPU time accounting.
+ */
+void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
+void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
+
+
+/*
+ * The following are functions that support scheduler-internal time accounting.
+ * These functions are generally called at the timer tick.  None of this depends
+ * on CONFIG_SCHEDSTATS.
+ */
+
+/**
+ * get_running_cputimer - return &tsk->signal->cputimer if cputimer is running
+ *
+ * @tsk:	Pointer to target task.
+ */
+#ifdef CONFIG_POSIX_TIMERS
+static inline
+struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk)
+{
+	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
+
+	/* Check if cputimer isn't running. This is accessed without locking. */
+	if (!READ_ONCE(cputimer->running))
+		return NULL;
+
+	/*
+	 * After we flush the task's sum_exec_runtime to sig->sum_sched_runtime
+	 * in __exit_signal(), we won't account to the signal struct further
+	 * cputime consumed by that task, even though the task can still be
+	 * ticking after __exit_signal().
+	 *
+	 * In order to keep a consistent behaviour between thread group cputime
+	 * and thread group cputimer accounting, lets also ignore the cputime
+	 * elapsing after __exit_signal() in any thread group timer running.
+	 *
+	 * This makes sure that POSIX CPU clocks and timers are synchronized, so
+	 * that a POSIX CPU timer won't expire while the corresponding POSIX CPU
+	 * clock delta is behind the expiring timer value.
+	 */
+	if (unlikely(!tsk->sighand))
+		return NULL;
+
+	return cputimer;
+}
+#else
+static inline
+struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk)
+{
+	return NULL;
+}
+#endif
+
+/**
+ * account_group_user_time - Maintain utime for a thread group.
+ *
+ * @tsk:	Pointer to task structure.
+ * @cputime:	Time value by which to increment the utime field of the
+ *		thread_group_cputime structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the utime field there.
+ */
+static inline void account_group_user_time(struct task_struct *tsk,
+					   u64 cputime)
+{
+	struct thread_group_cputimer *cputimer = get_running_cputimer(tsk);
+
+	if (!cputimer)
+		return;
+
+	atomic64_add(cputime, &cputimer->cputime_atomic.utime);
+}
+
+/**
+ * account_group_system_time - Maintain stime for a thread group.
+ *
+ * @tsk:	Pointer to task structure.
+ * @cputime:	Time value by which to increment the stime field of the
+ *		thread_group_cputime structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the stime field there.
+ */
+static inline void account_group_system_time(struct task_struct *tsk,
+					     u64 cputime)
+{
+	struct thread_group_cputimer *cputimer = get_running_cputimer(tsk);
+
+	if (!cputimer)
+		return;
+
+	atomic64_add(cputime, &cputimer->cputime_atomic.stime);
+}
+
+/**
+ * account_group_exec_runtime - Maintain exec runtime for a thread group.
+ *
+ * @tsk:	Pointer to task structure.
+ * @ns:		Time value by which to increment the sum_exec_runtime field
+ *		of the thread_group_cputime structure.
+ *
+ * If thread group time is being maintained, get the structure for the
+ * running CPU and update the sum_exec_runtime field there.
+ */
+static inline void account_group_exec_runtime(struct task_struct *tsk,
+					      unsigned long long ns)
+{
+	struct thread_group_cputimer *cputimer = get_running_cputimer(tsk);
+
+	if (!cputimer)
+		return;
+
+	atomic64_add(ns, &cputimer->cputime_atomic.sum_exec_runtime);
+}
+
+static inline void prev_cputime_init(struct prev_cputime *prev)
+{
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+	prev->utime = prev->stime = 0;
+	raw_spin_lock_init(&prev->lock);
+#endif
+}
+
+extern unsigned long long
+task_sched_runtime(struct task_struct *task);
 
 #endif /* _LINUX_SCHED_CPUTIME_H */
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index b3545fb4333a..2cf446704cd4 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -35,6 +35,39 @@ struct cpu_itimer {
 	u64 incr;
 };
 
+/*
+ * This is the atomic variant of task_cputime, which can be used for
+ * storing and updating task_cputime statistics without locking.
+ */
+struct task_cputime_atomic {
+	atomic64_t utime;
+	atomic64_t stime;
+	atomic64_t sum_exec_runtime;
+};
+
+#define INIT_CPUTIME_ATOMIC \
+	(struct task_cputime_atomic) {				\
+		.utime = ATOMIC64_INIT(0),			\
+		.stime = ATOMIC64_INIT(0),			\
+		.sum_exec_runtime = ATOMIC64_INIT(0),		\
+	}
+/**
+ * struct thread_group_cputimer - thread group interval timer counts
+ * @cputime_atomic:	atomic thread group interval timers.
+ * @running:		true when there are timers running and
+ *			@cputime_atomic receives updates.
+ * @checking_timer:	true when a thread in the group is in the
+ *			process of checking for thread group timers.
+ *
+ * This structure contains the version of task_cputime, above, that is
+ * used for thread group CPU timer calculations.
+ */
+struct thread_group_cputimer {
+	struct task_cputime_atomic cputime_atomic;
+	bool running;
+	bool checking_timer;
+};
+
 /*
  * NOTE! "signal_struct" does not have its own
  * locking, because a shared signal_struct always
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index bf0da0aa0a14..d5710651043b 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -164,114 +164,3 @@ sched_info_switch(struct rq *rq,
 #define sched_info_arrive(rq, next)		do { } while (0)
 #define sched_info_switch(rq, t, next)		do { } while (0)
 #endif /* CONFIG_SCHED_INFO */
-
-/*
- * The following are functions that support scheduler-internal time accounting.
- * These functions are generally called at the timer tick.  None of this depends
- * on CONFIG_SCHEDSTATS.
- */
-
-/**
- * get_running_cputimer - return &tsk->signal->cputimer if cputimer is running
- *
- * @tsk:	Pointer to target task.
- */
-#ifdef CONFIG_POSIX_TIMERS
-static inline
-struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk)
-{
-	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
-
-	/* Check if cputimer isn't running. This is accessed without locking. */
-	if (!READ_ONCE(cputimer->running))
-		return NULL;
-
-	/*
-	 * After we flush the task's sum_exec_runtime to sig->sum_sched_runtime
-	 * in __exit_signal(), we won't account to the signal struct further
-	 * cputime consumed by that task, even though the task can still be
-	 * ticking after __exit_signal().
-	 *
-	 * In order to keep a consistent behaviour between thread group cputime
-	 * and thread group cputimer accounting, lets also ignore the cputime
-	 * elapsing after __exit_signal() in any thread group timer running.
-	 *
-	 * This makes sure that POSIX CPU clocks and timers are synchronized, so
-	 * that a POSIX CPU timer won't expire while the corresponding POSIX CPU
-	 * clock delta is behind the expiring timer value.
-	 */
-	if (unlikely(!tsk->sighand))
-		return NULL;
-
-	return cputimer;
-}
-#else
-static inline
-struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk)
-{
-	return NULL;
-}
-#endif
-
-/**
- * account_group_user_time - Maintain utime for a thread group.
- *
- * @tsk:	Pointer to task structure.
- * @cputime:	Time value by which to increment the utime field of the
- *		thread_group_cputime structure.
- *
- * If thread group time is being maintained, get the structure for the
- * running CPU and update the utime field there.
- */
-static inline void account_group_user_time(struct task_struct *tsk,
-					   u64 cputime)
-{
-	struct thread_group_cputimer *cputimer = get_running_cputimer(tsk);
-
-	if (!cputimer)
-		return;
-
-	atomic64_add(cputime, &cputimer->cputime_atomic.utime);
-}
-
-/**
- * account_group_system_time - Maintain stime for a thread group.
- *
- * @tsk:	Pointer to task structure.
- * @cputime:	Time value by which to increment the stime field of the
- *		thread_group_cputime structure.
- *
- * If thread group time is being maintained, get the structure for the
- * running CPU and update the stime field there.
- */
-static inline void account_group_system_time(struct task_struct *tsk,
-					     u64 cputime)
-{
-	struct thread_group_cputimer *cputimer = get_running_cputimer(tsk);
-
-	if (!cputimer)
-		return;
-
-	atomic64_add(cputime, &cputimer->cputime_atomic.stime);
-}
-
-/**
- * account_group_exec_runtime - Maintain exec runtime for a thread group.
- *
- * @tsk:	Pointer to task structure.
- * @ns:		Time value by which to increment the sum_exec_runtime field
- *		of the thread_group_cputime structure.
- *
- * If thread group time is being maintained, get the structure for the
- * running CPU and update the sum_exec_runtime field there.
- */
-static inline void account_group_exec_runtime(struct task_struct *tsk,
-					      unsigned long long ns)
-{
-	struct thread_group_cputimer *cputimer = get_running_cputimer(tsk);
-
-	if (!cputimer)
-		return;
-
-	atomic64_add(ns, &cputimer->cputime_atomic.sum_exec_runtime);
-}
-- 
GitLab


From a2d7a7463c38dd14b845721aac3583a26a97c66d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 12:07:04 +0100
Subject: [PATCH 1007/1084] sched/headers: Move sched_info_on() and
 force_schedstat_enabled() from <linux/sched.h> to <linux/sched/stat.h>

These APIs are not core scheduler but scheduler statistics related.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 16 ----------------
 include/linux/sched/stat.h | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2b43f55e4956..707eee099332 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -254,22 +254,6 @@ struct sched_info {
 };
 #endif /* CONFIG_SCHED_INFO */
 
-static inline int sched_info_on(void)
-{
-#ifdef CONFIG_SCHEDSTATS
-	return 1;
-#elif defined(CONFIG_TASK_DELAY_ACCT)
-	extern int delayacct_on;
-	return delayacct_on;
-#else
-	return 0;
-#endif
-}
-
-#ifdef CONFIG_SCHEDSTATS
-void force_schedstat_enabled(void);
-#endif
-
 /*
  * Integer metrics need fixed point arithmetic, e.g., sched/fair
  * has a few: load, load_avg, util_avg, freq, and capacity.
diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
index 0d52ceb6d3ae..d8cedf27083e 100644
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -21,4 +21,20 @@ extern unsigned long nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
 extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
 
+static inline int sched_info_on(void)
+{
+#ifdef CONFIG_SCHEDSTATS
+	return 1;
+#elif defined(CONFIG_TASK_DELAY_ACCT)
+	extern int delayacct_on;
+	return delayacct_on;
+#else
+	return 0;
+#endif
+}
+
+#ifdef CONFIG_SCHEDSTATS
+void force_schedstat_enabled(void);
+#endif
+
 #endif /* _LINUX_SCHED_STAT_H */
-- 
GitLab


From 28851755990c832d7050d5e1e2c91ed9d9e6fe59 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:24:31 +0100
Subject: [PATCH 1008/1084] sched/headers, vfs/execve: Move the do_execve*()
 prototypes from <linux/sched.h> to <linux/binfmts.h>

These are not scheduler related.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/binfmts.h | 10 ++++++++++
 include/linux/sched.h   |  8 --------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 1303b570b18c..05488da3aee9 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -6,6 +6,8 @@
 #include <asm/exec.h>
 #include <uapi/linux/binfmts.h>
 
+struct filename;
+
 #define CORENAME_MAX_SIZE 128
 
 /*
@@ -123,4 +125,12 @@ extern void install_exec_creds(struct linux_binprm *bprm);
 extern void set_binfmt(struct linux_binfmt *new);
 extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t);
 
+extern int do_execve(struct filename *,
+		     const char __user * const __user *,
+		     const char __user * const __user *);
+extern int do_execveat(int, struct filename *,
+		       const char __user * const __user *,
+		       const char __user * const __user *,
+		       int);
+
 #endif /* _LINUX_BINFMTS_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 707eee099332..5f2267e41fde 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1400,14 +1400,6 @@ extern void exit_files(struct task_struct *);
 
 extern void exit_itimers(struct signal_struct *);
 
-extern int do_execve(struct filename *,
-		     const char __user * const __user *,
-		     const char __user * const __user *);
-extern int do_execveat(int, struct filename *,
-		       const char __user * const __user *,
-		       const char __user * const __user *,
-		       int);
-
 extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
 static inline void set_task_comm(struct task_struct *tsk, const char *from)
 {
-- 
GitLab


From 9049863a32fad5d7158318e45f0a41a0f2192c0c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:31:22 +0100
Subject: [PATCH 1009/1084] sched/headers: Move kstack_end() from
 <linux/sched.h> to <linux/sched/task_stack.h>

This is a task-stack related API, not core scheduler functionality.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h            | 10 ----------
 include/linux/sched/task_stack.h | 10 ++++++++++
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5f2267e41fde..309eb76b7eab 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1360,16 +1360,6 @@ static inline struct thread_info *task_thread_info(struct task_struct *task)
 # define task_thread_info(task)	((struct thread_info *)(task)->stack)
 #endif
 
-#ifndef __HAVE_ARCH_KSTACK_END
-static inline int kstack_end(void *addr)
-{
-	/* Reliable end of stack detection:
-	 * Some APM bios versions misalign the stack
-	 */
-	return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*)));
-}
-#endif
-
 extern struct pid_namespace init_pid_ns;
 
 /*
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index aaa5c2a6a0e9..df6ea6665b31 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -108,4 +108,14 @@ static inline unsigned long stack_not_used(struct task_struct *p)
 #endif
 extern void set_task_stack_end_magic(struct task_struct *tsk);
 
+#ifndef __HAVE_ARCH_KSTACK_END
+static inline int kstack_end(void *addr)
+{
+	/* Reliable end of stack detection:
+	 * Some APM bios versions misalign the stack
+	 */
+	return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*)));
+}
+#endif
+
 #endif /* _LINUX_SCHED_TASK_STACK_H */
-- 
GitLab


From 42011db0ed5a9c92b1281e1300eb3d026f3764a8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:35:41 +0100
Subject: [PATCH 1010/1084] sched/headers: Move exit_files() and exit_itimers()
 from <linux/sched.h> to <linux/sched/task.h>

These two functions are task management related, not core scheduler APIs.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 4 ----
 include/linux/sched/task.h | 3 +++
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 309eb76b7eab..4a03c49e3bcc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1386,10 +1386,6 @@ extern void wake_up_new_task(struct task_struct *tsk);
  static inline void kick_process(struct task_struct *tsk) { }
 #endif
 
-extern void exit_files(struct task_struct *);
-
-extern void exit_itimers(struct signal_struct *);
-
 extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
 static inline void set_task_comm(struct task_struct *tsk, const char *from)
 {
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 2be9fde588a7..a33a8121da9a 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -67,6 +67,9 @@ static inline void exit_thread(struct task_struct *tsk)
 #endif
 extern void do_group_exit(int);
 
+extern void exit_files(struct task_struct *);
+extern void exit_itimers(struct signal_struct *);
+
 extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long);
 extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
 struct task_struct *fork_idle(int);
-- 
GitLab


From c5a21921d55a2aee9d1e031a78cef6cda3eab78b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:47:12 +0100
Subject: [PATCH 1011/1084] sched/headers: Move _init() prototypes from
 <linux/sched.h> to <linux/sched/init.h>

trap_init() and cpu_init() belong into <linux/cpu.h>, sched_init*() into <linux/sched/init.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 5 -----
 include/linux/sched/init.h | 7 +++++++
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4a03c49e3bcc..0fb56ae0abc7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -176,15 +176,10 @@ struct uts_namespace;
 /* Task command name length */
 #define TASK_COMM_LEN 16
 
-extern void sched_init(void);
-extern void sched_init_smp(void);
-
 extern cpumask_var_t cpu_isolated_map;
 
 extern int runqueue_is_locked(int cpu);
 
-extern void cpu_init (void);
-extern void trap_init(void);
 extern void update_process_times(int user);
 extern void scheduler_tick(void);
 
diff --git a/include/linux/sched/init.h b/include/linux/sched/init.h
index 15e46313e55e..f31d16075857 100644
--- a/include/linux/sched/init.h
+++ b/include/linux/sched/init.h
@@ -3,4 +3,11 @@
 
 #include <linux/sched.h>
 
+/*
+ * Scheduler init related prototypes:
+ */
+
+extern void sched_init(void);
+extern void sched_init_smp(void);
+
 #endif /* _LINUX_SCHED_INIT_H */
-- 
GitLab


From 93b5a9a7051e51ce50109046af0235268a261ba0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 14:53:36 +0100
Subject: [PATCH 1012/1084] sched/headers, timekeeping: Move the timer tick
 function prototypes to <linux/timekeeping.h>

Move the update_process_times() and xtime_update() prototypes to <linux/timekeeping.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h       | 1 -
 include/linux/time.h        | 2 --
 include/linux/timekeeping.h | 4 ++++
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0fb56ae0abc7..dc8d94dc140f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -180,7 +180,6 @@ extern cpumask_var_t cpu_isolated_map;
 
 extern int runqueue_is_locked(int cpu);
 
-extern void update_process_times(int user);
 extern void scheduler_tick(void);
 
 #define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
diff --git a/include/linux/time.h b/include/linux/time.h
index 4f4ab65b6e61..23f0f5ce3090 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -167,8 +167,6 @@ static inline bool timespec_inject_offset_valid(const struct timespec *ts)
 extern u32 (*arch_gettimeoffset)(void);
 #endif
 
-extern void xtime_update(unsigned long ticks);
-
 struct itimerval;
 extern int do_setitimer(int which, struct itimerval *value,
 			struct itimerval *ovalue);
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index d2e804e15c3e..b598cbc7b576 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -8,6 +8,10 @@
 void timekeeping_init(void);
 extern int timekeeping_suspended;
 
+/* Architecture timer tick functions: */
+extern void update_process_times(int user);
+extern void xtime_update(unsigned long ticks);
+
 /*
  * Get and set timeofday
  */
-- 
GitLab


From dcc2dc45f7cf267d37843059ff75b70260596c69 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 15:05:49 +0100
Subject: [PATCH 1013/1084] sched/headers, mm: Move 'struct
 tlbflush_unmap_batch' from <linux/sched.h> to <linux/mm_types_task.h>

Unclutter <linux/sched.h> some more.

Also move the CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH condition inside the
structure body definition, to remove a pair of #ifdefs from sched.h.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mm_types_task.h | 22 ++++++++++++++++++++++
 include/linux/sched.h         | 21 ---------------------
 2 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index 9526d8b9fe0e..136dfdf63ba1 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -10,6 +10,7 @@
 #include <linux/types.h>
 #include <linux/threads.h>
 #include <linux/atomic.h>
+#include <linux/cpumask.h>
 
 #include <asm/page.h>
 
@@ -62,4 +63,25 @@ struct page_frag {
 #endif
 };
 
+/* Track pages that require TLB flushes */
+struct tlbflush_unmap_batch {
+#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+	/*
+	 * Each bit set is a CPU that potentially has a TLB entry for one of
+	 * the PFNs being flushed. See set_tlb_ubc_flush_pending().
+	 */
+	struct cpumask cpumask;
+
+	/* True if any bit in cpumask is set */
+	bool flush_required;
+
+	/*
+	 * If true then the PTE was dirty when unmapped. The entry must be
+	 * flushed before IO is initiated or a stale TLB entry potentially
+	 * allows an update without redirtying the page.
+	 */
+	bool writable;
+#endif
+};
+
 #endif /* _LINUX_MM_TYPES_TASK_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index dc8d94dc140f..8d6b7aa7f3ac 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -484,25 +484,6 @@ struct wake_q_node {
 	struct wake_q_node *next;
 };
 
-/* Track pages that require TLB flushes */
-struct tlbflush_unmap_batch {
-	/*
-	 * Each bit set is a CPU that potentially has a TLB entry for one of
-	 * the PFNs being flushed. See set_tlb_ubc_flush_pending().
-	 */
-	struct cpumask cpumask;
-
-	/* True if any bit in cpumask is set */
-	bool flush_required;
-
-	/*
-	 * If true then the PTE was dirty when unmapped. The entry must be
-	 * flushed before IO is initiated or a stale TLB entry potentially
-	 * allows an update without redirtying the page.
-	 */
-	bool writable;
-};
-
 struct task_struct {
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 	/*
@@ -895,9 +876,7 @@ struct task_struct {
 	unsigned long numa_pages_migrated;
 #endif /* CONFIG_NUMA_BALANCING */
 
-#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	struct tlbflush_unmap_batch tlb_ubc;
-#endif
 
 	struct rcu_head rcu;
 
-- 
GitLab


From cda66725c1444db67127115d611c982b62b45d8c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 15:30:50 +0100
Subject: [PATCH 1014/1084] sched/headers: Move the
 get_task_struct()/put_task_struct() and related APIs from <linux/sched.h> to
 <linux/sched/task.h>

These belong into the task lifetime API header.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 14 --------------
 include/linux/sched/task.h | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8d6b7aa7f3ac..b68358c60560 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1135,20 +1135,6 @@ static inline int is_global_init(struct task_struct *tsk)
 
 extern struct pid *cad_pid;
 
-extern void free_task(struct task_struct *tsk);
-#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
-
-extern void __put_task_struct(struct task_struct *t);
-
-static inline void put_task_struct(struct task_struct *t)
-{
-	if (atomic_dec_and_test(&t->usage))
-		__put_task_struct(t);
-}
-
-struct task_struct *task_rcu_dereference(struct task_struct **ptask);
-struct task_struct *try_get_task_struct(struct task_struct **ptask);
-
 /*
  * Per process flags
  */
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index a33a8121da9a..3886ae64148f 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -75,6 +75,21 @@ extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, i
 struct task_struct *fork_idle(int);
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 
+extern void free_task(struct task_struct *tsk);
+
+#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
+
+extern void __put_task_struct(struct task_struct *t);
+
+static inline void put_task_struct(struct task_struct *t)
+{
+	if (atomic_dec_and_test(&t->usage))
+		__put_task_struct(t);
+}
+
+struct task_struct *task_rcu_dereference(struct task_struct **ptask);
+struct task_struct *try_get_task_struct(struct task_struct **ptask);
+
 
 #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
 extern int arch_task_struct_size __read_mostly;
-- 
GitLab


From 6f175fc9536355d8aa5c2d4854848a97c244a031 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 11:12:45 +0100
Subject: [PATCH 1015/1084] sched/headers: Move the sched_exec() prototype to
 <linux/sched/task.h>

sched_exec() better fits into the task lifetime APIs than into the core scheduler
APIs.

This reduces the size of <linux/sched.h> a bit.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 7 -------
 include/linux/sched/task.h | 7 +++++++
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b68358c60560..bd89fc17767c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1258,13 +1258,6 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
 #define cpu_relax_yield() cpu_relax()
 #endif
 
-/* sched_exec is called by processes performing an exec */
-#ifdef CONFIG_SMP
-extern void sched_exec(void);
-#else
-#define sched_exec()   {}
-#endif
-
 extern int yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 3886ae64148f..a978d7189cfd 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -77,6 +77,13 @@ extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 
 extern void free_task(struct task_struct *tsk);
 
+/* sched_exec is called by processes performing an exec */
+#ifdef CONFIG_SMP
+extern void sched_exec(void);
+#else
+#define sched_exec()   {}
+#endif
+
 #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
 
 extern void __put_task_struct(struct task_struct *t);
-- 
GitLab


From c03cb28e7c44055cd00d11b8581b9fa46a3e3764 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: [PATCH 1016/1084] sched/headers: Remove <linux/sched.h> from
 <linux/sched/topology.h>

The <linux/sched/topology.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/topology.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index b3550b36e65d..0d6fceff37bb 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_SCHED_TOPOLOGY_H
 #define _LINUX_SCHED_TOPOLOGY_H
 
-#include <linux/sched.h>
-
 #include <linux/sched/idle.h>
 
 /*
-- 
GitLab


From f411229e1dd623a3dee623824d094546789977e0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 18:49:05 +0100
Subject: [PATCH 1017/1084] sched/headers: Remove tsk_is_polling()

It's not used by anything.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/idle.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
index 66ee32f87f0b..5ca63ebad6b4 100644
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -17,10 +17,6 @@ extern void wake_up_if_idle(int cpu);
  * polling state.
  */
 #ifdef TIF_POLLING_NRFLAG
-static inline int tsk_is_polling(struct task_struct *p)
-{
-	return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
-}
 
 static inline void __current_set_polling(void)
 {
@@ -59,7 +55,6 @@ static inline bool __must_check current_clr_polling_and_test(void)
 }
 
 #else
-static inline int tsk_is_polling(struct task_struct *p) { return 0; }
 static inline void __current_set_polling(void) { }
 static inline void __current_clr_polling(void) { }
 
-- 
GitLab


From ea94763950e49d1aa622c59218ee2fc3b434ba6b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: [PATCH 1018/1084] sched/headers: Remove <linux/sched.h> from
 <linux/sched/clock.h>

The <linux/sched/clock.h> file is a largely self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/clock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
index ac12f71d359c..4a68c6791207 100644
--- a/include/linux/sched/clock.h
+++ b/include/linux/sched/clock.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_SCHED_CLOCK_H
 #define _LINUX_SCHED_CLOCK_H
 
-#include <linux/sched.h>
+#include <linux/smp.h>
 
 /*
  * Do not use outside of architecture code which knows its limitations.
-- 
GitLab


From cc689c5b352d4a7510b11c975c5c94d4785b37e7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 1 Feb 2017 16:36:40 +0100
Subject: [PATCH 1019/1084] sched/headers: Remove <linux/sched.h> and
 <linux/slab.h> from <linux/delayacct.h>

The <linux/delayacct.h> file is a self-contained header and users of
it either don't need <linux/sched.h> and <linux/slab.h> - or have
already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/delayacct.h    | 2 --
 include/linux/fault-inject.h | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 6b769cbd1000..4178d2493547 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -18,8 +18,6 @@
 #define _LINUX_DELAYACCT_H
 
 #include <uapi/linux/taskstats.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
 
 /*
  * Per-task flags relevant to delay accounting
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 9f4956d8601c..728d4e0292aa 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -61,6 +61,8 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name,
 
 #endif /* CONFIG_FAULT_INJECTION */
 
+struct kmem_cache;
+
 #ifdef CONFIG_FAILSLAB
 extern bool should_failslab(struct kmem_cache *s, gfp_t gfpflags);
 #else
-- 
GitLab


From 5a2d6880f461faa416c0d329d46a128cf342c1eb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:54 +0100
Subject: [PATCH 1020/1084] sched/headers: Remove <linux/sched.h> from
 <linux/sched/loadavg.h>

The <linux/sched/loadavg.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/loadavg.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/sched/loadavg.h b/include/linux/sched/loadavg.h
index c392e28ce0ac..4264bc6b2c27 100644
--- a/include/linux/sched/loadavg.h
+++ b/include/linux/sched/loadavg.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_SCHED_LOADAVG_H
 #define _LINUX_SCHED_LOADAVG_H
 
-#include <linux/sched.h>
-
 /*
  * These are the constant used to fake the fixed-point load-average
  * counting. Some notes:
-- 
GitLab


From 5fd73157bdfb57370b69be7c0067f08e610e7daa Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:54 +0100
Subject: [PATCH 1021/1084] sched/headers: Remove <linux/sched.h> from
 <linux/sched/autogroup.h>

The <linux/sched/autogroup.h> file is a largely self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

Add a 'task_struct' predeclaration to make it build standalone.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/autogroup.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/linux/sched/autogroup.h b/include/linux/sched/autogroup.h
index fd6855548d0c..55cd496df884 100644
--- a/include/linux/sched/autogroup.h
+++ b/include/linux/sched/autogroup.h
@@ -1,9 +1,8 @@
 #ifndef _LINUX_SCHED_AUTOGROUP_H
 #define _LINUX_SCHED_AUTOGROUP_H
 
-#include <linux/sched.h>
-
 struct signal_struct;
+struct task_struct;
 struct task_group;
 struct seq_file;
 
-- 
GitLab


From b8d6d80b37a98e3910f1b4e799f8ebea88e94bfa Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:54 +0100
Subject: [PATCH 1022/1084] sched/headers: Remove <linux/sched.h> from
 <linux/sched/mm.h>

The <linux/sched/mm.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

Include kernel.h and atomic.h.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/mm.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 62dca4e0b4e0..830953ebb391 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_SCHED_MM_H
 #define _LINUX_SCHED_MM_H
 
+#include <linux/kernel.h>
+#include <linux/atomic.h>
 #include <linux/sched.h>
 #include <linux/mm_types.h>
 #include <linux/gfp.h>
-- 
GitLab


From ae1cc8823204bb938d039afa43c28a84591ada9f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:54 +0100
Subject: [PATCH 1023/1084] sched/headers: Remove <linux/sched.h> from
 <linux/sched/coredump.h>

The <linux/sched/coredump.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

Include <linux/mm_types.h>.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/coredump.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index f46912aa4f4c..69eedcef8f03 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -1,7 +1,6 @@
 #ifndef _LINUX_SCHED_COREDUMP_H
 #define _LINUX_SCHED_COREDUMP_H
 
-#include <linux/sched.h>
 #include <linux/mm_types.h>
 
 #define SUID_DUMP_DISABLE	0	/* No setuid dumping */
-- 
GitLab


From 556725839e543bc2b45bd73121c0b1c1d6c23714 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 10:31:24 +0100
Subject: [PATCH 1024/1084] sched/headers: Remove unused
 'task_can_switch_user()' prototype

The function does not exist anymore.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index bd89fc17767c..43ed34ccb53a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1509,9 +1509,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
 
-extern int task_can_switch_user(struct user_struct *up,
-					struct task_struct *tsk);
-
 #ifndef TASK_SIZE_OF
 #define TASK_SIZE_OF(tsk)	TASK_SIZE
 #endif
-- 
GitLab


From de8deb0ad79f8a49cea5110c006c8676a11611f7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:55 +0100
Subject: [PATCH 1025/1084] sched/headers: Remove <linux/sched.h> from
 <linux/sched/user.h>

If we add <linux/uidgid.h> then <linux/sched/user.h> becomes a
self-contained header and users of it either don't need <linux/sched.h>
or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/user.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 40c6363da5ef..5d5415e129d4 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -1,7 +1,8 @@
 #ifndef _LINUX_SCHED_USER_H
 #define _LINUX_SCHED_USER_H
 
-#include <linux/sched.h>
+#include <linux/uidgid.h>
+#include <linux/atomic.h>
 
 struct key;
 
-- 
GitLab


From ac4e620967bb72f86371154935aa8b67cf54e225 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 11:44:31 +0100
Subject: [PATCH 1026/1084] sched/headers: Remove #include <linux/capability.h>
 from <linux/sched.h>

The <linux/sched.h> header does not actually make use of any
types or APIs defined in <linux/capability.h>, so remove its inclusion.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 43ed34ccb53a..54eefb2ad603 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -5,7 +5,6 @@
 
 #include <linux/sched/prio.h>
 
-#include <linux/capability.h>
 #include <linux/mutex.h>
 #include <linux/plist.h>
 #include <linux/mm_types_task.h>
-- 
GitLab


From aa829c7679a13fca667f772db1f0ea03adc29122 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 11:54:05 +0100
Subject: [PATCH 1027/1084] sched/headers: Remove <linux/cgroup-defs.h> from
 <linux/sched.h>

It's not used by anything in <linux/sched.h> anymore.

This reduces the preprocessed size of <linux/sched.h> and
speeds up the build a bit.

Also fix code that implicitly relied on headers included by <linux/cgroup-defs.h>.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h             | 1 -
 include/target/target_core_base.h | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 54eefb2ad603..8991e4d7cd0f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -28,7 +28,6 @@
 #include <linux/gfp.h>
 #include <linux/topology.h>
 #include <linux/magic.h>
-#include <linux/cgroup-defs.h>
 
 #include <asm/current.h>
 
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 8be9ba73383d..774c29b57e82 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -4,6 +4,7 @@
 #include <linux/configfs.h>      /* struct config_group */
 #include <linux/dma-direction.h> /* enum dma_data_direction */
 #include <linux/percpu_ida.h>    /* struct percpu_ida */
+#include <linux/percpu-refcount.h>
 #include <linux/semaphore.h>     /* struct semaphore */
 #include <linux/completion.h>
 
-- 
GitLab


From ed53742d793bd92ed32114081370b199dc94467d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:55 +0100
Subject: [PATCH 1028/1084] sched/headers: Remove <linux/sched.h> from
 <linux/sched/cpufreq.h>

Make the <linux/sched/cpufreq.h> file a self-contained header and
remove the <linux/sched.h> dependency: users of it either don't
need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/cpufreq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
index 2bdcfbfc4c30..d2be2ccbb372 100644
--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_SCHED_CPUFREQ_H
 #define _LINUX_SCHED_CPUFREQ_H
 
-#include <linux/sched.h>
+#include <linux/types.h>
 
 /*
  * Interface between cpufreq drivers and the scheduler:
-- 
GitLab


From 71af2ed5eeea639339e3a1497a0196bab7de4b57 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 09:57:00 +0100
Subject: [PATCH 1029/1084] kasan, sched/headers: Remove <linux/sched.h> from
 <linux/kasan.h>

<linux/kasan.h> is a low level header that is included early
in affected kernel headers. But it includes <linux/sched.h>
which complicates the cleanup of sched.h dependencies.

Remove it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kasan.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 7793036eac80..ceb3fe78a0d3 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -1,7 +1,6 @@
 #ifndef _LINUX_KASAN_H
 #define _LINUX_KASAN_H
 
-#include <linux/sched.h>
 #include <linux/types.h>
 
 struct kmem_cache;
-- 
GitLab


From e26512fea5bcd6602dbf02a551ed073cd4529449 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 17:54:15 +0100
Subject: [PATCH 1030/1084] sched/headers: Remove <linux/cred.h> inclusion from
 <linux/sched.h>

This reduces header dependencies and speeds up the build.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8991e4d7cd0f..1be69735cef3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -24,7 +24,6 @@
 #include <linux/kcov.h>
 #include <linux/task_io_accounting.h>
 #include <linux/latencytop.h>
-#include <linux/cred.h>
 #include <linux/gfp.h>
 #include <linux/topology.h>
 #include <linux/magic.h>
-- 
GitLab


From f780d89a0e820a529cf91fb78b52565e1b37b774 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 10:03:42 +0100
Subject: [PATCH 1031/1084] sched/headers: Remove <asm/ptrace.h> from
 <linux/sched.h>

This reduces header dependencies.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/sh/include/asm/fpu.h | 2 ++
 arch/sh/mm/extable_32.c   | 2 ++
 include/linux/sched.h     | 1 -
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/sh/include/asm/fpu.h b/arch/sh/include/asm/fpu.h
index 09fc2bc8a790..50921c7cc3f0 100644
--- a/arch/sh/include/asm/fpu.h
+++ b/arch/sh/include/asm/fpu.h
@@ -3,6 +3,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/ptrace.h>
+
 struct task_struct;
 
 #ifdef CONFIG_SH_FPU
diff --git a/arch/sh/mm/extable_32.c b/arch/sh/mm/extable_32.c
index 24a75d315dcb..940e871bc816 100644
--- a/arch/sh/mm/extable_32.c
+++ b/arch/sh/mm/extable_32.c
@@ -7,6 +7,8 @@
 #include <linux/extable.h>
 #include <linux/uaccess.h>
 
+#include <asm/ptrace.h>
+
 int fixup_exception(struct pt_regs *regs)
 {
 	const struct exception_table_entry *fixup;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1be69735cef3..9dfa9c113570 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -8,7 +8,6 @@
 #include <linux/mutex.h>
 #include <linux/plist.h>
 #include <linux/mm_types_task.h>
-#include <asm/ptrace.h>
 
 #include <linux/sem.h>
 #include <linux/shm.h>
-- 
GitLab


From 9c6da18109d603a99915b257929c0370c9d3ae40 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 10:08:30 +0100
Subject: [PATCH 1032/1084] sched/headers: Remove <linux/rtmutex.h> from
 <linux/sched.h>

This reduces header dependencies.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9dfa9c113570..cc5c99a27e21 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -16,7 +16,6 @@
 #include <linux/pid.h>
 #include <linux/seccomp.h>
 #include <linux/rculist.h>
-#include <linux/rtmutex.h>
 
 #include <linux/resource.h>
 #include <linux/hrtimer.h>
-- 
GitLab


From f0a0eb699967f4f51567b563818bafe4017bef73 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Feb 2017 20:56:33 +0100
Subject: [PATCH 1033/1084] sched/headers: Remove the <linux/gfp.h> include
 from <linux/sched.h>

This reduces sched.h header dependencies.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index cc5c99a27e21..ab80596dddfd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -22,7 +22,6 @@
 #include <linux/kcov.h>
 #include <linux/task_io_accounting.h>
 #include <linux/latencytop.h>
-#include <linux/gfp.h>
 #include <linux/topology.h>
 #include <linux/magic.h>
 
-- 
GitLab


From dc1995392d79b0eeee147a792482b991f74c1494 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:57 +0100
Subject: [PATCH 1034/1084] sched/headers: Remove <linux/sched.h> from
 <linux/sched/stat.h>

The <linux/sched/stat.h> file is a largely self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

( Keep the <linux/percpu.h> dependency.)

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/stat.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
index d8cedf27083e..141b74c53fad 100644
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_SCHED_STAT_H
 #define _LINUX_SCHED_STAT_H
 
-#include <linux/sched.h>
+#include <linux/percpu.h>
 
 /*
  * Various counters maintained by the scheduler and fork(),
-- 
GitLab


From b0145d9b332a5117bd0abbb980ab89586a1d5f6c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:57 +0100
Subject: [PATCH 1035/1084] sched/headers: Remove <linux/sched.h> from
 <linux/sched/nohz.h>

The <linux/sched/nohz.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/nohz.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
index 9471f0736a3a..4995b717500b 100644
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_SCHED_NOHZ_H
 #define _LINUX_SCHED_NOHZ_H
 
-#include <linux/sched.h>
-
 /*
  * This is the interface between the scheduler and nohz/dyntics:
  */
-- 
GitLab


From 4f079e98a0db5f067c0981a526ff8938e21c52e2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:57 +0100
Subject: [PATCH 1036/1084] sched/headers: Remove <linux/sched.h> from
 <linux/sched/debug.h>

The <linux/sched/debug.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/debug.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h
index 853bbef0b47b..e0eaee54c5a4 100644
--- a/include/linux/sched/debug.h
+++ b/include/linux/sched/debug.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_SCHED_DEBUG_H
 #define _LINUX_SCHED_DEBUG_H
 
-#include <linux/sched.h>
-
 /*
  * Various scheduler/task debugging interfaces:
  */
-- 
GitLab


From a906086ef35129d522047a296e7c4237af75fdcb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:57 +0100
Subject: [PATCH 1037/1084] sched/headers: Remove <linux/sched.h> from
 <linux/sched/hotplug.h>

The <linux/sched/hotplug.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/hotplug.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h
index c608d3c1ddb8..752ac7e628d7 100644
--- a/include/linux/sched/hotplug.h
+++ b/include/linux/sched/hotplug.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_SCHED_HOTPLUG_H
 #define _LINUX_SCHED_HOTPLUG_H
 
-#include <linux/sched.h>
-
 /*
  * Scheduler interfaces for hotplug CPU support:
  */
-- 
GitLab


From fae3c30c2d1ae3ff93877f64e5d55e2443d8f82f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 12:00:08 +0100
Subject: [PATCH 1038/1084] sched/headers: Remove the runqueue_is_locked()
 prototype

Remove the runqueue_is_locked() prototype, the function does not exist anymore.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ab80596dddfd..ac98255d00fb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -172,8 +172,6 @@ struct uts_namespace;
 
 extern cpumask_var_t cpu_isolated_map;
 
-extern int runqueue_is_locked(int cpu);
-
 extern void scheduler_tick(void);
 
 #define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
-- 
GitLab


From cd9c513be34ceaae8bf211474b91b6897574efdd Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:58 +0100
Subject: [PATCH 1039/1084] sched/headers: Remove <linux/rwsem.h> from
 <linux/sched.h>

This is a stray header that is not needed by anything in sched.h,
so remove it.

Update files that relied on the stray inclusion.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h          | 2 --
 include/linux/user_namespace.h | 1 +
 kernel/utsname_sysctl.c        | 1 +
 3 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ac98255d00fb..b361f881fe44 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -226,8 +226,6 @@ struct task_cputime {
 #define prof_exp	stime
 #define sched_exp	sum_exec_runtime
 
-#include <linux/rwsem.h>
-
 #ifdef CONFIG_SCHED_INFO
 struct sched_info {
 	/* cumulative counters */
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 08264641b502..faa9bfb827da 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -5,6 +5,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ns_common.h>
 #include <linux/sched.h>
+#include <linux/rwsem.h>
 #include <linux/sysctl.h>
 #include <linux/err.h>
 
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index c8eac43267e9..233cd8fc6910 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -14,6 +14,7 @@
 #include <linux/utsname.h>
 #include <linux/sysctl.h>
 #include <linux/wait.h>
+#include <linux/rwsem.h>
 
 #ifdef CONFIG_PROC_SYSCTL
 
-- 
GitLab


From 192c9414516e7178a44f9ed48d47501c4c19771c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 3 Feb 2017 23:47:37 +0100
Subject: [PATCH 1040/1084] sched/headers: Remove <linux/signal.h> from
 <linux/sched.h>

Instead of including the full <linux/signal.h>, only include the types-only
<linux/signal_types.h> header in <linux/sched.h>, to further decouple the
scheduler header from the signal headers.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b361f881fe44..905d4f148d65 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -11,7 +11,6 @@
 
 #include <linux/sem.h>
 #include <linux/shm.h>
-#include <linux/signal.h>
 #include <linux/signal_types.h>
 #include <linux/pid.h>
 #include <linux/seccomp.h>
-- 
GitLab


From 1d1954e038435765a623884b626731784cde768f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 01:16:15 +0100
Subject: [PATCH 1041/1084] sched/headers: Remove the 'init_pid_ns' prototype
 from <linux/sched.h>

pid.h already defines it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 905d4f148d65..057b2a85ecc7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1301,8 +1301,6 @@ static inline struct thread_info *task_thread_info(struct task_struct *task)
 # define task_thread_info(task)	((struct thread_info *)(task)->stack)
 #endif
 
-extern struct pid_namespace init_pid_ns;
-
 /*
  * find a task by one of its numerical ids
  *
-- 
GitLab


From b68070e146b9c2b4ece8d869a4fab9a4f14bbfb4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 4 Feb 2017 01:27:20 +0100
Subject: [PATCH 1042/1084] sched/headers: Remove <linux/rculist.h> from
 <linux/sched.h>

We don't actually need the full rculist.h header anymore, include
the smaller rcupdate.h header instead.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 057b2a85ecc7..5720d11f3224 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -14,7 +14,7 @@
 #include <linux/signal_types.h>
 #include <linux/pid.h>
 #include <linux/seccomp.h>
-#include <linux/rculist.h>
+#include <linux/rcupdate.h>
 
 #include <linux/resource.h>
 #include <linux/hrtimer.h>
-- 
GitLab


From 2c873d55cd838deef8218b6d5fe9bd336cb3113a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 12:11:03 +0100
Subject: [PATCH 1043/1084] sched/core: Remove unused prefetch_stack()

prefetch_stack() is defined by IA64, but not actually used anywhere anymore.

Remove it.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/ia64/include/asm/processor.h |  2 --
 arch/ia64/kernel/entry.S          | 23 -----------------------
 include/linux/sched.h             |  6 ------
 3 files changed, 31 deletions(-)

diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index 03911a336406..26a63d69c599 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -19,8 +19,6 @@
 #include <asm/ptrace.h>
 #include <asm/ustack.h>
 
-#define ARCH_HAS_PREFETCH_SWITCH_STACK
-
 #define IA64_NUM_PHYS_STACK_REG	96
 #define IA64_NUM_DBG_REGS	8
 
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 6f27a663177c..e7a716b09350 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -455,29 +455,6 @@ GLOBAL_ENTRY(load_switch_stack)
 	br.cond.sptk.many b7
 END(load_switch_stack)
 
-GLOBAL_ENTRY(prefetch_stack)
-	add r14 = -IA64_SWITCH_STACK_SIZE, sp
-	add r15 = IA64_TASK_THREAD_KSP_OFFSET, in0
-	;;
-	ld8 r16 = [r15]				// load next's stack pointer
-	lfetch.fault.excl [r14], 128
-	;;
-	lfetch.fault.excl [r14], 128
-	lfetch.fault [r16], 128
-	;;
-	lfetch.fault.excl [r14], 128
-	lfetch.fault [r16], 128
-	;;
-	lfetch.fault.excl [r14], 128
-	lfetch.fault [r16], 128
-	;;
-	lfetch.fault.excl [r14], 128
-	lfetch.fault [r16], 128
-	;;
-	lfetch.fault [r16], 128
-	br.ret.sptk.many rp
-END(prefetch_stack)
-
 	/*
 	 * Invoke a system call, but do some tracing before and after the call.
 	 * We MUST preserve the current register frame throughout this routine
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5720d11f3224..bd0111a06aa2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -247,12 +247,6 @@ struct sched_info {
 # define SCHED_FIXEDPOINT_SHIFT	10
 # define SCHED_FIXEDPOINT_SCALE	(1L << SCHED_FIXEDPOINT_SHIFT)
 
-#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
-extern void prefetch_stack(struct task_struct *t);
-#else
-static inline void prefetch_stack(struct task_struct *t) { }
-#endif
-
 struct load_weight {
 	unsigned long weight;
 	u32 inv_weight;
-- 
GitLab


From 5c0d0f36414f9f8a292b42e797f9284b127d79c2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 8 Feb 2017 18:51:59 +0100
Subject: [PATCH 1044/1084] sched/headers: Remove <linux/sched.h> from
 <linux/sched/init.h>

The <linux/sched/init.h> file is a self-contained header and users of
it either don't need <linux/sched.h> - or have already included it.

This reduces the size of the header dependency graph.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/init.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/sched/init.h b/include/linux/sched/init.h
index f31d16075857..127215045285 100644
--- a/include/linux/sched/init.h
+++ b/include/linux/sched/init.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_SCHED_INIT_H
 #define _LINUX_SCHED_INIT_H
 
-#include <linux/sched.h>
-
 /*
  * Scheduler init related prototypes:
  */
-- 
GitLab


From 50ff9d130014f7b19541dbf607a615a72b75b778 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 5 Feb 2017 16:03:58 +0100
Subject: [PATCH 1045/1084] sched/headers: Remove <linux/magic.h> from
 <linux/sched/task_stack.h>

It's not used by any of the scheduler methods, but <linux/sched/task_stack.h>
needs it to pick up STACK_END_MAGIC.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h     | 1 -
 kernel/cgroup/cgroup-v1.c | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index bd0111a06aa2..559be4f1aee5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -22,7 +22,6 @@
 #include <linux/task_io_accounting.h>
 #include <linux/latencytop.h>
 #include <linux/topology.h>
-#include <linux/magic.h>
 
 #include <asm/current.h>
 
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index abc585858685..56eba9caa632 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -7,6 +7,7 @@
 #include <linux/mm.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/task.h>
+#include <linux/magic.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/delayacct.h>
-- 
GitLab


From b2d5bfea2d00a0000da18f7667c2b0e2c2f168d9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 09:56:40 +0100
Subject: [PATCH 1046/1084] sched/headers, timers: Remove the <linux/sysctl.h>
 include from <linux/timer.h>

So we want to simplify <linux/sched.h>'s header dependencies, but one
roadblock of that is <linux/timer.h>'s inclusion of sysctl.h,
which brings in other, problematic headers.

Note that timer.h's inclusion of sysctl.h can be avoided if we
pre-declare ctl_table - so do that.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/timer.h          | 2 +-
 include/linux/user_namespace.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/timer.h b/include/linux/timer.h
index c7bdf895179c..e6789b8757d5 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -212,7 +212,7 @@ struct hrtimer;
 extern enum hrtimer_restart it_real_fn(struct hrtimer *);
 
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-#include <linux/sysctl.h>
+struct ctl_table;
 
 extern unsigned int sysctl_timer_migration;
 int timer_migration_handler(struct ctl_table *table, int write,
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index faa9bfb827da..be765234c0a2 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -5,6 +5,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ns_common.h>
 #include <linux/sched.h>
+#include <linux/workqueue.h>
 #include <linux/rwsem.h>
 #include <linux/sysctl.h>
 #include <linux/err.h>
-- 
GitLab


From 1a79a72c65b8f1b4f44e166d1fb5cad560461b26 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 09:58:35 +0100
Subject: [PATCH 1047/1084] sched/headers, x86/apic: Remove the <linux/pm.h>
 header inclusion from <asm/apic.h>

We want to simplify <linux/sched.h>'s header dependencies, but one
roadblock to that is <asm/apic.h>'s inclusion of pm.h,
which brings in other, problematic headers.

Remove it, as it appears to be entirely spurious, apic.h does not
actually make use of any PM facilities.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/apic.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index eff8e36aaf72..730ef65e8393 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -2,7 +2,6 @@
 #define _ASM_X86_APIC_H
 
 #include <linux/cpumask.h>
-#include <linux/pm.h>
 
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
-- 
GitLab


From 283cb90305cf1686594ed537c7a8cb188eba1a4d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 09:59:47 +0100
Subject: [PATCH 1048/1084] sched/headers, hrtimer: Remove the <linux/wait.h>
 include from <linux/hrtimer.h>

In our quest to simplify <linux/sched.h>'s header dependencies, remove
the <linux/wait.h> inclusion from <linux/hrtimer.h> - which does
not appear to be necessary, as hrtimer.h does not use waitqueues.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/hrtimer.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index e52b427223ba..249e579ecd4c 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -19,7 +19,6 @@
 #include <linux/ktime.h>
 #include <linux/init.h>
 #include <linux/list.h>
-#include <linux/wait.h>
 #include <linux/percpu.h>
 #include <linux/timer.h>
 #include <linux/timerqueue.h>
-- 
GitLab


From ee6a3d19f15b9b10075481088b8d4537f286d7b4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 10:01:09 +0100
Subject: [PATCH 1049/1084] sched/headers: Remove the <linux/topology.h>
 include from <linux/sched.h>

It's used only by a single (rarely used) inline function (task_node(p)),
which we can move to <linux/sched/topology.h>.

( Add <linux/nodemask.h>, because we rely on that. )

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/mips/sgi-ip27/ip27-smp.c  | 1 +
 include/linux/sched.h          | 7 +------
 include/linux/sched/topology.h | 7 +++++++
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c
index f9ae6a8fa7c7..f5ed45e8f442 100644
--- a/arch/mips/sgi-ip27/ip27-smp.c
+++ b/arch/mips/sgi-ip27/ip27-smp.c
@@ -8,6 +8,7 @@
  */
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/topology.h>
 #include <linux/nodemask.h>
 #include <asm/page.h>
 #include <asm/processor.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 559be4f1aee5..f9dc9cfaf079 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -4,6 +4,7 @@
 #include <uapi/linux/sched.h>
 
 #include <linux/sched/prio.h>
+#include <linux/nodemask.h>
 
 #include <linux/mutex.h>
 #include <linux/plist.h>
@@ -21,7 +22,6 @@
 #include <linux/kcov.h>
 #include <linux/task_io_accounting.h>
 #include <linux/latencytop.h>
-#include <linux/topology.h>
 
 #include <asm/current.h>
 
@@ -1454,11 +1454,6 @@ static inline unsigned int task_cpu(const struct task_struct *p)
 #endif
 }
 
-static inline int task_node(const struct task_struct *p)
-{
-	return cpu_to_node(task_cpu(p));
-}
-
 extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
 
 #else
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 0d6fceff37bb..7d065abc7a47 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_SCHED_TOPOLOGY_H
 #define _LINUX_SCHED_TOPOLOGY_H
 
+#include <linux/topology.h>
+
 #include <linux/sched/idle.h>
 
 /*
@@ -216,4 +218,9 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu)
 
 #endif	/* !CONFIG_SMP */
 
+static inline int task_node(const struct task_struct *p)
+{
+	return cpu_to_node(task_cpu(p));
+}
+
 #endif /* _LINUX_SCHED_TOPOLOGY_H */
-- 
GitLab


From 7f5f8e8d97d77edf33f2836259d1f19c6f4d94f5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 11:44:12 +0100
Subject: [PATCH 1050/1084] sched/headers: Remove #ifdefs from <linux/sched.h>

We can remove two pairs of #ifdefs by defining structures in a smarter way.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f9dc9cfaf079..341f5792fbe0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -224,8 +224,8 @@ struct task_cputime {
 #define prof_exp	stime
 #define sched_exp	sum_exec_runtime
 
-#ifdef CONFIG_SCHED_INFO
 struct sched_info {
+#ifdef CONFIG_SCHED_INFO
 	/* cumulative counters */
 	unsigned long pcount;	      /* # of times run on this cpu */
 	unsigned long long run_delay; /* time spent waiting on a runqueue */
@@ -233,8 +233,8 @@ struct sched_info {
 	/* timestamps */
 	unsigned long long last_arrival,/* when we last ran on a cpu */
 			   last_queued;	/* when we were last queued to run */
-};
 #endif /* CONFIG_SCHED_INFO */
+};
 
 /*
  * Integer metrics need fixed point arithmetic, e.g., sched/fair
@@ -309,8 +309,8 @@ struct sched_avg {
 	unsigned long load_avg, util_avg;
 };
 
-#ifdef CONFIG_SCHEDSTATS
 struct sched_statistics {
+#ifdef CONFIG_SCHEDSTATS
 	u64			wait_start;
 	u64			wait_max;
 	u64			wait_count;
@@ -342,8 +342,8 @@ struct sched_statistics {
 	u64			nr_wakeups_affine_attempts;
 	u64			nr_wakeups_passive;
 	u64			nr_wakeups_idle;
-};
 #endif
+};
 
 struct sched_entity {
 	struct load_weight	load;		/* for load-balancing */
@@ -358,9 +358,7 @@ struct sched_entity {
 
 	u64			nr_migrations;
 
-#ifdef CONFIG_SCHEDSTATS
 	struct sched_statistics statistics;
-#endif
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	int			depth;
@@ -530,9 +528,7 @@ struct task_struct {
 	int rcu_tasks_idle_cpu;
 #endif /* #ifdef CONFIG_TASKS_RCU */
 
-#ifdef CONFIG_SCHED_INFO
 	struct sched_info sched_info;
-#endif
 
 	struct list_head tasks;
 #ifdef CONFIG_SMP
-- 
GitLab


From 5eca1c10cbaa9c366c18ca79f81f21c731e3dcc7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 6 Feb 2017 22:06:35 +0100
Subject: [PATCH 1051/1084] sched/headers: Clean up <linux/sched.h>

Now that <linux/sched.h> dependencies have been sorted out,
do various trivial cleanups:

 - remove unnecessary structure predeclarations
 - fix various typos
 - update comments where necessary
 - remove pointless comments
 - use consistent types
 - tabulate consistently
 - use a consistent comment style
 - clean up the header section a bit
 - use a consistent style of a single field per line
 - remove line-breaks where they make the code look worse
 - etc ...

No change in functionality.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1222 ++++++++++++++++++++++-------------------
 1 file changed, 651 insertions(+), 571 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 341f5792fbe0..d67eee84fd43 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1,38 +1,38 @@
 #ifndef _LINUX_SCHED_H
 #define _LINUX_SCHED_H
 
-#include <uapi/linux/sched.h>
+/*
+ * Define 'struct task_struct' and provide the main scheduler
+ * APIs (schedule(), wakeup variants, etc.)
+ */
 
-#include <linux/sched/prio.h>
-#include <linux/nodemask.h>
+#include <uapi/linux/sched.h>
 
-#include <linux/mutex.h>
-#include <linux/plist.h>
-#include <linux/mm_types_task.h>
+#include <asm/current.h>
 
+#include <linux/pid.h>
 #include <linux/sem.h>
 #include <linux/shm.h>
-#include <linux/signal_types.h>
-#include <linux/pid.h>
+#include <linux/kcov.h>
+#include <linux/mutex.h>
+#include <linux/plist.h>
+#include <linux/hrtimer.h>
 #include <linux/seccomp.h>
+#include <linux/nodemask.h>
 #include <linux/rcupdate.h>
-
 #include <linux/resource.h>
-#include <linux/hrtimer.h>
-#include <linux/kcov.h>
-#include <linux/task_io_accounting.h>
 #include <linux/latencytop.h>
+#include <linux/sched/prio.h>
+#include <linux/signal_types.h>
+#include <linux/mm_types_task.h>
+#include <linux/task_io_accounting.h>
 
-#include <asm/current.h>
-
-/* task_struct member predeclarations: */
+/* task_struct member predeclarations (sorted alphabetically): */
 struct audit_context;
-struct autogroup;
 struct backing_dev_info;
 struct bio_list;
 struct blk_plug;
 struct cfs_rq;
-struct filename;
 struct fs_struct;
 struct futex_pi_state;
 struct io_context;
@@ -52,8 +52,6 @@ struct sighand_struct;
 struct signal_struct;
 struct task_delay_info;
 struct task_group;
-struct task_struct;
-struct uts_namespace;
 
 /*
  * Task state bitmask. NOTE! These bits are also
@@ -65,50 +63,53 @@ struct uts_namespace;
  * modifying one set can't modify the other one by
  * mistake.
  */
-#define TASK_RUNNING		0
-#define TASK_INTERRUPTIBLE	1
-#define TASK_UNINTERRUPTIBLE	2
-#define __TASK_STOPPED		4
-#define __TASK_TRACED		8
-/* in tsk->exit_state */
-#define EXIT_DEAD		16
-#define EXIT_ZOMBIE		32
-#define EXIT_TRACE		(EXIT_ZOMBIE | EXIT_DEAD)
-/* in tsk->state again */
-#define TASK_DEAD		64
-#define TASK_WAKEKILL		128
-#define TASK_WAKING		256
-#define TASK_PARKED		512
-#define TASK_NOLOAD		1024
-#define TASK_NEW		2048
-#define TASK_STATE_MAX		4096
-
-#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn"
-
-/* Convenience macros for the sake of set_current_state */
-#define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
-#define TASK_STOPPED		(TASK_WAKEKILL | __TASK_STOPPED)
-#define TASK_TRACED		(TASK_WAKEKILL | __TASK_TRACED)
-
-#define TASK_IDLE		(TASK_UNINTERRUPTIBLE | TASK_NOLOAD)
-
-/* Convenience macros for the sake of wake_up */
-#define TASK_NORMAL		(TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
-#define TASK_ALL		(TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
-
-/* get_task_state() */
-#define TASK_REPORT		(TASK_RUNNING | TASK_INTERRUPTIBLE | \
-				 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
-				 __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
-
-#define task_is_traced(task)	((task->state & __TASK_TRACED) != 0)
-#define task_is_stopped(task)	((task->state & __TASK_STOPPED) != 0)
-#define task_is_stopped_or_traced(task)	\
-			((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
-#define task_contributes_to_load(task)	\
-				((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
-				 (task->flags & PF_FROZEN) == 0 && \
-				 (task->state & TASK_NOLOAD) == 0)
+
+/* Used in tsk->state: */
+#define TASK_RUNNING			0
+#define TASK_INTERRUPTIBLE		1
+#define TASK_UNINTERRUPTIBLE		2
+#define __TASK_STOPPED			4
+#define __TASK_TRACED			8
+/* Used in tsk->exit_state: */
+#define EXIT_DEAD			16
+#define EXIT_ZOMBIE			32
+#define EXIT_TRACE			(EXIT_ZOMBIE | EXIT_DEAD)
+/* Used in tsk->state again: */
+#define TASK_DEAD			64
+#define TASK_WAKEKILL			128
+#define TASK_WAKING			256
+#define TASK_PARKED			512
+#define TASK_NOLOAD			1024
+#define TASK_NEW			2048
+#define TASK_STATE_MAX			4096
+
+#define TASK_STATE_TO_CHAR_STR		"RSDTtXZxKWPNn"
+
+/* Convenience macros for the sake of set_current_state: */
+#define TASK_KILLABLE			(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
+#define TASK_STOPPED			(TASK_WAKEKILL | __TASK_STOPPED)
+#define TASK_TRACED			(TASK_WAKEKILL | __TASK_TRACED)
+
+#define TASK_IDLE			(TASK_UNINTERRUPTIBLE | TASK_NOLOAD)
+
+/* Convenience macros for the sake of wake_up(): */
+#define TASK_NORMAL			(TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
+#define TASK_ALL			(TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
+
+/* get_task_state(): */
+#define TASK_REPORT			(TASK_RUNNING | TASK_INTERRUPTIBLE | \
+					 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
+					 __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
+
+#define task_is_traced(task)		((task->state & __TASK_TRACED) != 0)
+
+#define task_is_stopped(task)		((task->state & __TASK_STOPPED) != 0)
+
+#define task_is_stopped_or_traced(task)	((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
+
+#define task_contributes_to_load(task)	((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
+					 (task->flags & PF_FROZEN) == 0 && \
+					 (task->state & TASK_NOLOAD) == 0)
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 
@@ -158,26 +159,24 @@ struct uts_namespace;
  *
  * Also see the comments of try_to_wake_up().
  */
-#define __set_current_state(state_value)		\
-	do { current->state = (state_value); } while (0)
-#define set_current_state(state_value)			\
-	smp_store_mb(current->state, (state_value))
-
+#define __set_current_state(state_value) do { current->state = (state_value); } while (0)
+#define set_current_state(state_value)	 smp_store_mb(current->state, (state_value))
 #endif
 
-/* Task command name length */
-#define TASK_COMM_LEN 16
+/* Task command name length: */
+#define TASK_COMM_LEN			16
 
-extern cpumask_var_t cpu_isolated_map;
+extern cpumask_var_t			cpu_isolated_map;
 
 extern void scheduler_tick(void);
 
-#define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
-extern signed long schedule_timeout(signed long timeout);
-extern signed long schedule_timeout_interruptible(signed long timeout);
-extern signed long schedule_timeout_killable(signed long timeout);
-extern signed long schedule_timeout_uninterruptible(signed long timeout);
-extern signed long schedule_timeout_idle(signed long timeout);
+#define	MAX_SCHEDULE_TIMEOUT		LONG_MAX
+
+extern long schedule_timeout(long timeout);
+extern long schedule_timeout_interruptible(long timeout);
+extern long schedule_timeout_killable(long timeout);
+extern long schedule_timeout_uninterruptible(long timeout);
+extern long schedule_timeout_idle(long timeout);
 asmlinkage void schedule(void);
 extern void schedule_preempt_disabled(void);
 
@@ -197,9 +196,9 @@ extern void io_schedule(void);
  */
 struct prev_cputime {
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-	u64 utime;
-	u64 stime;
-	raw_spinlock_t lock;
+	u64				utime;
+	u64				stime;
+	raw_spinlock_t			lock;
 #endif
 };
 
@@ -214,25 +213,34 @@ struct prev_cputime {
  * these counts together and treat all three of them in parallel.
  */
 struct task_cputime {
-	u64 utime;
-	u64 stime;
-	unsigned long long sum_exec_runtime;
+	u64				utime;
+	u64				stime;
+	unsigned long long		sum_exec_runtime;
 };
 
-/* Alternate field names when used to cache expirations. */
-#define virt_exp	utime
-#define prof_exp	stime
-#define sched_exp	sum_exec_runtime
+/* Alternate field names when used on cache expirations: */
+#define virt_exp			utime
+#define prof_exp			stime
+#define sched_exp			sum_exec_runtime
 
 struct sched_info {
 #ifdef CONFIG_SCHED_INFO
-	/* cumulative counters */
-	unsigned long pcount;	      /* # of times run on this cpu */
-	unsigned long long run_delay; /* time spent waiting on a runqueue */
+	/* Cumulative counters: */
+
+	/* # of times we have run on this CPU: */
+	unsigned long			pcount;
+
+	/* Time spent waiting on a runqueue: */
+	unsigned long long		run_delay;
+
+	/* Timestamps: */
+
+	/* When did we last run on a CPU? */
+	unsigned long long		last_arrival;
+
+	/* When were we last queued to run? */
+	unsigned long long		last_queued;
 
-	/* timestamps */
-	unsigned long long last_arrival,/* when we last ran on a cpu */
-			   last_queued;	/* when we were last queued to run */
 #endif /* CONFIG_SCHED_INFO */
 };
 
@@ -243,12 +251,12 @@ struct sched_info {
  * We define a basic fixed point arithmetic range, and then formalize
  * all these metrics based on that basic range.
  */
-# define SCHED_FIXEDPOINT_SHIFT	10
-# define SCHED_FIXEDPOINT_SCALE	(1L << SCHED_FIXEDPOINT_SHIFT)
+# define SCHED_FIXEDPOINT_SHIFT		10
+# define SCHED_FIXEDPOINT_SCALE		(1L << SCHED_FIXEDPOINT_SHIFT)
 
 struct load_weight {
-	unsigned long weight;
-	u32 inv_weight;
+	unsigned long			weight;
+	u32				inv_weight;
 };
 
 /*
@@ -304,69 +312,73 @@ struct load_weight {
  * issues.
  */
 struct sched_avg {
-	u64 last_update_time, load_sum;
-	u32 util_sum, period_contrib;
-	unsigned long load_avg, util_avg;
+	u64				last_update_time;
+	u64				load_sum;
+	u32				util_sum;
+	u32				period_contrib;
+	unsigned long			load_avg;
+	unsigned long			util_avg;
 };
 
 struct sched_statistics {
 #ifdef CONFIG_SCHEDSTATS
-	u64			wait_start;
-	u64			wait_max;
-	u64			wait_count;
-	u64			wait_sum;
-	u64			iowait_count;
-	u64			iowait_sum;
-
-	u64			sleep_start;
-	u64			sleep_max;
-	s64			sum_sleep_runtime;
-
-	u64			block_start;
-	u64			block_max;
-	u64			exec_max;
-	u64			slice_max;
-
-	u64			nr_migrations_cold;
-	u64			nr_failed_migrations_affine;
-	u64			nr_failed_migrations_running;
-	u64			nr_failed_migrations_hot;
-	u64			nr_forced_migrations;
-
-	u64			nr_wakeups;
-	u64			nr_wakeups_sync;
-	u64			nr_wakeups_migrate;
-	u64			nr_wakeups_local;
-	u64			nr_wakeups_remote;
-	u64			nr_wakeups_affine;
-	u64			nr_wakeups_affine_attempts;
-	u64			nr_wakeups_passive;
-	u64			nr_wakeups_idle;
+	u64				wait_start;
+	u64				wait_max;
+	u64				wait_count;
+	u64				wait_sum;
+	u64				iowait_count;
+	u64				iowait_sum;
+
+	u64				sleep_start;
+	u64				sleep_max;
+	s64				sum_sleep_runtime;
+
+	u64				block_start;
+	u64				block_max;
+	u64				exec_max;
+	u64				slice_max;
+
+	u64				nr_migrations_cold;
+	u64				nr_failed_migrations_affine;
+	u64				nr_failed_migrations_running;
+	u64				nr_failed_migrations_hot;
+	u64				nr_forced_migrations;
+
+	u64				nr_wakeups;
+	u64				nr_wakeups_sync;
+	u64				nr_wakeups_migrate;
+	u64				nr_wakeups_local;
+	u64				nr_wakeups_remote;
+	u64				nr_wakeups_affine;
+	u64				nr_wakeups_affine_attempts;
+	u64				nr_wakeups_passive;
+	u64				nr_wakeups_idle;
 #endif
 };
 
 struct sched_entity {
-	struct load_weight	load;		/* for load-balancing */
-	struct rb_node		run_node;
-	struct list_head	group_node;
-	unsigned int		on_rq;
+	/* For load-balancing: */
+	struct load_weight		load;
+	struct rb_node			run_node;
+	struct list_head		group_node;
+	unsigned int			on_rq;
 
-	u64			exec_start;
-	u64			sum_exec_runtime;
-	u64			vruntime;
-	u64			prev_sum_exec_runtime;
+	u64				exec_start;
+	u64				sum_exec_runtime;
+	u64				vruntime;
+	u64				prev_sum_exec_runtime;
 
-	u64			nr_migrations;
+	u64				nr_migrations;
 
-	struct sched_statistics statistics;
+	struct sched_statistics		statistics;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	int			depth;
-	struct sched_entity	*parent;
+	int				depth;
+	struct sched_entity		*parent;
 	/* rq on which this entity is (to be) queued: */
-	struct cfs_rq		*cfs_rq;
+	struct cfs_rq			*cfs_rq;
 	/* rq "owned" by this entity/group: */
-	struct cfs_rq		*my_q;
+	struct cfs_rq			*my_q;
 #endif
 
 #ifdef CONFIG_SMP
@@ -376,49 +388,49 @@ struct sched_entity {
 	 * Put into separate cache line so it does not
 	 * collide with read-mostly values above.
 	 */
-	struct sched_avg	avg ____cacheline_aligned_in_smp;
+	struct sched_avg		avg ____cacheline_aligned_in_smp;
 #endif
 };
 
 struct sched_rt_entity {
-	struct list_head run_list;
-	unsigned long timeout;
-	unsigned long watchdog_stamp;
-	unsigned int time_slice;
-	unsigned short on_rq;
-	unsigned short on_list;
-
-	struct sched_rt_entity *back;
+	struct list_head		run_list;
+	unsigned long			timeout;
+	unsigned long			watchdog_stamp;
+	unsigned int			time_slice;
+	unsigned short			on_rq;
+	unsigned short			on_list;
+
+	struct sched_rt_entity		*back;
 #ifdef CONFIG_RT_GROUP_SCHED
-	struct sched_rt_entity	*parent;
+	struct sched_rt_entity		*parent;
 	/* rq on which this entity is (to be) queued: */
-	struct rt_rq		*rt_rq;
+	struct rt_rq			*rt_rq;
 	/* rq "owned" by this entity/group: */
-	struct rt_rq		*my_q;
+	struct rt_rq			*my_q;
 #endif
 };
 
 struct sched_dl_entity {
-	struct rb_node	rb_node;
+	struct rb_node			rb_node;
 
 	/*
 	 * Original scheduling parameters. Copied here from sched_attr
 	 * during sched_setattr(), they will remain the same until
 	 * the next sched_setattr().
 	 */
-	u64 dl_runtime;		/* maximum runtime for each instance	*/
-	u64 dl_deadline;	/* relative deadline of each instance	*/
-	u64 dl_period;		/* separation of two instances (period) */
-	u64 dl_bw;		/* dl_runtime / dl_deadline		*/
+	u64				dl_runtime;	/* Maximum runtime for each instance	*/
+	u64				dl_deadline;	/* Relative deadline of each instance	*/
+	u64				dl_period;	/* Separation of two instances (period) */
+	u64				dl_bw;		/* dl_runtime / dl_deadline		*/
 
 	/*
 	 * Actual scheduling parameters. Initialized with the values above,
 	 * they are continously updated during task execution. Note that
 	 * the remaining runtime could be < 0 in case we are in overrun.
 	 */
-	s64 runtime;		/* remaining runtime for this instance	*/
-	u64 deadline;		/* absolute deadline for this instance	*/
-	unsigned int flags;	/* specifying the scheduler behaviour	*/
+	s64				runtime;	/* Remaining runtime for this instance	*/
+	u64				deadline;	/* Absolute deadline for this instance	*/
+	unsigned int			flags;		/* Specifying the scheduler behaviour	*/
 
 	/*
 	 * Some bool flags:
@@ -431,24 +443,28 @@ struct sched_dl_entity {
 	 * outside bandwidth enforcement mechanism (but only until we
 	 * exit the critical section);
 	 *
-	 * @dl_yielded tells if task gave up the cpu before consuming
+	 * @dl_yielded tells if task gave up the CPU before consuming
 	 * all its available runtime during the last job.
 	 */
-	int dl_throttled, dl_boosted, dl_yielded;
+	int				dl_throttled;
+	int				dl_boosted;
+	int				dl_yielded;
 
 	/*
 	 * Bandwidth enforcement timer. Each -deadline task has its
 	 * own bandwidth to be enforced, thus we need one timer per task.
 	 */
-	struct hrtimer dl_timer;
+	struct hrtimer			dl_timer;
 };
 
 union rcu_special {
 	struct {
-		u8 blocked;
-		u8 need_qs;
-		u8 exp_need_qs;
-		u8 pad;	/* Otherwise the compiler can store garbage here. */
+		u8			blocked;
+		u8			need_qs;
+		u8			exp_need_qs;
+
+		/* Otherwise the compiler can store garbage here: */
+		u8			pad;
 	} b; /* Bits. */
 	u32 s; /* Set of bits. */
 };
@@ -470,361 +486,417 @@ struct task_struct {
 	 * For reasons of header soup (see current_thread_info()), this
 	 * must be the first element of task_struct.
 	 */
-	struct thread_info thread_info;
+	struct thread_info		thread_info;
 #endif
-	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
-	void *stack;
-	atomic_t usage;
-	unsigned int flags;	/* per process flags, defined below */
-	unsigned int ptrace;
+	/* -1 unrunnable, 0 runnable, >0 stopped: */
+	volatile long			state;
+	void				*stack;
+	atomic_t			usage;
+	/* Per task flags (PF_*), defined further below: */
+	unsigned int			flags;
+	unsigned int			ptrace;
 
 #ifdef CONFIG_SMP
-	struct llist_node wake_entry;
-	int on_cpu;
+	struct llist_node		wake_entry;
+	int				on_cpu;
 #ifdef CONFIG_THREAD_INFO_IN_TASK
-	unsigned int cpu;	/* current CPU */
+	/* Current CPU: */
+	unsigned int			cpu;
 #endif
-	unsigned int wakee_flips;
-	unsigned long wakee_flip_decay_ts;
-	struct task_struct *last_wakee;
+	unsigned int			wakee_flips;
+	unsigned long			wakee_flip_decay_ts;
+	struct task_struct		*last_wakee;
 
-	int wake_cpu;
+	int				wake_cpu;
 #endif
-	int on_rq;
+	int				on_rq;
+
+	int				prio;
+	int				static_prio;
+	int				normal_prio;
+	unsigned int			rt_priority;
 
-	int prio, static_prio, normal_prio;
-	unsigned int rt_priority;
-	const struct sched_class *sched_class;
-	struct sched_entity se;
-	struct sched_rt_entity rt;
+	const struct sched_class	*sched_class;
+	struct sched_entity		se;
+	struct sched_rt_entity		rt;
 #ifdef CONFIG_CGROUP_SCHED
-	struct task_group *sched_task_group;
+	struct task_group		*sched_task_group;
 #endif
-	struct sched_dl_entity dl;
+	struct sched_dl_entity		dl;
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
-	/* list of struct preempt_notifier: */
-	struct hlist_head preempt_notifiers;
+	/* List of struct preempt_notifier: */
+	struct hlist_head		preempt_notifiers;
 #endif
 
 #ifdef CONFIG_BLK_DEV_IO_TRACE
-	unsigned int btrace_seq;
+	unsigned int			btrace_seq;
 #endif
 
-	unsigned int policy;
-	int nr_cpus_allowed;
-	cpumask_t cpus_allowed;
+	unsigned int			policy;
+	int				nr_cpus_allowed;
+	cpumask_t			cpus_allowed;
 
 #ifdef CONFIG_PREEMPT_RCU
-	int rcu_read_lock_nesting;
-	union rcu_special rcu_read_unlock_special;
-	struct list_head rcu_node_entry;
-	struct rcu_node *rcu_blocked_node;
+	int				rcu_read_lock_nesting;
+	union rcu_special		rcu_read_unlock_special;
+	struct list_head		rcu_node_entry;
+	struct rcu_node			*rcu_blocked_node;
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
+
 #ifdef CONFIG_TASKS_RCU
-	unsigned long rcu_tasks_nvcsw;
-	bool rcu_tasks_holdout;
-	struct list_head rcu_tasks_holdout_list;
-	int rcu_tasks_idle_cpu;
+	unsigned long			rcu_tasks_nvcsw;
+	bool				rcu_tasks_holdout;
+	struct list_head		rcu_tasks_holdout_list;
+	int				rcu_tasks_idle_cpu;
 #endif /* #ifdef CONFIG_TASKS_RCU */
 
-	struct sched_info sched_info;
+	struct sched_info		sched_info;
 
-	struct list_head tasks;
+	struct list_head		tasks;
 #ifdef CONFIG_SMP
-	struct plist_node pushable_tasks;
-	struct rb_node pushable_dl_tasks;
+	struct plist_node		pushable_tasks;
+	struct rb_node			pushable_dl_tasks;
 #endif
 
-	struct mm_struct *mm, *active_mm;
+	struct mm_struct		*mm;
+	struct mm_struct		*active_mm;
 
 	/* Per-thread vma caching: */
-	struct vmacache vmacache;
-
-#if defined(SPLIT_RSS_COUNTING)
-	struct task_rss_stat	rss_stat;
-#endif
-/* task state */
-	int exit_state;
-	int exit_code, exit_signal;
-	int pdeath_signal;  /*  The signal sent when the parent dies  */
-	unsigned long jobctl;	/* JOBCTL_*, siglock protected */
-
-	/* Used for emulating ABI behavior of previous Linux versions */
-	unsigned int personality;
-
-	/* scheduler bits, serialized by scheduler locks */
-	unsigned sched_reset_on_fork:1;
-	unsigned sched_contributes_to_load:1;
-	unsigned sched_migrated:1;
-	unsigned sched_remote_wakeup:1;
-	unsigned :0; /* force alignment to the next boundary */
-
-	/* unserialized, strictly 'current' */
-	unsigned in_execve:1; /* bit to tell LSMs we're in execve */
-	unsigned in_iowait:1;
-#if !defined(TIF_RESTORE_SIGMASK)
-	unsigned restore_sigmask:1;
+	struct vmacache			vmacache;
+
+#ifdef SPLIT_RSS_COUNTING
+	struct task_rss_stat		rss_stat;
+#endif
+	int				exit_state;
+	int				exit_code;
+	int				exit_signal;
+	/* The signal sent when the parent dies: */
+	int				pdeath_signal;
+	/* JOBCTL_*, siglock protected: */
+	unsigned long			jobctl;
+
+	/* Used for emulating ABI behavior of previous Linux versions: */
+	unsigned int			personality;
+
+	/* Scheduler bits, serialized by scheduler locks: */
+	unsigned			sched_reset_on_fork:1;
+	unsigned			sched_contributes_to_load:1;
+	unsigned			sched_migrated:1;
+	unsigned			sched_remote_wakeup:1;
+	/* Force alignment to the next boundary: */
+	unsigned			:0;
+
+	/* Unserialized, strictly 'current' */
+
+	/* Bit to tell LSMs we're in execve(): */
+	unsigned			in_execve:1;
+	unsigned			in_iowait:1;
+#ifndef TIF_RESTORE_SIGMASK
+	unsigned			restore_sigmask:1;
 #endif
 #ifdef CONFIG_MEMCG
-	unsigned memcg_may_oom:1;
+	unsigned			memcg_may_oom:1;
 #ifndef CONFIG_SLOB
-	unsigned memcg_kmem_skip_account:1;
+	unsigned			memcg_kmem_skip_account:1;
 #endif
 #endif
 #ifdef CONFIG_COMPAT_BRK
-	unsigned brk_randomized:1;
+	unsigned			brk_randomized:1;
 #endif
 
-	unsigned long atomic_flags; /* Flags needing atomic access. */
+	unsigned long			atomic_flags; /* Flags requiring atomic access. */
 
-	struct restart_block restart_block;
+	struct restart_block		restart_block;
 
-	pid_t pid;
-	pid_t tgid;
+	pid_t				pid;
+	pid_t				tgid;
 
 #ifdef CONFIG_CC_STACKPROTECTOR
-	/* Canary value for the -fstack-protector gcc feature */
-	unsigned long stack_canary;
+	/* Canary value for the -fstack-protector GCC feature: */
+	unsigned long			stack_canary;
 #endif
 	/*
-	 * pointers to (original) parent process, youngest child, younger sibling,
+	 * Pointers to the (original) parent process, youngest child, younger sibling,
 	 * older sibling, respectively.  (p->father can be replaced with
 	 * p->real_parent->pid)
 	 */
-	struct task_struct __rcu *real_parent; /* real parent process */
-	struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
+
+	/* Real parent process: */
+	struct task_struct __rcu	*real_parent;
+
+	/* Recipient of SIGCHLD, wait4() reports: */
+	struct task_struct __rcu	*parent;
+
 	/*
-	 * children/sibling forms the list of my natural children
+	 * Children/sibling form the list of natural children:
 	 */
-	struct list_head children;	/* list of my children */
-	struct list_head sibling;	/* linkage in my parent's children list */
-	struct task_struct *group_leader;	/* threadgroup leader */
+	struct list_head		children;
+	struct list_head		sibling;
+	struct task_struct		*group_leader;
 
 	/*
-	 * ptraced is the list of tasks this task is using ptrace on.
+	 * 'ptraced' is the list of tasks this task is using ptrace() on.
+	 *
 	 * This includes both natural children and PTRACE_ATTACH targets.
-	 * p->ptrace_entry is p's link on the p->parent->ptraced list.
+	 * 'ptrace_entry' is this task's link on the p->parent->ptraced list.
 	 */
-	struct list_head ptraced;
-	struct list_head ptrace_entry;
+	struct list_head		ptraced;
+	struct list_head		ptrace_entry;
 
 	/* PID/PID hash table linkage. */
-	struct pid_link pids[PIDTYPE_MAX];
-	struct list_head thread_group;
-	struct list_head thread_node;
+	struct pid_link			pids[PIDTYPE_MAX];
+	struct list_head		thread_group;
+	struct list_head		thread_node;
+
+	struct completion		*vfork_done;
 
-	struct completion *vfork_done;		/* for vfork() */
-	int __user *set_child_tid;		/* CLONE_CHILD_SETTID */
-	int __user *clear_child_tid;		/* CLONE_CHILD_CLEARTID */
+	/* CLONE_CHILD_SETTID: */
+	int __user			*set_child_tid;
 
-	u64 utime, stime;
+	/* CLONE_CHILD_CLEARTID: */
+	int __user			*clear_child_tid;
+
+	u64				utime;
+	u64				stime;
 #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
-	u64 utimescaled, stimescaled;
+	u64				utimescaled;
+	u64				stimescaled;
 #endif
-	u64 gtime;
-	struct prev_cputime prev_cputime;
+	u64				gtime;
+	struct prev_cputime		prev_cputime;
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-	seqcount_t vtime_seqcount;
-	unsigned long long vtime_snap;
+	seqcount_t			vtime_seqcount;
+	unsigned long long		vtime_snap;
 	enum {
-		/* Task is sleeping or running in a CPU with VTIME inactive */
+		/* Task is sleeping or running in a CPU with VTIME inactive: */
 		VTIME_INACTIVE = 0,
-		/* Task runs in userspace in a CPU with VTIME active */
+		/* Task runs in userspace in a CPU with VTIME active: */
 		VTIME_USER,
-		/* Task runs in kernelspace in a CPU with VTIME active */
+		/* Task runs in kernelspace in a CPU with VTIME active: */
 		VTIME_SYS,
 	} vtime_snap_whence;
 #endif
 
 #ifdef CONFIG_NO_HZ_FULL
-	atomic_t tick_dep_mask;
+	atomic_t			tick_dep_mask;
 #endif
-	unsigned long nvcsw, nivcsw; /* context switch counts */
-	u64 start_time;		/* monotonic time in nsec */
-	u64 real_start_time;	/* boot based time in nsec */
-/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
-	unsigned long min_flt, maj_flt;
+	/* Context switch counts: */
+	unsigned long			nvcsw;
+	unsigned long			nivcsw;
+
+	/* Monotonic time in nsecs: */
+	u64				start_time;
+
+	/* Boot based time in nsecs: */
+	u64				real_start_time;
+
+	/* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */
+	unsigned long			min_flt;
+	unsigned long			maj_flt;
 
 #ifdef CONFIG_POSIX_TIMERS
-	struct task_cputime cputime_expires;
-	struct list_head cpu_timers[3];
-#endif
-
-/* process credentials */
-	const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
-	const struct cred __rcu *real_cred; /* objective and real subjective task
-					 * credentials (COW) */
-	const struct cred __rcu *cred;	/* effective (overridable) subjective task
-					 * credentials (COW) */
-	char comm[TASK_COMM_LEN]; /* executable name excluding path
-				     - access with [gs]et_task_comm (which lock
-				       it with task_lock())
-				     - initialized normally by setup_new_exec */
-/* file system info */
-	struct nameidata *nameidata;
+	struct task_cputime		cputime_expires;
+	struct list_head		cpu_timers[3];
+#endif
+
+	/* Process credentials: */
+
+	/* Tracer's credentials at attach: */
+	const struct cred __rcu		*ptracer_cred;
+
+	/* Objective and real subjective task credentials (COW): */
+	const struct cred __rcu		*real_cred;
+
+	/* Effective (overridable) subjective task credentials (COW): */
+	const struct cred __rcu		*cred;
+
+	/*
+	 * executable name, excluding path.
+	 *
+	 * - normally initialized setup_new_exec()
+	 * - access it with [gs]et_task_comm()
+	 * - lock it with task_lock()
+	 */
+	char				comm[TASK_COMM_LEN];
+
+	struct nameidata		*nameidata;
+
 #ifdef CONFIG_SYSVIPC
-/* ipc stuff */
-	struct sysv_sem sysvsem;
-	struct sysv_shm sysvshm;
+	struct sysv_sem			sysvsem;
+	struct sysv_shm			sysvshm;
 #endif
 #ifdef CONFIG_DETECT_HUNG_TASK
-/* hung task detection */
-	unsigned long last_switch_count;
-#endif
-/* filesystem information */
-	struct fs_struct *fs;
-/* open file information */
-	struct files_struct *files;
-/* namespaces */
-	struct nsproxy *nsproxy;
-/* signal handlers */
-	struct signal_struct *signal;
-	struct sighand_struct *sighand;
-
-	sigset_t blocked, real_blocked;
-	sigset_t saved_sigmask;	/* restored if set_restore_sigmask() was used */
-	struct sigpending pending;
-
-	unsigned long sas_ss_sp;
-	size_t sas_ss_size;
-	unsigned sas_ss_flags;
-
-	struct callback_head *task_works;
-
-	struct audit_context *audit_context;
+	unsigned long			last_switch_count;
+#endif
+	/* Filesystem information: */
+	struct fs_struct		*fs;
+
+	/* Open file information: */
+	struct files_struct		*files;
+
+	/* Namespaces: */
+	struct nsproxy			*nsproxy;
+
+	/* Signal handlers: */
+	struct signal_struct		*signal;
+	struct sighand_struct		*sighand;
+	sigset_t			blocked;
+	sigset_t			real_blocked;
+	/* Restored if set_restore_sigmask() was used: */
+	sigset_t			saved_sigmask;
+	struct sigpending		pending;
+	unsigned long			sas_ss_sp;
+	size_t				sas_ss_size;
+	unsigned int			sas_ss_flags;
+
+	struct callback_head		*task_works;
+
+	struct audit_context		*audit_context;
 #ifdef CONFIG_AUDITSYSCALL
-	kuid_t loginuid;
-	unsigned int sessionid;
+	kuid_t				loginuid;
+	unsigned int			sessionid;
 #endif
-	struct seccomp seccomp;
+	struct seccomp			seccomp;
 
-/* Thread group tracking */
-   	u32 parent_exec_id;
-   	u32 self_exec_id;
-/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
- * mempolicy */
-	spinlock_t alloc_lock;
+	/* Thread group tracking: */
+	u32				parent_exec_id;
+	u32				self_exec_id;
+
+	/* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */
+	spinlock_t			alloc_lock;
 
 	/* Protection of the PI data structures: */
-	raw_spinlock_t pi_lock;
+	raw_spinlock_t			pi_lock;
 
-	struct wake_q_node wake_q;
+	struct wake_q_node		wake_q;
 
 #ifdef CONFIG_RT_MUTEXES
-	/* PI waiters blocked on a rt_mutex held by this task */
-	struct rb_root pi_waiters;
-	struct rb_node *pi_waiters_leftmost;
-	/* Deadlock detection and priority inheritance handling */
-	struct rt_mutex_waiter *pi_blocked_on;
+	/* PI waiters blocked on a rt_mutex held by this task: */
+	struct rb_root			pi_waiters;
+	struct rb_node			*pi_waiters_leftmost;
+	/* Deadlock detection and priority inheritance handling: */
+	struct rt_mutex_waiter		*pi_blocked_on;
 #endif
 
 #ifdef CONFIG_DEBUG_MUTEXES
-	/* mutex deadlock detection */
-	struct mutex_waiter *blocked_on;
+	/* Mutex deadlock detection: */
+	struct mutex_waiter		*blocked_on;
 #endif
+
 #ifdef CONFIG_TRACE_IRQFLAGS
-	unsigned int irq_events;
-	unsigned long hardirq_enable_ip;
-	unsigned long hardirq_disable_ip;
-	unsigned int hardirq_enable_event;
-	unsigned int hardirq_disable_event;
-	int hardirqs_enabled;
-	int hardirq_context;
-	unsigned long softirq_disable_ip;
-	unsigned long softirq_enable_ip;
-	unsigned int softirq_disable_event;
-	unsigned int softirq_enable_event;
-	int softirqs_enabled;
-	int softirq_context;
+	unsigned int			irq_events;
+	unsigned long			hardirq_enable_ip;
+	unsigned long			hardirq_disable_ip;
+	unsigned int			hardirq_enable_event;
+	unsigned int			hardirq_disable_event;
+	int				hardirqs_enabled;
+	int				hardirq_context;
+	unsigned long			softirq_disable_ip;
+	unsigned long			softirq_enable_ip;
+	unsigned int			softirq_disable_event;
+	unsigned int			softirq_enable_event;
+	int				softirqs_enabled;
+	int				softirq_context;
 #endif
+
 #ifdef CONFIG_LOCKDEP
-# define MAX_LOCK_DEPTH 48UL
-	u64 curr_chain_key;
-	int lockdep_depth;
-	unsigned int lockdep_recursion;
-	struct held_lock held_locks[MAX_LOCK_DEPTH];
-	gfp_t lockdep_reclaim_gfp;
+# define MAX_LOCK_DEPTH			48UL
+	u64				curr_chain_key;
+	int				lockdep_depth;
+	unsigned int			lockdep_recursion;
+	struct held_lock		held_locks[MAX_LOCK_DEPTH];
+	gfp_t				lockdep_reclaim_gfp;
 #endif
+
 #ifdef CONFIG_UBSAN
-	unsigned int in_ubsan;
+	unsigned int			in_ubsan;
 #endif
 
-/* journalling filesystem info */
-	void *journal_info;
+	/* Journalling filesystem info: */
+	void				*journal_info;
 
-/* stacked block device info */
-	struct bio_list *bio_list;
+	/* Stacked block device info: */
+	struct bio_list			*bio_list;
 
 #ifdef CONFIG_BLOCK
-/* stack plugging */
-	struct blk_plug *plug;
+	/* Stack plugging: */
+	struct blk_plug			*plug;
 #endif
 
-/* VM state */
-	struct reclaim_state *reclaim_state;
+	/* VM state: */
+	struct reclaim_state		*reclaim_state;
+
+	struct backing_dev_info		*backing_dev_info;
 
-	struct backing_dev_info *backing_dev_info;
+	struct io_context		*io_context;
 
-	struct io_context *io_context;
+	/* Ptrace state: */
+	unsigned long			ptrace_message;
+	siginfo_t			*last_siginfo;
 
-	unsigned long ptrace_message;
-	siginfo_t *last_siginfo; /* For ptrace use.  */
-	struct task_io_accounting ioac;
-#if defined(CONFIG_TASK_XACCT)
-	u64 acct_rss_mem1;	/* accumulated rss usage */
-	u64 acct_vm_mem1;	/* accumulated virtual memory usage */
-	u64 acct_timexpd;	/* stime + utime since last update */
+	struct task_io_accounting	ioac;
+#ifdef CONFIG_TASK_XACCT
+	/* Accumulated RSS usage: */
+	u64				acct_rss_mem1;
+	/* Accumulated virtual memory usage: */
+	u64				acct_vm_mem1;
+	/* stime + utime since last update: */
+	u64				acct_timexpd;
 #endif
 #ifdef CONFIG_CPUSETS
-	nodemask_t mems_allowed;	/* Protected by alloc_lock */
-	seqcount_t mems_allowed_seq;	/* Seqence no to catch updates */
-	int cpuset_mem_spread_rotor;
-	int cpuset_slab_spread_rotor;
+	/* Protected by ->alloc_lock: */
+	nodemask_t			mems_allowed;
+	/* Seqence number to catch updates: */
+	seqcount_t			mems_allowed_seq;
+	int				cpuset_mem_spread_rotor;
+	int				cpuset_slab_spread_rotor;
 #endif
 #ifdef CONFIG_CGROUPS
-	/* Control Group info protected by css_set_lock */
-	struct css_set __rcu *cgroups;
-	/* cg_list protected by css_set_lock and tsk->alloc_lock */
-	struct list_head cg_list;
+	/* Control Group info protected by css_set_lock: */
+	struct css_set __rcu		*cgroups;
+	/* cg_list protected by css_set_lock and tsk->alloc_lock: */
+	struct list_head		cg_list;
 #endif
 #ifdef CONFIG_INTEL_RDT_A
-	int closid;
+	int				closid;
 #endif
 #ifdef CONFIG_FUTEX
-	struct robust_list_head __user *robust_list;
+	struct robust_list_head __user	*robust_list;
 #ifdef CONFIG_COMPAT
 	struct compat_robust_list_head __user *compat_robust_list;
 #endif
-	struct list_head pi_state_list;
-	struct futex_pi_state *pi_state_cache;
+	struct list_head		pi_state_list;
+	struct futex_pi_state		*pi_state_cache;
 #endif
 #ifdef CONFIG_PERF_EVENTS
-	struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
-	struct mutex perf_event_mutex;
-	struct list_head perf_event_list;
+	struct perf_event_context	*perf_event_ctxp[perf_nr_task_contexts];
+	struct mutex			perf_event_mutex;
+	struct list_head		perf_event_list;
 #endif
 #ifdef CONFIG_DEBUG_PREEMPT
-	unsigned long preempt_disable_ip;
+	unsigned long			preempt_disable_ip;
 #endif
 #ifdef CONFIG_NUMA
-	struct mempolicy *mempolicy;	/* Protected by alloc_lock */
-	short il_next;
-	short pref_node_fork;
+	/* Protected by alloc_lock: */
+	struct mempolicy		*mempolicy;
+	short				il_next;
+	short				pref_node_fork;
 #endif
 #ifdef CONFIG_NUMA_BALANCING
-	int numa_scan_seq;
-	unsigned int numa_scan_period;
-	unsigned int numa_scan_period_max;
-	int numa_preferred_nid;
-	unsigned long numa_migrate_retry;
-	u64 node_stamp;			/* migration stamp  */
-	u64 last_task_numa_placement;
-	u64 last_sum_exec_runtime;
-	struct callback_head numa_work;
-
-	struct list_head numa_entry;
-	struct numa_group *numa_group;
+	int				numa_scan_seq;
+	unsigned int			numa_scan_period;
+	unsigned int			numa_scan_period_max;
+	int				numa_preferred_nid;
+	unsigned long			numa_migrate_retry;
+	/* Migration stamp: */
+	u64				node_stamp;
+	u64				last_task_numa_placement;
+	u64				last_sum_exec_runtime;
+	struct callback_head		numa_work;
+
+	struct list_head		numa_entry;
+	struct numa_group		*numa_group;
 
 	/*
 	 * numa_faults is an array split into four regions:
@@ -840,8 +912,8 @@ struct task_struct {
 	 * during the current scan window. When the scan completes, the counts
 	 * in faults_memory and faults_cpu decay and these values are copied.
 	 */
-	unsigned long *numa_faults;
-	unsigned long total_numa_faults;
+	unsigned long			*numa_faults;
+	unsigned long			total_numa_faults;
 
 	/*
 	 * numa_faults_locality tracks if faults recorded during the last
@@ -849,119 +921,132 @@ struct task_struct {
 	 * period is adapted based on the locality of the faults with different
 	 * weights depending on whether they were shared or private faults
 	 */
-	unsigned long numa_faults_locality[3];
+	unsigned long			numa_faults_locality[3];
 
-	unsigned long numa_pages_migrated;
+	unsigned long			numa_pages_migrated;
 #endif /* CONFIG_NUMA_BALANCING */
 
-	struct tlbflush_unmap_batch tlb_ubc;
+	struct tlbflush_unmap_batch	tlb_ubc;
 
-	struct rcu_head rcu;
+	struct rcu_head			rcu;
 
-	/*
-	 * cache last used pipe for splice
-	 */
-	struct pipe_inode_info *splice_pipe;
+	/* Cache last used pipe for splice(): */
+	struct pipe_inode_info		*splice_pipe;
 
-	struct page_frag task_frag;
+	struct page_frag		task_frag;
 
 #ifdef CONFIG_TASK_DELAY_ACCT
 	struct task_delay_info		*delays;
 #endif
 
 #ifdef CONFIG_FAULT_INJECTION
-	int make_it_fail;
+	int				make_it_fail;
 #endif
 	/*
-	 * when (nr_dirtied >= nr_dirtied_pause), it's time to call
-	 * balance_dirty_pages() for some dirty throttling pause
+	 * When (nr_dirtied >= nr_dirtied_pause), it's time to call
+	 * balance_dirty_pages() for a dirty throttling pause:
 	 */
-	int nr_dirtied;
-	int nr_dirtied_pause;
-	unsigned long dirty_paused_when; /* start of a write-and-pause period */
+	int				nr_dirtied;
+	int				nr_dirtied_pause;
+	/* Start of a write-and-pause period: */
+	unsigned long			dirty_paused_when;
 
 #ifdef CONFIG_LATENCYTOP
-	int latency_record_count;
-	struct latency_record latency_record[LT_SAVECOUNT];
+	int				latency_record_count;
+	struct latency_record		latency_record[LT_SAVECOUNT];
 #endif
 	/*
-	 * time slack values; these are used to round up poll() and
+	 * Time slack values; these are used to round up poll() and
 	 * select() etc timeout values. These are in nanoseconds.
 	 */
-	u64 timer_slack_ns;
-	u64 default_timer_slack_ns;
+	u64				timer_slack_ns;
+	u64				default_timer_slack_ns;
 
 #ifdef CONFIG_KASAN
-	unsigned int kasan_depth;
+	unsigned int			kasan_depth;
 #endif
+
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	/* Index of current stored address in ret_stack */
-	int curr_ret_stack;
-	/* Stack of return addresses for return function tracing */
-	struct ftrace_ret_stack	*ret_stack;
-	/* time stamp for last schedule */
-	unsigned long long ftrace_timestamp;
+	/* Index of current stored address in ret_stack: */
+	int				curr_ret_stack;
+
+	/* Stack of return addresses for return function tracing: */
+	struct ftrace_ret_stack		*ret_stack;
+
+	/* Timestamp for last schedule: */
+	unsigned long long		ftrace_timestamp;
+
 	/*
 	 * Number of functions that haven't been traced
-	 * because of depth overrun.
+	 * because of depth overrun:
 	 */
-	atomic_t trace_overrun;
-	/* Pause for the tracing */
-	atomic_t tracing_graph_pause;
+	atomic_t			trace_overrun;
+
+	/* Pause tracing: */
+	atomic_t			tracing_graph_pause;
 #endif
+
 #ifdef CONFIG_TRACING
-	/* state flags for use by tracers */
-	unsigned long trace;
-	/* bitmask and counter of trace recursion */
-	unsigned long trace_recursion;
+	/* State flags for use by tracers: */
+	unsigned long			trace;
+
+	/* Bitmask and counter of trace recursion: */
+	unsigned long			trace_recursion;
 #endif /* CONFIG_TRACING */
+
 #ifdef CONFIG_KCOV
-	/* Coverage collection mode enabled for this task (0 if disabled). */
-	enum kcov_mode kcov_mode;
-	/* Size of the kcov_area. */
-	unsigned	kcov_size;
-	/* Buffer for coverage collection. */
-	void		*kcov_area;
-	/* kcov desciptor wired with this task or NULL. */
-	struct kcov	*kcov;
+	/* Coverage collection mode enabled for this task (0 if disabled): */
+	enum kcov_mode			kcov_mode;
+
+	/* Size of the kcov_area: */
+	unsigned int			kcov_size;
+
+	/* Buffer for coverage collection: */
+	void				*kcov_area;
+
+	/* KCOV descriptor wired with this task or NULL: */
+	struct kcov			*kcov;
 #endif
+
 #ifdef CONFIG_MEMCG
-	struct mem_cgroup *memcg_in_oom;
-	gfp_t memcg_oom_gfp_mask;
-	int memcg_oom_order;
+	struct mem_cgroup		*memcg_in_oom;
+	gfp_t				memcg_oom_gfp_mask;
+	int				memcg_oom_order;
 
-	/* number of pages to reclaim on returning to userland */
-	unsigned int memcg_nr_pages_over_high;
+	/* Number of pages to reclaim on returning to userland: */
+	unsigned int			memcg_nr_pages_over_high;
 #endif
+
 #ifdef CONFIG_UPROBES
-	struct uprobe_task *utask;
+	struct uprobe_task		*utask;
 #endif
 #if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
-	unsigned int	sequential_io;
-	unsigned int	sequential_io_avg;
+	unsigned int			sequential_io;
+	unsigned int			sequential_io_avg;
 #endif
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-	unsigned long	task_state_change;
+	unsigned long			task_state_change;
 #endif
-	int pagefault_disabled;
+	int				pagefault_disabled;
 #ifdef CONFIG_MMU
-	struct task_struct *oom_reaper_list;
+	struct task_struct		*oom_reaper_list;
 #endif
 #ifdef CONFIG_VMAP_STACK
-	struct vm_struct *stack_vm_area;
+	struct vm_struct		*stack_vm_area;
 #endif
 #ifdef CONFIG_THREAD_INFO_IN_TASK
-	/* A live task holds one reference. */
-	atomic_t stack_refcount;
+	/* A live task holds one reference: */
+	atomic_t			stack_refcount;
 #endif
-/* CPU-specific state of this task */
-	struct thread_struct thread;
-/*
- * WARNING: on x86, 'thread_struct' contains a variable-sized
- * structure.  It *MUST* be at the end of 'task_struct'.
- *
- * Do not put anything below here!
- */
+	/* CPU-specific state of this task: */
+	struct thread_struct		thread;
+
+	/*
+	 * WARNING: on x86, 'thread_struct' contains a variable-sized
+	 * structure.  It *MUST* be at the end of 'task_struct'.
+	 *
+	 * Do not put anything below here!
+	 */
 };
 
 static inline struct pid *task_pid(struct task_struct *task)
@@ -975,7 +1060,7 @@ static inline struct pid *task_tgid(struct task_struct *task)
 }
 
 /*
- * Without tasklist or rcu lock it is not safe to dereference
+ * Without tasklist or RCU lock it is not safe to dereference
  * the result of task_pgrp/task_session even if task == current,
  * we can race with another thread doing sys_setsid/sys_setpgid.
  */
@@ -1002,16 +1087,14 @@ static inline struct pid *task_session(struct task_struct *task)
  *
  * see also pid_nr() etc in include/linux/pid.h
  */
-pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
-			struct pid_namespace *ns);
+pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns);
 
 static inline pid_t task_pid_nr(struct task_struct *tsk)
 {
 	return tsk->pid;
 }
 
-static inline pid_t task_pid_nr_ns(struct task_struct *tsk,
-					struct pid_namespace *ns)
+static inline pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
 {
 	return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns);
 }
@@ -1027,15 +1110,28 @@ static inline pid_t task_tgid_nr(struct task_struct *tsk)
 	return tsk->tgid;
 }
 
-pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+extern pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
 
 static inline pid_t task_tgid_vnr(struct task_struct *tsk)
 {
 	return pid_vnr(task_tgid(tsk));
 }
 
+/**
+ * pid_alive - check that a task structure is not stale
+ * @p: Task structure to be checked.
+ *
+ * Test if a process is not yet dead (at most zombie state)
+ * If pid_alive fails, then pointers within the task structure
+ * can be stale and must not be dereferenced.
+ *
+ * Return: 1 if the process is alive. 0 otherwise.
+ */
+static inline int pid_alive(const struct task_struct *p)
+{
+	return p->pids[PIDTYPE_PID].pid != NULL;
+}
 
-static inline int pid_alive(const struct task_struct *p);
 static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
 {
 	pid_t pid = 0;
@@ -1053,8 +1149,7 @@ static inline pid_t task_ppid_nr(const struct task_struct *tsk)
 	return task_ppid_nr_ns(tsk, &init_pid_ns);
 }
 
-static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk,
-					struct pid_namespace *ns)
+static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
 {
 	return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns);
 }
@@ -1065,8 +1160,7 @@ static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
 }
 
 
-static inline pid_t task_session_nr_ns(struct task_struct *tsk,
-					struct pid_namespace *ns)
+static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
 {
 	return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns);
 }
@@ -1076,27 +1170,12 @@ static inline pid_t task_session_vnr(struct task_struct *tsk)
 	return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL);
 }
 
-/* obsolete, do not use */
+/* Obsolete, do not use: */
 static inline pid_t task_pgrp_nr(struct task_struct *tsk)
 {
 	return task_pgrp_nr_ns(tsk, &init_pid_ns);
 }
 
-/**
- * pid_alive - check that a task structure is not stale
- * @p: Task structure to be checked.
- *
- * Test if a process is not yet dead (at most zombie state)
- * If pid_alive fails, then pointers within the task structure
- * can be stale and must not be dereferenced.
- *
- * Return: 1 if the process is alive. 0 otherwise.
- */
-static inline int pid_alive(const struct task_struct *p)
-{
-	return p->pids[PIDTYPE_PID].pid != NULL;
-}
-
 /**
  * is_global_init - check if a task structure is init. Since init
  * is free to have sub-threads we need to check tgid.
@@ -1116,34 +1195,34 @@ extern struct pid *cad_pid;
 /*
  * Per process flags
  */
-#define PF_IDLE		0x00000002	/* I am an IDLE thread */
-#define PF_EXITING	0x00000004	/* getting shut down */
-#define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
-#define PF_VCPU		0x00000010	/* I'm a virtual CPU */
-#define PF_WQ_WORKER	0x00000020	/* I'm a workqueue worker */
-#define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
-#define PF_MCE_PROCESS  0x00000080      /* process policy on mce errors */
-#define PF_SUPERPRIV	0x00000100	/* used super-user privileges */
-#define PF_DUMPCORE	0x00000200	/* dumped core */
-#define PF_SIGNALED	0x00000400	/* killed by a signal */
-#define PF_MEMALLOC	0x00000800	/* Allocating memory */
-#define PF_NPROC_EXCEEDED 0x00001000	/* set_user noticed that RLIMIT_NPROC was exceeded */
-#define PF_USED_MATH	0x00002000	/* if unset the fpu must be initialized before use */
-#define PF_USED_ASYNC	0x00004000	/* used async_schedule*(), used by module init */
-#define PF_NOFREEZE	0x00008000	/* this thread should not be frozen */
-#define PF_FROZEN	0x00010000	/* frozen for system suspend */
-#define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
-#define PF_KSWAPD	0x00040000	/* I am kswapd */
-#define PF_MEMALLOC_NOIO 0x00080000	/* Allocating memory without IO involved */
-#define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
-#define PF_KTHREAD	0x00200000	/* I am a kernel thread */
-#define PF_RANDOMIZE	0x00400000	/* randomize virtual address space */
-#define PF_SWAPWRITE	0x00800000	/* Allowed to write to swap */
-#define PF_NO_SETAFFINITY 0x04000000	/* Userland is not allowed to meddle with cpus_allowed */
-#define PF_MCE_EARLY    0x08000000      /* Early kill for mce process policy */
-#define PF_MUTEX_TESTER	0x20000000	/* Thread belongs to the rt mutex tester */
-#define PF_FREEZER_SKIP	0x40000000	/* Freezer should not count it as freezable */
-#define PF_SUSPEND_TASK 0x80000000      /* this thread called freeze_processes and should not be frozen */
+#define PF_IDLE			0x00000002	/* I am an IDLE thread */
+#define PF_EXITING		0x00000004	/* Getting shut down */
+#define PF_EXITPIDONE		0x00000008	/* PI exit done on shut down */
+#define PF_VCPU			0x00000010	/* I'm a virtual CPU */
+#define PF_WQ_WORKER		0x00000020	/* I'm a workqueue worker */
+#define PF_FORKNOEXEC		0x00000040	/* Forked but didn't exec */
+#define PF_MCE_PROCESS		0x00000080      /* Process policy on mce errors */
+#define PF_SUPERPRIV		0x00000100	/* Used super-user privileges */
+#define PF_DUMPCORE		0x00000200	/* Dumped core */
+#define PF_SIGNALED		0x00000400	/* Killed by a signal */
+#define PF_MEMALLOC		0x00000800	/* Allocating memory */
+#define PF_NPROC_EXCEEDED	0x00001000	/* set_user() noticed that RLIMIT_NPROC was exceeded */
+#define PF_USED_MATH		0x00002000	/* If unset the fpu must be initialized before use */
+#define PF_USED_ASYNC		0x00004000	/* Used async_schedule*(), used by module init */
+#define PF_NOFREEZE		0x00008000	/* This thread should not be frozen */
+#define PF_FROZEN		0x00010000	/* Frozen for system suspend */
+#define PF_FSTRANS		0x00020000	/* Inside a filesystem transaction */
+#define PF_KSWAPD		0x00040000	/* I am kswapd */
+#define PF_MEMALLOC_NOIO	0x00080000	/* Allocating memory without IO involved */
+#define PF_LESS_THROTTLE	0x00100000	/* Throttle me less: I clean memory */
+#define PF_KTHREAD		0x00200000	/* I am a kernel thread */
+#define PF_RANDOMIZE		0x00400000	/* Randomize virtual address space */
+#define PF_SWAPWRITE		0x00800000	/* Allowed to write to swap */
+#define PF_NO_SETAFFINITY	0x04000000	/* Userland is not allowed to meddle with cpus_allowed */
+#define PF_MCE_EARLY		0x08000000      /* Early kill for mce process policy */
+#define PF_MUTEX_TESTER		0x20000000	/* Thread belongs to the rt mutex tester */
+#define PF_FREEZER_SKIP		0x40000000	/* Freezer should not count it as freezable */
+#define PF_SUSPEND_TASK		0x80000000      /* This thread called freeze_processes() and should not be frozen */
 
 /*
  * Only the _current_ task can read/write to tsk->flags, but other
@@ -1156,33 +1235,38 @@ extern struct pid *cad_pid;
  * child is not running and in turn not changing child->flags
  * at the same time the parent does it.
  */
-#define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0)
-#define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0)
-#define clear_used_math() clear_stopped_child_used_math(current)
-#define set_used_math() set_stopped_child_used_math(current)
+#define clear_stopped_child_used_math(child)	do { (child)->flags &= ~PF_USED_MATH; } while (0)
+#define set_stopped_child_used_math(child)	do { (child)->flags |= PF_USED_MATH; } while (0)
+#define clear_used_math()			clear_stopped_child_used_math(current)
+#define set_used_math()				set_stopped_child_used_math(current)
+
 #define conditional_stopped_child_used_math(condition, child) \
 	do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0)
-#define conditional_used_math(condition) \
-	conditional_stopped_child_used_math(condition, current)
+
+#define conditional_used_math(condition)	conditional_stopped_child_used_math(condition, current)
+
 #define copy_to_stopped_child_used_math(child) \
 	do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0)
+
 /* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */
-#define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
-#define used_math() tsk_used_math(current)
+#define tsk_used_math(p)			((p)->flags & PF_USED_MATH)
+#define used_math()				tsk_used_math(current)
 
 /* Per-process atomic flags. */
-#define PFA_NO_NEW_PRIVS 0	/* May not gain new privileges. */
-#define PFA_SPREAD_PAGE  1      /* Spread page cache over cpuset */
-#define PFA_SPREAD_SLAB  2      /* Spread some slab caches over cpuset */
-#define PFA_LMK_WAITING  3      /* Lowmemorykiller is waiting */
+#define PFA_NO_NEW_PRIVS		0	/* May not gain new privileges. */
+#define PFA_SPREAD_PAGE			1	/* Spread page cache over cpuset */
+#define PFA_SPREAD_SLAB			2	/* Spread some slab caches over cpuset */
+#define PFA_LMK_WAITING			3	/* Lowmemorykiller is waiting */
 
 
 #define TASK_PFA_TEST(name, func)					\
 	static inline bool task_##func(struct task_struct *p)		\
 	{ return test_bit(PFA_##name, &p->atomic_flags); }
+
 #define TASK_PFA_SET(name, func)					\
 	static inline void task_set_##func(struct task_struct *p)	\
 	{ set_bit(PFA_##name, &p->atomic_flags); }
+
 #define TASK_PFA_CLEAR(name, func)					\
 	static inline void task_clear_##func(struct task_struct *p)	\
 	{ clear_bit(PFA_##name, &p->atomic_flags); }
@@ -1201,30 +1285,23 @@ TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
 TASK_PFA_TEST(LMK_WAITING, lmk_waiting)
 TASK_PFA_SET(LMK_WAITING, lmk_waiting)
 
-static inline void tsk_restore_flags(struct task_struct *task,
-				unsigned long orig_flags, unsigned long flags)
+static inline void
+tsk_restore_flags(struct task_struct *task, unsigned long orig_flags, unsigned long flags)
 {
 	task->flags &= ~flags;
 	task->flags |= orig_flags & flags;
 }
 
-extern int cpuset_cpumask_can_shrink(const struct cpumask *cur,
-				     const struct cpumask *trial);
-extern int task_can_attach(struct task_struct *p,
-			   const struct cpumask *cs_cpus_allowed);
+extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
+extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed);
 #ifdef CONFIG_SMP
-extern void do_set_cpus_allowed(struct task_struct *p,
-			       const struct cpumask *new_mask);
-
-extern int set_cpus_allowed_ptr(struct task_struct *p,
-				const struct cpumask *new_mask);
+extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
+extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
 #else
-static inline void do_set_cpus_allowed(struct task_struct *p,
-				      const struct cpumask *new_mask)
+static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
 {
 }
-static inline int set_cpus_allowed_ptr(struct task_struct *p,
-				       const struct cpumask *new_mask)
+static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 {
 	if (!cpumask_test_cpu(0, new_mask))
 		return -EINVAL;
@@ -1239,6 +1316,7 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
 extern int yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
+
 /**
  * task_nice - return the nice value of a given task.
  * @p: the task in question.
@@ -1249,16 +1327,15 @@ static inline int task_nice(const struct task_struct *p)
 {
 	return PRIO_TO_NICE((p)->static_prio);
 }
+
 extern int can_nice(const struct task_struct *p, const int nice);
 extern int task_curr(const struct task_struct *p);
 extern int idle_cpu(int cpu);
-extern int sched_setscheduler(struct task_struct *, int,
-			      const struct sched_param *);
-extern int sched_setscheduler_nocheck(struct task_struct *, int,
-				      const struct sched_param *);
-extern int sched_setattr(struct task_struct *,
-			 const struct sched_attr *);
+extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
+extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
+extern int sched_setattr(struct task_struct *, const struct sched_attr *);
 extern struct task_struct *idle_task(int cpu);
+
 /**
  * is_idle_task - is the specified task an idle task?
  * @p: the task in question.
@@ -1269,6 +1346,7 @@ static inline bool is_idle_task(const struct task_struct *p)
 {
 	return !!(p->flags & PF_IDLE);
 }
+
 extern struct task_struct *curr_task(int cpu);
 extern void ia64_set_curr_task(int cpu, struct task_struct *p);
 
@@ -1302,23 +1380,25 @@ static inline struct thread_info *task_thread_info(struct task_struct *task)
  */
 
 extern struct task_struct *find_task_by_vpid(pid_t nr);
-extern struct task_struct *find_task_by_pid_ns(pid_t nr,
-		struct pid_namespace *ns);
+extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns);
 
 extern int wake_up_state(struct task_struct *tsk, unsigned int state);
 extern int wake_up_process(struct task_struct *tsk);
 extern void wake_up_new_task(struct task_struct *tsk);
+
 #ifdef CONFIG_SMP
- extern void kick_process(struct task_struct *tsk);
+extern void kick_process(struct task_struct *tsk);
 #else
- static inline void kick_process(struct task_struct *tsk) { }
+static inline void kick_process(struct task_struct *tsk) { }
 #endif
 
 extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
+
 static inline void set_task_comm(struct task_struct *tsk, const char *from)
 {
 	__set_task_comm(tsk, from, false);
 }
+
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
@@ -1326,15 +1406,15 @@ void scheduler_ipi(void);
 extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
 #else
 static inline void scheduler_ipi(void) { }
-static inline unsigned long wait_task_inactive(struct task_struct *p,
-					       long match_state)
+static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state)
 {
 	return 1;
 }
 #endif
 
-/* set thread flags in other task's structures
- * - see asm/thread_info.h for TIF_xxxx flags available
+/*
+ * Set thread flags in other task's structures.
+ * See asm/thread_info.h for TIF_xxxx flags available:
  */
 static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag)
 {
-- 
GitLab


From d3aa9c9f212a729e46653d4c1eb6a9ab190efe3a Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 15 Dec 2016 15:20:34 +0100
Subject: [PATCH 1052/1084] ixgbe: update the rss key on h/w, when ethtool ask
 for it

Currently ixgbe_set_rxfh() updates the rss_key copy in the driver
memory, but does not push the new value into the h/w. This commit
add a new helper for the latter operation and call it in
ixgbe_set_rxfh(), so that the h/w rss key value can be really
updated via ethtool.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h      |  1 +
 .../net/ethernet/intel/ixgbe/ixgbe_ethtool.c  |  4 +++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 19 ++++++++++++++++---
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index a2cc43d28888..7a951b116821 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -929,6 +929,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
 				  struct ixgbe_adapter *adapter,
 				  struct ixgbe_ring *tx_ring);
 u32 ixgbe_rss_indir_tbl_entries(struct ixgbe_adapter *adapter);
+void ixgbe_store_key(struct ixgbe_adapter *adapter);
 void ixgbe_store_reta(struct ixgbe_adapter *adapter);
 s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
 		       u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index a7574c7b12af..90fa5bf23d1b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -2998,8 +2998,10 @@ static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir,
 	}
 
 	/* Fill out the rss hash key */
-	if (key)
+	if (key) {
 		memcpy(adapter->rss_key, key, ixgbe_get_rxfh_key_size(netdev));
+		ixgbe_store_key(adapter);
+	}
 
 	ixgbe_store_reta(adapter);
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 060cdce8058f..67ab13fd163c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -3473,6 +3473,21 @@ u32 ixgbe_rss_indir_tbl_entries(struct ixgbe_adapter *adapter)
 		return 512;
 }
 
+/**
+ * ixgbe_store_key - Write the RSS key to HW
+ * @adapter: device handle
+ *
+ * Write the RSS key stored in adapter.rss_key to HW.
+ */
+void ixgbe_store_key(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+
+	for (i = 0; i < 10; i++)
+		IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), adapter->rss_key[i]);
+}
+
 /**
  * ixgbe_store_reta - Write the RETA table to HW
  * @adapter: device handle
@@ -3538,7 +3553,6 @@ static void ixgbe_store_vfreta(struct ixgbe_adapter *adapter)
 
 static void ixgbe_setup_reta(struct ixgbe_adapter *adapter)
 {
-	struct ixgbe_hw *hw = &adapter->hw;
 	u32 i, j;
 	u32 reta_entries = ixgbe_rss_indir_tbl_entries(adapter);
 	u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
@@ -3551,8 +3565,7 @@ static void ixgbe_setup_reta(struct ixgbe_adapter *adapter)
 		rss_i = 4;
 
 	/* Fill out hash function seeds */
-	for (i = 0; i < 10; i++)
-		IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), adapter->rss_key[i]);
+	ixgbe_store_key(adapter);
 
 	/* Fill out redirection table */
 	memset(adapter->rss_indir_tbl, 0, sizeof(adapter->rss_indir_tbl));
-- 
GitLab


From a528d35e8bfcc521d7cb70aaf03e1bd296c8493f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 31 Jan 2017 16:46:22 +0000
Subject: [PATCH 1053/1084] statx: Add a system call to make enhanced file info
 available

Add a system call to make extended file information available, including
file creation and some attribute flags where available through the
underlying filesystem.

The getattr inode operation is altered to take two additional arguments: a
u32 request_mask and an unsigned int flags that indicate the
synchronisation mode.  This change is propagated to the vfs_getattr*()
function.

Functions like vfs_stat() are now inline wrappers around new functions
vfs_statx() and vfs_statx_fd() to reduce stack usage.

========
OVERVIEW
========

The idea was initially proposed as a set of xattrs that could be retrieved
with getxattr(), but the general preference proved to be for a new syscall
with an extended stat structure.

A number of requests were gathered for features to be included.  The
following have been included:

 (1) Make the fields a consistent size on all arches and make them large.

 (2) Spare space, request flags and information flags are provided for
     future expansion.

 (3) Better support for the y2038 problem [Arnd Bergmann] (tv_sec is an
     __s64).

 (4) Creation time: The SMB protocol carries the creation time, which could
     be exported by Samba, which will in turn help CIFS make use of
     FS-Cache as that can be used for coherency data (stx_btime).

     This is also specified in NFSv4 as a recommended attribute and could
     be exported by NFSD [Steve French].

 (5) Lightweight stat: Ask for just those details of interest, and allow a
     netfs (such as NFS) to approximate anything not of interest, possibly
     without going to the server [Trond Myklebust, Ulrich Drepper, Andreas
     Dilger] (AT_STATX_DONT_SYNC).

 (6) Heavyweight stat: Force a netfs to go to the server, even if it thinks
     its cached attributes are up to date [Trond Myklebust]
     (AT_STATX_FORCE_SYNC).

And the following have been left out for future extension:

 (7) Data version number: Could be used by userspace NFS servers [Aneesh
     Kumar].

     Can also be used to modify fill_post_wcc() in NFSD which retrieves
     i_version directly, but has just called vfs_getattr().  It could get
     it from the kstat struct if it used vfs_xgetattr() instead.

     (There's disagreement on the exact semantics of a single field, since
     not all filesystems do this the same way).

 (8) BSD stat compatibility: Including more fields from the BSD stat such
     as creation time (st_btime) and inode generation number (st_gen)
     [Jeremy Allison, Bernd Schubert].

 (9) Inode generation number: Useful for FUSE and userspace NFS servers
     [Bernd Schubert].

     (This was asked for but later deemed unnecessary with the
     open-by-handle capability available and caused disagreement as to
     whether it's a security hole or not).

(10) Extra coherency data may be useful in making backups [Andreas Dilger].

     (No particular data were offered, but things like last backup
     timestamp, the data version number and the DOS archive bit would come
     into this category).

(11) Allow the filesystem to indicate what it can/cannot provide: A
     filesystem can now say it doesn't support a standard stat feature if
     that isn't available, so if, for instance, inode numbers or UIDs don't
     exist or are fabricated locally...

     (This requires a separate system call - I have an fsinfo() call idea
     for this).

(12) Store a 16-byte volume ID in the superblock that can be returned in
     struct xstat [Steve French].

     (Deferred to fsinfo).

(13) Include granularity fields in the time data to indicate the
     granularity of each of the times (NFSv4 time_delta) [Steve French].

     (Deferred to fsinfo).

(14) FS_IOC_GETFLAGS value.  These could be translated to BSD's st_flags.
     Note that the Linux IOC flags are a mess and filesystems such as Ext4
     define flags that aren't in linux/fs.h, so translation in the kernel
     may be a necessity (or, possibly, we provide the filesystem type too).

     (Some attributes are made available in stx_attributes, but the general
     feeling was that the IOC flags were to ext[234]-specific and shouldn't
     be exposed through statx this way).

(15) Mask of features available on file (eg: ACLs, seclabel) [Brad Boyer,
     Michael Kerrisk].

     (Deferred, probably to fsinfo.  Finding out if there's an ACL or
     seclabal might require extra filesystem operations).

(16) Femtosecond-resolution timestamps [Dave Chinner].

     (A __reserved field has been left in the statx_timestamp struct for
     this - if there proves to be a need).

(17) A set multiple attributes syscall to go with this.

===============
NEW SYSTEM CALL
===============

The new system call is:

	int ret = statx(int dfd,
			const char *filename,
			unsigned int flags,
			unsigned int mask,
			struct statx *buffer);

The dfd, filename and flags parameters indicate the file to query, in a
similar way to fstatat().  There is no equivalent of lstat() as that can be
emulated with statx() by passing AT_SYMLINK_NOFOLLOW in flags.  There is
also no equivalent of fstat() as that can be emulated by passing a NULL
filename to statx() with the fd of interest in dfd.

Whether or not statx() synchronises the attributes with the backing store
can be controlled by OR'ing a value into the flags argument (this typically
only affects network filesystems):

 (1) AT_STATX_SYNC_AS_STAT tells statx() to behave as stat() does in this
     respect.

 (2) AT_STATX_FORCE_SYNC will require a network filesystem to synchronise
     its attributes with the server - which might require data writeback to
     occur to get the timestamps correct.

 (3) AT_STATX_DONT_SYNC will suppress synchronisation with the server in a
     network filesystem.  The resulting values should be considered
     approximate.

mask is a bitmask indicating the fields in struct statx that are of
interest to the caller.  The user should set this to STATX_BASIC_STATS to
get the basic set returned by stat().  It should be noted that asking for
more information may entail extra I/O operations.

buffer points to the destination for the data.  This must be 256 bytes in
size.

======================
MAIN ATTRIBUTES RECORD
======================

The following structures are defined in which to return the main attribute
set:

	struct statx_timestamp {
		__s64	tv_sec;
		__s32	tv_nsec;
		__s32	__reserved;
	};

	struct statx {
		__u32	stx_mask;
		__u32	stx_blksize;
		__u64	stx_attributes;
		__u32	stx_nlink;
		__u32	stx_uid;
		__u32	stx_gid;
		__u16	stx_mode;
		__u16	__spare0[1];
		__u64	stx_ino;
		__u64	stx_size;
		__u64	stx_blocks;
		__u64	__spare1[1];
		struct statx_timestamp	stx_atime;
		struct statx_timestamp	stx_btime;
		struct statx_timestamp	stx_ctime;
		struct statx_timestamp	stx_mtime;
		__u32	stx_rdev_major;
		__u32	stx_rdev_minor;
		__u32	stx_dev_major;
		__u32	stx_dev_minor;
		__u64	__spare2[14];
	};

The defined bits in request_mask and stx_mask are:

	STATX_TYPE		Want/got stx_mode & S_IFMT
	STATX_MODE		Want/got stx_mode & ~S_IFMT
	STATX_NLINK		Want/got stx_nlink
	STATX_UID		Want/got stx_uid
	STATX_GID		Want/got stx_gid
	STATX_ATIME		Want/got stx_atime{,_ns}
	STATX_MTIME		Want/got stx_mtime{,_ns}
	STATX_CTIME		Want/got stx_ctime{,_ns}
	STATX_INO		Want/got stx_ino
	STATX_SIZE		Want/got stx_size
	STATX_BLOCKS		Want/got stx_blocks
	STATX_BASIC_STATS	[The stuff in the normal stat struct]
	STATX_BTIME		Want/got stx_btime{,_ns}
	STATX_ALL		[All currently available stuff]

stx_btime is the file creation time, stx_mask is a bitmask indicating the
data provided and __spares*[] are where as-yet undefined fields can be
placed.

Time fields are structures with separate seconds and nanoseconds fields
plus a reserved field in case we want to add even finer resolution.  Note
that times will be negative if before 1970; in such a case, the nanosecond
fields will also be negative if not zero.

The bits defined in the stx_attributes field convey information about a
file, how it is accessed, where it is and what it does.  The following
attributes map to FS_*_FL flags and are the same numerical value:

	STATX_ATTR_COMPRESSED		File is compressed by the fs
	STATX_ATTR_IMMUTABLE		File is marked immutable
	STATX_ATTR_APPEND		File is append-only
	STATX_ATTR_NODUMP		File is not to be dumped
	STATX_ATTR_ENCRYPTED		File requires key to decrypt in fs

Within the kernel, the supported flags are listed by:

	KSTAT_ATTR_FS_IOC_FLAGS

[Are any other IOC flags of sufficient general interest to be exposed
through this interface?]

New flags include:

	STATX_ATTR_AUTOMOUNT		Object is an automount trigger

These are for the use of GUI tools that might want to mark files specially,
depending on what they are.

Fields in struct statx come in a number of classes:

 (0) stx_dev_*, stx_blksize.

     These are local system information and are always available.

 (1) stx_mode, stx_nlinks, stx_uid, stx_gid, stx_[amc]time, stx_ino,
     stx_size, stx_blocks.

     These will be returned whether the caller asks for them or not.  The
     corresponding bits in stx_mask will be set to indicate whether they
     actually have valid values.

     If the caller didn't ask for them, then they may be approximated.  For
     example, NFS won't waste any time updating them from the server,
     unless as a byproduct of updating something requested.

     If the values don't actually exist for the underlying object (such as
     UID or GID on a DOS file), then the bit won't be set in the stx_mask,
     even if the caller asked for the value.  In such a case, the returned
     value will be a fabrication.

     Note that there are instances where the type might not be valid, for
     instance Windows reparse points.

 (2) stx_rdev_*.

     This will be set only if stx_mode indicates we're looking at a
     blockdev or a chardev, otherwise will be 0.

 (3) stx_btime.

     Similar to (1), except this will be set to 0 if it doesn't exist.

=======
TESTING
=======

The following test program can be used to test the statx system call:

	samples/statx/test-statx.c

Just compile and run, passing it paths to the files you want to examine.
The file is built automatically if CONFIG_SAMPLES is enabled.

Here's some example output.  Firstly, an NFS directory that crosses to
another FSID.  Note that the AUTOMOUNT attribute is set because transiting
this directory will cause d_automount to be invoked by the VFS.

	[root@andromeda ~]# /tmp/test-statx -A /warthog/data
	statx(/warthog/data) = 0
	results=7ff
	  Size: 4096            Blocks: 8          IO Block: 1048576  directory
	Device: 00:26           Inode: 1703937     Links: 125
	Access: (3777/drwxrwxrwx)  Uid:     0   Gid:  4041
	Access: 2016-11-24 09:02:12.219699527+0000
	Modify: 2016-11-17 10:44:36.225653653+0000
	Change: 2016-11-17 10:44:36.225653653+0000
	Attributes: 0000000000001000 (-------- -------- -------- -------- -------- -------- ---m---- --------)

Secondly, the result of automounting on that directory.

	[root@andromeda ~]# /tmp/test-statx /warthog/data
	statx(/warthog/data) = 0
	results=7ff
	  Size: 4096            Blocks: 8          IO Block: 1048576  directory
	Device: 00:27           Inode: 2           Links: 125
	Access: (3777/drwxrwxrwx)  Uid:     0   Gid:  4041
	Access: 2016-11-24 09:02:12.219699527+0000
	Modify: 2016-11-17 10:44:36.225653653+0000
	Change: 2016-11-17 10:44:36.225653653+0000

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking             |   3 +-
 Documentation/filesystems/vfs.txt             |   3 +-
 arch/x86/entry/syscalls/syscall_32.tbl        |   1 +
 arch/x86/entry/syscalls/syscall_64.tbl        |   1 +
 drivers/base/devtmpfs.c                       |   3 +-
 drivers/block/loop.c                          |   3 +-
 drivers/mtd/ubi/build.c                       |   2 +-
 drivers/mtd/ubi/kapi.c                        |   2 +-
 drivers/staging/lustre/lustre/llite/file.c    |   9 +-
 .../lustre/lustre/llite/llite_internal.h      |   3 +-
 fs/9p/vfs_inode.c                             |  10 +-
 fs/9p/vfs_inode_dotl.c                        |   5 +-
 fs/afs/inode.c                                |   8 +-
 fs/afs/internal.h                             |   2 +-
 fs/bad_inode.c                                |   4 +-
 fs/btrfs/inode.c                              |   6 +-
 fs/ceph/inode.c                               |   6 +-
 fs/ceph/super.h                               |   4 +-
 fs/cifs/cifsfs.h                              |   2 +-
 fs/cifs/inode.c                               |   5 +-
 fs/coda/coda_linux.h                          |   2 +-
 fs/coda/inode.c                               |   7 +-
 fs/ecryptfs/inode.c                           |  13 +-
 fs/exportfs/expfs.c                           |   3 +-
 fs/ext4/ext4.h                                |   3 +-
 fs/ext4/inode.c                               |   6 +-
 fs/f2fs/f2fs.h                                |   4 +-
 fs/f2fs/file.c                                |   6 +-
 fs/fat/fat.h                                  |   4 +-
 fs/fat/file.c                                 |   5 +-
 fs/fuse/dir.c                                 |   6 +-
 fs/gfs2/inode.c                               |  11 +-
 fs/kernfs/inode.c                             |   8 +-
 fs/kernfs/kernfs-internal.h                   |   4 +-
 fs/libfs.c                                    |  12 +-
 fs/minix/inode.c                              |  11 +-
 fs/minix/minix.h                              |   2 +-
 fs/nfs/inode.c                                |  13 +-
 fs/nfs/namespace.c                            |   9 +-
 fs/nfsd/nfs4xdr.c                             |   4 +-
 fs/nfsd/vfs.h                                 |   3 +-
 fs/ocfs2/file.c                               |  11 +-
 fs/ocfs2/file.h                               |   4 +-
 fs/orangefs/inode.c                           |  13 +-
 fs/orangefs/orangefs-kernel.h                 |   5 +-
 fs/overlayfs/copy_up.c                        |   6 +-
 fs/overlayfs/dir.c                            |  10 +-
 fs/overlayfs/inode.c                          |   7 +-
 fs/proc/base.c                                |  12 +-
 fs/proc/generic.c                             |   6 +-
 fs/proc/internal.h                            |   2 +-
 fs/proc/proc_net.c                            |   6 +-
 fs/proc/proc_sysctl.c                         |   5 +-
 fs/proc/root.c                                |   6 +-
 fs/stat.c                                     | 214 ++++++++++++---
 fs/sysv/itree.c                               |   7 +-
 fs/sysv/sysv.h                                |   2 +-
 fs/ubifs/dir.c                                |   6 +-
 fs/ubifs/ubifs.h                              |   4 +-
 fs/udf/symlink.c                              |   5 +-
 fs/xfs/xfs_iops.c                             |   9 +-
 include/linux/fs.h                            |  35 ++-
 include/linux/nfs_fs.h                        |   2 +-
 include/linux/stat.h                          |  24 +-
 include/linux/syscalls.h                      |   3 +
 include/uapi/linux/fcntl.h                    |   5 +
 include/uapi/linux/stat.h                     | 131 +++++++++
 mm/shmem.c                                    |   6 +-
 samples/Kconfig                               |   6 +
 samples/Makefile                              |   2 +-
 samples/statx/Makefile                        |  10 +
 samples/statx/test-statx.c                    | 254 ++++++++++++++++++
 72 files changed, 822 insertions(+), 214 deletions(-)
 create mode 100644 samples/statx/Makefile
 create mode 100644 samples/statx/test-statx.c

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index ace63cd7af8c..fdcfdd79682a 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -58,7 +58,8 @@ prototypes:
 	int (*permission) (struct inode *, int, unsigned int);
 	int (*get_acl)(struct inode *, int);
 	int (*setattr) (struct dentry *, struct iattr *);
-	int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *);
+	int (*getattr) (const struct path *, struct dentry *, struct kstat *,
+			u32, unsigned int);
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
 	void (*update_time)(struct inode *, struct timespec *, int);
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index b968084eeac1..569211703721 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -382,7 +382,8 @@ struct inode_operations {
 	int (*permission) (struct inode *, int);
 	int (*get_acl)(struct inode *, int);
 	int (*setattr) (struct dentry *, struct iattr *);
-	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
+	int (*getattr) (const struct path *, struct dentry *, struct kstat *,
+			u32, unsigned int);
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	void (*update_time)(struct inode *, struct timespec *, int);
 	int (*atomic_open)(struct inode *, struct dentry *, struct file *,
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 2b3618542544..9ba050fe47f3 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -389,3 +389,4 @@
 380	i386	pkey_mprotect		sys_pkey_mprotect
 381	i386	pkey_alloc		sys_pkey_alloc
 382	i386	pkey_free		sys_pkey_free
+383	i386	statx			sys_statx
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index e93ef0b38db8..5aef183e2f85 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -338,6 +338,7 @@
 329	common	pkey_mprotect		sys_pkey_mprotect
 330	common	pkey_alloc		sys_pkey_alloc
 331	common	pkey_free		sys_pkey_free
+332	common	statx			sys_statx
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 44a74cf1372c..d2fb9c8ed205 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -309,7 +309,8 @@ static int handle_remove(const char *nodename, struct device *dev)
 	if (d_really_is_positive(dentry)) {
 		struct kstat stat;
 		struct path p = {.mnt = parent.mnt, .dentry = dentry};
-		err = vfs_getattr(&p, &stat);
+		err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE,
+				  AT_STATX_SYNC_AS_STAT);
 		if (!err && dev_mynode(dev, d_inode(dentry), &stat)) {
 			struct iattr newattrs;
 			/*
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index eeb1db73f44e..8f4051999741 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1175,7 +1175,8 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info)
 
 	if (lo->lo_state != Lo_bound)
 		return -ENXIO;
-	error = vfs_getattr(&file->f_path, &stat);
+	error = vfs_getattr(&file->f_path, &stat,
+			    STATX_INO, AT_STATX_SYNC_AS_STAT);
 	if (error)
 		return error;
 	memset(info, 0, sizeof(*info));
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 85d54f37e28f..77513195f50e 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -1159,7 +1159,7 @@ static struct mtd_info * __init open_mtd_by_chdev(const char *mtd_dev)
 	if (err)
 		return ERR_PTR(err);
 
-	err = vfs_getattr(&path, &stat);
+	err = vfs_getattr(&path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT);
 	path_put(&path);
 	if (err)
 		return ERR_PTR(err);
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index 88b1897aeb40..d4b2e8744498 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -314,7 +314,7 @@ struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode)
 	if (error)
 		return ERR_PTR(error);
 
-	error = vfs_getattr(&path, &stat);
+	error = vfs_getattr(&path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT);
 	path_put(&path);
 	if (error)
 		return ERR_PTR(error);
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index 10adfcdd7035..481c0d01d4c6 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -2952,15 +2952,16 @@ static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
 	return rc;
 }
 
-int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
+int ll_getattr(const struct path *path, struct kstat *stat,
+	       u32 request_mask, unsigned int flags)
 {
-	struct inode *inode = d_inode(de);
+	struct inode *inode = d_inode(path->dentry);
 	struct ll_sb_info *sbi = ll_i2sbi(inode);
 	struct ll_inode_info *lli = ll_i2info(inode);
 	int res;
 
-	res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE |
-				      MDS_INODELOCK_LOOKUP);
+	res = ll_inode_revalidate(path->dentry,
+				  MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP);
 	ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1);
 
 	if (res)
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index ecdfd0c29b7f..55f68acd85d1 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -750,7 +750,8 @@ int ll_file_open(struct inode *inode, struct file *file);
 int ll_file_release(struct inode *inode, struct file *file);
 int ll_release_openhandle(struct inode *, struct lookup_intent *);
 int ll_md_real_close(struct inode *inode, fmode_t fmode);
-int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
+int ll_getattr(const struct path *path, struct kstat *stat,
+	       u32 request_mask, unsigned int flags);
 struct posix_acl *ll_get_acl(struct inode *inode, int type);
 int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
 	       const char *name, int namelen);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index f4f4450119e4..f1d96233670c 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1047,16 +1047,18 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 /**
  * v9fs_vfs_getattr - retrieve file metadata
- * @mnt: mount information
- * @dentry: file to get attributes on
+ * @path: Object to query
  * @stat: metadata structure to populate
+ * @request_mask: Mask of STATX_xxx flags indicating the caller's interests
+ * @flags: AT_STATX_xxx setting
  *
  */
 
 static int
-v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		 struct kstat *stat)
+v9fs_vfs_getattr(const struct path *path, struct kstat *stat,
+		 u32 request_mask, unsigned int flags)
 {
+	struct dentry *dentry = path->dentry;
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid;
 	struct p9_wstat *st;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 5999bd050678..570e63ee5b71 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -468,9 +468,10 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
 }
 
 static int
-v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
-		 struct kstat *stat)
+v9fs_vfs_getattr_dotl(const struct path *path, struct kstat *stat,
+		 u32 request_mask, unsigned int flags)
 {
+	struct dentry *dentry = path->dentry;
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid;
 	struct p9_stat_dotl *st;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 86cc7264c21c..1e4897a048d2 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -375,12 +375,10 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
 /*
  * read the attributes of an inode
  */
-int afs_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		      struct kstat *stat)
+int afs_getattr(const struct path *path, struct kstat *stat,
+		u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode;
-
-	inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 
 	_enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 8acf3670e756..5dfa56903a2d 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -533,7 +533,7 @@ extern struct inode *afs_iget(struct super_block *, struct key *,
 			      struct afs_callback *);
 extern void afs_zap_data(struct afs_vnode *);
 extern int afs_validate(struct afs_vnode *, struct key *);
-extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int afs_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern int afs_setattr(struct dentry *, struct iattr *);
 extern void afs_evict_inode(struct inode *);
 extern int afs_drop_inode(struct inode *);
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 5f685c819298..bb53728c7a31 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -89,8 +89,8 @@ static int bad_inode_permission(struct inode *inode, int mask)
 	return -EIO;
 }
 
-static int bad_inode_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			struct kstat *stat)
+static int bad_inode_getattr(const struct path *path, struct kstat *stat,
+			     u32 request_mask, unsigned int query_flags)
 {
 	return -EIO;
 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ee6978d80491..c40060cc481f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9413,11 +9413,11 @@ int btrfs_init_cachep(void)
 	return -ENOMEM;
 }
 
-static int btrfs_getattr(struct vfsmount *mnt,
-			 struct dentry *dentry, struct kstat *stat)
+static int btrfs_getattr(const struct path *path, struct kstat *stat,
+			 u32 request_mask, unsigned int flags)
 {
 	u64 delalloc_bytes;
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	u32 blocksize = inode->i_sb->s_blocksize;
 
 	generic_fillattr(inode, stat);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index fd8f771f99b7..d449e1c03cbd 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -2187,10 +2187,10 @@ int ceph_permission(struct inode *inode, int mask)
  * Get all attributes.  Hopefully somedata we'll have a statlite()
  * and can limit the fields we require to be accurate.
  */
-int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		 struct kstat *stat)
+int ceph_getattr(const struct path *path, struct kstat *stat,
+		 u32 request_mask, unsigned int flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int err;
 
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index e9410bcf4113..fe6b9cfc4013 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -784,8 +784,8 @@ static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
 extern int ceph_permission(struct inode *inode, int mask);
 extern int __ceph_setattr(struct inode *inode, struct iattr *attr);
 extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
-extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			struct kstat *stat);
+extern int ceph_getattr(const struct path *path, struct kstat *stat,
+			u32 request_mask, unsigned int flags);
 
 /* xattr.c */
 int __ceph_setxattr(struct inode *, const char *, const void *, size_t, int);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index c9c00a862036..da717fee3026 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -83,7 +83,7 @@ extern int cifs_revalidate_dentry(struct dentry *);
 extern int cifs_invalidate_mapping(struct inode *inode);
 extern int cifs_revalidate_mapping(struct inode *inode);
 extern int cifs_zap_mapping(struct inode *inode);
-extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int cifs_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern int cifs_setattr(struct dentry *, struct iattr *);
 
 extern const struct inode_operations cifs_file_inode_ops;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 7ab5be7944aa..1363fff460b9 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1990,9 +1990,10 @@ int cifs_revalidate_dentry(struct dentry *dentry)
 	return cifs_revalidate_mapping(inode);
 }
 
-int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		 struct kstat *stat)
+int cifs_getattr(const struct path *path, struct kstat *stat,
+		 u32 request_mask, unsigned int flags)
 {
+	struct dentry *dentry = path->dentry;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb);
 	struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
 	struct inode *inode = d_inode(dentry);
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index 5104d84c4f64..d3c361883c28 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -47,7 +47,7 @@ int coda_open(struct inode *i, struct file *f);
 int coda_release(struct inode *i, struct file *f);
 int coda_permission(struct inode *inode, int mask);
 int coda_revalidate_inode(struct inode *);
-int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+int coda_getattr(const struct path *, struct kstat *, u32, unsigned int);
 int coda_setattr(struct dentry *, struct iattr *);
 
 /* this file:  heloers */
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 71dbe7e287ce..2dea594da199 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -255,11 +255,12 @@ static void coda_evict_inode(struct inode *inode)
 	coda_cache_clear_inode(inode);
 }
 
-int coda_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int coda_getattr(const struct path *path, struct kstat *stat,
+		 u32 request_mask, unsigned int flags)
 {
-	int err = coda_revalidate_inode(d_inode(dentry));
+	int err = coda_revalidate_inode(d_inode(path->dentry));
 	if (!err)
-		generic_fillattr(d_inode(dentry), stat);
+		generic_fillattr(d_inode(path->dentry), stat);
 	return err;
 }
 
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index e7413f82d27b..efc2db42d175 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -959,9 +959,10 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
 	return rc;
 }
 
-static int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry,
-				 struct kstat *stat)
+static int ecryptfs_getattr_link(const struct path *path, struct kstat *stat,
+				 u32 request_mask, unsigned int flags)
 {
+	struct dentry *dentry = path->dentry;
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
 	int rc = 0;
 
@@ -983,13 +984,15 @@ static int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry,
 	return rc;
 }
 
-static int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			    struct kstat *stat)
+static int ecryptfs_getattr(const struct path *path, struct kstat *stat,
+			    u32 request_mask, unsigned int flags)
 {
+	struct dentry *dentry = path->dentry;
 	struct kstat lower_stat;
 	int rc;
 
-	rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat);
+	rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat,
+			 request_mask, flags);
 	if (!rc) {
 		fsstack_copy_attr_all(d_inode(dentry),
 				      ecryptfs_inode_to_lower(d_inode(dentry)));
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index a4b531be9168..f2d24bb8d745 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -299,7 +299,8 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
 	 * filesystem supports 64-bit inode numbers.  So we need to
 	 * actually call ->getattr, not just read i_ino:
 	 */
-	error = vfs_getattr_nosec(&child_path, &stat);
+	error = vfs_getattr_nosec(&child_path, &stat,
+				  STATX_INO, AT_STATX_SYNC_AS_STAT);
 	if (error)
 		return error;
 	buffer.ino = stat.ino;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 2fd17e8e4984..025d2e85f454 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2462,8 +2462,7 @@ extern struct inode *ext4_iget(struct super_block *, unsigned long);
 extern struct inode *ext4_iget_normal(struct super_block *, unsigned long);
 extern int  ext4_write_inode(struct inode *, struct writeback_control *);
 extern int  ext4_setattr(struct dentry *, struct iattr *);
-extern int  ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
-				struct kstat *stat);
+extern int  ext4_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern void ext4_evict_inode(struct inode *);
 extern void ext4_clear_inode(struct inode *);
 extern int  ext4_sync_inode(handle_t *, struct inode *);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 971f66342080..7385e6a6b6cb 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5387,13 +5387,13 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 	return error;
 }
 
-int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		 struct kstat *stat)
+int ext4_getattr(const struct path *path, struct kstat *stat,
+		 u32 request_mask, unsigned int query_flags)
 {
 	struct inode *inode;
 	unsigned long long delalloc_blocks;
 
-	inode = d_inode(dentry);
+	inode = d_inode(path->dentry);
 	generic_fillattr(inode, stat);
 
 	/*
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d1483136fed6..e849f83d6114 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2040,8 +2040,8 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
 void truncate_data_blocks(struct dnode_of_data *dn);
 int truncate_blocks(struct inode *inode, u64 from, bool lock);
 int f2fs_truncate(struct inode *inode);
-int f2fs_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			struct kstat *stat);
+int f2fs_getattr(const struct path *path, struct kstat *stat,
+			u32 request_mask, unsigned int flags);
 int f2fs_setattr(struct dentry *dentry, struct iattr *attr);
 int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end);
 int truncate_data_blocks_range(struct dnode_of_data *dn, int count);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 78e65288f2b2..5f7317875a67 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -633,10 +633,10 @@ int f2fs_truncate(struct inode *inode)
 	return 0;
 }
 
-int f2fs_getattr(struct vfsmount *mnt,
-			 struct dentry *dentry, struct kstat *stat)
+int f2fs_getattr(const struct path *path, struct kstat *stat,
+		 u32 request_mask, unsigned int flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	generic_fillattr(inode, stat);
 	stat->blocks <<= 3;
 	return 0;
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index e6b764a17a9c..051dac1ce3be 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -364,8 +364,8 @@ extern const struct file_operations fat_file_operations;
 extern const struct inode_operations fat_file_inode_operations;
 extern int fat_setattr(struct dentry *dentry, struct iattr *attr);
 extern void fat_truncate_blocks(struct inode *inode, loff_t offset);
-extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		       struct kstat *stat);
+extern int fat_getattr(const struct path *path, struct kstat *stat,
+		       u32 request_mask, unsigned int flags);
 extern int fat_file_fsync(struct file *file, loff_t start, loff_t end,
 			  int datasync);
 
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 3d04b124bce0..4724cc9ad650 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -365,9 +365,10 @@ void fat_truncate_blocks(struct inode *inode, loff_t offset)
 	fat_flush_inodes(inode->i_sb, inode, NULL);
 }
 
-int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int fat_getattr(const struct path *path, struct kstat *stat,
+		u32 request_mask, unsigned int flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	generic_fillattr(inode, stat);
 	stat->blksize = MSDOS_SB(inode->i_sb)->cluster_size;
 
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 811fd8929a18..beb3d64f16e2 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1777,10 +1777,10 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
 	return ret;
 }
 
-static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
-			struct kstat *stat)
+static int fuse_getattr(const struct path *path, struct kstat *stat,
+			u32 request_mask, unsigned int flags)
 {
-	struct inode *inode = d_inode(entry);
+	struct inode *inode = d_inode(path->dentry);
 	struct fuse_conn *fc = get_fuse_conn(inode);
 
 	if (!fuse_allow_current_process(fc))
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index eb7724b8578a..288c15f385bd 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1959,9 +1959,10 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
 
 /**
  * gfs2_getattr - Read out an inode's attributes
- * @mnt: The vfsmount the inode is being accessed from
- * @dentry: The dentry to stat
+ * @path: Object to query
  * @stat: The inode's stats
+ * @request_mask: Mask of STATX_xxx flags indicating the caller's interests
+ * @flags: AT_STATX_xxx setting
  *
  * This may be called from the VFS directly, or from within GFS2 with the
  * inode locked, so we look to see if the glock is already locked and only
@@ -1972,10 +1973,10 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
  * Returns: errno
  */
 
-static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			struct kstat *stat)
+static int gfs2_getattr(const struct path *path, struct kstat *stat,
+			u32 request_mask, unsigned int flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_holder gh;
 	int error;
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index ac9e108ce1ea..fb4b4a79a0d6 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -200,11 +200,11 @@ static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode)
 		set_nlink(inode, kn->dir.subdirs + 2);
 }
 
-int kernfs_iop_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		   struct kstat *stat)
+int kernfs_iop_getattr(const struct path *path, struct kstat *stat,
+		       u32 request_mask, unsigned int query_flags)
 {
-	struct kernfs_node *kn = dentry->d_fsdata;
-	struct inode *inode = d_inode(dentry);
+	struct kernfs_node *kn = path->dentry->d_fsdata;
+	struct inode *inode = d_inode(path->dentry);
 
 	mutex_lock(&kernfs_mutex);
 	kernfs_refresh_inode(kn, inode);
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 3100987cf8ba..2d5144ab4251 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -80,8 +80,8 @@ extern const struct xattr_handler *kernfs_xattr_handlers[];
 void kernfs_evict_inode(struct inode *inode);
 int kernfs_iop_permission(struct inode *inode, int mask);
 int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr);
-int kernfs_iop_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		       struct kstat *stat);
+int kernfs_iop_getattr(const struct path *path, struct kstat *stat,
+		       u32 request_mask, unsigned int query_flags);
 ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size);
 
 /*
diff --git a/fs/libfs.c b/fs/libfs.c
index 28d6f35feed6..1dfaf8f606c0 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -20,10 +20,10 @@
 
 #include "internal.h"
 
-int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		   struct kstat *stat)
+int simple_getattr(const struct path *path, struct kstat *stat,
+		   u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	generic_fillattr(inode, stat);
 	stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9);
 	return 0;
@@ -1143,10 +1143,10 @@ static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry,
 	return ERR_PTR(-ENOENT);
 }
 
-static int empty_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
-				 struct kstat *stat)
+static int empty_dir_getattr(const struct path *path, struct kstat *stat,
+			     u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	generic_fillattr(inode, stat);
 	return 0;
 }
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index e7d9bf86d975..6ac76b0434e9 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -622,11 +622,14 @@ static int minix_write_inode(struct inode *inode, struct writeback_control *wbc)
 	return err;
 }
 
-int minix_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int minix_getattr(const struct path *path, struct kstat *stat,
+		  u32 request_mask, unsigned int flags)
 {
-	struct super_block *sb = dentry->d_sb;
-	generic_fillattr(d_inode(dentry), stat);
-	if (INODE_VERSION(d_inode(dentry)) == MINIX_V1)
+	struct super_block *sb = path->dentry->d_sb;
+	struct inode *inode = d_inode(path->dentry);
+
+	generic_fillattr(inode, stat);
+	if (INODE_VERSION(inode) == MINIX_V1)
 		stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb);
 	else
 		stat->blocks = (sb->s_blocksize / 512) * V2_minix_blocks(stat->size, sb);
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 01ad81dcacc5..663d66138d06 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -51,7 +51,7 @@ extern unsigned long minix_count_free_inodes(struct super_block *sb);
 extern int minix_new_block(struct inode * inode);
 extern void minix_free_block(struct inode *inode, unsigned long block);
 extern unsigned long minix_count_free_blocks(struct super_block *sb);
-extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int minix_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len);
 
 extern void V1_minix_truncate(struct inode *);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5ca4d96b1942..b5425315adcc 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -703,9 +703,10 @@ static bool nfs_need_revalidate_inode(struct inode *inode)
 	return false;
 }
 
-int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int nfs_getattr(const struct path *path, struct kstat *stat,
+		u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
 	int err = 0;
 
@@ -726,17 +727,17 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	 *  - NFS never sets MS_NOATIME or MS_NODIRATIME so there is
 	 *    no point in checking those.
 	 */
- 	if ((mnt->mnt_flags & MNT_NOATIME) ||
- 	    ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
+	if ((path->mnt->mnt_flags & MNT_NOATIME) ||
+	    ((path->mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
 		need_atime = 0;
 
 	if (need_atime || nfs_need_revalidate_inode(inode)) {
 		struct nfs_server *server = NFS_SERVER(inode);
 
-		nfs_readdirplus_parent_cache_miss(dentry);
+		nfs_readdirplus_parent_cache_miss(path->dentry);
 		err = __nfs_revalidate_inode(server, inode);
 	} else
-		nfs_readdirplus_parent_cache_hit(dentry);
+		nfs_readdirplus_parent_cache_hit(path->dentry);
 	if (!err) {
 		generic_fillattr(inode, stat);
 		stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index e49d831c4e85..786f17580582 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -178,11 +178,12 @@ struct vfsmount *nfs_d_automount(struct path *path)
 }
 
 static int
-nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+nfs_namespace_getattr(const struct path *path, struct kstat *stat,
+			u32 request_mask, unsigned int query_flags)
 {
-	if (NFS_FH(d_inode(dentry))->size != 0)
-		return nfs_getattr(mnt, dentry, stat);
-	generic_fillattr(d_inode(dentry), stat);
+	if (NFS_FH(d_inode(path->dentry))->size != 0)
+		return nfs_getattr(path, stat, request_mask, query_flags);
+	generic_fillattr(d_inode(path->dentry), stat);
 	return 0;
 }
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 382c1fd05b4c..33017d652b1d 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2301,7 +2301,7 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat)
 		if (path.dentry != path.mnt->mnt_root)
 			break;
 	}
-	err = vfs_getattr(&path, stat);
+	err = vfs_getattr(&path, stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
 	path_put(&path);
 	return err;
 }
@@ -2385,7 +2385,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
 			goto out;
 	}
 
-	err = vfs_getattr(&path, &stat);
+	err = vfs_getattr(&path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
 	if (err)
 		goto out_nfserr;
 	if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index db98c48c735a..1bbdccecbf3d 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -135,7 +135,8 @@ static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat)
 {
 	struct path p = {.mnt = fh->fh_export->ex_path.mnt,
 			 .dentry = fh->fh_dentry};
-	return nfserrno(vfs_getattr(&p, stat));
+	return nfserrno(vfs_getattr(&p, stat, STATX_BASIC_STATS,
+				    AT_STATX_SYNC_AS_STAT));
 }
 
 static inline int nfsd_create_is_exclusive(int createmode)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8836305eb378..bfeb647459d9 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1306,16 +1306,15 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 	return status;
 }
 
-int ocfs2_getattr(struct vfsmount *mnt,
-		  struct dentry *dentry,
-		  struct kstat *stat)
+int ocfs2_getattr(const struct path *path, struct kstat *stat,
+		  u32 request_mask, unsigned int flags)
 {
-	struct inode *inode = d_inode(dentry);
-	struct super_block *sb = dentry->d_sb;
+	struct inode *inode = d_inode(path->dentry);
+	struct super_block *sb = path->dentry->d_sb;
 	struct ocfs2_super *osb = sb->s_fs_info;
 	int err;
 
-	err = ocfs2_inode_revalidate(dentry);
+	err = ocfs2_inode_revalidate(path->dentry);
 	if (err) {
 		if (err != -ENOENT)
 			mlog_errno(err);
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 897fd9a2e51d..1fdc9839cd93 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -68,8 +68,8 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
 int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
 		u32 clusters_to_add, int mark_unwritten);
 int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
-int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		  struct kstat *stat);
+int ocfs2_getattr(const struct path *path, struct kstat *stat,
+		  u32 request_mask, unsigned int flags);
 int ocfs2_permission(struct inode *inode, int mask);
 
 int ocfs2_should_update_atime(struct inode *inode,
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 5cd617980fbf..a304bf34b212 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -245,25 +245,24 @@ int orangefs_setattr(struct dentry *dentry, struct iattr *iattr)
 /*
  * Obtain attributes of an object given a dentry
  */
-int orangefs_getattr(struct vfsmount *mnt,
-		  struct dentry *dentry,
-		  struct kstat *kstat)
+int orangefs_getattr(const struct path *path, struct kstat *stat,
+		     u32 request_mask, unsigned int flags)
 {
 	int ret = -ENOENT;
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = path->dentry->d_inode;
 	struct orangefs_inode_s *orangefs_inode = NULL;
 
 	gossip_debug(GOSSIP_INODE_DEBUG,
 		     "orangefs_getattr: called on %pd\n",
-		     dentry);
+		     path->dentry);
 
 	ret = orangefs_inode_getattr(inode, 0, 0);
 	if (ret == 0) {
-		generic_fillattr(inode, kstat);
+		generic_fillattr(inode, stat);
 
 		/* override block size reported to stat */
 		orangefs_inode = ORANGEFS_I(inode);
-		kstat->blksize = orangefs_inode->blksize;
+		stat->blksize = orangefs_inode->blksize;
 	}
 	return ret;
 }
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 70355a9a2596..0c4f03c22ce0 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -439,9 +439,8 @@ struct inode *orangefs_new_inode(struct super_block *sb,
 
 int orangefs_setattr(struct dentry *dentry, struct iattr *iattr);
 
-int orangefs_getattr(struct vfsmount *mnt,
-		  struct dentry *dentry,
-		  struct kstat *kstat);
+int orangefs_getattr(const struct path *path, struct kstat *stat,
+		     u32 request_mask, unsigned int flags);
 
 int orangefs_permission(struct inode *inode, int mask);
 
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index f57043dace62..a6f9ca621e0b 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -346,7 +346,8 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 	ovl_path_upper(parent, &parentpath);
 	upperdir = parentpath.dentry;
 
-	err = vfs_getattr(&parentpath, &pstat);
+	err = vfs_getattr(&parentpath, &pstat,
+			  STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT);
 	if (err)
 		return err;
 
@@ -409,7 +410,8 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
 		}
 
 		ovl_path_lower(next, &lowerpath);
-		err = vfs_getattr(&lowerpath, &stat);
+		err = vfs_getattr(&lowerpath, &stat,
+				  STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
 		/* maybe truncate regular file. this has no effect on dirs */
 		if (flags & O_TRUNC)
 			stat.size = 0;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 16e06dd89457..6515796460df 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -138,9 +138,10 @@ static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
 	return err;
 }
 
-static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			 struct kstat *stat)
+static int ovl_dir_getattr(const struct path *path, struct kstat *stat,
+			   u32 request_mask, unsigned int flags)
 {
+	struct dentry *dentry = path->dentry;
 	int err;
 	enum ovl_path_type type;
 	struct path realpath;
@@ -148,7 +149,7 @@ static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
 
 	type = ovl_path_real(dentry, &realpath);
 	old_cred = ovl_override_creds(dentry->d_sb);
-	err = vfs_getattr(&realpath, stat);
+	err = vfs_getattr(&realpath, stat, request_mask, flags);
 	revert_creds(old_cred);
 	if (err)
 		return err;
@@ -264,7 +265,8 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
 		goto out;
 
 	ovl_path_upper(dentry, &upperpath);
-	err = vfs_getattr(&upperpath, &stat);
+	err = vfs_getattr(&upperpath, &stat,
+			  STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
 	if (err)
 		goto out_unlock;
 
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 08643ac44a02..d4bb54f7b6b4 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -56,16 +56,17 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
 	return err;
 }
 
-static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			 struct kstat *stat)
+static int ovl_getattr(const struct path *path, struct kstat *stat,
+		       u32 request_mask, unsigned int flags)
 {
+	struct dentry *dentry = path->dentry;
 	struct path realpath;
 	const struct cred *old_cred;
 	int err;
 
 	ovl_path_real(dentry, &realpath);
 	old_cred = ovl_override_creds(dentry->d_sb);
-	err = vfs_getattr(&realpath, stat);
+	err = vfs_getattr(&realpath, stat, request_mask, flags);
 	revert_creds(old_cred);
 	return err;
 }
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1e1e182d571b..3b5e6aa2a326 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1724,11 +1724,12 @@ struct inode *proc_pid_make_inode(struct super_block * sb,
 	return NULL;
 }
 
-int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int pid_getattr(const struct path *path, struct kstat *stat,
+		u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	struct task_struct *task;
-	struct pid_namespace *pid = dentry->d_sb->s_fs_info;
+	struct pid_namespace *pid = path->dentry->d_sb->s_fs_info;
 
 	generic_fillattr(inode, stat);
 
@@ -3511,9 +3512,10 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
 	return 0;
 }
 
-static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+static int proc_task_getattr(const struct path *path, struct kstat *stat,
+			     u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	struct task_struct *p = get_proc_task(inode);
 	generic_fillattr(inode, stat);
 
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 06c73904d497..ee27feb34cf4 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -118,10 +118,10 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
 	return 0;
 }
 
-static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			struct kstat *stat)
+static int proc_getattr(const struct path *path, struct kstat *stat,
+			u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	struct proc_dir_entry *de = PDE(inode);
 	if (de && de->nlink)
 		set_nlink(inode, de->nlink);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 5d6960f5f1c0..e93cdc6ddb31 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -149,7 +149,7 @@ extern int proc_pid_statm(struct seq_file *, struct pid_namespace *,
  * base.c
  */
 extern const struct dentry_operations pid_dentry_operations;
-extern int pid_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int pid_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern int proc_setattr(struct dentry *, struct iattr *);
 extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t);
 extern int pid_revalidate(struct dentry *, unsigned int);
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index ffd72a6c6e04..9db1df2537fc 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -140,10 +140,10 @@ static struct dentry *proc_tgid_net_lookup(struct inode *dir,
 	return de;
 }
 
-static int proc_tgid_net_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		struct kstat *stat)
+static int proc_tgid_net_getattr(const struct path *path, struct kstat *stat,
+				 u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	struct net *net;
 
 	net = get_proc_task_net(inode);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 3e64c6502dc8..3d8726445ad1 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -801,9 +801,10 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
 	return 0;
 }
 
-static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+static int proc_sys_getattr(const struct path *path, struct kstat *stat,
+			    u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	struct ctl_table_header *head = grab_header(inode);
 	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 
diff --git a/fs/proc/root.c b/fs/proc/root.c
index b90da888b81a..fb1955c82274 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -149,10 +149,10 @@ void __init proc_root_init(void)
 	proc_sys_init();
 }
 
-static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
-)
+static int proc_root_getattr(const struct path *path, struct kstat *stat,
+			     u32 request_mask, unsigned int query_flags)
 {
-	generic_fillattr(d_inode(dentry), stat);
+	generic_fillattr(d_inode(path->dentry), stat);
 	stat->nlink = proc_root.nlink + nr_processes();
 	return 0;
 }
diff --git a/fs/stat.c b/fs/stat.c
index 3f14d1ef0868..a3804feadade 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -18,6 +18,15 @@
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
 
+/**
+ * generic_fillattr - Fill in the basic attributes from the inode struct
+ * @inode: Inode to use as the source
+ * @stat: Where to fill in the attributes
+ *
+ * Fill in the basic attributes in the kstat structure from data that's to be
+ * found on the VFS inode structure.  This is the default if no getattr inode
+ * operation is supplied.
+ */
 void generic_fillattr(struct inode *inode, struct kstat *stat)
 {
 	stat->dev = inode->i_sb->s_dev;
@@ -33,81 +42,147 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
 	stat->ctime = inode->i_ctime;
 	stat->blksize = i_blocksize(inode);
 	stat->blocks = inode->i_blocks;
-}
 
+	if (IS_NOATIME(inode))
+		stat->result_mask &= ~STATX_ATIME;
+	if (IS_AUTOMOUNT(inode))
+		stat->attributes |= STATX_ATTR_AUTOMOUNT;
+}
 EXPORT_SYMBOL(generic_fillattr);
 
 /**
  * vfs_getattr_nosec - getattr without security checks
  * @path: file to get attributes from
  * @stat: structure to return attributes in
+ * @request_mask: STATX_xxx flags indicating what the caller wants
+ * @query_flags: Query mode (KSTAT_QUERY_FLAGS)
  *
  * Get attributes without calling security_inode_getattr.
  *
  * Currently the only caller other than vfs_getattr is internal to the
- * filehandle lookup code, which uses only the inode number and returns
- * no attributes to any user.  Any other code probably wants
- * vfs_getattr.
+ * filehandle lookup code, which uses only the inode number and returns no
+ * attributes to any user.  Any other code probably wants vfs_getattr.
  */
-int vfs_getattr_nosec(struct path *path, struct kstat *stat)
+int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
+		      u32 request_mask, unsigned int query_flags)
 {
 	struct inode *inode = d_backing_inode(path->dentry);
 
+	memset(stat, 0, sizeof(*stat));
+	stat->result_mask |= STATX_BASIC_STATS;
+	request_mask &= STATX_ALL;
+	query_flags &= KSTAT_QUERY_FLAGS;
 	if (inode->i_op->getattr)
-		return inode->i_op->getattr(path->mnt, path->dentry, stat);
+		return inode->i_op->getattr(path, stat, request_mask,
+					    query_flags);
 
 	generic_fillattr(inode, stat);
 	return 0;
 }
-
 EXPORT_SYMBOL(vfs_getattr_nosec);
 
-int vfs_getattr(struct path *path, struct kstat *stat)
+/*
+ * vfs_getattr - Get the enhanced basic attributes of a file
+ * @path: The file of interest
+ * @stat: Where to return the statistics
+ * @request_mask: STATX_xxx flags indicating what the caller wants
+ * @query_flags: Query mode (KSTAT_QUERY_FLAGS)
+ *
+ * Ask the filesystem for a file's attributes.  The caller must indicate in
+ * request_mask and query_flags to indicate what they want.
+ *
+ * If the file is remote, the filesystem can be forced to update the attributes
+ * from the backing store by passing AT_STATX_FORCE_SYNC in query_flags or can
+ * suppress the update by passing AT_STATX_DONT_SYNC.
+ *
+ * Bits must have been set in request_mask to indicate which attributes the
+ * caller wants retrieving.  Any such attribute not requested may be returned
+ * anyway, but the value may be approximate, and, if remote, may not have been
+ * synchronised with the server.
+ *
+ * 0 will be returned on success, and a -ve error code if unsuccessful.
+ */
+int vfs_getattr(const struct path *path, struct kstat *stat,
+		u32 request_mask, unsigned int query_flags)
 {
 	int retval;
 
 	retval = security_inode_getattr(path);
 	if (retval)
 		return retval;
-	return vfs_getattr_nosec(path, stat);
+	return vfs_getattr_nosec(path, stat, request_mask, query_flags);
 }
-
 EXPORT_SYMBOL(vfs_getattr);
 
-int vfs_fstat(unsigned int fd, struct kstat *stat)
+/**
+ * vfs_statx_fd - Get the enhanced basic attributes by file descriptor
+ * @fd: The file descriptor referring to the file of interest
+ * @stat: The result structure to fill in.
+ * @request_mask: STATX_xxx flags indicating what the caller wants
+ * @query_flags: Query mode (KSTAT_QUERY_FLAGS)
+ *
+ * This function is a wrapper around vfs_getattr().  The main difference is
+ * that it uses a file descriptor to determine the file location.
+ *
+ * 0 will be returned on success, and a -ve error code if unsuccessful.
+ */
+int vfs_statx_fd(unsigned int fd, struct kstat *stat,
+		 u32 request_mask, unsigned int query_flags)
 {
 	struct fd f = fdget_raw(fd);
 	int error = -EBADF;
 
 	if (f.file) {
-		error = vfs_getattr(&f.file->f_path, stat);
+		error = vfs_getattr(&f.file->f_path, stat,
+				    request_mask, query_flags);
 		fdput(f);
 	}
 	return error;
 }
-EXPORT_SYMBOL(vfs_fstat);
+EXPORT_SYMBOL(vfs_statx_fd);
 
-int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
-		int flag)
+/**
+ * vfs_statx - Get basic and extra attributes by filename
+ * @dfd: A file descriptor representing the base dir for a relative filename
+ * @filename: The name of the file of interest
+ * @flags: Flags to control the query
+ * @stat: The result structure to fill in.
+ * @request_mask: STATX_xxx flags indicating what the caller wants
+ *
+ * This function is a wrapper around vfs_getattr().  The main difference is
+ * that it uses a filename and base directory to determine the file location.
+ * Additionally, the use of AT_SYMLINK_NOFOLLOW in flags will prevent a symlink
+ * at the given name from being referenced.
+ *
+ * The caller must have preset stat->request_mask as for vfs_getattr().  The
+ * flags are also used to load up stat->query_flags.
+ *
+ * 0 will be returned on success, and a -ve error code if unsuccessful.
+ */
+int vfs_statx(int dfd, const char __user *filename, int flags,
+	      struct kstat *stat, u32 request_mask)
 {
 	struct path path;
 	int error = -EINVAL;
-	unsigned int lookup_flags = 0;
+	unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_AUTOMOUNT;
 
-	if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
-		      AT_EMPTY_PATH)) != 0)
-		goto out;
+	if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
+		       AT_EMPTY_PATH | KSTAT_QUERY_FLAGS)) != 0)
+		return -EINVAL;
 
-	if (!(flag & AT_SYMLINK_NOFOLLOW))
-		lookup_flags |= LOOKUP_FOLLOW;
-	if (flag & AT_EMPTY_PATH)
+	if (flags & AT_SYMLINK_NOFOLLOW)
+		lookup_flags &= ~LOOKUP_FOLLOW;
+	if (flags & AT_NO_AUTOMOUNT)
+		lookup_flags &= ~LOOKUP_AUTOMOUNT;
+	if (flags & AT_EMPTY_PATH)
 		lookup_flags |= LOOKUP_EMPTY;
+
 retry:
 	error = user_path_at(dfd, filename, lookup_flags, &path);
 	if (error)
 		goto out;
 
-	error = vfs_getattr(&path, stat);
+	error = vfs_getattr(&path, stat, request_mask, flags);
 	path_put(&path);
 	if (retry_estale(error, lookup_flags)) {
 		lookup_flags |= LOOKUP_REVAL;
@@ -116,19 +191,7 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
 out:
 	return error;
 }
-EXPORT_SYMBOL(vfs_fstatat);
-
-int vfs_stat(const char __user *name, struct kstat *stat)
-{
-	return vfs_fstatat(AT_FDCWD, name, stat, 0);
-}
-EXPORT_SYMBOL(vfs_stat);
-
-int vfs_lstat(const char __user *name, struct kstat *stat)
-{
-	return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW);
-}
-EXPORT_SYMBOL(vfs_lstat);
+EXPORT_SYMBOL(vfs_statx);
 
 
 #ifdef __ARCH_WANT_OLD_STAT
@@ -141,7 +204,7 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta
 {
 	static int warncount = 5;
 	struct __old_kernel_stat tmp;
-	
+
 	if (warncount > 0) {
 		warncount--;
 		printk(KERN_WARNING "VFS: Warning: %s using old stat() call. Recompile your binary.\n",
@@ -166,7 +229,7 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta
 #if BITS_PER_LONG == 32
 	if (stat->size > MAX_NON_LFS)
 		return -EOVERFLOW;
-#endif	
+#endif
 	tmp.st_size = stat->size;
 	tmp.st_atime = stat->atime.tv_sec;
 	tmp.st_mtime = stat->mtime.tv_sec;
@@ -445,6 +508,81 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename,
 }
 #endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */
 
+static inline int __put_timestamp(struct timespec *kts,
+				  struct statx_timestamp __user *uts)
+{
+	return (__put_user(kts->tv_sec,		&uts->tv_sec		) ||
+		__put_user(kts->tv_nsec,	&uts->tv_nsec		) ||
+		__put_user(0,			&uts->__reserved	));
+}
+
+/*
+ * Set the statx results.
+ */
+static long statx_set_result(struct kstat *stat, struct statx __user *buffer)
+{
+	uid_t uid = from_kuid_munged(current_user_ns(), stat->uid);
+	gid_t gid = from_kgid_munged(current_user_ns(), stat->gid);
+
+	if (__put_user(stat->result_mask,	&buffer->stx_mask	) ||
+	    __put_user(stat->mode,		&buffer->stx_mode	) ||
+	    __clear_user(&buffer->__spare0, sizeof(buffer->__spare0))	  ||
+	    __put_user(stat->nlink,		&buffer->stx_nlink	) ||
+	    __put_user(uid,			&buffer->stx_uid	) ||
+	    __put_user(gid,			&buffer->stx_gid	) ||
+	    __put_user(stat->attributes,	&buffer->stx_attributes	) ||
+	    __put_user(stat->blksize,		&buffer->stx_blksize	) ||
+	    __put_user(MAJOR(stat->rdev),	&buffer->stx_rdev_major	) ||
+	    __put_user(MINOR(stat->rdev),	&buffer->stx_rdev_minor	) ||
+	    __put_user(MAJOR(stat->dev),	&buffer->stx_dev_major	) ||
+	    __put_user(MINOR(stat->dev),	&buffer->stx_dev_minor	) ||
+	    __put_timestamp(&stat->atime,	&buffer->stx_atime	) ||
+	    __put_timestamp(&stat->btime,	&buffer->stx_btime	) ||
+	    __put_timestamp(&stat->ctime,	&buffer->stx_ctime	) ||
+	    __put_timestamp(&stat->mtime,	&buffer->stx_mtime	) ||
+	    __put_user(stat->ino,		&buffer->stx_ino	) ||
+	    __put_user(stat->size,		&buffer->stx_size	) ||
+	    __put_user(stat->blocks,		&buffer->stx_blocks	) ||
+	    __clear_user(&buffer->__spare1, sizeof(buffer->__spare1))	  ||
+	    __clear_user(&buffer->__spare2, sizeof(buffer->__spare2)))
+		return -EFAULT;
+
+	return 0;
+}
+
+/**
+ * sys_statx - System call to get enhanced stats
+ * @dfd: Base directory to pathwalk from *or* fd to stat.
+ * @filename: File to stat *or* NULL.
+ * @flags: AT_* flags to control pathwalk.
+ * @mask: Parts of statx struct actually required.
+ * @buffer: Result buffer.
+ *
+ * Note that if filename is NULL, then it does the equivalent of fstat() using
+ * dfd to indicate the file of interest.
+ */
+SYSCALL_DEFINE5(statx,
+		int, dfd, const char __user *, filename, unsigned, flags,
+		unsigned int, mask,
+		struct statx __user *, buffer)
+{
+	struct kstat stat;
+	int error;
+
+	if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
+		return -EINVAL;
+	if (!access_ok(VERIFY_WRITE, buffer, sizeof(*buffer)))
+		return -EFAULT;
+
+	if (filename)
+		error = vfs_statx(dfd, filename, flags, &stat, mask);
+	else
+		error = vfs_statx_fd(dfd, &stat, mask, flags);
+	if (error)
+		return error;
+	return statx_set_result(&stat, buffer);
+}
+
 /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */
 void __inode_add_bytes(struct inode *inode, loff_t bytes)
 {
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index 08d3e630b49c..83809f5b5eca 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -440,10 +440,11 @@ static unsigned sysv_nblocks(struct super_block *s, loff_t size)
 	return blocks;
 }
 
-int sysv_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+int sysv_getattr(const struct path *path, struct kstat *stat,
+		 u32 request_mask, unsigned int flags)
 {
-	struct super_block *s = dentry->d_sb;
-	generic_fillattr(d_inode(dentry), stat);
+	struct super_block *s = path->dentry->d_sb;
+	generic_fillattr(d_inode(path->dentry), stat);
 	stat->blocks = (s->s_blocksize / 512) * sysv_nblocks(s, stat->size);
 	stat->blksize = s->s_blocksize;
 	return 0;
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 6c212288adcb..1e7e27c729af 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -142,7 +142,7 @@ extern struct inode *sysv_iget(struct super_block *, unsigned int);
 extern int sysv_write_inode(struct inode *, struct writeback_control *wbc);
 extern int sysv_sync_inode(struct inode *);
 extern void sysv_set_inode(struct inode *, dev_t);
-extern int sysv_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int sysv_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern int sysv_init_icache(void);
 extern void sysv_destroy_icache(void);
 
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 528369f3e472..30825d882aa9 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -1622,11 +1622,11 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	return do_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
 }
 
-int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		  struct kstat *stat)
+int ubifs_getattr(const struct path *path, struct kstat *stat,
+		  u32 request_mask, unsigned int flags)
 {
 	loff_t size;
-	struct inode *inode = d_inode(dentry);
+	struct inode *inode = d_inode(path->dentry);
 	struct ubifs_inode *ui = ubifs_inode(inode);
 
 	mutex_lock(&ui->ui_mutex);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index f0c86f076535..4d57e488038e 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1749,8 +1749,8 @@ int ubifs_update_time(struct inode *inode, struct timespec *time, int flags);
 /* dir.c */
 struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
 			      umode_t mode);
-int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		  struct kstat *stat);
+int ubifs_getattr(const struct path *path, struct kstat *stat,
+		  u32 request_mask, unsigned int flags);
 int ubifs_check_dir_empty(struct inode *dir);
 
 /* xattr.c */
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index f7dfef53f739..6023c97c6da2 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -152,9 +152,10 @@ static int udf_symlink_filler(struct file *file, struct page *page)
 	return err;
 }
 
-static int udf_symlink_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			       struct kstat *stat)
+static int udf_symlink_getattr(const struct path *path, struct kstat *stat,
+				u32 request_mask, unsigned int flags)
 {
+	struct dentry *dentry = path->dentry;
 	struct inode *inode = d_backing_inode(dentry);
 	struct page *page;
 
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 22c16155f1b4..229cc6a6d8ef 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -489,11 +489,12 @@ xfs_vn_get_link_inline(
 
 STATIC int
 xfs_vn_getattr(
-	struct vfsmount		*mnt,
-	struct dentry		*dentry,
-	struct kstat		*stat)
+	const struct path	*path,
+	struct kstat		*stat,
+	u32			request_mask,
+	unsigned int		query_flags)
 {
-	struct inode		*inode = d_inode(dentry);
+	struct inode		*inode = d_inode(path->dentry);
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 52350947c670..aad3fd0ff5f8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1709,7 +1709,7 @@ struct inode_operations {
 	int (*rename) (struct inode *, struct dentry *,
 			struct inode *, struct dentry *, unsigned int);
 	int (*setattr) (struct dentry *, struct iattr *);
-	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
+	int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
 		      u64 len);
@@ -2902,8 +2902,8 @@ extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern const struct inode_operations page_symlink_inode_operations;
 extern void kfree_link(void *);
 extern void generic_fillattr(struct inode *, struct kstat *);
-int vfs_getattr_nosec(struct path *path, struct kstat *stat);
-extern int vfs_getattr(struct path *, struct kstat *);
+extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
+extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
 void __inode_add_bytes(struct inode *inode, loff_t bytes);
 void inode_add_bytes(struct inode *inode, loff_t bytes);
 void __inode_sub_bytes(struct inode *inode, loff_t bytes);
@@ -2916,10 +2916,29 @@ extern const struct inode_operations simple_symlink_inode_operations;
 
 extern int iterate_dir(struct file *, struct dir_context *);
 
-extern int vfs_stat(const char __user *, struct kstat *);
-extern int vfs_lstat(const char __user *, struct kstat *);
-extern int vfs_fstat(unsigned int, struct kstat *);
-extern int vfs_fstatat(int , const char __user *, struct kstat *, int);
+extern int vfs_statx(int, const char __user *, int, struct kstat *, u32);
+extern int vfs_statx_fd(unsigned int, struct kstat *, u32, unsigned int);
+
+static inline int vfs_stat(const char __user *filename, struct kstat *stat)
+{
+	return vfs_statx(AT_FDCWD, filename, 0, stat, STATX_BASIC_STATS);
+}
+static inline int vfs_lstat(const char __user *name, struct kstat *stat)
+{
+	return vfs_statx(AT_FDCWD, name, AT_SYMLINK_NOFOLLOW,
+			 stat, STATX_BASIC_STATS);
+}
+static inline int vfs_fstatat(int dfd, const char __user *filename,
+			      struct kstat *stat, int flags)
+{
+	return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS);
+}
+static inline int vfs_fstat(int fd, struct kstat *stat)
+{
+	return vfs_statx_fd(fd, stat, STATX_BASIC_STATS, 0);
+}
+
+
 extern const char *vfs_get_link(struct dentry *, struct delayed_call *);
 extern int vfs_readlink(struct dentry *, char __user *, int);
 
@@ -2949,7 +2968,7 @@ extern int dcache_dir_close(struct inode *, struct file *);
 extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
 extern int dcache_readdir(struct file *, struct dir_context *);
 extern int simple_setattr(struct dentry *, struct iattr *);
-extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int simple_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern int simple_statfs(struct dentry *, struct kstatfs *);
 extern int simple_open(struct inode *inode, struct file *file);
 extern int simple_link(struct dentry *, struct inode *, struct dentry *);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index f1da8c8dd473..287f34161086 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -335,7 +335,7 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
 extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr);
-extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int nfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
 extern void nfs_access_set_mask(struct nfs_access_entry *, u32);
 extern int nfs_permission(struct inode *, int);
diff --git a/include/linux/stat.h b/include/linux/stat.h
index 075cb0c7eb2a..c76e524fb34b 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -18,20 +18,32 @@
 #include <linux/time.h>
 #include <linux/uidgid.h>
 
+#define KSTAT_QUERY_FLAGS (AT_STATX_SYNC_TYPE)
+
 struct kstat {
-	u64		ino;
-	dev_t		dev;
+	u32		result_mask;	/* What fields the user got */
 	umode_t		mode;
 	unsigned int	nlink;
+	uint32_t	blksize;	/* Preferred I/O size */
+	u64		attributes;
+#define KSTAT_ATTR_FS_IOC_FLAGS				\
+	(STATX_ATTR_COMPRESSED |			\
+	 STATX_ATTR_IMMUTABLE |				\
+	 STATX_ATTR_APPEND |				\
+	 STATX_ATTR_NODUMP |				\
+	 STATX_ATTR_ENCRYPTED				\
+	 )/* Attrs corresponding to FS_*_FL flags */
+	u64		ino;
+	dev_t		dev;
+	dev_t		rdev;
 	kuid_t		uid;
 	kgid_t		gid;
-	dev_t		rdev;
 	loff_t		size;
-	struct timespec  atime;
+	struct timespec	atime;
 	struct timespec	mtime;
 	struct timespec	ctime;
-	unsigned long	blksize;
-	unsigned long long	blocks;
+	struct timespec	btime;			/* File creation time */
+	u64		blocks;
 };
 
 #endif
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 91a740f6b884..980c3c9b06f8 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -48,6 +48,7 @@ struct stat;
 struct stat64;
 struct statfs;
 struct statfs64;
+struct statx;
 struct __sysctl_args;
 struct sysinfo;
 struct timespec;
@@ -902,5 +903,7 @@ asmlinkage long sys_pkey_mprotect(unsigned long start, size_t len,
 				  unsigned long prot, int pkey);
 asmlinkage long sys_pkey_alloc(unsigned long flags, unsigned long init_val);
 asmlinkage long sys_pkey_free(int pkey);
+asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags,
+			  unsigned mask, struct statx __user *buffer);
 
 #endif
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index beed138bd359..813afd6eee71 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -63,5 +63,10 @@
 #define AT_NO_AUTOMOUNT		0x800	/* Suppress terminal automount traversal */
 #define AT_EMPTY_PATH		0x1000	/* Allow empty relative pathname */
 
+#define AT_STATX_SYNC_TYPE	0x6000	/* Type of synchronisation required from statx() */
+#define AT_STATX_SYNC_AS_STAT	0x0000	/* - Do whatever stat() does */
+#define AT_STATX_FORCE_SYNC	0x2000	/* - Force the attributes to be sync'd with the server */
+#define AT_STATX_DONT_SYNC	0x4000	/* - Don't sync attributes with the server */
+
 
 #endif /* _UAPI_LINUX_FCNTL_H */
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 7fec7e36d921..51a6b86e3700 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -1,6 +1,7 @@
 #ifndef _UAPI_LINUX_STAT_H
 #define _UAPI_LINUX_STAT_H
 
+#include <linux/types.h>
 
 #if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
 
@@ -41,5 +42,135 @@
 
 #endif
 
+/*
+ * Timestamp structure for the timestamps in struct statx.
+ *
+ * tv_sec holds the number of seconds before (negative) or after (positive)
+ * 00:00:00 1st January 1970 UTC.
+ *
+ * tv_nsec holds a number of nanoseconds before (0..-999,999,999 if tv_sec is
+ * negative) or after (0..999,999,999 if tv_sec is positive) the tv_sec time.
+ *
+ * Note that if both tv_sec and tv_nsec are non-zero, then the two values must
+ * either be both positive or both negative.
+ *
+ * __reserved is held in case we need a yet finer resolution.
+ */
+struct statx_timestamp {
+	__s64	tv_sec;
+	__s32	tv_nsec;
+	__s32	__reserved;
+};
+
+/*
+ * Structures for the extended file attribute retrieval system call
+ * (statx()).
+ *
+ * The caller passes a mask of what they're specifically interested in as a
+ * parameter to statx().  What statx() actually got will be indicated in
+ * st_mask upon return.
+ *
+ * For each bit in the mask argument:
+ *
+ * - if the datum is not supported:
+ *
+ *   - the bit will be cleared, and
+ *
+ *   - the datum will be set to an appropriate fabricated value if one is
+ *     available (eg. CIFS can take a default uid and gid), otherwise
+ *
+ *   - the field will be cleared;
+ *
+ * - otherwise, if explicitly requested:
+ *
+ *   - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is
+ *     set or if the datum is considered out of date, and
+ *
+ *   - the field will be filled in and the bit will be set;
+ *
+ * - otherwise, if not requested, but available in approximate form without any
+ *   effort, it will be filled in anyway, and the bit will be set upon return
+ *   (it might not be up to date, however, and no attempt will be made to
+ *   synchronise the internal state first);
+ *
+ * - otherwise the field and the bit will be cleared before returning.
+ *
+ * Items in STATX_BASIC_STATS may be marked unavailable on return, but they
+ * will have values installed for compatibility purposes so that stat() and
+ * co. can be emulated in userspace.
+ */
+struct statx {
+	/* 0x00 */
+	__u32	stx_mask;	/* What results were written [uncond] */
+	__u32	stx_blksize;	/* Preferred general I/O size [uncond] */
+	__u64	stx_attributes;	/* Flags conveying information about the file [uncond] */
+	/* 0x10 */
+	__u32	stx_nlink;	/* Number of hard links */
+	__u32	stx_uid;	/* User ID of owner */
+	__u32	stx_gid;	/* Group ID of owner */
+	__u16	stx_mode;	/* File mode */
+	__u16	__spare0[1];
+	/* 0x20 */
+	__u64	stx_ino;	/* Inode number */
+	__u64	stx_size;	/* File size */
+	__u64	stx_blocks;	/* Number of 512-byte blocks allocated */
+	__u64	__spare1[1];
+	/* 0x40 */
+	struct statx_timestamp	stx_atime;	/* Last access time */
+	struct statx_timestamp	stx_btime;	/* File creation time */
+	struct statx_timestamp	stx_ctime;	/* Last attribute change time */
+	struct statx_timestamp	stx_mtime;	/* Last data modification time */
+	/* 0x80 */
+	__u32	stx_rdev_major;	/* Device ID of special file [if bdev/cdev] */
+	__u32	stx_rdev_minor;
+	__u32	stx_dev_major;	/* ID of device containing file [uncond] */
+	__u32	stx_dev_minor;
+	/* 0x90 */
+	__u64	__spare2[14];	/* Spare space for future expansion */
+	/* 0x100 */
+};
+
+/*
+ * Flags to be stx_mask
+ *
+ * Query request/result mask for statx() and struct statx::stx_mask.
+ *
+ * These bits should be set in the mask argument of statx() to request
+ * particular items when calling statx().
+ */
+#define STATX_TYPE		0x00000001U	/* Want/got stx_mode & S_IFMT */
+#define STATX_MODE		0x00000002U	/* Want/got stx_mode & ~S_IFMT */
+#define STATX_NLINK		0x00000004U	/* Want/got stx_nlink */
+#define STATX_UID		0x00000008U	/* Want/got stx_uid */
+#define STATX_GID		0x00000010U	/* Want/got stx_gid */
+#define STATX_ATIME		0x00000020U	/* Want/got stx_atime */
+#define STATX_MTIME		0x00000040U	/* Want/got stx_mtime */
+#define STATX_CTIME		0x00000080U	/* Want/got stx_ctime */
+#define STATX_INO		0x00000100U	/* Want/got stx_ino */
+#define STATX_SIZE		0x00000200U	/* Want/got stx_size */
+#define STATX_BLOCKS		0x00000400U	/* Want/got stx_blocks */
+#define STATX_BASIC_STATS	0x000007ffU	/* The stuff in the normal stat struct */
+#define STATX_BTIME		0x00000800U	/* Want/got stx_btime */
+#define STATX_ALL		0x00000fffU	/* All currently supported flags */
+
+/*
+ * Attributes to be found in stx_attributes
+ *
+ * These give information about the features or the state of a file that might
+ * be of use to ordinary userspace programs such as GUIs or ls rather than
+ * specialised tools.
+ *
+ * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS
+ * semantically.  Where possible, the numerical value is picked to correspond
+ * also.
+ */
+#define STATX_ATTR_COMPRESSED		0x00000004 /* [I] File is compressed by the fs */
+#define STATX_ATTR_IMMUTABLE		0x00000010 /* [I] File is marked immutable */
+#define STATX_ATTR_APPEND		0x00000020 /* [I] File is append-only */
+#define STATX_ATTR_NODUMP		0x00000040 /* [I] File is not to be dumped */
+#define STATX_ATTR_ENCRYPTED		0x00000800 /* [I] File requires key to decrypt in fs */
+
+#define STATX_ATTR_AUTOMOUNT		0x00001000 /* Dir: Automount trigger */
+
 
 #endif /* _UAPI_LINUX_STAT_H */
diff --git a/mm/shmem.c b/mm/shmem.c
index a26649a6633f..e07728f716b2 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -958,10 +958,10 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 }
 EXPORT_SYMBOL_GPL(shmem_truncate_range);
 
-static int shmem_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			 struct kstat *stat)
+static int shmem_getattr(const struct path *path, struct kstat *stat,
+			 u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = path->dentry->d_inode;
 	struct shmem_inode_info *info = SHMEM_I(inode);
 
 	if (info->alloced - info->swapped != inode->i_mapping->nrpages) {
diff --git a/samples/Kconfig b/samples/Kconfig
index b124f62ed6cb..9cb63188d3ef 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -112,4 +112,10 @@ config SAMPLE_VFIO_MDEV_MTTY
 	  Build a virtual tty sample driver for use as a VFIO
 	  mediated device
 
+config SAMPLE_STATX
+	bool "Build example extended-stat using code"
+	depends on BROKEN
+	help
+	  Build example userspace program to use the new extended-stat syscall.
+
 endif # SAMPLES
diff --git a/samples/Makefile b/samples/Makefile
index 86a137e451d9..db54e766ddb1 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -3,4 +3,4 @@
 obj-$(CONFIG_SAMPLES)	+= kobject/ kprobes/ trace_events/ livepatch/ \
 			   hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \
 			   configfs/ connector/ v4l/ trace_printk/ blackfin/ \
-			   vfio-mdev/
+			   vfio-mdev/ statx/
diff --git a/samples/statx/Makefile b/samples/statx/Makefile
new file mode 100644
index 000000000000..1f80a3d8cf45
--- /dev/null
+++ b/samples/statx/Makefile
@@ -0,0 +1,10 @@
+# kbuild trick to avoid linker error. Can be omitted if a module is built.
+obj- := dummy.o
+
+# List of programs to build
+hostprogs-$(CONFIG_SAMPLE_STATX) := test-statx
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+
+HOSTCFLAGS_test-statx.o += -I$(objtree)/usr/include
diff --git a/samples/statx/test-statx.c b/samples/statx/test-statx.c
new file mode 100644
index 000000000000..8571d766331d
--- /dev/null
+++ b/samples/statx/test-statx.c
@@ -0,0 +1,254 @@
+/* Test the statx() system call.
+ *
+ * Note that the output of this program is intended to look like the output of
+ * /bin/stat where possible.
+ *
+ * Copyright (C) 2015 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define _GNU_SOURCE
+#define _ATFILE_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <sys/stat.h>
+
+#define AT_STATX_SYNC_TYPE	0x6000
+#define AT_STATX_SYNC_AS_STAT	0x0000
+#define AT_STATX_FORCE_SYNC	0x2000
+#define AT_STATX_DONT_SYNC	0x4000
+
+static __attribute__((unused))
+ssize_t statx(int dfd, const char *filename, unsigned flags,
+	      unsigned int mask, struct statx *buffer)
+{
+	return syscall(__NR_statx, dfd, filename, flags, mask, buffer);
+}
+
+static void print_time(const char *field, struct statx_timestamp *ts)
+{
+	struct tm tm;
+	time_t tim;
+	char buffer[100];
+	int len;
+
+	tim = ts->tv_sec;
+	if (!localtime_r(&tim, &tm)) {
+		perror("localtime_r");
+		exit(1);
+	}
+	len = strftime(buffer, 100, "%F %T", &tm);
+	if (len == 0) {
+		perror("strftime");
+		exit(1);
+	}
+	printf("%s", field);
+	fwrite(buffer, 1, len, stdout);
+	printf(".%09u", ts->tv_nsec);
+	len = strftime(buffer, 100, "%z", &tm);
+	if (len == 0) {
+		perror("strftime2");
+		exit(1);
+	}
+	fwrite(buffer, 1, len, stdout);
+	printf("\n");
+}
+
+static void dump_statx(struct statx *stx)
+{
+	char buffer[256], ft = '?';
+
+	printf("results=%x\n", stx->stx_mask);
+
+	printf(" ");
+	if (stx->stx_mask & STATX_SIZE)
+		printf(" Size: %-15llu", (unsigned long long)stx->stx_size);
+	if (stx->stx_mask & STATX_BLOCKS)
+		printf(" Blocks: %-10llu", (unsigned long long)stx->stx_blocks);
+	printf(" IO Block: %-6llu", (unsigned long long)stx->stx_blksize);
+	if (stx->stx_mask & STATX_TYPE) {
+		switch (stx->stx_mode & S_IFMT) {
+		case S_IFIFO:	printf("  FIFO\n");			ft = 'p'; break;
+		case S_IFCHR:	printf("  character special file\n");	ft = 'c'; break;
+		case S_IFDIR:	printf("  directory\n");		ft = 'd'; break;
+		case S_IFBLK:	printf("  block special file\n");	ft = 'b'; break;
+		case S_IFREG:	printf("  regular file\n");		ft = '-'; break;
+		case S_IFLNK:	printf("  symbolic link\n");		ft = 'l'; break;
+		case S_IFSOCK:	printf("  socket\n");			ft = 's'; break;
+		default:
+			printf(" unknown type (%o)\n", stx->stx_mode & S_IFMT);
+			break;
+		}
+	} else {
+		printf(" no type\n");
+	}
+
+	sprintf(buffer, "%02x:%02x", stx->stx_dev_major, stx->stx_dev_minor);
+	printf("Device: %-15s", buffer);
+	if (stx->stx_mask & STATX_INO)
+		printf(" Inode: %-11llu", (unsigned long long) stx->stx_ino);
+	if (stx->stx_mask & STATX_NLINK)
+		printf(" Links: %-5u", stx->stx_nlink);
+	if (stx->stx_mask & STATX_TYPE) {
+		switch (stx->stx_mode & S_IFMT) {
+		case S_IFBLK:
+		case S_IFCHR:
+			printf(" Device type: %u,%u",
+			       stx->stx_rdev_major, stx->stx_rdev_minor);
+			break;
+		}
+	}
+	printf("\n");
+
+	if (stx->stx_mask & STATX_MODE)
+		printf("Access: (%04o/%c%c%c%c%c%c%c%c%c%c)  ",
+		       stx->stx_mode & 07777,
+		       ft,
+		       stx->stx_mode & S_IRUSR ? 'r' : '-',
+		       stx->stx_mode & S_IWUSR ? 'w' : '-',
+		       stx->stx_mode & S_IXUSR ? 'x' : '-',
+		       stx->stx_mode & S_IRGRP ? 'r' : '-',
+		       stx->stx_mode & S_IWGRP ? 'w' : '-',
+		       stx->stx_mode & S_IXGRP ? 'x' : '-',
+		       stx->stx_mode & S_IROTH ? 'r' : '-',
+		       stx->stx_mode & S_IWOTH ? 'w' : '-',
+		       stx->stx_mode & S_IXOTH ? 'x' : '-');
+	if (stx->stx_mask & STATX_UID)
+		printf("Uid: %5d   ", stx->stx_uid);
+	if (stx->stx_mask & STATX_GID)
+		printf("Gid: %5d\n", stx->stx_gid);
+
+	if (stx->stx_mask & STATX_ATIME)
+		print_time("Access: ", &stx->stx_atime);
+	if (stx->stx_mask & STATX_MTIME)
+		print_time("Modify: ", &stx->stx_mtime);
+	if (stx->stx_mask & STATX_CTIME)
+		print_time("Change: ", &stx->stx_ctime);
+	if (stx->stx_mask & STATX_BTIME)
+		print_time(" Birth: ", &stx->stx_btime);
+
+	if (stx->stx_attributes) {
+		unsigned char bits;
+		int loop, byte;
+
+		static char attr_representation[64 + 1] =
+			/* STATX_ATTR_ flags: */
+			"????????"	/* 63-56 */
+			"????????"	/* 55-48 */
+			"????????"	/* 47-40 */
+			"????????"	/* 39-32 */
+			"????????"	/* 31-24	0x00000000-ff000000 */
+			"????????"	/* 23-16	0x00000000-00ff0000 */
+			"???me???"	/* 15- 8	0x00000000-0000ff00 */
+			"?dai?c??"	/*  7- 0	0x00000000-000000ff */
+			;
+
+		printf("Attributes: %016llx (", stx->stx_attributes);
+		for (byte = 64 - 8; byte >= 0; byte -= 8) {
+			bits = stx->stx_attributes >> byte;
+			for (loop = 7; loop >= 0; loop--) {
+				int bit = byte + loop;
+
+				if (bits & 0x80)
+					putchar(attr_representation[63 - bit]);
+				else
+					putchar('-');
+				bits <<= 1;
+			}
+			if (byte)
+				putchar(' ');
+		}
+		printf(")\n");
+	}
+}
+
+static void dump_hex(unsigned long long *data, int from, int to)
+{
+	unsigned offset, print_offset = 1, col = 0;
+
+	from /= 8;
+	to = (to + 7) / 8;
+
+	for (offset = from; offset < to; offset++) {
+		if (print_offset) {
+			printf("%04x: ", offset * 8);
+			print_offset = 0;
+		}
+		printf("%016llx", data[offset]);
+		col++;
+		if ((col & 3) == 0) {
+			printf("\n");
+			print_offset = 1;
+		} else {
+			printf(" ");
+		}
+	}
+
+	if (!print_offset)
+		printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+	struct statx stx;
+	int ret, raw = 0, atflag = AT_SYMLINK_NOFOLLOW;
+
+	unsigned int mask = STATX_ALL;
+
+	for (argv++; *argv; argv++) {
+		if (strcmp(*argv, "-F") == 0) {
+			atflag &= ~AT_STATX_SYNC_TYPE;
+			atflag |= AT_STATX_FORCE_SYNC;
+			continue;
+		}
+		if (strcmp(*argv, "-D") == 0) {
+			atflag &= ~AT_STATX_SYNC_TYPE;
+			atflag |= AT_STATX_DONT_SYNC;
+			continue;
+		}
+		if (strcmp(*argv, "-L") == 0) {
+			atflag &= ~AT_SYMLINK_NOFOLLOW;
+			continue;
+		}
+		if (strcmp(*argv, "-O") == 0) {
+			mask &= ~STATX_BASIC_STATS;
+			continue;
+		}
+		if (strcmp(*argv, "-A") == 0) {
+			atflag |= AT_NO_AUTOMOUNT;
+			continue;
+		}
+		if (strcmp(*argv, "-R") == 0) {
+			raw = 1;
+			continue;
+		}
+
+		memset(&stx, 0xbf, sizeof(stx));
+		ret = statx(AT_FDCWD, *argv, atflag, mask, &stx);
+		printf("statx(%s) = %d\n", *argv, ret);
+		if (ret < 0) {
+			perror(*argv);
+			exit(1);
+		}
+
+		if (raw)
+			dump_hex((unsigned long long *)&stx, 0, sizeof(stx));
+
+		dump_statx(&stx);
+	}
+	return 0;
+}
-- 
GitLab


From c74042f3b3ca982652af99cad85252a2655c6064 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 3 Feb 2017 09:19:40 -0800
Subject: [PATCH 1054/1084] ixgbe: Limit use of 2K buffers on architectures
 with 256B or larger cache lines

On architectures that have a cache line size larger than 64 Bytes we start
running into issues where the amount of headroom for the frame starts
shrinking.

The size of skb_shared_info on a system with a 64B L1 cache line size is
320.  This increases to 384 with a 128B cache line, and 512 with a 256B
cache line.

In addition the NET_SKB_PAD value increases as well consistent with the
cache line size.  As a result when we get to a 256B cache line as seen on
the s390 we end up 768 bytes used by padding and shared info leaving us
with only 1280 bytes to use for data storage.  On architectures such as
this we should default to using 3K Rx buffers out of a 8K page instead of
trying to do 1.5K buffers out of a 4K page.

To take all of this into account I have added one small check so that we
compare the max_frame to the amount of actual data we can store.  This was
already occurring for igb, but I had overlooked it for ixgbe as it doesn't
have strict limits for 82599 once we enable jumbo frames.  By adding this
check we will automatically enable 3K Rx buffers as soon as the maximum
frame size we can handle drops below the standard Ethernet MTU.

I also went through and fixed one small typo that I found where I had left
an IGB in a variable name due to a copy/paste error.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h      | 2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 7a951b116821..b1ecc2627a5a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -96,7 +96,7 @@
 #define IXGBE_MAX_FRAME_BUILD_SKB \
 	(SKB_WITH_OVERHEAD(IXGBE_RXBUFFER_2K) - IXGBE_SKB_PAD)
 #else
-#define IGB_MAX_FRAME_BUILD_SKB IXGBE_RXBUFFER_2K
+#define IXGBE_MAX_FRAME_BUILD_SKB IXGBE_RXBUFFER_2K
 #endif
 
 /*
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 67ab13fd163c..a7a430a7be2c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -3972,7 +3972,8 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter)
 		if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
 			set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state);
 
-		if (max_frame > (ETH_FRAME_LEN + ETH_FCS_LEN))
+		if ((max_frame > (ETH_FRAME_LEN + ETH_FCS_LEN)) ||
+		    (max_frame > IXGBE_MAX_FRAME_BUILD_SKB))
 			set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state);
 #endif
 	}
-- 
GitLab


From 284316dd42a2027afe37df34c5199eb4eabed8fd Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Thu, 2 Mar 2017 15:42:48 -0600
Subject: [PATCH 1055/1084] CIFS: Fix sparse warnings

Fix two minor sparse compile check warnings

Signed-off-by: Steve French <steve.french@primarydata.com>
Acked-by: Pavel Shilovsky <pshilov@microsoft.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
---
 fs/cifs/cifs_unicode.h | 6 +++---
 fs/cifs/cifssmb.c      | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 479bc0a941f3..3d7298cc0aeb 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -130,10 +130,10 @@ wchar_t cifs_toupper(wchar_t in);
  * Returns:
  *     Address of the first string
  */
-static inline wchar_t *
-UniStrcat(wchar_t *ucs1, const wchar_t *ucs2)
+static inline __le16 *
+UniStrcat(__le16 *ucs1, const __le16 *ucs2)
 {
-	wchar_t *anchor = ucs1;	/* save a pointer to start of ucs1 */
+	__le16 *anchor = ucs1;	/* save a pointer to start of ucs1 */
 
 	while (*ucs1++) ;	/* To end of first string */
 	ucs1--;			/* Return to the null */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 5005c7995b6a..066950671929 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -4886,7 +4886,7 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses,
 				 le16_to_cpu(pSMBr->t2.DataCount),
 				 num_of_nodes, target_nodes, nls_codepage,
 				 remap, search_name,
-				 pSMBr->hdr.Flags2 & SMBFLG2_UNICODE);
+				 (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) != 0);
 
 GetDFSRefExit:
 	cifs_buf_release(pSMB);
-- 
GitLab


From ef65aaede23f75977af56a8c330bb9be8c6e125c Mon Sep 17 00:00:00 2001
From: Sachin Prabhu <sprabhu@redhat.com>
Date: Wed, 18 Jan 2017 15:35:57 +0530
Subject: [PATCH 1056/1084] smb2: Enforce sec= mount option

If the security type specified using a mount option is not supported,
the SMB2 session setup code changes the security type to RawNTLMSSP. We
should instead fail the mount and return an error.

The patch changes the code for SMB2 to make it similar to the code used
for SMB1. Like in SMB1, we now use the global security flags to select
the security method to be used when no security method is specified and
to return an error when the requested auth method is not available.

For SMB2, we also use ntlmv2 as a synonym for nltmssp.

Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
Acked-by: Pavel Shilovsky <pshilov@microsoft.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h  |  3 +++
 fs/cifs/cifsproto.h |  2 ++
 fs/cifs/connect.c   |  3 ++-
 fs/cifs/sess.c      |  4 ++--
 fs/cifs/smb1ops.c   |  1 +
 fs/cifs/smb2ops.c   |  4 ++++
 fs/cifs/smb2pdu.c   | 37 +++++++++++++++++++++++++++++++++----
 fs/cifs/smb2proto.h |  2 ++
 8 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index af224cda8697..d42dd3288647 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -443,6 +443,9 @@ struct smb_version_operations {
 	int (*is_transform_hdr)(void *buf);
 	int (*receive_transform)(struct TCP_Server_Info *,
 				 struct mid_q_entry **);
+	enum securityEnum (*select_sectype)(struct TCP_Server_Info *,
+			    enum securityEnum);
+
 };
 
 struct smb_version_values {
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 9ee46c1c3ebd..97e5d236d265 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -533,4 +533,6 @@ int cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
 int __cifs_calc_signature(struct smb_rqst *rqst,
 			struct TCP_Server_Info *server, char *signature,
 			struct shash_desc *shash);
+enum securityEnum cifs_select_sectype(struct TCP_Server_Info *,
+					enum securityEnum);
 #endif			/* _CIFSPROTO_H */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 777ad9f4fc3c..de4c56e8fb37 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2073,7 +2073,8 @@ match_security(struct TCP_Server_Info *server, struct smb_vol *vol)
 	 * that was specified, or "Unspecified" if that sectype was not
 	 * compatible with the given NEGOTIATE request.
 	 */
-	if (select_sectype(server, vol->sectype) == Unspecified)
+	if (server->ops->select_sectype(server, vol->sectype)
+	     == Unspecified)
 		return false;
 
 	/*
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index dcbcc927399a..8b0502cd39af 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -498,7 +498,7 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer,
 }
 
 enum securityEnum
-select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
+cifs_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
 {
 	switch (server->negflavor) {
 	case CIFS_NEGFLAVOR_EXTENDED:
@@ -1391,7 +1391,7 @@ static int select_sec(struct cifs_ses *ses, struct sess_data *sess_data)
 {
 	int type;
 
-	type = select_sectype(ses->server, ses->sectype);
+	type = cifs_select_sectype(ses->server, ses->sectype);
 	cifs_dbg(FYI, "sess setup type %d\n", type);
 	if (type == Unspecified) {
 		cifs_dbg(VFS,
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 67a987e4d026..cc93ba4da9b5 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -1087,6 +1087,7 @@ struct smb_version_operations smb1_operations = {
 	.is_read_op = cifs_is_read_op,
 	.wp_retry_size = cifs_wp_retry_size,
 	.dir_needs_close = cifs_dir_needs_close,
+	.select_sectype = cifs_select_sectype,
 #ifdef CONFIG_CIFS_XATTR
 	.query_all_EAs = CIFSSMBQAllEAs,
 	.set_EA = CIFSSMBSetEA,
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index b360c381b00e..0231108d9387 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2381,6 +2381,7 @@ struct smb_version_operations smb20_operations = {
 	.wp_retry_size = smb2_wp_retry_size,
 	.dir_needs_close = smb2_dir_needs_close,
 	.get_dfs_refer = smb2_get_dfs_refer,
+	.select_sectype = smb2_select_sectype,
 };
 
 struct smb_version_operations smb21_operations = {
@@ -2463,6 +2464,7 @@ struct smb_version_operations smb21_operations = {
 	.dir_needs_close = smb2_dir_needs_close,
 	.enum_snapshots = smb3_enum_snapshots,
 	.get_dfs_refer = smb2_get_dfs_refer,
+	.select_sectype = smb2_select_sectype,
 };
 
 struct smb_version_operations smb30_operations = {
@@ -2555,6 +2557,7 @@ struct smb_version_operations smb30_operations = {
 	.is_transform_hdr = smb3_is_transform_hdr,
 	.receive_transform = smb3_receive_transform,
 	.get_dfs_refer = smb2_get_dfs_refer,
+	.select_sectype = smb2_select_sectype,
 };
 
 #ifdef CONFIG_CIFS_SMB311
@@ -2648,6 +2651,7 @@ struct smb_version_operations smb311_operations = {
 	.is_transform_hdr = smb3_is_transform_hdr,
 	.receive_transform = smb3_receive_transform,
 	.get_dfs_refer = smb2_get_dfs_refer,
+	.select_sectype = smb2_select_sectype,
 };
 #endif /* CIFS_SMB311 */
 
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 2069431b32e3..7446496850a3 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -657,6 +657,28 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
 	return -EIO;
 }
 
+enum securityEnum
+smb2_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
+{
+	switch (requested) {
+	case Kerberos:
+	case RawNTLMSSP:
+		return requested;
+	case NTLMv2:
+		return RawNTLMSSP;
+	case Unspecified:
+		if (server->sec_ntlmssp &&
+			(global_secflags & CIFSSEC_MAY_NTLMSSP))
+			return RawNTLMSSP;
+		if ((server->sec_kerberos || server->sec_mskerberos) &&
+			(global_secflags & CIFSSEC_MAY_KRB5))
+			return Kerberos;
+		/* Fallthrough */
+	default:
+		return Unspecified;
+	}
+}
+
 struct SMB2_sess_data {
 	unsigned int xid;
 	struct cifs_ses *ses;
@@ -1009,10 +1031,17 @@ SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data)
 static int
 SMB2_select_sec(struct cifs_ses *ses, struct SMB2_sess_data *sess_data)
 {
-	if (ses->sectype != Kerberos && ses->sectype != RawNTLMSSP)
-		ses->sectype = RawNTLMSSP;
+	int type;
+
+	type = smb2_select_sectype(ses->server, ses->sectype);
+	cifs_dbg(FYI, "sess setup type %d\n", type);
+	if (type == Unspecified) {
+		cifs_dbg(VFS,
+			"Unable to select appropriate authentication method!");
+		return -EINVAL;
+	}
 
-	switch (ses->sectype) {
+	switch (type) {
 	case Kerberos:
 		sess_data->func = SMB2_auth_kerberos;
 		break;
@@ -1020,7 +1049,7 @@ SMB2_select_sec(struct cifs_ses *ses, struct SMB2_sess_data *sess_data)
 		sess_data->func = SMB2_sess_auth_rawntlmssp_negotiate;
 		break;
 	default:
-		cifs_dbg(VFS, "secType %d not supported!\n", ses->sectype);
+		cifs_dbg(VFS, "secType %d not supported!\n", type);
 		return -EOPNOTSUPP;
 	}
 
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 11d9f3013db8..69e35873b1de 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -181,4 +181,6 @@ extern int SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
 			    __u8 *lease_key, const __le32 lease_state);
 extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *);
 
+extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *,
+					enum securityEnum);
 #endif			/* _SMB2PROTO_H */
-- 
GitLab


From c919a3069c775c1c876bec55e00b2305d5125caa Mon Sep 17 00:00:00 2001
From: Ethan Zonca <e@ethanzonca.com>
Date: Fri, 24 Feb 2017 11:27:36 -0500
Subject: [PATCH 1057/1084] can: gs_usb: Don't use stack memory for USB
 transfers

Fixes: 05ca5270005c can: gs_usb: add ethtool set_phys_id callback to locate physical device

The gs_usb driver is performing USB transfers using buffers allocated on
the stack. This causes the driver to not function with vmapped stacks.
Instead, allocate memory for the transfer buffers.

Signed-off-by: Ethan Zonca <e@ethanzonca.com>
Cc: linux-stable <stable@vger.kernel.org> # >= v4.8
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/gs_usb.c | 40 ++++++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 77e3cc06a30c..a0dabd4038ba 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -908,10 +908,14 @@ static int gs_usb_probe(struct usb_interface *intf,
 	struct gs_usb *dev;
 	int rc = -ENOMEM;
 	unsigned int icount, i;
-	struct gs_host_config hconf = {
-		.byte_order = 0x0000beef,
-	};
-	struct gs_device_config dconf;
+	struct gs_host_config *hconf;
+	struct gs_device_config *dconf;
+
+	hconf = kmalloc(sizeof(*hconf), GFP_KERNEL);
+	if (!hconf)
+		return -ENOMEM;
+
+	hconf->byte_order = 0x0000beef;
 
 	/* send host config */
 	rc = usb_control_msg(interface_to_usbdev(intf),
@@ -920,16 +924,22 @@ static int gs_usb_probe(struct usb_interface *intf,
 			     USB_DIR_OUT|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
 			     1,
 			     intf->altsetting[0].desc.bInterfaceNumber,
-			     &hconf,
-			     sizeof(hconf),
+			     hconf,
+			     sizeof(*hconf),
 			     1000);
 
+	kfree(hconf);
+
 	if (rc < 0) {
 		dev_err(&intf->dev, "Couldn't send data format (err=%d)\n",
 			rc);
 		return rc;
 	}
 
+	dconf = kmalloc(sizeof(*dconf), GFP_KERNEL);
+	if (!dconf)
+		return -ENOMEM;
+
 	/* read device config */
 	rc = usb_control_msg(interface_to_usbdev(intf),
 			     usb_rcvctrlpipe(interface_to_usbdev(intf), 0),
@@ -937,28 +947,33 @@ static int gs_usb_probe(struct usb_interface *intf,
 			     USB_DIR_IN|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
 			     1,
 			     intf->altsetting[0].desc.bInterfaceNumber,
-			     &dconf,
-			     sizeof(dconf),
+			     dconf,
+			     sizeof(*dconf),
 			     1000);
 	if (rc < 0) {
 		dev_err(&intf->dev, "Couldn't get device config: (err=%d)\n",
 			rc);
+		kfree(dconf);
 		return rc;
 	}
 
-	icount = dconf.icount + 1;
+	icount = dconf->icount + 1;
 	dev_info(&intf->dev, "Configuring for %d interfaces\n", icount);
 
 	if (icount > GS_MAX_INTF) {
 		dev_err(&intf->dev,
 			"Driver cannot handle more that %d CAN interfaces\n",
 			GS_MAX_INTF);
+		kfree(dconf);
 		return -EINVAL;
 	}
 
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-	if (!dev)
+	if (!dev) {
+		kfree(dconf);
 		return -ENOMEM;
+	}
+
 	init_usb_anchor(&dev->rx_submitted);
 
 	atomic_set(&dev->active_channels, 0);
@@ -967,7 +982,7 @@ static int gs_usb_probe(struct usb_interface *intf,
 	dev->udev = interface_to_usbdev(intf);
 
 	for (i = 0; i < icount; i++) {
-		dev->canch[i] = gs_make_candev(i, intf, &dconf);
+		dev->canch[i] = gs_make_candev(i, intf, dconf);
 		if (IS_ERR_OR_NULL(dev->canch[i])) {
 			/* save error code to return later */
 			rc = PTR_ERR(dev->canch[i]);
@@ -978,12 +993,15 @@ static int gs_usb_probe(struct usb_interface *intf,
 				gs_destroy_candev(dev->canch[i]);
 
 			usb_kill_anchored_urbs(&dev->rx_submitted);
+			kfree(dconf);
 			kfree(dev);
 			return rc;
 		}
 		dev->canch[i]->parent = dev;
 	}
 
+	kfree(dconf);
+
 	return 0;
 }
 
-- 
GitLab


From 540a27aef355e3fd8c598600d4a3c8f92127ee05 Mon Sep 17 00:00:00 2001
From: Ethan Zonca <e@ethanzonca.com>
Date: Fri, 24 Feb 2017 11:00:34 -0500
Subject: [PATCH 1058/1084] can: gs_usb: fix coding style

This patch fixes five minor style issues, spaces are between bitwise OR
operators.

Signed-off-by: Ethan Zonca <e@ethanzonca.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/gs_usb.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index a0dabd4038ba..300349fe8dc0 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -258,7 +258,7 @@ static int gs_cmd_reset(struct gs_usb *gsusb, struct gs_can *gsdev)
 	rc = usb_control_msg(interface_to_usbdev(intf),
 			     usb_sndctrlpipe(interface_to_usbdev(intf), 0),
 			     GS_USB_BREQ_MODE,
-			     USB_DIR_OUT|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
+			     USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
 			     gsdev->channel,
 			     0,
 			     dm,
@@ -432,7 +432,7 @@ static int gs_usb_set_bittiming(struct net_device *netdev)
 	rc = usb_control_msg(interface_to_usbdev(intf),
 			     usb_sndctrlpipe(interface_to_usbdev(intf), 0),
 			     GS_USB_BREQ_BITTIMING,
-			     USB_DIR_OUT|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
+			     USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
 			     dev->channel,
 			     0,
 			     dbt,
@@ -546,7 +546,6 @@ static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb,
 				  hf,
 				  urb->transfer_dma);
 
-
 		if (rc == -ENODEV) {
 			netif_device_detach(netdev);
 		} else {
@@ -804,7 +803,7 @@ static struct gs_can *gs_make_candev(unsigned int channel,
 	rc = usb_control_msg(interface_to_usbdev(intf),
 			     usb_rcvctrlpipe(interface_to_usbdev(intf), 0),
 			     GS_USB_BREQ_BT_CONST,
-			     USB_DIR_IN|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
+			     USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
 			     channel,
 			     0,
 			     bt_const,
@@ -921,7 +920,7 @@ static int gs_usb_probe(struct usb_interface *intf,
 	rc = usb_control_msg(interface_to_usbdev(intf),
 			     usb_sndctrlpipe(interface_to_usbdev(intf), 0),
 			     GS_USB_BREQ_HOST_FORMAT,
-			     USB_DIR_OUT|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
+			     USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
 			     1,
 			     intf->altsetting[0].desc.bInterfaceNumber,
 			     hconf,
@@ -944,7 +943,7 @@ static int gs_usb_probe(struct usb_interface *intf,
 	rc = usb_control_msg(interface_to_usbdev(intf),
 			     usb_rcvctrlpipe(interface_to_usbdev(intf), 0),
 			     GS_USB_BREQ_DEVICE_CONFIG,
-			     USB_DIR_IN|USB_TYPE_VENDOR|USB_RECIP_INTERFACE,
+			     USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
 			     1,
 			     intf->altsetting[0].desc.bInterfaceNumber,
 			     dconf,
-- 
GitLab


From 7c42631376306fb3f34d51fda546b50a9b6dd6ec Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Thu, 2 Mar 2017 12:03:40 +0100
Subject: [PATCH 1059/1084] can: usb_8dev: Fix memory leak of
 priv->cmd_msg_buffer

The priv->cmd_msg_buffer is allocated in the probe function, but never
kfree()ed. This patch converts the kzalloc() to resource-managed
kzalloc.

Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/usb_8dev.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c
index 108a30e15097..d000cb62d6ae 100644
--- a/drivers/net/can/usb/usb_8dev.c
+++ b/drivers/net/can/usb/usb_8dev.c
@@ -951,8 +951,8 @@ static int usb_8dev_probe(struct usb_interface *intf,
 	for (i = 0; i < MAX_TX_URBS; i++)
 		priv->tx_contexts[i].echo_index = MAX_TX_URBS;
 
-	priv->cmd_msg_buffer = kzalloc(sizeof(struct usb_8dev_cmd_msg),
-				      GFP_KERNEL);
+	priv->cmd_msg_buffer = devm_kzalloc(&intf->dev, sizeof(struct usb_8dev_cmd_msg),
+					    GFP_KERNEL);
 	if (!priv->cmd_msg_buffer)
 		goto cleanup_candev;
 
@@ -966,7 +966,7 @@ static int usb_8dev_probe(struct usb_interface *intf,
 	if (err) {
 		netdev_err(netdev,
 			"couldn't register CAN device: %d\n", err);
-		goto cleanup_cmd_msg_buffer;
+		goto cleanup_candev;
 	}
 
 	err = usb_8dev_cmd_version(priv, &version);
@@ -987,9 +987,6 @@ static int usb_8dev_probe(struct usb_interface *intf,
 cleanup_unregister_candev:
 	unregister_netdev(priv->netdev);
 
-cleanup_cmd_msg_buffer:
-	kfree(priv->cmd_msg_buffer);
-
 cleanup_candev:
 	free_candev(netdev);
 
-- 
GitLab


From 66ddb82129df66a94219844c509074adb4330a28 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Thu, 2 Mar 2017 15:42:49 +0100
Subject: [PATCH 1060/1084] can: flexcan: fix typo in comment

This patch fixes the typo "Disble" -> "Disable".

Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/flexcan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index ea57fed375c6..13f0f219d8aa 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -196,7 +196,7 @@
 #define FLEXCAN_QUIRK_BROKEN_ERR_STATE	BIT(1) /* [TR]WRN_INT not connected */
 #define FLEXCAN_QUIRK_DISABLE_RXFG	BIT(2) /* Disable RX FIFO Global mask */
 #define FLEXCAN_QUIRK_ENABLE_EACEN_RRS	BIT(3) /* Enable EACEN and RRS bit in ctrl2 */
-#define FLEXCAN_QUIRK_DISABLE_MECR	BIT(4) /* Disble Memory error detection */
+#define FLEXCAN_QUIRK_DISABLE_MECR	BIT(4) /* Disable Memory error detection */
 #define FLEXCAN_QUIRK_USE_OFF_TIMESTAMP	BIT(5) /* Use timestamp based offloading */
 
 /* Structure of the message buffer */
-- 
GitLab


From da2f27e9e615d1c799c9582b15262458da61fddc Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Wed, 1 Mar 2017 15:33:26 +0100
Subject: [PATCH 1061/1084] netfilter: nf_conntrack_sip: fix wrong memory
 initialisation

In commit 82de0be6862cd ("netfilter: Add helper array
register/unregister functions"),
struct nf_conntrack_helper sip[MAX_PORTS][4] was changed to
sip[MAX_PORTS * 4], so the memory init should have been changed to
memset(&sip[4 * i], 0, 4 * sizeof(sip[i]));

But as the sip[] table is allocated in the BSS, it is already set to 0

Fixes: 82de0be6862cd ("netfilter: Add helper array register/unregister functions")
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_sip.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 24174c520239..0d17894798b5 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1628,8 +1628,6 @@ static int __init nf_conntrack_sip_init(void)
 		ports[ports_c++] = SIP_PORT;
 
 	for (i = 0; i < ports_c; i++) {
-		memset(&sip[i], 0, sizeof(sip[i]));
-
 		nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP, "sip",
 				  SIP_PORT, ports[i], i, sip_exp_policy,
 				  SIP_EXPECT_MAX,
-- 
GitLab


From f9121355eb6f9babadb97bf5b34ab0cce7764406 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 1 Mar 2017 18:15:11 +0100
Subject: [PATCH 1062/1084] netfilter: nft_set_rbtree: incorrect assumption on
 lower interval lookups

In case of adjacent ranges, we may indeed see either the high part of
the range in first place or the low part of it. Remove this incorrect
assumption, let's make sure we annotate the low part of the interval in
case of we have adjacent interva intervals so we hit a matching in
lookups.

Reported-by: Simon Hanisch <hanisch@wh2.tu-dresden.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_set_rbtree.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 71e8fb886a73..78dfbf9588b3 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -60,11 +60,10 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
 		d = memcmp(this, key, set->klen);
 		if (d < 0) {
 			parent = parent->rb_left;
-			/* In case of adjacent ranges, we always see the high
-			 * part of the range in first place, before the low one.
-			 * So don't update interval if the keys are equal.
-			 */
-			if (interval && nft_rbtree_equal(set, this, interval))
+			if (interval &&
+			    nft_rbtree_equal(set, this, interval) &&
+			    nft_rbtree_interval_end(this) &&
+			    !nft_rbtree_interval_end(interval))
 				continue;
 			interval = rbe;
 		} else if (d > 0)
-- 
GitLab


From 25e94a997b324b5f167f56d56d7106d38b78c9de Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 1 Mar 2017 12:52:31 +0100
Subject: [PATCH 1063/1084] netfilter: nf_tables: don't call
 nfnetlink_set_err() if nfnetlink_send() fails

The underlying nlmsg_multicast() already sets sk->sk_err for us to
notify socket overruns, so we should not do anything with this return
value. So we just call nfnetlink_set_err() if:

1) We fail to allocate the netlink message.

or

2) We don't have enough space in the netlink message to place attributes,
   which means that we likely need to allocate a larger message.

Before this patch, the internal ESRCH netlink error code was propagated
to userspace, which is quite misleading. Netlink semantics mandate that
listeners just hit ENOBUFS if the socket buffer overruns.

Reported-by: Alexander Alemayhu <alexander@alemayhu.com>
Tested-by: Alexander Alemayhu <alexander@alemayhu.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |   6 +-
 net/netfilter/nf_tables_api.c     | 133 ++++++++++++------------------
 2 files changed, 58 insertions(+), 81 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index ac84686aaafb..2aa8a9d80fbe 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -988,9 +988,9 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
 					const struct nlattr *nla, u32 objtype,
 					u8 genmask);
 
-int nft_obj_notify(struct net *net, struct nft_table *table,
-		   struct nft_object *obj, u32 portid, u32 seq,
-		   int event, int family, int report, gfp_t gfp);
+void nft_obj_notify(struct net *net, struct nft_table *table,
+		    struct nft_object *obj, u32 portid, u32 seq,
+		    int event, int family, int report, gfp_t gfp);
 
 /**
  *	struct nft_object_type - stateful object type
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ff7304ae58ac..5e0ccfd5bb37 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -461,16 +461,15 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
 	return -1;
 }
 
-static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
+static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
 {
 	struct sk_buff *skb;
 	int err;
 
 	if (!ctx->report &&
 	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
-		return 0;
+		return;
 
-	err = -ENOBUFS;
 	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (skb == NULL)
 		goto err;
@@ -482,14 +481,11 @@ static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
 		goto err;
 	}
 
-	err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
-			     ctx->report, GFP_KERNEL);
+	nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+		       ctx->report, GFP_KERNEL);
+	return;
 err:
-	if (err < 0) {
-		nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
-				  err);
-	}
-	return err;
+	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
 }
 
 static int nf_tables_dump_tables(struct sk_buff *skb,
@@ -1050,16 +1046,15 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
 	return -1;
 }
 
-static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
+static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
 {
 	struct sk_buff *skb;
 	int err;
 
 	if (!ctx->report &&
 	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
-		return 0;
+		return;
 
-	err = -ENOBUFS;
 	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (skb == NULL)
 		goto err;
@@ -1072,14 +1067,11 @@ static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
 		goto err;
 	}
 
-	err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
-			     ctx->report, GFP_KERNEL);
+	nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+		       ctx->report, GFP_KERNEL);
+	return;
 err:
-	if (err < 0) {
-		nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
-				  err);
-	}
-	return err;
+	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
 }
 
 static int nf_tables_dump_chains(struct sk_buff *skb,
@@ -1934,18 +1926,16 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
 	return -1;
 }
 
-static int nf_tables_rule_notify(const struct nft_ctx *ctx,
-				 const struct nft_rule *rule,
-				 int event)
+static void nf_tables_rule_notify(const struct nft_ctx *ctx,
+				  const struct nft_rule *rule, int event)
 {
 	struct sk_buff *skb;
 	int err;
 
 	if (!ctx->report &&
 	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
-		return 0;
+		return;
 
-	err = -ENOBUFS;
 	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (skb == NULL)
 		goto err;
@@ -1958,14 +1948,11 @@ static int nf_tables_rule_notify(const struct nft_ctx *ctx,
 		goto err;
 	}
 
-	err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
-			     ctx->report, GFP_KERNEL);
+	nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+		       ctx->report, GFP_KERNEL);
+	return;
 err:
-	if (err < 0) {
-		nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
-				  err);
-	}
-	return err;
+	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
 }
 
 struct nft_rule_dump_ctx {
@@ -2696,9 +2683,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
 	return -1;
 }
 
-static int nf_tables_set_notify(const struct nft_ctx *ctx,
-				const struct nft_set *set,
-				int event, gfp_t gfp_flags)
+static void nf_tables_set_notify(const struct nft_ctx *ctx,
+				 const struct nft_set *set, int event,
+			         gfp_t gfp_flags)
 {
 	struct sk_buff *skb;
 	u32 portid = ctx->portid;
@@ -2706,9 +2693,8 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx,
 
 	if (!ctx->report &&
 	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
-		return 0;
+		return;
 
-	err = -ENOBUFS;
 	skb = nlmsg_new(NLMSG_GOODSIZE, gfp_flags);
 	if (skb == NULL)
 		goto err;
@@ -2719,12 +2705,11 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx,
 		goto err;
 	}
 
-	err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES,
-			     ctx->report, gfp_flags);
+	nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, ctx->report,
+		       gfp_flags);
+	return;
 err:
-	if (err < 0)
-		nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err);
-	return err;
+	nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
 }
 
 static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
@@ -3504,10 +3489,10 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
 	return -1;
 }
 
-static int nf_tables_setelem_notify(const struct nft_ctx *ctx,
-				    const struct nft_set *set,
-				    const struct nft_set_elem *elem,
-				    int event, u16 flags)
+static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
+				     const struct nft_set *set,
+				     const struct nft_set_elem *elem,
+				     int event, u16 flags)
 {
 	struct net *net = ctx->net;
 	u32 portid = ctx->portid;
@@ -3515,9 +3500,8 @@ static int nf_tables_setelem_notify(const struct nft_ctx *ctx,
 	int err;
 
 	if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
-		return 0;
+		return;
 
-	err = -ENOBUFS;
 	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (skb == NULL)
 		goto err;
@@ -3529,12 +3513,11 @@ static int nf_tables_setelem_notify(const struct nft_ctx *ctx,
 		goto err;
 	}
 
-	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report,
-			     GFP_KERNEL);
+	nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report,
+		       GFP_KERNEL);
+	return;
 err:
-	if (err < 0)
-		nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
-	return err;
+	nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
 }
 
 static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
@@ -4476,18 +4459,17 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk,
 	return nft_delobj(&ctx, obj);
 }
 
-int nft_obj_notify(struct net *net, struct nft_table *table,
-		   struct nft_object *obj, u32 portid, u32 seq, int event,
-		   int family, int report, gfp_t gfp)
+void nft_obj_notify(struct net *net, struct nft_table *table,
+		    struct nft_object *obj, u32 portid, u32 seq, int event,
+		    int family, int report, gfp_t gfp)
 {
 	struct sk_buff *skb;
 	int err;
 
 	if (!report &&
 	    !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
-		return 0;
+		return;
 
-	err = -ENOBUFS;
 	skb = nlmsg_new(NLMSG_GOODSIZE, gfp);
 	if (skb == NULL)
 		goto err;
@@ -4499,21 +4481,18 @@ int nft_obj_notify(struct net *net, struct nft_table *table,
 		goto err;
 	}
 
-	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp);
+	nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp);
+	return;
 err:
-	if (err < 0) {
-		nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
-	}
-	return err;
+	nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
 }
 EXPORT_SYMBOL_GPL(nft_obj_notify);
 
-static int nf_tables_obj_notify(const struct nft_ctx *ctx,
-				struct nft_object *obj, int event)
+static void nf_tables_obj_notify(const struct nft_ctx *ctx,
+				 struct nft_object *obj, int event)
 {
-	return nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid,
-			      ctx->seq, event, ctx->afi->family, ctx->report,
-			      GFP_KERNEL);
+	nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event,
+		       ctx->afi->family, ctx->report, GFP_KERNEL);
 }
 
 static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
@@ -4543,7 +4522,8 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
 	return -EMSGSIZE;
 }
 
-static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event)
+static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb,
+				 int event)
 {
 	struct nlmsghdr *nlh = nlmsg_hdr(skb);
 	struct sk_buff *skb2;
@@ -4551,9 +4531,8 @@ static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event)
 
 	if (nlmsg_report(nlh) &&
 	    !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
-		return 0;
+		return;
 
-	err = -ENOBUFS;
 	skb2 = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (skb2 == NULL)
 		goto err;
@@ -4565,14 +4544,12 @@ static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event)
 		goto err;
 	}
 
-	err = nfnetlink_send(skb2, net, NETLINK_CB(skb).portid,
-			     NFNLGRP_NFTABLES, nlmsg_report(nlh), GFP_KERNEL);
+	nfnetlink_send(skb2, net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
+		       nlmsg_report(nlh), GFP_KERNEL);
+	return;
 err:
-	if (err < 0) {
-		nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
-				  err);
-	}
-	return err;
+	nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
+			  -ENOBUFS);
 }
 
 static int nf_tables_getgen(struct net *net, struct sock *nlsk,
-- 
GitLab


From d67ce7da3b1eda838db7bdca86d7ec28ef37068e Mon Sep 17 00:00:00 2001
From: Paul Durrant <Paul.Durrant@citrix.com>
Date: Thu, 2 Mar 2017 12:54:25 +0000
Subject: [PATCH 1064/1084] xen-netback: keep a local pointer for vif in
 backend_disconnect()

This patch replaces use of 'be->vif' with 'vif' and hence generally
makes the function look tidier. No semantic change.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/xenbus.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index bb854f92f5a5..d82ddc913b4c 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -492,24 +492,28 @@ static int backend_create_xenvif(struct backend_info *be)
 
 static void backend_disconnect(struct backend_info *be)
 {
-	if (be->vif) {
+	struct xenvif *vif = be->vif;
+
+	if (vif) {
 		unsigned int queue_index;
 
-		xen_unregister_watchers(be->vif);
+		xen_unregister_watchers(vif);
 #ifdef CONFIG_DEBUG_FS
-		xenvif_debugfs_delif(be->vif);
+		xenvif_debugfs_delif(vif);
 #endif /* CONFIG_DEBUG_FS */
-		xenvif_disconnect_data(be->vif);
-		for (queue_index = 0; queue_index < be->vif->num_queues; ++queue_index)
-			xenvif_deinit_queue(&be->vif->queues[queue_index]);
-
-		spin_lock(&be->vif->lock);
-		vfree(be->vif->queues);
-		be->vif->num_queues = 0;
-		be->vif->queues = NULL;
-		spin_unlock(&be->vif->lock);
-
-		xenvif_disconnect_ctrl(be->vif);
+		xenvif_disconnect_data(vif);
+		for (queue_index = 0;
+		     queue_index < vif->num_queues;
+		     ++queue_index)
+			xenvif_deinit_queue(&vif->queues[queue_index]);
+
+		spin_lock(&vif->lock);
+		vfree(vif->queues);
+		vif->num_queues = 0;
+		vif->queues = NULL;
+		spin_unlock(&vif->lock);
+
+		xenvif_disconnect_ctrl(vif);
 	}
 }
 
-- 
GitLab


From a254d8f9a8a928772ef4608342125ccb35b79d5d Mon Sep 17 00:00:00 2001
From: Paul Durrant <Paul.Durrant@citrix.com>
Date: Thu, 2 Mar 2017 12:54:26 +0000
Subject: [PATCH 1065/1084] xen-netback: don't vfree() queues under spinlock

This leads to a BUG of the following form:

[  174.512861] switch: port 2(vif3.0) entered disabled state
[  174.522735] BUG: sleeping function called from invalid context at
/home/build/linux-linus/mm/vmalloc.c:1441
[  174.523451] in_atomic(): 1, irqs_disabled(): 0, pid: 28, name: xenwatch
[  174.524131] CPU: 1 PID: 28 Comm: xenwatch Tainted: G        W
4.10.0upstream-11073-g4977ab6-dirty #1
[  174.524819] Hardware name: MSI MS-7680/H61M-P23 (MS-7680), BIOS V17.0
03/14/2011
[  174.525517] Call Trace:
[  174.526217]  show_stack+0x23/0x60
[  174.526899]  dump_stack+0x5b/0x88
[  174.527562]  ___might_sleep+0xde/0x130
[  174.528208]  __might_sleep+0x35/0xa0
[  174.528840]  ? _raw_spin_unlock_irqrestore+0x13/0x20
[  174.529463]  ? __wake_up+0x40/0x50
[  174.530089]  remove_vm_area+0x20/0x90
[  174.530724]  __vunmap+0x1d/0xc0
[  174.531346]  ? delete_object_full+0x13/0x20
[  174.531973]  vfree+0x40/0x80
[  174.532594]  set_backend_state+0x18a/0xa90
[  174.533221]  ? dwc_scan_descriptors+0x24d/0x430
[  174.533850]  ? kfree+0x5b/0xc0
[  174.534476]  ? xenbus_read+0x3d/0x50
[  174.535101]  ? xenbus_read+0x3d/0x50
[  174.535718]  ? xenbus_gather+0x31/0x90
[  174.536332]  ? ___might_sleep+0xf6/0x130
[  174.536945]  frontend_changed+0x6b/0xd0
[  174.537565]  xenbus_otherend_changed+0x7d/0x80
[  174.538185]  frontend_changed+0x12/0x20
[  174.538803]  xenwatch_thread+0x74/0x110
[  174.539417]  ? woken_wake_function+0x20/0x20
[  174.540049]  kthread+0xe5/0x120
[  174.540663]  ? xenbus_printf+0x50/0x50
[  174.541278]  ? __kthread_init_worker+0x40/0x40
[  174.541898]  ret_from_fork+0x21/0x2c
[  174.548635] switch: port 2(vif3.0) entered disabled state

This patch defers the vfree() until after the spinlock is released.

Reported-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netback/xenbus.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index d82ddc913b4c..d2d7cd9145b1 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -496,6 +496,7 @@ static void backend_disconnect(struct backend_info *be)
 
 	if (vif) {
 		unsigned int queue_index;
+		struct xenvif_queue *queues;
 
 		xen_unregister_watchers(vif);
 #ifdef CONFIG_DEBUG_FS
@@ -508,11 +509,13 @@ static void backend_disconnect(struct backend_info *be)
 			xenvif_deinit_queue(&vif->queues[queue_index]);
 
 		spin_lock(&vif->lock);
-		vfree(vif->queues);
+		queues = vif->queues;
 		vif->num_queues = 0;
 		vif->queues = NULL;
 		spin_unlock(&vif->lock);
 
+		vfree(queues);
+
 		xenvif_disconnect_ctrl(vif);
 	}
 }
-- 
GitLab


From 16206524f6ea57d6dcd56fe46f9f4a06d4a1b113 Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@broadcom.com>
Date: Thu, 2 Mar 2017 17:59:56 -0500
Subject: [PATCH 1066/1084] net: ethernet: bgmac: init sequence bug

Fix a bug in the 'bgmac' driver init sequence that blind writes for init
sequence where it should preserve most bits other than the ones it is
deliberately manipulating.

The code now checks to see if the adapter needs to be brought out of
reset (where as before it was doing an IDM write to bring it out of
reset regardless of whether it was in reset or not).  Also, removed
unnecessary usleeps (as there is already a read present to flush the
IDM writes).

Signed-off-by: Zac Schroff <zschroff@broadcom.com>
Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Fixes: f6a95a24957 ("net: ethernet: bgmac: Add platform device support")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/broadcom/bgmac-platform.c    | 27 ++++++++++++-------
 drivers/net/ethernet/broadcom/bgmac.h         | 16 +++++++++++
 2 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c
index 7b1af950f312..da1b8b225eb9 100644
--- a/drivers/net/ethernet/broadcom/bgmac-platform.c
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
@@ -51,8 +51,7 @@ static void platform_bgmac_idm_write(struct bgmac *bgmac, u16 offset, u32 value)
 
 static bool platform_bgmac_clk_enabled(struct bgmac *bgmac)
 {
-	if ((bgmac_idm_read(bgmac, BCMA_IOCTL) &
-	     (BCMA_IOCTL_CLK | BCMA_IOCTL_FGC)) != BCMA_IOCTL_CLK)
+	if ((bgmac_idm_read(bgmac, BCMA_IOCTL) & BGMAC_CLK_EN) != BGMAC_CLK_EN)
 		return false;
 	if (bgmac_idm_read(bgmac, BCMA_RESET_CTL) & BCMA_RESET_CTL_RESET)
 		return false;
@@ -61,15 +60,25 @@ static bool platform_bgmac_clk_enabled(struct bgmac *bgmac)
 
 static void platform_bgmac_clk_enable(struct bgmac *bgmac, u32 flags)
 {
-	bgmac_idm_write(bgmac, BCMA_IOCTL,
-			(BCMA_IOCTL_CLK | BCMA_IOCTL_FGC | flags));
-	bgmac_idm_read(bgmac, BCMA_IOCTL);
+	u32 val;
 
-	bgmac_idm_write(bgmac, BCMA_RESET_CTL, 0);
-	bgmac_idm_read(bgmac, BCMA_RESET_CTL);
-	udelay(1);
+	/* The Reset Control register only contains a single bit to show if the
+	 * controller is currently in reset.  Do a sanity check here, just in
+	 * case the bootloader happened to leave the device in reset.
+	 */
+	val = bgmac_idm_read(bgmac, BCMA_RESET_CTL);
+	if (val) {
+		bgmac_idm_write(bgmac, BCMA_RESET_CTL, 0);
+		bgmac_idm_read(bgmac, BCMA_RESET_CTL);
+		udelay(1);
+	}
 
-	bgmac_idm_write(bgmac, BCMA_IOCTL, (BCMA_IOCTL_CLK | flags));
+	val = bgmac_idm_read(bgmac, BCMA_IOCTL);
+	/* Some bits of BCMA_IOCTL set by HW/ATF and should not change */
+	val |= flags & ~(BGMAC_AWCACHE | BGMAC_ARCACHE | BGMAC_AWUSER |
+			 BGMAC_ARUSER);
+	val |= BGMAC_CLK_EN;
+	bgmac_idm_write(bgmac, BCMA_IOCTL, val);
 	bgmac_idm_read(bgmac, BCMA_IOCTL);
 	udelay(1);
 }
diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
index 248727dc62f2..6d1c6ff1ed96 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -213,6 +213,22 @@
 /* BCMA GMAC core specific IO Control (BCMA_IOCTL) flags */
 #define BGMAC_BCMA_IOCTL_SW_CLKEN		0x00000004	/* PHY Clock Enable */
 #define BGMAC_BCMA_IOCTL_SW_RESET		0x00000008	/* PHY Reset */
+/* The IOCTL values appear to be different in NS, NSP, and NS2, and do not match
+ * the values directly above
+ */
+#define BGMAC_CLK_EN				BIT(0)
+#define BGMAC_RESERVED_0			BIT(1)
+#define BGMAC_SOURCE_SYNC_MODE_EN		BIT(2)
+#define BGMAC_DEST_SYNC_MODE_EN			BIT(3)
+#define BGMAC_TX_CLK_OUT_INVERT_EN		BIT(4)
+#define BGMAC_DIRECT_GMII_MODE			BIT(5)
+#define BGMAC_CLK_250_SEL			BIT(6)
+#define BGMAC_AWCACHE				(0xf << 7)
+#define BGMAC_RESERVED_1			(0x1f << 11)
+#define BGMAC_ARCACHE				(0xf << 16)
+#define BGMAC_AWUSER				(0x3f << 20)
+#define BGMAC_ARUSER				(0x3f << 26)
+#define BGMAC_RESERVED				BIT(31)
 
 /* BCMA GMAC core specific IO status (BCMA_IOST) flags */
 #define BGMAC_BCMA_IOST_ATTACHED		0x00000800
-- 
GitLab


From fa42245dff4a5f2f8f208da542acbd80c22f7c65 Mon Sep 17 00:00:00 2001
From: Hari Vyas <hariv@broadcom.com>
Date: Thu, 2 Mar 2017 17:59:57 -0500
Subject: [PATCH 1067/1084] net: ethernet: bgmac: mac address change bug

ndo_set_mac_address() passes struct sockaddr * as 2nd parameter to
bgmac_set_mac_address() but code assumed u8 *.  This caused two bytes
chopping and the wrong mac address was configured.

Signed-off-by: Hari Vyas <hariv@broadcom.com>
Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Fixes: 4e209001b86 ("bgmac: write mac address to hardware in ndo_set_mac_address")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bgmac.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 415046750bb4..fd66fca00e01 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1223,12 +1223,16 @@ static netdev_tx_t bgmac_start_xmit(struct sk_buff *skb,
 static int bgmac_set_mac_address(struct net_device *net_dev, void *addr)
 {
 	struct bgmac *bgmac = netdev_priv(net_dev);
+	struct sockaddr *sa = addr;
 	int ret;
 
 	ret = eth_prepare_mac_addr_change(net_dev, addr);
 	if (ret < 0)
 		return ret;
-	bgmac_write_mac_address(bgmac, (u8 *)addr);
+
+	ether_addr_copy(net_dev->dev_addr, sa->sa_data);
+	bgmac_write_mac_address(bgmac, net_dev->dev_addr);
+
 	eth_commit_mac_addr_change(net_dev, addr);
 	return 0;
 }
-- 
GitLab


From 9383b33771e566fa547daa2d09c6e0f1aaa298c3 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 2 Mar 2017 15:26:20 -0800
Subject: [PATCH 1068/1084] nfp: don't tell FW about the reserved buffer space

Since commit c0f031bc8866 ("nfp_net: use alloc_frag() and build_skb()")
we are allocating buffers which have to hold both the data and skb to
be created in place by build_skb().

FW should only be told about the buffer space it can DMA to, that
is without the build_skb() headroom and tailroom.  Note: firmware
applications should validate the buffers against both MTU and
free list buffer size so oversized packets would not pass through
the NIC anyway.

Fixes: c0f031bc8866 ("nfp: use alloc_frag() and build_skb()")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 074259cc8e06..00a83218857a 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2198,7 +2198,8 @@ static int __nfp_net_set_config_and_enable(struct nfp_net *nn)
 	nfp_net_write_mac_addr(nn);
 
 	nn_writel(nn, NFP_NET_CFG_MTU, nn->netdev->mtu);
-	nn_writel(nn, NFP_NET_CFG_FLBUFSZ, nn->fl_bufsz);
+	nn_writel(nn, NFP_NET_CFG_FLBUFSZ,
+		  nn->fl_bufsz - NFP_NET_RX_BUF_NON_DATA);
 
 	/* Enable device */
 	new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
-- 
GitLab


From d58cebb79b62ff84b537a35423b8d6b7f0746985 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 2 Mar 2017 15:26:21 -0800
Subject: [PATCH 1069/1084] nfp: correct DMA direction in XDP DMA sync

dma_sync_single_for_*() takes the direction in which the buffer
was mapped, not the direction of the sync.  We should sync XDP
buffers bidirectionally.

Fixes: ecd63a0217d5 ("nfp: add XDP support in the driver")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 00a83218857a..9179a99563af 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1498,7 +1498,7 @@ nfp_net_tx_xdp_buf(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring,
 	txbuf->real_len = pkt_len;
 
 	dma_sync_single_for_device(&nn->pdev->dev, rxbuf->dma_addr + pkt_off,
-				   pkt_len, DMA_TO_DEVICE);
+				   pkt_len, DMA_BIDIRECTIONAL);
 
 	/* Build TX descriptor */
 	txd = &tx_ring->txds[wr_idx];
@@ -1611,7 +1611,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 
 			dma_sync_single_for_cpu(&nn->pdev->dev,
 						rxbuf->dma_addr + pkt_off,
-						pkt_len, DMA_FROM_DEVICE);
+						pkt_len, DMA_BIDIRECTIONAL);
 			act = nfp_net_run_xdp(xdp_prog, rxbuf->frag + data_off,
 					      pkt_len);
 			switch (act) {
-- 
GitLab


From 37411cad633f5e41f8a13007654909d21b19363a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 2 Mar 2017 23:48:52 +0000
Subject: [PATCH 1070/1084] rxrpc: Fix potential NULL-pointer exception

Fix a potential NULL-pointer exception in rxrpc_do_sendmsg().  The call
state check that I added should have gone into the else-body of the
if-statement where we actually have a call to check.

Found by CoverityScan CID#1414316 ("Dereference after null check").

Fixes: 540b1c48c37a ("rxrpc: Fix deadlock between call creation and sendmsg/recvmsg")
Reported-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/sendmsg.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 31c1538c1a8d..27685d8cba1a 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -517,13 +517,6 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 		ret = -EBADSLT;
 		if (cmd != RXRPC_CMD_SEND_DATA)
 			goto error_release_sock;
-		ret = -EBUSY;
-		if (call->state == RXRPC_CALL_UNINITIALISED ||
-		    call->state == RXRPC_CALL_CLIENT_AWAIT_CONN ||
-		    call->state == RXRPC_CALL_SERVER_PREALLOC ||
-		    call->state == RXRPC_CALL_SERVER_SECURING ||
-		    call->state == RXRPC_CALL_SERVER_ACCEPTING)
-			goto error_release_sock;
 		call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
 							 exclusive);
 		/* The socket is now unlocked... */
@@ -531,6 +524,14 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 			return PTR_ERR(call);
 		/* ... and we have the call lock. */
 	} else {
+		ret = -EBUSY;
+		if (call->state == RXRPC_CALL_UNINITIALISED ||
+		    call->state == RXRPC_CALL_CLIENT_AWAIT_CONN ||
+		    call->state == RXRPC_CALL_SERVER_PREALLOC ||
+		    call->state == RXRPC_CALL_SERVER_SECURING ||
+		    call->state == RXRPC_CALL_SERVER_ACCEPTING)
+			goto error_release_sock;
+
 		ret = mutex_lock_interruptible(&call->user_mutex);
 		release_sock(&rx->sk);
 		if (ret < 0) {
-- 
GitLab


From a8d63a53b3eac8626f62336dcc327c18f1bbca78 Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@oracle.com>
Date: Fri, 3 Mar 2017 00:44:26 -0500
Subject: [PATCH 1071/1084] rds: remove unnecessary returned value check

The function rds_trans_register always returns 0. As such, it is not
necessary to check the returned value.

Cc: Joe Jin <joe.jin@oracle.com>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib.c        | 6 +-----
 net/rds/rds.h       | 2 +-
 net/rds/tcp.c       | 6 +-----
 net/rds/transport.c | 4 +---
 4 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/net/rds/ib.c b/net/rds/ib.c
index 0f557b243311..7a64c8db81ab 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -438,16 +438,12 @@ int rds_ib_init(void)
 	if (ret)
 		goto out_sysctl;
 
-	ret = rds_trans_register(&rds_ib_transport);
-	if (ret)
-		goto out_recv;
+	rds_trans_register(&rds_ib_transport);
 
 	rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
 
 	goto out;
 
-out_recv:
-	rds_ib_recv_exit();
 out_sysctl:
 	rds_ib_sysctl_exit();
 out_ibreg:
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 07fff73dd4f3..6f523ddfe1fb 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -910,7 +910,7 @@ void rds_connect_path_complete(struct rds_conn_path *conn, int curr);
 void rds_connect_complete(struct rds_connection *conn);
 
 /* transport.c */
-int rds_trans_register(struct rds_transport *trans);
+void rds_trans_register(struct rds_transport *trans);
 void rds_trans_unregister(struct rds_transport *trans);
 struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr);
 void rds_trans_put(struct rds_transport *trans);
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 5438f6725092..a973d3b4dff0 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -652,16 +652,12 @@ static int rds_tcp_init(void)
 	if (ret)
 		goto out_pernet;
 
-	ret = rds_trans_register(&rds_tcp_transport);
-	if (ret)
-		goto out_recv;
+	rds_trans_register(&rds_tcp_transport);
 
 	rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
 
 	goto out;
 
-out_recv:
-	rds_tcp_recv_exit();
 out_pernet:
 	unregister_pernet_subsys(&rds_tcp_net_ops);
 out_notifier:
diff --git a/net/rds/transport.c b/net/rds/transport.c
index 2ffd3e30c643..0b188dd0a344 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -40,7 +40,7 @@
 static struct rds_transport *transports[RDS_TRANS_COUNT];
 static DECLARE_RWSEM(rds_trans_sem);
 
-int rds_trans_register(struct rds_transport *trans)
+void rds_trans_register(struct rds_transport *trans)
 {
 	BUG_ON(strlen(trans->t_name) + 1 > TRANSNAMSIZ);
 
@@ -55,8 +55,6 @@ int rds_trans_register(struct rds_transport *trans)
 	}
 
 	up_write(&rds_trans_sem);
-
-	return 0;
 }
 EXPORT_SYMBOL_GPL(rds_trans_register);
 
-- 
GitLab


From d0346b033899b9affa4da8c32bfb574dfb89859e Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Fri, 3 Mar 2017 15:22:09 +0000
Subject: [PATCH 1072/1084] sfc: avoid max() in array size

It confuses sparse, which thinks the size isn't constant.  Let's achieve
 the same thing with a BUILD_BUG_ON, since we know which one should be
 bigger and don't expect them ever to change.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 92e1c6d8b293..4d88e8532d3e 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -828,9 +828,7 @@ static int efx_ef10_alloc_piobufs(struct efx_nic *efx, unsigned int n)
 static int efx_ef10_link_piobufs(struct efx_nic *efx)
 {
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-	_MCDI_DECLARE_BUF(inbuf,
-			  max(MC_CMD_LINK_PIOBUF_IN_LEN,
-			      MC_CMD_UNLINK_PIOBUF_IN_LEN));
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_LINK_PIOBUF_IN_LEN);
 	struct efx_channel *channel;
 	struct efx_tx_queue *tx_queue;
 	unsigned int offset, index;
@@ -839,8 +837,6 @@ static int efx_ef10_link_piobufs(struct efx_nic *efx)
 	BUILD_BUG_ON(MC_CMD_LINK_PIOBUF_OUT_LEN != 0);
 	BUILD_BUG_ON(MC_CMD_UNLINK_PIOBUF_OUT_LEN != 0);
 
-	memset(inbuf, 0, sizeof(inbuf));
-
 	/* Link a buffer to each VI in the write-combining mapping */
 	for (index = 0; index < nic_data->n_piobufs; ++index) {
 		MCDI_SET_DWORD(inbuf, LINK_PIOBUF_IN_PIOBUF_HANDLE,
@@ -920,6 +916,10 @@ static int efx_ef10_link_piobufs(struct efx_nic *efx)
 	return 0;
 
 fail:
+	/* inbuf was defined for MC_CMD_LINK_PIOBUF.  We can use the same
+	 * buffer for MC_CMD_UNLINK_PIOBUF because it's shorter.
+	 */
+	BUILD_BUG_ON(MC_CMD_LINK_PIOBUF_IN_LEN < MC_CMD_UNLINK_PIOBUF_IN_LEN);
 	while (index--) {
 		MCDI_SET_DWORD(inbuf, UNLINK_PIOBUF_IN_TXQ_INSTANCE,
 			       nic_data->pio_write_vi_base + index);
-- 
GitLab


From 6d43131c158f42e2138d83495d19c70ed5cc0ffe Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Fri, 3 Mar 2017 15:22:27 +0000
Subject: [PATCH 1073/1084] sfc: fix IPID endianness in TSOv2

The value we read from the header is in network byte order, whereas
 EFX_POPULATE_QWORD_* takes values in host byte order (which it then
 converts to little-endian, as MCDI is little-endian).

Fixes: e9117e5099ea ("sfc: Firmware-Assisted TSO version 2")
Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 4d88e8532d3e..c60c2d4c646a 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -2183,7 +2183,7 @@ static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
 		/* Modify IPv4 header if needed. */
 		ip->tot_len = 0;
 		ip->check = 0;
-		ipv4_id = ip->id;
+		ipv4_id = ntohs(ip->id);
 	} else {
 		/* Modify IPv6 header if needed. */
 		struct ipv6hdr *ipv6 = ipv6_hdr(skb);
-- 
GitLab


From d3c1a297b6fe60fe7be637cb067b27fca46be504 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Fri, 24 Feb 2017 10:42:14 +0200
Subject: [PATCH 1074/1084] Documentation: Update path to sysrq.txt

Commit 9d85025b0418 ("docs-rst: create an user's manual book") moved the
sysrq.txt leaving old paths in the kernel docs.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Reviewed-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/media/v4l-drivers/bttv.rst          | 2 +-
 Documentation/s390/Debugging390.txt               | 2 +-
 Documentation/sysctl/kernel.txt                   | 2 +-
 Documentation/virtual/uml/UserModeLinux-HOWTO.txt | 6 +++---
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Documentation/media/v4l-drivers/bttv.rst b/Documentation/media/v4l-drivers/bttv.rst
index bc63b12efafd..195ccaac2816 100644
--- a/Documentation/media/v4l-drivers/bttv.rst
+++ b/Documentation/media/v4l-drivers/bttv.rst
@@ -312,7 +312,7 @@ information out of a register+stack dump printed by the kernel on
 protection faults (so-called "kernel oops").
 
 If you run into some kind of deadlock, you can try to dump a call trace
-for each process using sysrq-t (see Documentation/sysrq.txt).
+for each process using sysrq-t (see Documentation/admin-guide/sysrq.rst).
 This way it is possible to figure where *exactly* some process in "D"
 state is stuck.
 
diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt
index 3df8babcdc41..5ae7f868a007 100644
--- a/Documentation/s390/Debugging390.txt
+++ b/Documentation/s390/Debugging390.txt
@@ -2116,7 +2116,7 @@ The sysrq key reading is very picky ( I have to type the keys in an
 This is particularly useful for syncing disks unmounting & rebooting
 if the machine gets partially hung.
 
-Read Documentation/sysrq.txt for more info
+Read Documentation/admin-guide/sysrq.rst for more info
 
 References:
 ===========
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index a32b4b748644..bac23c198360 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -85,7 +85,7 @@ show up in /proc/sys/kernel:
 - softlockup_all_cpu_backtrace
 - soft_watchdog
 - stop-a                      [ SPARC only ]
-- sysrq                       ==> Documentation/sysrq.txt
+- sysrq                       ==> Documentation/admin-guide/sysrq.rst
 - sysctl_writes_strict
 - tainted
 - threads-max
diff --git a/Documentation/virtual/uml/UserModeLinux-HOWTO.txt b/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
index f4099ca6b483..87b80f589e1c 100644
--- a/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
+++ b/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
@@ -2401,9 +2401,9 @@
 
   This takes one argument, which is a single letter.  It calls the
   generic kernel's SysRq driver, which does whatever is called for by
-  that argument.  See the SysRq documentation in Documentation/sysrq.txt
-  in your favorite kernel tree to see what letters are valid and what
-  they do.
+  that argument.  See the SysRq documentation in
+  Documentation/admin-guide/sysrq.rst in your favorite kernel tree to
+  see what letters are valid and what they do.
 
 
-- 
GitLab


From 2eb6a4b26d13c51bdb8d5aefdfc846c0624b8ce4 Mon Sep 17 00:00:00 2001
From: Cao jin <caoj.fnst@cn.fujitsu.com>
Date: Wed, 1 Mar 2017 17:05:28 +0800
Subject: [PATCH 1075/1084] pcieaer doc: update the link

The original link is empty, replace it.

Signed-off-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/PCI/pcieaer-howto.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/PCI/pcieaer-howto.txt b/Documentation/PCI/pcieaer-howto.txt
index ea8cafba255c..acd0dddd6bb8 100644
--- a/Documentation/PCI/pcieaer-howto.txt
+++ b/Documentation/PCI/pcieaer-howto.txt
@@ -256,7 +256,7 @@ After reboot with new kernel or insert the module, a device file named
 
 Then, you need a user space tool named aer-inject, which can be gotten
 from:
-    http://www.kernel.org/pub/linux/utils/pci/aer-inject/
+    https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/
 
 More information about aer-inject can be found in the document comes
 with its source code.
-- 
GitLab


From 9857b1ad4740ec2429e8ec0a39946d45a8d2cff9 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj38.park@gmail.com>
Date: Fri, 3 Mar 2017 15:44:02 +0900
Subject: [PATCH 1076/1084] doc/ko_KR/memory-barriers: Update
 control-dependencies section

This commit applies upstream change, commit c8241f8553e8 ("doc: Update
control-dependencies section of memory-barriers.txt"), to Korean
translation.

Signed-off-by: SeongJae Park <sj38.park@gmail.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/ko_KR/memory-barriers.txt    | 68 ++++++++++---------
 1 file changed, 37 insertions(+), 31 deletions(-)

diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index a3228a676cc1..ce0b48d69eaa 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -662,6 +662,10 @@ include/linux/rcupdate.h 의 rcu_assign_pointer() 와 rcu_dereference() 를
 컨트롤 의존성
 -------------
 
+현재의 컴파일러들은 컨트롤 의존성을 이해하고 있지 않기 때문에 컨트롤 의존성은
+약간 다루기 어려울 수 있습니다.  이 섹션의 목적은 여러분이 컴파일러의 무시로
+인해 여러분의 코드가 망가지는 걸 막을 수 있도록 돕는겁니다.
+
 로드-로드 컨트롤 의존성은 데이터 의존성 배리어만으로는 정확히 동작할 수가
 없어서 읽기 메모리 배리어를 필요로 합니다.  아래의 코드를 봅시다:
 
@@ -689,20 +693,21 @@ CPU 는 b 로부터의 로드 오퍼레이션이 a 로부터의 로드 오퍼레
 
 	q = READ_ONCE(a);
 	if (q) {
-		WRITE_ONCE(b, p);
+		WRITE_ONCE(b, 1);
 	}
 
 컨트롤 의존성은 보통 다른 타입의 배리어들과 짝을 맞춰 사용됩니다.  그렇다곤
-하나, READ_ONCE() 는 반드시 사용해야 함을 부디 명심하세요!  READ_ONCE() 가
-없다면, 컴파일러가 'a' 로부터의 로드를 'a' 로부터의 또다른 로드와, 'b' 로의
-스토어를 'b' 로의 또다른 스토어와 조합해 버려 매우 비직관적인 결과를 초래할 수
-있습니다.
+하나, READ_ONCE() 도 WRITE_ONCE() 도 선택사항이 아니라 필수사항임을 부디
+명심하세요!  READ_ONCE() 가 없다면, 컴파일러는 'a' 로부터의 로드를 'a' 로부터의
+또다른 로드와 조합할 수 있습니다.  WRITE_ONCE() 가 없다면, 컴파일러는 'b' 로의
+스토어를 'b' 로의 또라느 스토어들과 조합할 수 있습니다.  두 경우 모두 순서에
+있어 상당히 비직관적인 결과를 초래할 수 있습니다.
 
 이걸로 끝이 아닌게, 컴파일러가 변수 'a' 의 값이 항상 0이 아니라고 증명할 수
 있다면, 앞의 예에서 "if" 문을 없애서 다음과 같이 최적화 할 수도 있습니다:
 
 	q = a;
-	b = p;  /* BUG: Compiler and CPU can both reorder!!! */
+	b = 1;  /* BUG: Compiler and CPU can both reorder!!! */
 
 그러니 READ_ONCE() 를 반드시 사용하세요.
 
@@ -712,11 +717,11 @@ CPU 는 b 로부터의 로드 오퍼레이션이 a 로부터의 로드 오퍼레
 	q = READ_ONCE(a);
 	if (q) {
 		barrier();
-		WRITE_ONCE(b, p);
+		WRITE_ONCE(b, 1);
 		do_something();
 	} else {
 		barrier();
-		WRITE_ONCE(b, p);
+		WRITE_ONCE(b, 1);
 		do_something_else();
 	}
 
@@ -725,12 +730,12 @@ CPU 는 b 로부터의 로드 오퍼레이션이 a 로부터의 로드 오퍼레
 
 	q = READ_ONCE(a);
 	barrier();
-	WRITE_ONCE(b, p);  /* BUG: No ordering vs. load from a!!! */
+	WRITE_ONCE(b, 1);  /* BUG: No ordering vs. load from a!!! */
 	if (q) {
-		/* WRITE_ONCE(b, p); -- moved up, BUG!!! */
+		/* WRITE_ONCE(b, 1); -- moved up, BUG!!! */
 		do_something();
 	} else {
-		/* WRITE_ONCE(b, p); -- moved up, BUG!!! */
+		/* WRITE_ONCE(b, 1); -- moved up, BUG!!! */
 		do_something_else();
 	}
 
@@ -742,10 +747,10 @@ CPU 는 b 로부터의 로드 오퍼레이션이 a 로부터의 로드 오퍼레
 
 	q = READ_ONCE(a);
 	if (q) {
-		smp_store_release(&b, p);
+		smp_store_release(&b, 1);
 		do_something();
 	} else {
-		smp_store_release(&b, p);
+		smp_store_release(&b, 1);
 		do_something_else();
 	}
 
@@ -754,10 +759,10 @@ CPU 는 b 로부터의 로드 오퍼레이션이 a 로부터의 로드 오퍼레
 
 	q = READ_ONCE(a);
 	if (q) {
-		WRITE_ONCE(b, p);
+		WRITE_ONCE(b, 1);
 		do_something();
 	} else {
-		WRITE_ONCE(b, r);
+		WRITE_ONCE(b, 2);
 		do_something_else();
 	}
 
@@ -770,10 +775,10 @@ CPU 는 b 로부터의 로드 오퍼레이션이 a 로부터의 로드 오퍼레
 
 	q = READ_ONCE(a);
 	if (q % MAX) {
-		WRITE_ONCE(b, p);
+		WRITE_ONCE(b, 1);
 		do_something();
 	} else {
-		WRITE_ONCE(b, r);
+		WRITE_ONCE(b, 2);
 		do_something_else();
 	}
 
@@ -781,7 +786,7 @@ CPU 는 b 로부터의 로드 오퍼레이션이 a 로부터의 로드 오퍼레
 위의 코드를 아래와 같이 바꿔버릴 수 있습니다:
 
 	q = READ_ONCE(a);
-	WRITE_ONCE(b, p);
+	WRITE_ONCE(b, 1);
 	do_something_else();
 
 이렇게 되면, CPU 는 변수 'a' 로부터의 로드와 변수 'b' 로의 스토어 사이의 순서를
@@ -793,10 +798,10 @@ CPU 는 b 로부터의 로드 오퍼레이션이 a 로부터의 로드 오퍼레
 	q = READ_ONCE(a);
 	BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */
 	if (q % MAX) {
-		WRITE_ONCE(b, p);
+		WRITE_ONCE(b, 1);
 		do_something();
 	} else {
-		WRITE_ONCE(b, r);
+		WRITE_ONCE(b, 2);
 		do_something_else();
 	}
 
@@ -828,35 +833,33 @@ CPU 는 b 로부터의 로드 오퍼레이션이 a 로부터의 로드 오퍼레
 
 	q = READ_ONCE(a);
 	if (q) {
-		WRITE_ONCE(b, p);
+		WRITE_ONCE(b, 1);
 	} else {
-		WRITE_ONCE(b, r);
+		WRITE_ONCE(b, 2);
 	}
-	WRITE_ONCE(c, 1);  /* BUG: No ordering against the read from "a". */
+	WRITE_ONCE(c, 1);  /* BUG: No ordering against the read from 'a'. */
 
-컴파일러는 volatile 타입에 대한 액세스를 재배치 할 수 없고 이 조건 하의 "b"
+컴파일러는 volatile 타입에 대한 액세스를 재배치 할 수 없고 이 조건 하의 'b'
 로의 쓰기를 재배치 할 수 없기 때문에 여기에 순서 규칙이 존재한다고 주장하고
 싶을 겁니다.  불행히도 이 경우에, 컴파일러는 다음의 가상의 pseudo-assembly 언어
-코드처럼 "b" 로의 두개의 쓰기 오퍼레이션을 conditional-move 인스트럭션으로
+코드처럼 'b' 로의 두개의 쓰기 오퍼레이션을 conditional-move 인스트럭션으로
 번역할 수 있습니다:
 
 	ld r1,a
-	ld r2,p
-	ld r3,r
 	cmp r1,$0
-	cmov,ne r4,r2
-	cmov,eq r4,r3
+	cmov,ne r4,$1
+	cmov,eq r4,$2
 	st r4,b
 	st $1,c
 
-완화된 순서 규칙의 CPU 는 "a" 로부터의 로드와 "c" 로의 스토어 사이에 어떤
+완화된 순서 규칙의 CPU 는 'a' 로부터의 로드와 'c' 로의 스토어 사이에 어떤
 종류의 의존성도 갖지 않을 겁니다.  이 컨트롤 의존성은 두개의 cmov 인스트럭션과
 거기에 의존하는 스토어 에게만 적용될 겁니다.  짧게 말하자면, 컨트롤 의존성은
 주어진 if 문의 then 절과 else 절에게만 (그리고 이 두 절 내에서 호출되는
 함수들에게까지) 적용되지, 이 if 문을 뒤따르는 코드에는 적용되지 않습니다.
 
 마지막으로, 컨트롤 의존성은 이행성 (transitivity) 을 제공하지 -않습니다-.  이건
-x 와 y 가 둘 다 0 이라는 초기값을 가졌다는 가정 하의 두개의 예제로
+'x' 와 'y' 가 둘 다 0 이라는 초기값을 가졌다는 가정 하의 두개의 예제로
 보이겠습니다:
 
 	CPU 0                     CPU 1
@@ -924,6 +927,9 @@ http://www.cl.cam.ac.uk/users/pes20/ppc-supplemental/test6.pdf 와
   (*) 컨트롤 의존성은 이행성을 제공하지 -않습니다-.  이행성이 필요하다면,
       smp_mb() 를 사용하세요.
 
+  (*) 컴파일러는 컨트롤 의존성을 이해하고 있지 않습니다.  따라서 컴파일러가
+      여러분의 코드를 망가뜨리지 않도록 하는건 여러분이 해야 하는 일입니다.
+
 
 SMP 배리어 짝맞추기
 --------------------
-- 
GitLab


From f3fc83e55533b9fddac1d4eda79956768df569ea Mon Sep 17 00:00:00 2001
From: Martyn Welch <martyn.welch@collabora.co.uk>
Date: Fri, 3 Mar 2017 22:43:30 +0000
Subject: [PATCH 1077/1084] docs: Fix htmldocs build failure

Build of HTML docs failing due to conversion of deviceiobook.tmpl in
8a8a602f and regulator.tmpl in 028f2533 to RST without removing from
DOCBOOKS in Makefile, resulting (in the case of deviceiobook) the
following error:

make[1]: *** No rule to make target 'Documentation/DocBook/deviceiobook.xml', needed by 'Documentation/DocBook/deviceiobook.aux.xml'.  Stop.
Makefile:1452: recipe for target 'htmldocs' failed
make: *** [htmldocs] Error 2

Update DOCBOOKS to reflect available books.

Signed-off-by: Martyn Welch <martyn.welch@collabora.co.uk>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/DocBook/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 7b8831fb4e37..c339cf59ad69 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -7,12 +7,12 @@
 # list of DOCBOOKS.
 
 DOCBOOKS := z8530book.xml  \
-	    kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
+	    kernel-hacking.xml kernel-locking.xml \
 	    writing_usb_driver.xml networking.xml \
 	    kernel-api.xml filesystems.xml lsm.xml kgdb.xml \
 	    gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
 	    genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
-	    sh.xml regulator.xml w1.xml \
+	    sh.xml w1.xml \
 	    writing_musb_glue_layer.xml
 
 ifeq ($(DOCBOOKS),)
-- 
GitLab


From fd5d666932d51b2552ecc0280047d6b35d9b6cd1 Mon Sep 17 00:00:00 2001
From: John Keeping <john@metanate.com>
Date: Fri, 3 Mar 2017 12:24:05 +0000
Subject: [PATCH 1078/1084] Documentation/sphinx: fix primary_domain
 configuration

With Sphinx 1.5.3 I get the warning:

	WARNING: primary_domain 'C' not found, ignored.

It seems that domain names in Sphinx are case-sensitive and for the C
domain the name must be lower case.

Signed-off-by: John Keeping <john@metanate.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/conf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/conf.py b/Documentation/conf.py
index f6823cf01275..7fadb3b83293 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -135,7 +135,7 @@ pygments_style = 'sphinx'
 # If true, `todo` and `todoList` produce output, else they produce nothing.
 todo_include_todos = False
 
-primary_domain = 'C'
+primary_domain = 'c'
 highlight_language = 'none'
 
 # -- Options for HTML output ----------------------------------------------
-- 
GitLab


From f78ef7cd9a0686b979679d0de061c6dbfd8d649e Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Fri, 3 Mar 2017 12:21:14 -0800
Subject: [PATCH 1079/1084] strparser: destroy workqueue on module exit

Fixes: 43a0c6751a32 ("strparser: Stream parser for messages")
Cc: Tom Herbert <tom@herbertland.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/strparser/strparser.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 41adf362936d..b5c279b22680 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -504,6 +504,7 @@ static int __init strp_mod_init(void)
 
 static void __exit strp_mod_exit(void)
 {
+	destroy_workqueue(strp_wq);
 }
 module_init(strp_mod_init);
 module_exit(strp_mod_exit);
-- 
GitLab


From c1ae3cfa0e89fa1a7ecc4c99031f5e9ae99d9201 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 5 Mar 2017 12:59:56 -0800
Subject: [PATCH 1080/1084] Linux 4.11-rc1

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 4cb6b0a1152b..165cf9783a5d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
-PATCHLEVEL = 10
+PATCHLEVEL = 11
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
-- 
GitLab


From 077dbaee9df53c597df532a08d721d03f4570f3d Mon Sep 17 00:00:00 2001
From: Franck Demathieu <fdemathieu@gmail.com>
Date: Thu, 23 Feb 2017 10:48:55 +0100
Subject: [PATCH 1081/1084] irqchip/crossbar: Fix incorrect type of local
 variables

The max and entry variables are unsigned according to the dt-bindings.
Fix following 3 sparse issues (-Wtypesign):

  drivers/irqchip/irq-crossbar.c:222:52: warning: incorrect type in argument 3 (different signedness)
  drivers/irqchip/irq-crossbar.c:222:52:    expected unsigned int [usertype] *out_value
  drivers/irqchip/irq-crossbar.c:222:52:    got int *<noident>

  drivers/irqchip/irq-crossbar.c:245:56: warning: incorrect type in argument 4 (different signedness)
  drivers/irqchip/irq-crossbar.c:245:56:    expected unsigned int [usertype] *out_value
  drivers/irqchip/irq-crossbar.c:245:56:    got int *<noident>

  drivers/irqchip/irq-crossbar.c:263:56: warning: incorrect type in argument 4 (different signedness)
  drivers/irqchip/irq-crossbar.c:263:56:    expected unsigned int [usertype] *out_value
  drivers/irqchip/irq-crossbar.c:263:56:    got int *<noident>

Signed-off-by: Franck Demathieu <fdemathieu@gmail.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-crossbar.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c
index 1eef56a89b1f..05bbf171df37 100644
--- a/drivers/irqchip/irq-crossbar.c
+++ b/drivers/irqchip/irq-crossbar.c
@@ -198,7 +198,8 @@ static const struct irq_domain_ops crossbar_domain_ops = {
 
 static int __init crossbar_of_init(struct device_node *node)
 {
-	int i, size, max = 0, reserved = 0, entry;
+	int i, size, reserved = 0;
+	u32 max = 0, entry;
 	const __be32 *irqsr;
 	int ret = -ENOMEM;
 
-- 
GitLab


From b3e228473e6cec7cf83b4025b4570c8066ab2dd8 Mon Sep 17 00:00:00 2001
From: Mian Yousaf Kaukab <yousaf.kaukab@suse.com>
Date: Thu, 2 Mar 2017 16:11:47 +0100
Subject: [PATCH 1082/1084] irqdomain: Add empty irq_domain_check_msi_remap

Fix following build error for s390:
drivers/vfio/vfio_iommu_type1.c: In function 'vfio_iommu_type1_attach_group':
drivers/vfio/vfio_iommu_type1.c:1290:25: error: implicit declaration of function 'irq_domain_check_msi_remap'

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Mian Yousaf Kaukab <yousaf.kaukab@suse.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqdomain.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 188eced6813e..9f3616085423 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -524,6 +524,10 @@ static inline struct irq_domain *irq_find_matching_fwnode(
 {
 	return NULL;
 }
+static inline bool irq_domain_check_msi_remap(void)
+{
+	return false;
+}
 #endif /* !CONFIG_IRQ_DOMAIN */
 
 #endif /* _LINUX_IRQDOMAIN_H */
-- 
GitLab


From 90922a2d03d84de36bf8a9979d62580102f31a92 Mon Sep 17 00:00:00 2001
From: Shanker Donthineni <shankerd@codeaurora.org>
Date: Tue, 7 Mar 2017 08:20:38 -0600
Subject: [PATCH 1083/1084] irqchip/gicv3-its: Add workaround for QDF2400 ITS
 erratum 0065

On Qualcomm Datacenter Technologies QDF2400 SoCs, the ITS hardware
implementation uses 16Bytes for Interrupt Translation Entry (ITE),
but reports an incorrect value of 8Bytes in GITS_TYPER.ITTE_size.

It might cause kernel memory corruption depending on the number
of MSI(x) that are configured and the amount of memory that has
been allocated for ITEs in its_create_device().

This patch fixes the potential memory corruption by setting the
correct ITE size to 16Bytes.

Cc: stable@vger.kernel.org
Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 Documentation/arm64/silicon-errata.txt |  1 +
 arch/arm64/Kconfig                     | 10 ++++++++++
 drivers/irqchip/irq-gic-v3-its.c       | 16 ++++++++++++++++
 3 files changed, 27 insertions(+)

diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index a71b8095dbd8..2f66683500b8 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -68,3 +68,4 @@ stable kernels.
 |                |                 |                 |                             |
 | Qualcomm Tech. | Falkor v1       | E1003           | QCOM_FALKOR_ERRATUM_1003    |
 | Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
+| Qualcomm Tech. | QDF2400 ITS     | E0065           | QCOM_QDF2400_ERRATUM_0065   |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a39029b5414e..8c7c244247b6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -508,6 +508,16 @@ config QCOM_FALKOR_ERRATUM_1009
 
 	  If unsure, say Y.
 
+config QCOM_QDF2400_ERRATUM_0065
+	bool "QDF2400 E0065: Incorrect GITS_TYPER.ITT_Entry_size"
+	default y
+	help
+	  On Qualcomm Datacenter Technologies QDF2400 SoC, ITS hardware reports
+	  ITE size incorrectly. The GITS_TYPER.ITT_Entry_size field should have
+	  been indicated as 16Bytes (0xf), not 8Bytes (0x7).
+
+	  If unsure, say Y.
+
 endmenu
 
 
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 23201004fd7a..f77f840d2b5f 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1601,6 +1601,14 @@ static void __maybe_unused its_enable_quirk_cavium_23144(void *data)
 	its->flags |= ITS_FLAGS_WORKAROUND_CAVIUM_23144;
 }
 
+static void __maybe_unused its_enable_quirk_qdf2400_e0065(void *data)
+{
+	struct its_node *its = data;
+
+	/* On QDF2400, the size of the ITE is 16Bytes */
+	its->ite_size = 16;
+}
+
 static const struct gic_quirk its_quirks[] = {
 #ifdef CONFIG_CAVIUM_ERRATUM_22375
 	{
@@ -1617,6 +1625,14 @@ static const struct gic_quirk its_quirks[] = {
 		.mask	= 0xffff0fff,
 		.init	= its_enable_quirk_cavium_23144,
 	},
+#endif
+#ifdef CONFIG_QCOM_QDF2400_ERRATUM_0065
+	{
+		.desc	= "ITS: QDF2400 erratum 0065",
+		.iidr	= 0x00001070, /* QDF2400 ITS rev 1.x */
+		.mask	= 0xffffffff,
+		.init	= its_enable_quirk_qdf2400_e0065,
+	},
 #endif
 	{
 	}
-- 
GitLab


From 4b9de5da7e120c7f02395da729f0ec77ce7a6044 Mon Sep 17 00:00:00 2001
From: Franck Demathieu <fdemathieu@gmail.com>
Date: Mon, 6 Mar 2017 14:41:06 +0100
Subject: [PATCH 1084/1084] irqchip/crossbar: Fix incorrect type of register
 size

The 'size' variable is unsigned according to the dt-bindings.
As this variable is used as integer in other places, create a new variable
that allows to fix the following sparse issue (-Wtypesign):

  drivers/irqchip/irq-crossbar.c:279:52: warning: incorrect type in argument 3 (different signedness)
  drivers/irqchip/irq-crossbar.c:279:52:    expected unsigned int [usertype] *out_value
  drivers/irqchip/irq-crossbar.c:279:52:    got int *<noident>

Signed-off-by: Franck Demathieu <fdemathieu@gmail.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-crossbar.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c
index 05bbf171df37..1070b7b959f2 100644
--- a/drivers/irqchip/irq-crossbar.c
+++ b/drivers/irqchip/irq-crossbar.c
@@ -199,7 +199,7 @@ static const struct irq_domain_ops crossbar_domain_ops = {
 static int __init crossbar_of_init(struct device_node *node)
 {
 	int i, size, reserved = 0;
-	u32 max = 0, entry;
+	u32 max = 0, entry, reg_size;
 	const __be32 *irqsr;
 	int ret = -ENOMEM;
 
@@ -276,9 +276,9 @@ static int __init crossbar_of_init(struct device_node *node)
 	if (!cb->register_offsets)
 		goto err_irq_map;
 
-	of_property_read_u32(node, "ti,reg-size", &size);
+	of_property_read_u32(node, "ti,reg-size", &reg_size);
 
-	switch (size) {
+	switch (reg_size) {
 	case 1:
 		cb->write = crossbar_writeb;
 		break;
@@ -304,7 +304,7 @@ static int __init crossbar_of_init(struct device_node *node)
 			continue;
 
 		cb->register_offsets[i] = reserved;
-		reserved += size;
+		reserved += reg_size;
 	}
 
 	of_property_read_u32(node, "ti,irqs-safe-map", &cb->safe_map);
-- 
GitLab